To master data merging in SAS, you must understand how the Program Data Vector (PDV) manages variable overwriting and observation counts. Unlike SQL joins, a SAS MERGE will overwrite variables with the same name across datasets; therefore, always drop or rename overlapping non-BY variables (like state) to prevent losing source data. To control the join type, use the (IN=) data set option—it allows you to mimic inner, left, or right joins by filtering for specific flags in your subsetting IF statements. Finally, remember that if datasets have an unequal number of observations per BY-group, SAS will "retain" the last value of the shorter dataset until the entire group is processed, which can lead to unintended data duplication if not managed carefully.
Type : CREATION_INTERNE
Examples use generated data (datalines) or SASHELP.
| 1 | DATA animal; |
| 2 | INPUT common $ animal$; |
| 3 | DATALINES; |
| 4 | a Ant |
| 5 | b Bird |
| 6 | c Cat |
| 7 | d Dog |
| 8 | e Eagle |
| 9 | f Frog |
| 10 | ; |
| 11 | RUN; |
| 12 | |
| 13 | DATA plant; |
| 14 | INPUT common $ plant$; |
| 15 | DATALINES; |
| 16 | a Apple |
| 17 | b Banana |
| 18 | c Coconut |
| 19 | d Dewberry |
| 20 | e Eggplant |
| 21 | f Fig |
| 22 | ; |
| 23 | RUN; |
| 24 | |
| 25 | PROC SORT DATA=animal; BY common; RUN; |
| 26 | PROC SORT DATA=plant; BY common; RUN; |
| 27 | |
| 28 | DATA matchmerge; |
| 29 | MERGE animal plant; |
| 30 | BY common; |
| 31 | RUN; |
| 32 | PROC PRINT DATA=matchmerge; RUN; |
| 1 | DATA one; |
| 2 | INPUT ID state$; |
| 3 | DATALINES; |
| 4 | 1 AZ |
| 5 | 2 MA |
| 6 | 3 WA |
| 7 | 4 WI |
| 8 | ; |
| 9 | RUN; |
| 10 | |
| 11 | DATA many; |
| 12 | INPUT ID city $ state$; |
| 13 | DATALINES; |
| 14 | 1 Phoenix Ariz |
| 15 | 2 Boston Mass |
| 16 | 2 Foxboro Mass |
| 17 | 3 Olympia Mass |
| 18 | 3 Seattle Wash |
| 19 | 3 Spokane Wash |
| 20 | 4 Madison Wis |
| 21 | 4 Milwaukee Wis |
| 22 | 4 Madison Wis |
| 23 | 4 Hurley Wis |
| 24 | ; |
| 25 | RUN; |
| 26 | |
| 27 | PROC SORT DATA=one; BY ID; RUN; |
| 28 | PROC SORT DATA=many; BY ID; RUN; |
| 29 | |
| 30 | DATA three; |
| 31 | MERGE many one; |
| 32 | BY ID; |
| 33 | RUN; |
| 34 | PROC PRINT DATA=three noobs; RUN; |
| 35 | title; |
| 36 | |
| 37 | DATA solution; |
| 38 | MERGE many(drop=state) one; |
| 39 | BY ID; |
| 40 | RUN; |
| 41 | PROC PRINT DATA=solution noobs; RUN; |
| 1 | DATA animalDupes; |
| 2 | INPUT common $ animal1$; |
| 3 | DATALINES; |
| 4 | a Ant |
| 5 | a Ape |
| 6 | b Bird |
| 7 | c Cat |
| 8 | d Dog |
| 9 | e Eagle |
| 10 | ; |
| 11 | RUN; |
| 12 | |
| 13 | DATA plantDupes; |
| 14 | INPUT common $ plant1$; |
| 15 | DATALINES; |
| 16 | a Apple |
| 17 | b Banana |
| 18 | c Coconut |
| 19 | c Celery |
| 20 | d Dewberry |
| 21 | e Eggplant |
| 22 | ; |
| 23 | RUN; |
| 24 | |
| 25 | PROC SORT DATA=animalDupes; BY common; RUN; |
| 26 | PROC SORT DATA=plantDupes; BY common; RUN; |
| 27 | |
| 28 | DATA matchmerge; |
| 29 | MERGE animalDupes plantDupes; |
| 30 | BY common; |
| 31 | RUN; |
| 32 | PROC PRINT DATA=matchmerge; RUN; |
| 1 | DATA animalMissing; |
| 2 | INPUT common $ animal1$; |
| 3 | DATALINES; |
| 4 | a Ant |
| 5 | c Cat |
| 6 | d Dog |
| 7 | e Eagle |
| 8 | ; |
| 9 | RUN; |
| 10 | |
| 11 | DATA plantMissing2; |
| 12 | INPUT common $ plant$; |
| 13 | DATALINES; |
| 14 | a Apple |
| 15 | b Banana |
| 16 | c Coconut |
| 17 | e Eggplant |
| 18 | f Fig |
| 19 | ; |
| 20 | RUN; |
| 21 | |
| 22 | PROC SORT DATA=animalMissing; BY common; RUN; |
| 23 | PROC SORT DATA=plantMissing2; BY common; RUN; |
| 24 | |
| 25 | DATA matchmerge; |
| 26 | MERGE animalMissing plantMissing2; |
| 27 | BY common; |
| 28 | RUN; |
| 29 | |
| 30 | PROC PRINT DATA=matchmerge; RUN; |
| 1 | DATA animalMissing; |
| 2 | INPUT common $ animal$; |
| 3 | DATALINES; |
| 4 | a Ant |
| 5 | c Cat |
| 6 | d Dog |
| 7 | e Eagle |
| 8 | ; |
| 9 | RUN; |
| 10 | |
| 11 | DATA plantMissing2; |
| 12 | INPUT common $ plant$; |
| 13 | DATALINES; |
| 14 | a Apple |
| 15 | b Banana |
| 16 | c Coconut |
| 17 | e Eggplant |
| 18 | f Fig |
| 19 | ; |
| 20 | RUN; |
| 21 | |
| 22 | PROC SORT DATA=animalMissing; BY common; RUN; |
| 23 | PROC SORT DATA=plantMissing2; BY common; RUN; |
| 24 | |
| 25 | DATA matchmerge2; |
| 26 | MERGE animalMissing(in=i) plantMissing2(in=j); |
| 27 | BY common; |
| 28 | IF (i=1) and (j=1); |
| 29 | RUN; |
| 30 | PROC PRINT DATA=matchmerge2; RUN; |
| 1 | DATA fruit; |
| 2 | INPUT ID $ fruit$; |
| 3 | DATALINES; |
| 4 | a apple |
| 5 | c apricot |
| 6 | d banana |
| 7 | e blueberry |
| 8 | c cantaloupe |
| 9 | c coconut |
| 10 | c cherry |
| 11 | c crabapple |
| 12 | c cranberry |
| 13 | ; |
| 14 | RUN; |
| 15 | |
| 16 | DATA color; |
| 17 | INPUT ID $ color$; |
| 18 | DATALINES; |
| 19 | a amber |
| 20 | b brown |
| 21 | b blue |
| 22 | b black |
| 23 | b beige |
| 24 | b bronze |
| 25 | c cocoa |
| 26 | c cream |
| 27 | ; |
| 28 | RUN; |
| 29 | |
| 30 | PROC SORT DATA=fruit; BY ID; RUN; |
| 31 | PROC SORT DATA=color; BY ID; RUN; |
| 32 | |
| 33 | DATA merged; |
| 34 | MERGE fruit color; |
| 35 | BY id; |
| 36 | RUN; |
| 37 | |
| 38 | PROC PRINT DATA=merged; |
| 39 | title 'Merged by ID'; |
| 40 | RUN; |
| 1 | DATA one; |
| 2 | INPUT ID age score; |
| 3 | DATALINES; |
| 4 | 1 8 90 |
| 5 | 1 . 100 |
| 6 | 1 . 95 |
| 7 | 2 9 80 |
| 8 | 2 . 100 |
| 9 | ; |
| 10 | RUN; |
| 11 | |
| 12 | DATA two; |
| 13 | INPUT ID name $ age; |
| 14 | DATALINES; |
| 15 | 1 Sarah 11 |
| 16 | 2 John 10 |
| 17 | ; |
| 18 | RUN; |
| 19 | |
| 20 | PROC SORT DATA=one; BY ID; RUN; |
| 21 | PROC SORT DATA=two; BY ID; RUN; |
| 22 | |
| 23 | DATA merge1; |
| 24 | MERGE one two; |
| 25 | BY id; |
| 26 | RUN; |
| 27 | PROC PRINT DATA=merge1; title 'Merged by ID'; RUN; |
| 28 | |
| 29 | DATA merge2 (drop=temp_age); |
| 30 | MERGE one two; |
| 31 | BY id; |
| 32 | retain temp_age; |
| 33 | IF first.id THEN temp_age = .; |
| 34 | IF age = . THEN age = temp_age; |
| 35 | ELSE temp_age = age; |
| 36 | RUN; |
| 37 | PROC PRINT DATA=merge2; title 'Merged by ID with Age Retained'; RUN; |