The code below illustrates how to combine the NSAF data sets.
You can view the documentation and download the NSAF data files from https://www.icpsr.umich.edu/web/ICPSR/series/00216
Some of the data files have variables with the same name but different information in them. The macro renames the variables so that they do not overwrite one another during the merge.
options nocenter nodate formchar="|----|+|---+=|-/<>" ; %macro rname(libname, data, outdata, suffix); proc sql noprint; select name into :varlist separated by " " from sashelp.vcolumn where libname=upcase("&libname") and memname=upcase("&data"); quit; %let k=1; %local change ; %let m = %scan(&varlist, &k); %do %while(&m NE); %let k = %eval(&k + 1); %let change = &change &m = &m&suffix; %let m = %scan(&varlist, &k); %end; data &outdata ; set &data; rename &change ; run; %mend ; * NOTE: You want to do the sort after you drop the extra variables to minimize the sort time; * NOTE: You do NOT want to drop the probability weight nor the replicate weights; data temp1; set "D:focalchd"; keep hhid persid respid ufamid ucpsid fhosp fdent femer fment fdoct site file; file = 1; run; proc print data = temp1 (obs = 5); run; %rname(work, temp1, temp1s, _1); * There are three ways shown to check for unique IDs: 1) sort with the nodup option, 2) creating a flag variable and 3) creating different data sets; * You need to check the log to make sure that no duplicate observations were deleted.; * If observations were deleted, it means that the variables listed on the by statement do not uniquely identify the observations and you need to choose new variables that will.; proc sort data = temp1s nodup; by hhid_1 persid_1; run; * NOTE: You need to run the proc contents to see how many characters the id variables are using; proc contents data = temp1s; run; * NOTE: The input function "destrings" the variables. These variables need to be in numeric format to avoid problems with the merge; data temp1s; set temp1s; hhid=input(hhid_1,best8.); persid=input(persid_1,best10.); respid=input(respid_1,best10.); ufamid=input(ufamid_1,best10.); ucpsid=input(ucpsid_1,best10.); drop hhid_1 persid_1 respid_1 ufamid_1 ucpsid_1; run; data temp2; set "D:adult_pr"; keep hhid persid lwhunp lfdstmp lgenass site file; file = 1; run; %rname(work, temp2, temp2s, _2); proc sort data = temp2s; by hhid_2 persid_2; run; data temp2s; set temp2s; hhid=input(hhid_2,best8.); persid=input(persid_2,best10.); drop hhid_2 persid_2; run; proc sort data = temp2s; by hhid persid ; run; proc print data = temp2s (obs = 5); run; data flag; set temp2s; by hhid persid; if first.hhid or first.persid then flag = 0; else flag = 1; run; proc print data = flag; where flag = 1; run; data temp3; set "D:adult_rn"; keep hhid persid site sex typeint fhosp file; file = 1; run; %rname(work, temp3, temp3s, _3); data temp3s; set temp3s; hhid=input(hhid_3,best8.); persid=input(persid_3,best10.); drop hhid_3 persid_3; run; proc sort data = temp3s; by hhid persid; run; data nodup dup; set temp3s; by hhid persid; if first.hhid or first.persid then output nodup; else output dup; run; * NOTE: If everything worked correctly, all of the observations will be in the nodup file and none of them will be in the dup file.; proc contents data = dup; run; data temp4; set "D:adult_rb"; keep hhid persid site age sex state uregion file; file = 1; run; %rname(work, temp4, temp4s, _4); proc sort data = temp4s nodup; by hhid_4 persid_4; run; data temp4s; set temp4s; hhid=input(hhid_4,best8.); persid=input(persid_4,best10.); drop hhid_4 persid_4; run; * NOTE: The order in which you merge the data files MATTERS!!! You need to merge the file with the least restrictive merge last! In this case, all of the files have two variables on which to merge, except the household data file, which has only one merge variable. Because it only has one merge variable, the household data set has the least restrictive merge, and needs to be merged last.; * the libname statements are due to a stat-transfer error and is a temporary work around; * the second data set putting the data back into the temporary directory is necessary for the macro to work correctly; * NOTE: You need to have the file as an .sd2 file for this to work; libname in v6 "D:"; libname out v8 "D:temp"; data out.temp5; set in.famresp; keep hhid respid site kafdctim lcredit kaskcare file; file = 1; run; data temp5; set out.temp5; run; %rname(work, temp5, temp5s, _5); proc sort data = temp5s nodup; by hhid_5 respid_5; run; data temp5s; set temp5s; hhid=input(hhid_5,best8.); respid=input(respid_5,best10.); drop hhid_5 respid_5; run; data temp6; set "D:person"; keep hhid persid sex site uexprrp upfrel xsupnum file; file = 1; run; %rname(work, temp6, temp6s, _6); proc sort data = temp6s nodup; by hhid_6 persid_6; run; data temp6s; set temp6s; hhid=input(hhid_6,best8.); persid=input(persid_6,best10.); drop hhid_6 persid_6; run; data temp7; set "D:househld"; keep hhid varstrat varunit state ufips uhhcount site file; file = 1; run; %rname(work, temp7, temp7s, _7); proc sort data = temp7s nodup; by hhid_7; run; data temp7s; set temp7s; hhid=input(hhid_7,best8.); drop hhid_7; run; data out.temp8; set in.socfam; keep hhid ufamid site jeaf jab2pov jsupkid jchildf file; file = 1; run; data temp8; set out.temp8; run; %rname(work, temp8, temp8s, _8); proc sort data = temp8s nodup; by hhid_8 ufamid_8; run; data temp8s; set temp8s; hhid=input(hhid_8,best8.); ufamid = input(ufamid_8,best10.); drop hhid_8 ufamid_8; run; data temp9; set "D:cpsfam"; keep hhid ucpsid site ublinc ublga ubloth uincrpov uwrk file; file = 1; run; %rname(work, temp9, temp9s, _9); proc sort data = temp9s nodup; by hhid_9 ucpsid_9; run; data temp9s; set temp9s; hhid=input(hhid_9,best8.); ucpsid = input(ucpsid_9,best10.); drop hhid_9 ucpsid_9; run; * NOTE: You have to give the in= variables a new name later on in the data step; * NOTE: You cannot name the data set "merge"; * merging focalchd adult_pr adult_rn adult_rb person; data merge1; merge temp1s(in=t1) temp2s(in=t2) temp3s(in=t3) temp4s(in=t4) temp6s(in=t6); by hhid persid; t1a = t1; t2a = t2; t3a = t3; t4a = t4; t6a = t6; run; proc contents data = merge1; run; proc sort data = merge1; by hhid ufamid; run; * merging in socfam data; data merge2; merge merge1 temp8s(in=t8); by hhid ufamid; t8a = t8; run; proc sort data = merge2; by hhid ucpsid; run; * merging in cpsfam data; data merge3; merge merge2 temp9s(in=t9); by hhid ucpsid; t9a = t9; run; proc sort data = merge3; by hhid respid; run; * merging in famresp data; data merge4; merge merge3 temp5s(in=t5); by hhid respid; t5a = t5; run; proc sort data = merge4; by hhid; run; * merging in household data; data merge5; merge merge4 temp7s(in=t7); by hhid; t7a = t7; run; * The procs below are used for verifying that the merge worked as expected; proc freq data = merge5; tables t1a*t2a*t3a*t4a*t5a*t6a*t7a*t8a*t9a / list missing; run; proc contents data = merge5; run; options nolabel; proc means data = merge5 ; run; proc freq data = merge5; tables t1a*t2a*t3a*t4a*t5a*t6a*t7a*t8a*t9a/list missing; run; * saving the temporary data set down to the hard drive and renaming it; data "D:nsaf99"; set merge5; run; /* NOTE: There are 193,805 observations in the final data set. The table below is from the proc freq above. It indicates which files contributed observations. For example, the first line indicates that files 7 and 9 contributed 17,573 observations to the final data set. The FREQ Procedure Cumulative Cumulative t1a t2a t3a t4a t5a t6a t7a t8a t9a Frequency Percent Frequency Percent ---------------------------------------------------------------------------------------- . . . . 0 . 1 . 1 17573 9.07 17573 9.07 . . . . 0 . 1 1 0 14701 7.59 32274 16.65 . . . . 1 . 1 . . 16765 8.65 49039 25.30 0 0 0 0 0 1 1 0 0 34158 17.62 83197 42.93 0 1 0 0 0 1 1 0 0 22107 11.41 105304 54.34 0 1 1 0 0 1 1 0 0 35798 18.47 141102 72.81 0 1 1 1 0 1 1 0 0 16765 8.65 157867 81.46 1 0 0 0 1 1 1 1 1 35889 18.52 193756 99.97 1 1 0 0 1 1 1 1 1 25 0.01 193781 99.99 1 1 1 0 1 1 1 1 1 24 0.01 193805 100.00 */