Below is the Stata code that can be used to merge the NSAF data files.
set more off
* NOTE: Commands such as renames, renvars and tablist may have to be
* downloaded. Use search <command> to find the command on the internet
* and follow the instructions given to download it.
* NOTE: Three ways are shown for renaming variables. You can use any of these
* methods. They are integrated into the code so that it is clear how each method
* would be used.
use "D:focalchd.dta", clear
* keep only the variables that you want; otherwise, the files will get to be
* too big for Stata and you will have to move to Stata S/E
keep hhid persid respid ufamid ucpsid fhosp fdent femer fment fdoct site
order hhid persid respid ufamid ucpsid fhosp fdent femer fment fdoct
* rename all vars to have _1 at end to avoid overwriting variables with the same name during merge
foreach var of varlist * {
rename `var' `var'_1
}
* need to destring the merge variables to avoid problems during the merge
* need to keep all of these variables for later merges with data files
* that have only one of these variables available to merge on
destring hhid_1 persid_1 respid_1 ufamid_1 ucpsid_1, gen(hhidn persidn respidn ufamidn ucpsidn)
compress
gen _file1 = 1
count
* 35938
isid hhidn persidn
sort hhidn persidn
save "D:focalchds.dta", replace
* you can also use the renames command to add the suffix
use "D:adult_pr.dta", clear
keep hhid persid lwhunp lfdstmp lgenass site
renames *, suffix(_2)
destring hhid_2 persid_2, gen(hhidn persidn)
compress
gen _file2 = 1
count
* 74719
sort hhidn persidn
save "D:adult_prs.dta", replace
* You can also use the renvars command to add the suffix
use "D:adult_rn.dta", clear
keep hhid persid site sex typeint fhosp
renvars *, postfix(_3)
destring hhid_3 persid_3, gen(hhidn persidn)
compress
gen _file3 = 1
count
* 52587
isid hhidn persidn
sort hhidn persidn
save "D:adult_rns.dta", replace
use "D:adult_rb.dta", clear
keep hhid persid site age sex state uregion
renvars *, postfix(_4)
destring hhid_4 persid_4, gen(hhidn persidn)
compress
gen _file4 = 1
count
* 16765
isid hhidn persidn
sort hhidn persidn
save "D:adult_rbs.dta", replace
use "D:famresp.dta", clear
keep hhid respid site kafdctim lcredit kaskcare
renvars *, postfix(_5)
destring hhid_5 respid_5, gen(hhidn respidn)
* compress
gen _file5 = 1
count
* 46705
isid hhidn respidn
sort hhidn respidn
save "D:famresps.dta", replace
use "D:person.dta", clear
keep hhid persid sex site uexprrp upfrel xsupnum
renvars *, postfix(_6)
destring hhid_6 persid_6, gen(hhidn persidn)
compress
gen _file6 = 1
count
* 144766
isid hhidn persidn
sort hhidn persidn
save "D:persons.dta", replace
use "D:socfam.dta", clear
keep hhid ufamid site jeaf jab2pov jsupkid jchildf
foreach var of varlist * {
rename `var' `var'_7
}
destring hhid_7 ufamid_7, gen(hhidn ufamidn)
compress
gen _file7 = 1
count
* 44303
isid hhidn ufamidn
sort hhidn ufamidn
save "D:socfams.dta", replace
use "D:cpsfam.dta", clear
keep hhid ucpsid site ublinc ublga ubloth uincrpov uwrk
foreach var of varlist * {
rename `var' `var'_8
}
destring hhid_8 ucpsid_8, gen(hhidn ucpsidn)
compress
gen _file8 = 1
count
* 44303
isid hhidn ucpsidn
sort hhidn ucpsidn
save "D:cpsfams.dta", replace
use "D:househld.dta", clear
keep hhid varstrat varunit state ufips uhhcount site
foreach var of varlist * {
rename `var' `var'_9
}
destring hhid_9, gen(hhidn)
compress
gen _file9 = 1
count
* 42360
isid hhidn
sort hhidn
save "D:households.dta", replace
/*
This code counts the number of persons in each size of household
use "D:persons.dta", clear
gen x = 1
egen s = sum(x), by(hhidn)
tab s
s | Freq. Percent Cum.
------------+-----------------------------------
1 | 4,259 2.94 2.94
2 | 15,942 11.01 13.95
3 | 29,592 20.44 34.40
4 | 46,100 31.84 66.24
5 | 27,905 19.28 85.52
6 | 12,030 8.31 93.83
7 | 4,914 3.39 97.22
8 | 1,936 1.34 98.56
9 | 972 0.67 99.23
10 | 550 0.38 99.61
11 | 297 0.21 99.81
12 | 156 0.11 99.92
13 | 52 0.04 99.96
14 | 28 0.02 99.98
15 | 15 0.01 99.99
18 | 18 0.01 100.00
------------+-----------------------------------
Total | 144,766 100.00
*/
*********************************************************************
* NOTE: The order in which you merge the data files MATTERS!!!
* You need to merge the file with the least restrictive merge last!
* In this case, all of the files have two variables on which to merge,
* except the household data file, which has only one merge variable.
* Because it only has one merge variable, the household data set has the
* least restrictive merge, and needs to be merged last.
use "D:focalchds.dta"
* sort hhidn persidn
merge hhidn persidn using "D:adult_prs.dta", _merge(merge1)
save "D:merge1.dta", replace
tab merge1
* use "D:merge1.dta"
sort hhidn persidn
merge hhidn persidn using "D:adult_rns.dta", _merge(merge2)
save "D:merge2.dta", replace
tab merge2
* use "D:merge2.dta"
sort hhidn persidn
merge hhidn persidn using "D:adult_rbs.dta", _merge(merge3)
save "D:merge3.dta", replace
tab merge3
* use "D:merge3.dta", clear
sort hhidn respidn
merge hhidn respidn using "D:famresps.dta", _merge(merge4)
save "D:merge4.dta", replace
tab merge4
* use "D:merge4.dta", clear
sort hhidn persidn
merge hhidn persidn using "D:persons.dta", _merge(merge5)
save "D:merge5.dta", replace
tab merge5
* use "D:merge5.dta", clear
sort hhidn ufamidn
merge hhidn ufamidn using "D:socfams.dta", _merge(merge6)
save "D:merge6.dta", replace
tab merge6
* use "D:merge6.dta", clear
sort hhidn ucpsidn
merge hhidn ucpsidn using "D:cpsfams.dta", _merge(merge7)
save "D:merge7.dta", replace
tab merge7
* use "D:merge7.dta", clear
sort hhidn
merge hhidn using "D:households.dta", _merge(merge8)
save "D:merge8.dta", replace
tab merge8
compress
format hhidn persidn %20.0f
save "D:nsaf99.dta", replace
* The commands below are used for verifying that the merge worked as expected.
describe
tab1 merge*
tablist _file*
summ
***************
/*
There are 193,805 observations in the final data set.
The table below is the output from the tablist _file* command indicates which files
* contributed observations. For example, the first line of the table indicates
* that 35,889 observations were contributed from files 1, 5, 6, 7, 8 and 9.
tablist _file*
+----------------------------------------------------------------------------------------+
| _file1 _file2 _file3 _file4 _file5 _file6 _file7 _file8 _file9 Freq |
|----------------------------------------------------------------------------------------|
| 1 . . . 1 1 1 1 1 35889 |
| . 1 1 . . 1 . . 1 35798 |
| . . . . . 1 . . 1 34158 |
| . 1 . . . 1 . . 1 22107 |
| . . . . . . . 1 1 17573 |
|----------------------------------------------------------------------------------------|
| . . . . 1 . . . 1 16765 |
| . 1 1 1 . 1 . . 1 16765 |
| . . . . . . 1 . 1 14701 |
| 1 1 . . 1 1 1 1 1 25 |
| 1 1 1 . 1 1 1 1 1 24 |
+----------------------------------------------------------------------------------------+
The output below is from the tab1 merge* command above.
tab1 merge*
-> tabulation of merge1
merge1 | Freq. Percent Cum.
------------+-----------------------------------
1 | 35,889 32.45 32.45
2 | 74,670 67.51 99.96
3 | 49 0.04 100.00
------------+-----------------------------------
Total | 110,608 100.00
-> tabulation of merge2
merge2 | Freq. Percent Cum.
------------+-----------------------------------
1 | 58,021 52.46 52.46
3 | 52,587 47.54 100.00
------------+-----------------------------------
Total | 110,608 100.00
-> tabulation of merge3
merge3 | Freq. Percent Cum.
------------+-----------------------------------
1 | 93,843 84.84 84.84
3 | 16,765 15.16 100.00
------------+-----------------------------------
Total | 110,608 100.00
-> tabulation of merge4
merge4 | Freq. Percent Cum.
------------+-----------------------------------
1 | 74,670 58.62 58.62
2 | 16,765 13.16 71.79
3 | 35,938 28.21 100.00
------------+-----------------------------------
Total | 127,373 100.00
-> tabulation of merge5
merge5 | Freq. Percent Cum.
------------+-----------------------------------
1 | 16,765 10.38 10.38
2 | 34,158 21.15 31.53
3 | 110,608 68.47 100.00
------------+-----------------------------------
Total | 161,531 100.00
-> tabulation of merge6
merge6 | Freq. Percent Cum.
------------+-----------------------------------
1 | 125,593 71.27 71.27
2 | 14,701 8.34 79.61
3 | 35,938 20.39 100.00
------------+-----------------------------------
Total | 176,232 100.00
-> tabulation of merge7
merge7 | Freq. Percent Cum.
------------+-----------------------------------
1 | 140,294 72.39 72.39
2 | 17,573 9.07 81.46
3 | 35,938 18.54 100.00
------------+-----------------------------------
Total | 193,805 100.00
-> tabulation of merge8
merge8 | Freq. Percent Cum.
------------+-----------------------------------
3 | 193,805 100.00 100.00
------------+-----------------------------------
Total | 193,805 100.00
*/
