Section 11.2: Cox-Snell Residuals for Assessing the Fit of a Cox Model
A data step creates a data set called bone_marrow1, and it can be downloaded here. We will use this dataset in this section.
data bone_marrow1; set bone_marrow; z1 = z1 -28; z2 = z2- 28; z1xz2 = z1 * z2; g1 = ( g = 1 ); g2 = ( g = 2 ); g3 = ( g = 3 ); cons = 1; z7c = z7 / 30 - 9; run;
Fig. 11.1 on page 330.
proc phreg data = bone_marrow1; model t2*dfree(0) = z1 z2 z1xz2 g2 g3 z7c z8 z10 ; output out = figure11_1 LOGSURV = h /method = ch; /*-logsurv is the cox-snell residual*/ run; data figure11_1a; set figure11_1; h = -h; cons = 1; run; proc phreg data = figure11_1a ; model h*dfree(0) = cons; output out = figure11_1b logsurv = ls /method = ch; run; data figure11_1c; set figure11_1b; haz = - ls; run; proc sort data = figure11_1c; by h; run; title "Figure 11.1"; axis1 order = (0 to 3 by .5) minor = none; axis2 order = (0 to 3 by .5) minor = none label = ( a=90); symbol1 i = stepjl c= blue; symbol2 i = join c = red l = 3; proc gplot data = figure11_1c; plot haz*h =1 h*h =2 /overlay haxis=axis1 vaxis= axis2; label haz = "Estimated Cumulative Hazard Rates"; label h = "Residual"; run; quit;
Figure 11.2 on page 331.
proc sort data = figure11_1a; by z10; run; proc phreg data = figure11_1a ; model h*dfree(0) = cons; output out = fill_2b logsurv = ls /method = ch; by z10; run; data fill_2b1; set fill_2b; if z10 = 0 then haz1 = -ls; if z10 = 1 then haz2 = -ls; run; proc sort data = fill_2b1; by h; run; title "Figure 11.2"; symbol1 i = stepjl c= blue; symbol2 i = stepjl c = red l = 3; symbol3 i = join c = black; proc gplot data = fill_2b1; plot haz1*h = 1 haz2*h = 2 h*h=3 /overlay haxis=axis1 vaxis=axis2 ; label haz1 = "Log Cumulative Hazard Rate"; label h= "Residual"; run; quit;
Figure 11.3 on page 332.
proc phreg data = bone_marrow1 ; model t2*dfree(0) = z1 z2 z1xz2 g2 g3 z7 z8 ; strata z10; output out = figure11_3 logsurv = h /method=ch; run; data figure11_3a; set figure11_3; cons = 1; h = -h; run; proc sort data = figure11_3a; by z10; run; proc phreg data = figure11_3a ; model h*dfree(0) = cons; output out = fig11_t logsurv = ls /method = ch; by z10; run; data fig11_t; set fig11_t; if z10 = 0 then haz1 = - ls; if z10 = 1 then haz2 = - ls; run; proc sort data = fig11_t; by h; run; options label; title "Figure 11.3"; axis1 order = (0 to 3 by .5) minor = none; axis2 order = (0 to 3 by .5) minor = none label = ( a=90); symbol1 i = stepjl c= blue; symbol2 i = stepjl c = red l = 3; symbol3 i = join c = black; proc gplot data = fig11_t; plot haz1*h = 1 haz2*h = 2 h*h=3 /overlay haxis=axis1 vaxis=axis2 ; label haz1 = "Estimated Cumulative Hazard Rate"; label h= "Residual"; run; quit;
Section 11.3: Determining the Functional Form of a Covariate: Martingale Residuals
data sec1_10; input type dtype time ind kscore wtime; cards; 1 1 28 1 90 24 1 1 32 1 30 7 1 1 49 1 40 8 1 1 84 1 60 10 1 1 357 1 70 42 1 1 933 0 90 9 1 1 1078 0 100 16 1 1 1183 0 90 16 1 1 1560 0 80 20 1 1 2114 0 80 27 1 1 2144 0 90 5 1 2 2 1 20 34 1 2 4 1 50 28 1 2 72 1 80 59 1 2 77 1 60 102 1 2 79 1 70 71 2 1 42 1 80 19 2 1 53 1 90 17 2 1 57 1 30 9 2 1 63 1 60 13 2 1 81 1 50 12 2 1 140 1 100 11 2 1 81 1 50 12 2 1 252 1 90 21 2 1 524 1 90 39 2 1 210 0 90 16 2 1 476 0 90 24 2 1 1037 0 90 84 2 2 30 1 90 73 2 2 36 1 80 61 2 2 41 1 70 34 2 2 52 1 60 18 2 2 62 1 90 40 2 2 108 1 70 65 2 2 132 1 60 17 2 2 180 0 100 61 2 2 307 0 100 24 2 2 406 0 100 48 2 2 446 0 100 52 2 2 484 0 90 84 2 2 748 0 90 171 2 2 1290 0 90 20 2 2 1345 0 80 98 ; run; proc phreg data = sec1_10; model time*ind(0) = wtime dtype type kscore; output out = figure11_4 RESMART = mgale ; run; proc loess data=figure11_4; ods output OutputStatistics=figure11_4a; model mgale = wtime / smooth=0.6 direct; run; quit; proc sort data = figure11_4a; by wtime; run; axis1 order = (-2.0 to 1 by .5) offset = (0, 2) label= (a = 90) minor = none; axis2 order = (0 to 200 by 50) minor = none; symbol1 i = none v = dot h = 1.5 c = blue; symbol2 i = join v = none c = red; title "Figure 11.4"; proc gplot data = figure11_4a; format depvar f4.1; format wtime f4.1; plot depvar*wtime = 1 pred*wtime = 2 /haxis = axis2 vaxis = axis1 overlay; label depvar = "Martingle Residual"; label wtime = "Waiting Time to Transplant (months)"; run; quit;
Figure 11.5 on page 336.
%macro profile(data, dep, indep, step); ods listing close; proc sql; drop table _profile_; quit; proc sql; create table _profile_ (type = data , logr char(12), step char(12)); quit; %do i = 10 %to 110 %by &step; data _temp_; set &data; if wtime < &i then z2 = 0; else z2 = 1; run; proc phreg data = _temp_ ; model &dep = &indep z2; ods output FitStatistics = _ll_; run; data _null_; set _ll_; if _n_ = 1 then call symput('logr', withcovariates); run; proc sql; insert into _profile_ values("&logr", "&i"); quit; %end; data _profile_; set _profile_; logr1 = - input(logr, 8.)/2; step1 = input(step, 8.); run; ods listing; proc gplot data = _profile_; title "Figure 11.5"; symbol i = join c = blue ; axis1 minor = none label = (a=90); axis2 minor = none order = (0 to 120 by 10); format logr1 3.0; format step1 4.0; plot logr1*step1 = 1 /haxis = axis2 vaxis = axis1; label logr1 = "Profile Likelihood"; label step1 = "Cut Point"; run; quit; %mend; %profile(figure11_4, time*ind(0), dtype type kscore, 1);
Section 11.4: Graphical Checks of the Proportional Hazards Assumption
Figure 11.6 on page 339.
data sec1_9; input time type ind; cards; 0.030 1 1 0.493 1 1 0.855 1 1 1.184 1 1 1.283 1 1 1.480 1 1 1.776 1 1 2.138 1 1 2.500 1 1 2.763 1 1 2.993 1 1 3.224 1 1 3.421 1 1 4.178 1 1 4.441 1 0 5.691 1 1 5.855 1 0 6.941 1 0 6.941 1 1 7.993 1 0 8.882 1 1 8.882 1 1 9.145 1 0 11.480 1 1 11.513 1 1 12.105 1 0 12.796 1 1 12.993 1 0 13.849 1 0 16.612 1 0 17.138 1 0 20.066 1 1 20.329 1 0 22.368 1 0 26.776 1 0 28.717 1 0 28.717 1 0 32.928 1 0 33.783 1 0 34.221 1 0 34.770 1 0 39.593 1 0 41.118 1 0 45.003 1 0 46.053 1 0 46.941 1 0 48.289 1 0 57.401 1 0 58.322 1 0 60.625 1 0 0.658 2 1 0.822 2 1 1.414 2 1 2.500 2 1 3.322 2 1 3.816 2 1 4.737 2 1 4.836 2 0 4.934 2 1 5.033 2 1 5.757 2 1 5.855 2 1 5.987 2 1 6.151 2 1 6.217 2 1 6.447 2 0 8.651 2 1 8.711 2 1 9.441 2 0 10.329 2 1 11.480 2 1 12.007 2 1 12.007 2 0 12.237 2 1 12.401 2 0 13.059 2 0 14.474 2 0 15.000 2 0 15.461 2 1 15.757 2 1 16.480 2 1 16.711 2 1 17.204 2 0 17.237 2 1 17.303 2 0 17.664 2 0 18.092 2 1 18.092 2 0 18.750 2 0 20.625 2 0 23.158 2 1 27.730 2 0 31.184 2 0 32.434 2 0 35.921 2 0 42.237 2 0 44.638 2 0 46.480 2 0 47.467 2 0 48.322 2 0 56.086 2 1 ; end; data sec1_9a; set sec1_9; cons = 1; type1 = type - 1; run; proc phreg data = sec1_9a ; model time*ind(0) = cons; strata type1; output out = figure11_6 logsurv = ls /method = ch; run; data figure11_6a; set figure11_6 ; logh = log (-ls); if type1 = 0 then logh1 = logh; if type1 = 1 then logh2 = logh; run; proc sort data = figure11_6a; by time; run; options label; goptions reset = all; title "Figure 11.6"; axis1 order = (0 to 25 by 5) minor = none; axis2 order = (-4 to 0 by 1) minor = none label = ( a=90); symbol1 i = stepjl c= blue; symbol2 i = stepjl c = red l = 3; proc gplot data = figure11_6a; plot logh1*time = 1 logh2*time = 2 /overlay haxis=axis1 vaxis=axis2 ; label logh1 = "Log Cumulative Hazard Rate"; label time= "Time on Study"; run; quit;
Figure 11.7 on page 340.
data figure11_6b; set figure11_6a; retain l1 l2 -5; if logh1 ~= . then l1 = logh1; if logh2 ~= . then l2 = logh2; diff = l2 - l1; run; goptions reset = all; goptions ftext = swiss htitle = 5 htext = 3 gunit = pct border cback = white hsize = 4in vsize = 4in; filename outgraph 'e:survival_analysissashttps://stats.idre.ucla.edu/wp-content/uploads/2016/02/ch11_7.gif'; goptions gsfname = outgraph dev = gif570; axis1 order = (0 to 25 by 5) minor = none; axis2 order = (-1.5 to 1 by .5) minor = none label = ( a=90); symbol1 i = stepjl c= blue; title "Figure 11.7"; proc gplot data = figure11_6b; plot diff*time /haxis= axis1 vaxis=axis2 vref=0; label diff = "Difference in Cumulative Hazard Rates"; label time = "Time on Study"; run; quit;
Figure 11.8 on page 341.
data figure11_6c; set figure11_6b; retain h1 h2 0; if type1 = 0 then h1 = -ls; if type1 = 1 then h2 = -ls; run; proc sort data = figure11_6c; by l1; run; symbol1 c = blue i = stepjl; symbol2 c = black i = join; title "Figure 11.8"; axis1 order = (0 to 1 by .2) minor = none; axis2 order = (0 to 1 by .2) minor = none label = ( a=90); proc gplot data = figure11_6c; plot h2*h1 = 1 h1*h1=2 /overlay haxis = axis1 vaxis = axis2; label h2 = "Auto"; label h1 = "Allo"; run; quit;
Figure 11.9 on page 342.
proc phreg data = bone_marrow1 ; model t2*dfree(0) = cons; strata z10; output out = figure11_9 logsurv = ls /method = ch; run; data figure11_9a; set figure11_9 ; logh = log (-ls); if z10 = 0 then logh1 = logh; if z10 = 1 then logh2 = logh; run; proc sort data = figure11_9a; by t2; run; title "Figure 11.9"; axis1 order = (0 to 1200 by 200) minor = none; axis2 order = (-4 to 0 by 1) minor = none label = ( a=90); symbol1 i = stepjl c= blue; symbol2 i = stepjl c = red l = 3; proc gplot data = figure11_9a; plot logh1*t2 = 1 logh2*t2 = 2 /overlay haxis=axis1 vaxis=axis2 ; label logh1 = "Log Cumulative Hazard Rate"; label t2= "Time on Study"; run; quit;
Figure 11.10 on page 343.
data figure11_9b; set figure11_9a; retain l1 l2 -5; if logh1 ~= . then l1 = logh1; if logh2 ~= . then l2 = logh2; diff = l2 - l1; run; axis1 order = (0 to 1200 by 200) minor = none; axis2 order = (0 to 1.6 by .4) minor = none label = ( a=90); symbol1 i = stepjl c= blue; title "Figure 11.10"; proc gplot data = figure11_9b; plot diff*t2 /haxis= axis1 vaxis=axis2; label diff = "Difference in Cumulative Hazard Rates"; label t2 = "Time on Study"; run; quit;
Figure 11.11 on page 344.
data figure11_11; set figure11_9b; retain h1 h2 0; if z10 = 0 then h1 = -ls; if z10 = 1 then h2 = -ls; run; proc sort data = figure11_11; by l1; run; symbol1 c = blue i = stepjl; symbol2 c = black i = join; title "Figure 11.11"; axis1 order = (0 to 1 by .2) minor = none; axis2 order = (0 to 1 by .2) minor = none label = ( a=90); proc gplot data = figure11_11; plot h2*h1 = 1 h1*h1=2 /overlay haxis = axis1 vaxis = axis2; label h2 = "MTX Group"; label h1 = "No MTX Group"; run; quit;
Figure 11.12 on page 345.
data bone_z7cat; set bone_marrow1; if z7c <= -5 then cz7 = 0; if -5 < z7c <= -3.06 then cz7 = 1; if -3.06 < z7c <= 0 then cz7 = 2; if 0 < z7c then cz7 = 3; run; proc phreg data = bone_z7cat; model t2*dfree(0) = z1 z2 z1xz2 g2 g3 z8 z10 ; strata cz7; baseline out = figure11_12 LOGSURV = h ; run; data figure11_12a; set figure11_12; if h ~= 0 then lgh = log(-h); run; proc sort data = figure11_12a; by t2; run; title "Figure 11.12"; axis1 order = (0 to 1200 by 200) offset = (5,2) minor = none; axis2 order = (-4.5 to .5 by 1) offset = (5,5) minor = none label = ( a=90); symbol1 i = stepjl c= blue; symbol2 i = stepjl c = red l = 3; symbol3 i = stepjl c = green l = 2; symbol4 i = stepjl c = black l = 20; legend label=none value=(h=2 font=swiss '<=-5' '-5, -3.06' '-3.06, 0' '>0') position=(bottom right inside) mode=share cborder=black; proc gplot data = figure11_12a; plot lgh*t2 = cz7 /legend=legend haxis=axis1 vaxis=axis2 ; label lgh = "Log Cumulative Hazard Rate"; label t2= "Time on Study"; run; quit;
Figure 11.13 on page 346.
proc sort data = figure11_12; by t2; proc transpose data = figure11_12 out = fig11_13(keep = t2 group0-group3) prefix=group; by t2; id cz7; var h ; run; proc print data=fig11_13; run; data fig11_13a; set fig11_13; retain lgh1 lgh2 lgh3 lgh4 0; if group0 ~= . then lgh1 = log(-group0); if group1 ~= . then lgh2 = log(-group1); if group2 ~= . then lgh3 = log(-group2); if group3 ~= . then lgh4 = log(-group3); diff2 = lgh2 - lgh1; diff3 = lgh3 - lgh1; diff4 = lgh4 - lgh1; run; axis1 order = (0 to 1200 by 200) minor = none; axis2 order = (-1 to 2 by .5) minor = none label = ( a=90); symbol1 i = stepjl c= black; symbol2 i = stepjl c = red l = 3; symbol3 i = stepjl c = green l = 2; legend label=none value=(h=2 font=swiss '2 vs. 1' '3 vs. 1' '4 vs. 1') position=(bottom right inside) mode=share cborder=black; title "Figure 11.13"; proc gplot data = fig11_13a; plot diff2*t2 = 1 diff3*t2 = 2 diff4*t2 = 3 / legend = legend overlay haxis= axis1 vaxis=axis2; label diff2 = "Difference in Cumulative Hazard Rates"; label t2 = "Time on Study"; run; quit;
Figure 11.14 on page 347.
data fig11_14; set fig11_13; retain h1 h2 h3 h4 0; if group0 ~= . then h1 = -group0; if group1 ~= . then h2 = -group1; if group2 ~= . then h3 = -group2; if group3 ~= . then h4 = -group3; run; proc sort data = fig11_14; by h1; run; symbol1 i = stepjl c= black; symbol2 i = stepjl c = red l = 3; symbol3 i = stepjl c = green l = 2; title "Figure 11.14"; axis1 order = (0 to .8 by .2) minor = none; axis2 order = (0 to 1.4 by .2) minor = none label = ( a=90); legend label=none value=(h=2 font=swiss 'Strata 2' 'Strata 3' 'Strata 4') position=(bottom right inside) mode=share cborder=black; proc gplot data = fig11_14; plot h2*h1 = 1 h3*h1 = 2 h4*h1 = 3 /legend = legend overlay haxis = axis1 vaxis = axis2; label h2 = "Estimated Cumulative Hazard Rate for jth Strata"; label h1 = "Estimated Cumulative Hazard Rate for Strata with centered Waiting Time <=-5"; run; quit;
Figure 11.15 on page 348.
data sec1_9a; set sec1_9; cons = 1; run; proc phreg data = sec1_9a ; model time*ind(0)=cons ; output out = try15 logsurv=h; run; proc sql noprint; select count(time) into :t1-:t2 from sec1_9a group by type; quit; proc sort data = sec1_9; by time ind; proc sort data = try15; by time ind; data try15a; merge sec1_9 try15; by time; retain n1 n2 h1 h2 c1 c2 0; if h ~=. then do; if type = 1 then do ; c1 = c1 + 1; n1 = n1 + ind; h1 = -h + h1; end; else if type = 2 then do; c2 = c2 + 1; n2 = n2 + ind; h2 = -h + h2; end; end; hh1 = h1 - h*(&t1-c1); hh2 = h2 - h*(&t2-c2); run; axis1 order = (0 to 30 by 5) label=('Number of Failures') minor=none; axis2 order = (0 to 30 by 5) label=(a=90 'Estimated Cumulative Hazard Rates') minor=none; symbol1 i = join c = red ; symbol2 i = join c = blue; symbol3 i = join c = black; title 'Figure 11.15'; proc gplot data = try15a; plot hh1*n1 hh2*n2 n1*n1 /overlay haxis = axis1 vaxis = axis2; run; quit;
Figure 11.16 on page 349.
proc phreg data = bone_marrow1; model t2*dfree(0) = z1 z2 z1xz2 g2 g3 z7; baseline out = fig11_16 LOGSURV = h ; run; proc sql noprint; select count(t2) into :t1-:t2 from bone_marrow1 group by z10; quit; proc sort data = bone_marrow1; by t2; data try16a; merge bone_marrow1 fig11_16 ; by t2; retain n1 n2 c1 c2 h1 h2 0; if h ~=. then do; if z10 = 0 then do ; c1 = c1 + 1; n1 = n1 + dfree; h1 = h1 - h; end; if z10 = 1 then do; c2 = c2 + 1; n2 = n2 + dfree; h2 = h2 - h; end; end; hh1 = h1 - h*(&t1-c1); hh2 = h2 - h*(&t2-c2); run; axis1 order = (0 to 60 by 10) minor = none label=('Number of Failures'); axis2 order = (0 to 60 by 20) minor = none label=(a=90 'Estimated Cumulative Hazard Rates'); symbol1 i = join c = red ; symbol2 i = join c = blue; symbol3 i = join c = black; title 'Figure 11.16'; proc gplot data = try16a; plot hh1*n1 hh2*n2 n1*n1 /overlay haxis = axis1 vaxis = axis2; run; quit;
Figure 11.17 is omitted here.
Figure 11.18 on page 352.
Section 11.5: Deviance Residuals
Section 11.6: Checking the Influence of Individual Observations
Section 11.7: Assessing the Fit of the Additive Model