An Introduction to Categorical Analysis by Alan Agresti Chapter 7 – Building and Applying Logit and Log-linear Models

We will present both the SAS proc catmod and proc genmod code in this chapter most of the time.

Table 7.2 based on the drug data set, table 7.1 on page 178.

data drug;
  input R G A C M count;
  label R='Race'
        G='Gender' 
        A='alcohol' 
        C='smoke'
        M='marijuana';
cards;
1  1  1   1   1   405
1  1  1   1   0   268 
1  1  1   0   1    13
1  1  1   0   0   218
1  1  0   1   1     1
1  1  0   1   0    17
1  1  0   0   1     1
1  1  0   0   0   117
1  0  1   1   1   453
1  0  1   1   0   228 
1  0  1   0   1    28
1  0  1   0   0   201
1  0  0   1   1     1
1  0  0   1   0    17
1  0  0   0   1     1
1  0  0   0   0   133
0  1  1   1   1    23
0  1  1   1   0    23 
0  1  1   0   1     2
0  1  1   0   0    19
0  1  0   1   1     0
0  1  0   1   0     1
0  1  0   0   1     0
0  1  0   0   0    12
0  0  1   1   1    30
0  0  1   1   0    19 
0  0  1   0   1     1
0  0  1   0   0    18
0  0  0   1   1     1
0  0  0   1   0     8
0  0  0   0   1     0
0  0  0   0   0    17
;
run;

The catmod proc will drop any observations where the count is zero. In order to solve this problem we create a new count variable where we add a very small amount (in this case .0005) to all the counts. This will not change any of the Maximum Likelihood estimates, it will simply ensure that all the observations are used in the calculations.

data drugmore;
  set drug;
  count1= count+.0005;
run;

Table 7.2, p. 179 using proc catmod.

ods listing close;
proc catmod data=drugmore;
  weight count1;
  model R*G*A*C*M= _response_ ;
  loglin g|r a c m g r ;
  ods output anova=temp1;
run;
quit;
data temp1;
  set temp1;
  model = 'RG_A_C_M';
run;
proc catmod data=drugmore;
  weight count1;
  model R*G*A*C*M= _response_ ;
  loglin g|r|a|c|m @2;
  ods output anova=temp2;
run;
quit;
data temp2;
  set temp2;
  model = 'homogeneous';
run;
proc catmod data=drugmore;
  weight count1;
  model R*G*A*C*M= _response_ ;
  loglin g|r|a|c|m @3;
  ods output anova=temp3;
run;
quit;
data temp3;
  set temp3;
  model = 'three-way';
run;
proc catmod data=drugmore;
  weight count1;
  model R*G*A*C*M= _response_ ;
  loglin r|g r|a r|c r|m g|a g|c g|m  a|m c|m;
  ods output anova=temp4;
run;
quit;
data temp4;
  set temp4;
  model = 'homogeneous - AC';
run;
proc catmod data=drugmore;
  weight count1;
  model R*G*A*C*M= _response_ ;
  loglin r|g r|a r|c r|m g|a g|c g|m a|c  c|m;
  ods output anova=temp5;
run;
quit;
data temp5;
  set temp5;
  model = 'homogeneous - AM';
run;
proc catmod data=drugmore;
  weight count1;
  model R*G*A*C*M= _response_ ;
  loglin r|g r|a r|c r|m g|a g|c g|m a|c a|m ;
  ods output anova=temp6;
run;
quit;
data temp6;
  set temp6;
  model = 'homogeneous - CM';
run;
proc catmod data=drugmore;
  weight count1;
  model R*G*A*C*M= _response_ ;
  loglin r|g r|a r|c r|m  g|c g|m a|c a|m c|m;
  ods output anova=temp7;
run;
quit;
data temp7;
  set temp7;
  model = 'homogeneous - AG';
run;
proc catmod data=drugmore;
  weight count1;
  model R*G*A*C*M= _response_ ;
  loglin r|g r|c r|m g|a g|c g|m a|c a|m c|m;
  ods output anova=temp8;
run;
quit;
data temp8;
  set temp8;
  model = 'homogeneous - AR';
run;
proc catmod data=drugmore;
  weight count1;
  model R*G*A*C*M= _response_ ;
  loglin r|g r|a r|c r|m g|a g|m a|c a|m c|m;
  ods output anova=temp9;
run;
quit;
data temp9;
  set temp9;
  model = 'homogeneous - CG';
run;
proc catmod data=drugmore;
  weight count1;
  model R*G*A*C*M= _response_ ;
  loglin r|g r|a r|m g|a g|c g|m a|c a|m c|m;
  ods output anova=temp10;
run;
quit;
data temp10;
  set temp10;
  model = 'homogeneous - CR';
run;
proc catmod data=drugmore;
  weight count1;
  model R*G*A*C*M= _response_ ;
  loglin r|g r|a r|c r|m g|a g|c  a|c a|m c|m;
  ods output anova=temp11;
run;
quit;
data temp11;
  set temp11;
  model = 'homogeneous - GM';
run;
proc catmod data=drugmore;
  weight count1;
  model R*G*A*C*M= _response_ ;
  loglin r|g r|a r|c  g|a g|c g|m a|c a|m c|m;
  ods output anova=temp12;
run;
quit;
data temp12;
  set temp12;
  model = 'homogeneous - MR';
run;
 
proc catmod data=drugmore;
  weight count1;
  model R*G*A*C*M= _response_ ;
  loglin r|g r|a r|m g|a g|m a|c a|m c|m;
  ods output anova=temp13;
run;
quit;
data temp13;
  set temp13;
  model = 'RG_RA_RM_GA_GM_AC_AM_CM';
run;
proc catmod data=drugmore;
  weight count1;
  model R*G*A*C*M= _response_ ;
  loglin r|g r|a  g|a g|m a|c a|m c|m;
  ods output anova=temp14;
run;
quit;
data temp14;
  set temp14;
  model = 'RG_RA_GA_GM_AC_AM_CM';
run;
proc catmod data=drugmore;
  weight count1;
  model R*G*A*C*M= _response_ ;
  loglin g|r r|a  g|a a|c a|m c|m;
  ods output anova=temp15;
run;
quit;
data temp15;
  set temp15;
  model = 'RG_RA_RM_GA_AC_AM_CM';
run;
ods output close;
ods listing;
data combo;
  set temp1 temp2 temp3 temp4 temp5 temp6 temp7 TEMP8 TEMP9 TEMP10 TEMP11 temp12 temp13 temp14 temp15;
  rename chisq=G2;
  rename probchisq=P_value;
run;
proc print data=combo noobs;
  where source='Likelihood Ratio';
  var model G2 df P_value;
run;

Model G2 DF P_value

RG_A_C_M 1325.09 25 <.0001 homogeneous 15.32 16 0.5011 three-way 5.25 6 0.5119 homogeneous – AC 201.19 17 <.0001 homogeneous – AM 106.93 17 <.0001 homogeneous – CM 513.45 17 <.0001 homogeneous – AG 18.70 17 0.3460 homogeneous – AR 20.31 17 0.2589 homogeneous – CG 16.30 17 0.5026 homogeneous – CR 15.77 17 0.5404 homogeneous – GM 25.14 17 0.0915 homogeneous – MR 18.91 17 0.3337 RG_RA_RM_GA_GM_AC_AM_CM 16.72 18 0.5425 RG_RA_GA_GM_AC_AM_CM 19.89 19 0.4012 RG_RA_RM_GA_AC_AM_CM 28.79 20 0.0920

The code using proc genmod is shown below.

proc genmod data = drug;
  class a c r g  m ;
  model count =a c r|g  m /dist = poi link = log ;
  ods output modelfit = mf1;
run;

proc genmod data = drug;
  class a c r g m ;
  model count = a|c a|r  a|g a|m c|r  c|g c|m r|g r|m g|m
                /dist = poi link = log ;
 ods output modelfit = mf2;
run;
proc genmod data = drug;
  class a c r g m;
    model count =  g|r|a|c|m @2 /dist = poi link = log ;
	ods output modelfit = mf2;
run;

proc genmod data = drug;
  class a c r g m;
    model count =  g|r|a|c|m @3 /dist = poi link = log ;
	ods output modelfit = mf3;
run;

proc genmod data = drug;/*-ac*/
  class a c r g m;
  model count = r|g r|a r|c r|m g|a g|c g|m  a|m c|m /dist = poi link = log ;
 ods output modelfit = mf4;
run;
proc genmod data = drug;/*-am*/
  class a c r g m;
  model count = r|g r|a r|c r|m g|a g|c g|m a|c  c|m /dist = poi link = log ;
 ods output modelfit = mf5;
run;

proc genmod data = drug;/*-cm*/
  class a c r g m;
  model count =r|g r|a r|c r|m g|a g|c g|m a|c a|m  /dist = poi link = log ;
 ods output modelfit = mf6;
run;
proc genmod data = drug;/*-ag*/
  class a c r g m;
  model count= r|g r|a r|c r|m  g|c g|m a|c a|m c|m/dist = poi link = log ;
 ods output modelfit = mf7;
run;
proc genmod data = drug;/*-ar*/
  class a c r g m;
  model count= r|g r|c r|m g|a g|c g|m a|c a|m c|m /dist = poi link = log ;
 ods output modelfit = mf8;
run;
proc genmod data = drug;/*-cg*/
  class a c r g m;
  model count= r|g r|a r|c r|m g|a g|m a|c a|m c|m /dist = poi link = log ;
 ods output modelfit = mf9;
run;
proc genmod data = drug;/*-cr*/
  class a c r g m;
  model count=r|g r|a r|m g|a g|c g|m a|c a|m c|m /dist = poi link = log ;
 ods output modelfit = mf10;
run;
proc genmod data = drug;/*-gm*/
 class a c r g m;
  model count=r|g r|a r|c r|m g|a g|c  a|c a|m c|m /dist = poi link = log ;
 ods output modelfit = mf11;
run;
proc genmod data = drug;/*-mr*/
 class a c r g m;
  model count= r|g r|a r|c  g|a g|c g|m a|c a|m c|m /dist = poi link = log ;
 ods output modelfit = mf12;
run;
proc genmod data = drug;/*ac am cm ag ar gm gr mr*/
  class a c r g m;
  model count = r|g r|a r|m g|a g|m a|c a|m c|m
				/dist = poi link = log ;
 ods output modelfit = mf13;
run;
proc genmod data = drug;/*ac am cm ag ar gm gr*/
   class a c r g m;
  model count =r|g r|a  g|a g|m a|c a|m c|m 
				/dist = poi link = log ;
 ods output modelfit = mf14;
run;
proc genmod data = drug;/*ac am cm ag ar gr */
  class a c r g m;
  model count = g|r r|a  g|a a|c a|m c|m
				/dist = poi link = log ;
 ods output modelfit = mf15;
run;

Inputting Survey data, table 7.3, p. 181.

data sex;
  input premar birth count @@;
  assoc = premar*birth; 
cards;
1 4  38   1 3  60   1 2 68   1 1 81
2 4  14   2 3  29   2 2 26   2 1 24
3 4  42   3 3  74   3 2 41   3 1 18
4 4 157   4 3 161   4 2 57   4 1 36
;
run;

Table 7.3, p. 181 and the goodness-of-fit statistics in the second paragraph.

proc genmod data = sex;
  class premar birth;
  model count = premar birth /dist = poi link = log ;
  output out = table7_3 pred=pred STDRESCHI = r;
run;
proc format;
  value premar 1='Always wrong' 2='Usually Wrong' 3='Sometimes Wrong' 4='Never wrong';
  value birth 1='Strongly Disagree' 2='Disagree' 3='Agree' 4='Strongly Agree';
run;
proc print data = table7_3 noobs;
  format premar premar. birth birth.;
run;

Criteria For Assessing Goodness Of Fit

Criterion DF Value Value/DF

Deviance 9 127.6529 14.1837 Scaled Deviance 9 127.6529 14.1837 Pearson Chi-Square 9 128.6836 14.2982 Scaled Pearson X2 9 128.6836 14.2982 Log Likelihood 2983.6850

premar birth count assoc pred r

Always wrong Strongly Agree 38 4 66.951 -4.83965 Always wrong Agree 60 3 86.423 -4.11671 Always wrong Disagree 68 2 51.214 3.07671 Always wrong Strongly Disagree 81 1 42.411 7.60318 Usually Wrong Strongly Agree 14 8 25.208 -2.75682 Usually Wrong Agree 29 6 32.540 -0.81148 Usually Wrong Disagree 26 4 19.283 1.81148 Usually Wrong Strongly Disagree 24 2 15.969 2.32832 Sometimes Wrong Strongly Agree 42 12 47.435 -1.02637 Sometimes Wrong Agree 74 9 61.231 2.24730 Sometimes Wrong Disagree 41 6 36.285 0.97623 Sometimes Wrong Strongly Disagree 18 3 30.049 -2.68175 Never wrong Strongly Agree 157 16 111.405 6.78455 Never wrong Agree 161 12 143.806 2.38456 Never wrong Disagree 57 8 85.218 -4.60386 Never wrong Strongly Disagree 36 4 70.571 -6.06333

Using proc catmod, we get the same predicted values and the likelihood ratio test statistic.

proc catmod data=sex;
  weight count;
  model premar*birth= _response_/ predict=freq noprofile noiter noresponse ;
  loglin premar birth;
run;
quit;

Maximum Likelihood Analysis of Variance

Source DF Chi-Square Pr > ChiSq ————————————————– premar 3 212.44 <.0001 birth 3 66.20 <.0001

Likelihood Ratio 9 127.65 <.0001

Maximum Likelihood Predicted Values for Frequencies

——-Observed—— ——Predicted—— Standard Standard premar birth Frequency Error Frequency Error Residual —————————————————————————– 1 1 81 8.597365 42.41145 3.835381 38.58855 1 2 68 7.937662 51.21382 4.314471 16.78618 1 3 60 7.490815 86.42333 6.095867 -26.4233 1 4 38 6.036605 66.9514 5.130779 -28.9514 2 1 24 4.835077 15.96868 1.94804 8.031317 2 2 26 5.026925 19.28294 2.265306 6.717063 2 3 29 5.300169 32.53996 3.516572 -3.53996 2 4 14 3.713265 25.20842 2.827013 -11.2084 3 1 18 4.201203 30.0486 2.981299 -12.0486 3 2 41 6.259766 36.2851 3.396646 4.714903 3 3 74 8.251448 61.2311 4.989831 12.7689 3 4 42 6.332064 47.43521 4.118554 -5.43521 4 1 36 5.882213 70.57127 5.716958 -34.5713 4 2 57 7.313779 85.21814 6.309364 -28.2181 4 3 161 11.53289 143.8056 8.335372 17.19438 4 4 157 11.41846 111.405 7.268973 45.59503

Linear-by-linear association in the Survey data.

The ML estimate of the association parameter, beta, and its standard error are the estimate and standard error of assoc in the table labeled Analysis of Parameter Estimates which is followed by the Wald test and 95% Wald confidence interval, middle of p. 183.

proc genmod data=sex; 
  format premar premar. birth birth.;
  class premar birth;
  model count = premar birth assoc/ dist=poi link=log ;
  output out=temp p=predict ;
run;

The GENMOD Procedure

Model Information Data Set WORK.SEX Distribution Poisson Link Function Log Dependent Variable count Observations Used 16 Class Level Information

Class Levels Values premar 4 Always wrong Never wrong Sometimes Wrong Usually Wrong birth 4 Agree Disagree Strongly Agree Strongly Disagree Criteria For Assessing Goodness Of Fit

Criterion DF Value Value/DF Deviance 8 11.5337 1.4417 Scaled Deviance 8 11.5337 1.4417 Pearson Chi-Square 8 11.5085 1.4386 Scaled Pearson X2 8 11.5085 1.4386 Log Likelihood 3041.7446

Algorithm converged. Analysis Of Parameter Estimates

Standard Wald 95% Chi- Parameter DF Estimate Error Confidence Limits Square Pr > ChiSq Intercept 1 2.4609 0.1323 2.2015 2.7202 345.91 <.0001 premar Always wrong 1 1.6460 0.1347 1.3819 1.9100 149.24 <.0001 premar Never wrong 1 -0.1077 0.1988 -0.4974 0.2820 0.29 0.5880 premar Sometimes Wrong 1 -0.1241 0.1481 -0.4143 0.1661 0.70 0.4021 premar Usually Wrong 0 0.0000 0.0000 0.0000 0.0000 . . birth Agree 1 -0.7245 0.1620 -1.0420 -0.4070 20.00 <.0001 birth Disagree 1 -0.4641 0.1195 -0.6984 -0.2298 15.08 0.0001 birth Strongly Agree 1 -1.8797 0.2491 -2.3679 -1.3914 56.94 <.0001 birth Strongly Disagre 0 0.0000 0.0000 0.0000 0.0000 . . assoc 1 0.2858 0.0282 0.2305 0.3412 102.46 <.0001 Scale 0 1.0000 0.0000 1.0000 1.0000 NOTE: The scale parameter was held fixed.

Table 7.4, p. 184.

proc print data = temp;
  format premar premar. birth birth.;
  var premar birth count predict;  
run;

Obs premar birth count predict

1 Always wrong Strongly Agree 38 29.094 2 Always wrong Agree 60 69.396 3 Always wrong Disagree 68 67.654 4 Always wrong Strongly Disagree 81 80.857 5 Usually Wrong Strongly Agree 14 17.600 6 Usually Wrong Agree 29 31.543 7 Usually Wrong Disagree 26 23.107 8 Usually Wrong Strongly Disagree 24 20.750 9 Sometimes Wrong Strongly Agree 42 48.773 10 Sometimes Wrong Agree 74 65.681 11 Sometimes Wrong Disagree 41 36.152 12 Sometimes Wrong Strongly Disagree 18 24.394 13 Never wrong Strongly Agree 157 155.533 14 Never wrong Agree 161 157.379 15 Never wrong Disagree 57 65.088 16 Never wrong Strongly Disagree 36 33.000

Section 7.2.3 using the contrast statement in proc genmod to obtain the likelihood-ratio test and Wald test statistics.

proc genmod data = sex; 
   class premar birth;
   model  count = premar birth assoc / dist=poi link=log ;
   output out = table7_4 pred = pred;
   contrast "asso" premar 0 birth 0 assoc 1 ;
   contrast "asso_wald" premar 0 birth 0 assoc 1 /wald;
run;

Contrast Results

Chi- Contrast DF Square Pr > ChiSq Type asso 1 116.12 <.0001 LR asso_wald 1 102.46 <.0001 Wald

Inputting Job Satisfaction data, table 7.5, p. 186.

data cmh;
input gender $ income satisf count @@;
cards;
F  3 1 1    F 3  3 3    F  3 4 11    F  3 5 2
F 10 1 2    F 10 3 3    F 10 4 17    F 10 5 3
F 20 1 0    F 20 3 1    F 20 4  8    F 20 5 5
F 35 1 0    F 35 3 2    F 35 4  4    F 35 5 2
M  3 1 1    M  3 3 1    M  3 4  2    M  3 5 1
M 10 1 0    M 10 3 3    M 10 4  5    M 10 5 1
M 20 1 0    M 20 3 0    M 20 4  7    M 20 5 3
M 35 1 0    M 35 3 1    M 35 4  9    M 35 5 6
;
run;

Proc catmod will drop any observations where the count equals zero. To fix this problem we simply create a new count variable where we add a very small amount (in this case .00001) to all the counts. This will not change any of the Maximum Likelihood estimates, it will simply ensure that all the observations are used in the calculations.

data cmh1;
  set cmh;
  count1 = count + .00001;
run;

The Likelihood Ratio statistic in the output is the G^2 statistic for the model (GI, GS), Section 7.3.2, p. 186.

proc catmod data=cmh1;
  weight count1;
  model gender*income*satisf= _response_ /noiter noprofile noparm noresponse;
  loglin gender|income gender|satisf ;
  ods output anova=temp1;
run;
quit;
data _null_;
  set temp1;
  if source = 'Likelihood Ratio' then call symput('chisq1', chisq);
  if source = 'Likelihood Ratio' then call symput('df1', df);
run;
%put chisq=&chisq1 and df=&df1; /* this will make the two number appear in the log file. */

The CATMOD Procedure Data Summary

Response gender*income*satisf Response Levels 32 Weight Variable count1 Populations 1 Data Set CMH1 Total Frequency 104 Frequency Missing 0 Observations 32 Maximum Likelihood Analysis Maximum likelihood computations converged. Maximum Likelihood Analysis of Variance

Source DF Chi-Square Pr > ChiSq ————————————————– gender 1 2.72 0.0991 income 3 2.47 0.4808 gender*income 3 11.52 0.0092 satisf 3 49.51 <.0001 gender*satisf 3 1.28 0.7330

Likelihood Ratio 18 19.37 0.3696

This can also be produced by proc genmod.

proc genmod data= cmh;
  class gender income satisf;
   model count = gender|income gender|satisf  /dist = poi link = log;
run;

Note 1: We have created two macro variables in order to use a Chi-squared test to compare the two models.

Note 2: In the previous data step we created a new count variable in order to get rid of cells with zero entries. However, if the amount added to all the counts was too small proc catmod might have problems with collinearity when calculating the degrees of freedom for the G^2 statistic. Our suggestion is to try adding various amounts to all the counts starting with .1 then .01 then .001 and so on until the degrees of freedom start to drop. The point is that in order to get the best approximation for the G^2 statistic you want to add the smallest possible amount without losing degrees of freedom due to collinearity.

data cmh2;
  set cmh1;
  count2=count+.001;
run;
proc catmod data=cmh2;
  weight count2;
  model gender*income*satisf= _response_ /noiter noprofile noparm noresponse;
  loglin gender|income gender|satisf income|satisf;
  ods output anova=temp2;
run;
quit;
data _null_;
  set temp2;
  if source = 'Likelihood Ratio' then call symput('chisq2', chisq);
  if source = 'Likelihood Ratio' then call symput('df2', df);
run;
%put chisq=&chisq2 and df=&df2; /* this will make the numbers appear in the log file */

The CATMOD Procedure Data Summary Response gender*income*satisf Response Levels 32 Weight Variable count2 Populations 1 Data Set CMH2 Total Frequency 104.032 Frequency Missing 0 Observations 32 Maximum Likelihood Analysis Maximum likelihood computations converged. Maximum Likelihood Analysis of Variance

Source DF Chi-Square Pr > ChiSq ————————————————– gender 1 1.58 0.2089 income 3 0.99 0.8025 gender*income 3 10.47 0.0150 satisf 3 31.38 <.0001 gender*satisf 3 0.11 0.9902 income*satisf 9 6.46 0.6932

Likelihood Ratio 9 7.07 0.6301

The following is the code for the proc genmod.

proc genmod data= cmh;
  class gender income satisf;
  model count = gender|income gender|satisf income|satisf  / dist = poi link = log;
run;

Using a Chi-squared test to compare the two models, (GI, GS, IS) to (GI, GS), section 7.3.2, p. 186. We should notice that the comparison of the two models can be done by using contrast statement. But, with this particular model, it didn’t converge because that the negative Hessian is not positive definite and this prevents SAS further estimate any other statements. This is why we have to do the comparison manually.

data chitest;
  chi_diff= &chisq1-&chisq2;
  df_diff= &df1-&df2;
  p_value = 1-cdf('CHISQ', (&chisq1-&chisq2), (&df1-&df2) );
run;
proc print data = chitest;
run;

Obs    chi_diff    df_diff    p_value
 1      12.3000       9       0.19692

Generating the scores {1, 2, 3, 4} for income and satisfaction as well as creating the association variable, p. 187.

data score;
  set cmh1;
  if income=3 then score1 = 1;
  if income=10 then score1= 2;
  if income=20 then score1=3;
  if income=35 then score1=4;
  score2 = 1;
  if satisf ne 1 then score2 = satisf - 1;
  assoc = score1*score2;
run;

The estimate and standard error for assoc in the output is the ML estimate and ASE for beta-hat followed by the Wald test and 95% Wald confidence interval, p. 187 and 188.

proc genmod data= score;
  class  female income satisf  ;
  model count = female|income female|satisf  assoc / expected dist = poi link = log;
  contrast "assoc" assoc 1;
  contrast "assoc_wald" assoc 1 /wald;
run;

The GENMOD Procedure

Criteria For Assessing Goodness Of Fit

Criterion DF Value Value/DF

Deviance 17 12.3268 0.7251 Scaled Deviance 17 12.3268 0.7251 Pearson Chi-Square 17 12.8853 0.7580 Scaled Pearson X2 17 12.8853 0.7580 Log Likelihood 70.2108

Algorithm converged.

Analysis Of Parameter Estimates

Parameter Intercept female 0 female 1 income 3 income income income female*income 0 3 female*income 0 female*income 0 female*income 0 female*income 1 3 female*income 1 female*income 1 female*income 1 satisf 1 satisf 3 satisf 4 satisf 5 female*satisf 0 1 female*satisf 0 3 female*satisf 0 4 female*satisf 0 5 female*satisf 1 1 female*satisf 1 3 female*satisf 1 4 female*satisf 1 5 assoc Scale Standard Wald 95% Confidence Chi- DF Estimate Error Limits Square Pr > ChiSq 1 -5.2238 2.3367 -9.8036 -0.6440 5.00 0.0254 1 0.8040 0.5420 -0.2583 1.8663 2.20 0.1380 0 0.0000 0.0000 0.0000 0.0000 . . 1 4.2551 1.4863 1.3421 7.1681 8.20 0.0042 10 1 3.5434 1.0790 1.4285 5.6582 10.78 0.0010 20 1 1.7928 0.6845 0.4512 3.1343 6.86 0.0088 35 0 0.0000 0.0000 0.0000 0.0000 . . 1 -1.8948 0.6889 -3.2450 -0.5446 7.56 0.0060 10 1 -1.6969 0.5912 -2.8557 -0.5381 8.24 0.0041 20 1 -1.0186 0.6013 -2.1971 0.1598 2.87 0.0902 35 0 0.0000 0.0000 0.0000 0.0000 . . 0 0.0000 0.0000 0.0000 0.0000 . . 10 0 0.0000 0.0000 0.0000 0.0000 . . 20 0 0.0000 0.0000 0.0000 0.0000 . . 35 0 0.0000 0.0000 0.0000 0.0000 . . 1 1.0097 1.1165 -1.1785 3.1980 0.82 0.3658 1 1.4297 0.8213 -0.1800 3.0393 3.03 0.0817 1 2.1307 0.5169 1.1176 3.1439 16.99 <.0001 0 0.0000 0.0000 0.0000 0.0000 . . 1 -0.3044 1.2714 -2.7962 2.1875 0.06 0.8108 1 0.0210 0.7408 -1.4310 1.4730 0.00 0.9774 1 -0.1920 0.5105 -1.1924 0.8085 0.14 0.7069 0 0.0000 0.0000 0.0000 0.0000 . . 0 0.0000 0.0000 0.0000 0.0000 . . 0 0.0000 0.0000 0.0000 0.0000 . . 0 0.0000 0.0000 0.0000 0.0000 . . 0 0.0000 0.0000 0.0000 0.0000 . . 1 0.3878 0.1547 0.0846 0.6910 6.28 0.0122 0 1.0000 0.0000 1.0000 1.0000

Contrast Results

Chi- Contrast DF Square Pr > ChiSq Type

assoc 1 7.04 0.0080 LR assoc_wald 1 6.28 0.0122 Wald

Section 7.3.5, section 7.3.6 and section 7.3.7 on page 188 and page 190. The sample correlation between income and job satisfaction for female and male is shown below.

proc sort data = score;
by female;
proc corr data = score noprob nosimple;
  by female;
  weight count;
  var score1 score2 ;
run;

The CORR Procedure

   2        Variables:    score1   score2
     Weight Variable:     count
female=0
Pearson Correlation Coefficients, N = 16
              score1        score2
score1       1.00000       0.38145
score2       0.38145       1.00000

female=1

Pearson Correlation Coefficients, N = 16

              score1        score2
score1       1.00000       0.17086
score2       0.17086       1.00000

Means by gender and by levels of income using the scores {1, 2, 3, 4} for job satisfaction, p. 189.

proc sort data=score out=sorted;
  by gender income;
run;
proc means data=sorted mean;
  freq count;
  class gender income;
  var score2;
run;

gender income N Obs Mean ——————————————————– F 3 17 2.8235294

10 25 2.8400000

20 14 3.2857143

35 8 3.0000000

M 3 5 2.6000000

10 9 2.7777778

20 10 3.3000000

35 16 3.3125000 ——————————————————–

Generalized correlation statistic in the middle of page 188. SAS computes three different generalized Cochran-Mantel-Haenszel Statistics. The first line of output below is the first generalization in section 7.3.5. The result on testing difference among the four income levels in their mean job satisfaction (result on page 189, the third paragraph) is also shown below. The last line of the output is the result on page 190 on general association statistic.

proc freq data = score;
  weight count;
  tables female*score1*score2 /cmh noprint;
run;

The FREQ Procedure

Summary Statistics for score1 by score2
Controlling for female

  Cochran-Mantel-Haenszel Statistics (Based on Table Scores)

Statistic    Alternative Hypothesis    DF       Value      Prob
---------------------------------------------------------------
    1        Nonzero Correlation        1      6.6235    0.0101
    2        Row Mean Scores Differ     3      9.2259    0.0264
    3        General Association        9     10.2001    0.3345

Total Sample Size = 104

Using the scores {3, 10, 20, 35} for income and {1, 3, 4, 5} for satisfaction, we have the generalized correlation different from what we have above.

proc freq data = score;
  weight count;
  tables female*income*satisf /cmh noprint;
run;

The FREQ Procedure

Summary Statistics for income by satisf
Controlling for female

  Cochran-Mantel-Haenszel Statistics (Based on Table Scores)

Statistic    Alternative Hypothesis    DF       Value      Prob
---------------------------------------------------------------
    1        Nonzero Correlation        1      6.1563    0.0131
    2        Row Mean Scores Differ     3      9.0342    0.0288
    3        General Association        9     10.2001    0.3345

Total Sample Size = 104

Section 7.4.2 Clinical Trials Example. Inputting Clinical Trial data, table 7.7, p. 193.

data trial;
  input center treat response count;
cards;
       1         1         1         0  
       1         1         0         5  
       1         0         1         0  
       1         0         0         9  
       2         1         1         1  
       2         1         0        12  
       2         0         1         0  
       2         0         0        10  
       3         1         1         0  
       3         1         0         7  
       3         0         1         0  
       3         0         0         5  
       4         1         1         6  
       4         1         0         3  
       4         0         1         2  
       4         0         0         6  
       5         1         1         5  
       5         1         0         9  
       5         0         1         2  
       5         0         0        12  
;
run;

Allowing zero cell entries. proc genmod example on page 193.

proc genmod data = trial descending;
   class center treat;
   weight count;
   model response = center treat /dist=multinomial ;
run;

Analysis Of Parameter Estimates

Standard Wald 95% Confidence Chi- Parameter DF Estimate Error Limits Square Pr > ChiSq

Intercept1 1 -0.4763 0.5058 -1.4677 0.5151 0.89 0.3463 center 1 1 -25.5795 172969.8 -339040 338989.0 0.00 0.9999 center 2 1 -2.1802 1.1327 -4.4003 0.0399 3.70 0.0543 center 3 1 -25.8964 187690.2 -367892 367840.1 0.00 0.9999 center 4 1 1.0631 0.7011 -0.3110 2.4373 2.30 0.1294 center 5 0 0.0000 0.0000 0.0000 0.0000 . . treat 0 1 -1.5460 0.7017 -2.9212 -0.1708 4.85 0.0276 treat 1 0 0.0000 0.0000 0.0000 0.0000 . . Scale 0 1.0000 0.0000 1.0000 1.0000

The results by adding a constant less than .001 and by collapsing center 1, center 2 and center 3.

data trial_add;
  set trial;
  count1 = count + .001;
run;
proc genmod data=trial_add ;
  class center treat ;
  weight count1;
  model response = center treat / dist=multinomial type3;
run;

data collapse;
  set trial_add;
  if center = 1 or center = 2 or center = 3 then ccenter = 1;
  else if center =4 then ccenter= 2;
  else if center = 5 then ccenter = 3;
run;

proc genmod data=collapse ;
  class ccenter treat ;
  weight count1;
  model response = ccenter treat / dist=multinomial type3;
run;

Model 1:

Criteria For Assessing Goodness Of Fit

Criterion DF Value Value/DF

Log Likelihood -28.9239

Analysis Of Parameter Estimates

Standard Wald 95% Confidence Chi- Parameter DF Estimate Error Limits Square Pr > ChiSq

Intercept1 1 0.4765 0.5057 -0.5147 1.4678 0.89 0.3461 center 1 1 7.6727 22.3676 -36.1669 51.5123 0.12 0.7316 center 2 1 2.1783 1.1318 -0.0400 4.3966 3.70 0.0543 center 3 1 7.8259 22.3681 -36.0147 51.6666 0.12 0.7264 center 4 1 -1.0628 0.7009 -2.4366 0.3111 2.30 0.1295 center 5 0 0.0000 0.0000 0.0000 0.0000 . . treat 0 1 1.5447 0.7013 0.1702 2.9191 4.85 0.0276 treat 1 0 0.0000 0.0000 0.0000 0.0000 . . Scale 0 1.0000 0.0000 1.0000 1.0000

LR Statistics For Type 3 Analysis

Chi- Source DF Square Pr > ChiSq

center 4 23.36 0.0001 treat 1 5.48 0.0192

Model 2:

Criteria For Assessing Goodness Of Fit

Criterion DF Value Value/DF

Log Likelihood -29.6055

Analysis Of Parameter Estimates

Standard Wald 95% Confidence Chi- Parameter DF Estimate Error Limits Square Pr > ChiSq

Intercept1 1 0.4726 0.5053 -0.5178 1.4631 0.87 0.3496 ccenter 1 1 2.8953 1.1144 0.7111 5.0796 6.75 0.0094 ccenter 2 1 -1.0645 0.7017 -2.4398 0.3108 2.30 0.1293 ccenter 3 0 0.0000 0.0000 0.0000 0.0000 . . treat 0 1 1.5574 0.6994 0.1867 2.9282 4.96 0.0260 treat 1 0 0.0000 0.0000 0.0000 0.0000 . . Scale 0 1.0000 0.0000 1.0000 1.0000

LR Statistics For Type 3 Analysis

Chi- Source DF Square Pr > ChiSq

ccenter 2 22.00 <.0001 treat 1 5.61 0.0179