Achapter7.htm

SAS Textbook Examples
An Introduction to Categorical Analysis by Alan Agresti
Chapter 7 – Building and Applying Logit and Log-linear Models

We will try to use both SAS proc catmod and proc genmod for the results in this chapter.

Inputting the drug data set, table 6.3, p. 152.

data drug;
  input R G A C M count;
  label R='Race'
        G='Gender' 
        A='alcohol' 
        C='smoke'
        M='marijuana';
cards;
1  1  1   1   1   405
1  1  1   1   0   268 
1  1  1   0   1    13
1  1  1   0   0   218
1  1  0   1   1     1
1  1  0   1   0    17
1  1  0   0   1     1
1  1  0   0   0   117
1  0  1   1   1   453
1  0  1   1   0   228 
1  0  1   0   1    28
1  0  1   0   0   201
1  0  0   1   1     1
1  0  0   1   0    17
1  0  0   0   1     1
1  0  0   0   0   133
0  1  1   1   1    23
0  1  1   1   0    23 
0  1  1   0   1     2
0  1  1   0   0    19
0  1  0   1   1     0
0  1  0   1   0     1
0  1  0   0   1     0
0  1  0   0   0    12
0  0  1   1   1    30
0  0  1   1   0    19 
0  0  1   0   1     1
0  0  1   0   0    18
0  0  0   1   1     1
0  0  0   1   0     8
0  0  0   0   1     0
0  0  0   0   0    17
;
run;

The catmod proc will drop any observations where the count is zero. In order to solve this problem we create a new count variable where we add a very small amount (in this case .0005) to all the counts. This will not change any of the Maximum Likelihood estimates, it will simply ensure that all the observations are used in the calculations.

data drugmore;
  set drug;
  count1= count+.0005;
run;

Table 7.2, p. 179.

ods listing close;
proc catmod data=drugmore;
  weight count1;
  model R*G*A*C*M= _response_ ;
  loglin g|r a c m g r ;
  ods output anova=temp1;
run;
quit;
data temp1;
  set temp1;
  model = 'RG_A_C_M';
run;
proc catmod data=drugmore;
  weight count1;
  model R*G*A*C*M= _response_ ;
  loglin g|r|a|c|m @2;
  ods output anova=temp2;
run;
quit;
data temp2;
  set temp2;
  model = 'homogeneous';
run;
proc catmod data=drugmore;
  weight count1;
  model R*G*A*C*M= _response_ ;
  loglin g|r|a|c|m @3;
  ods output anova=temp3;
run;
quit;
data temp3;
  set temp3;
  model = 'three-way';
run;
proc catmod data=drugmore;
  weight count1;
  model R*G*A*C*M= _response_ ;
  loglin r|g r|a r|c r|m g|a g|c g|m  a|m c|m;
  ods output anova=temp4;
run;
quit;
data temp4;
  set temp4;
  model = 'homogeneous - AC';
run;
proc catmod data=drugmore;
  weight count1;
  model R*G*A*C*M= _response_ ;
  loglin r|g r|a r|c r|m g|a g|c g|m a|c  c|m;
  ods output anova=temp5;
run;
quit;
data temp5;
  set temp5;
  model = 'homogeneous - AM';
run;
proc catmod data=drugmore;
  weight count1;
  model R*G*A*C*M= _response_ ;
  loglin r|g r|a r|c r|m g|a g|c g|m a|c a|m ;
  ods output anova=temp6;
run;
quit;
data temp6;
  set temp6;
  model = 'homogeneous - CM';
run;
proc catmod data=drugmore;
  weight count1;
  model R*G*A*C*M= _response_ ;
  loglin r|g r|a r|c r|m  g|c g|m a|c a|m c|m;
  ods output anova=temp7;
run;
quit;
data temp7;
  set temp7;
  model = 'homogeneous - AG';
run;
proc catmod data=drugmore;
  weight count1;
  model R*G*A*C*M= _response_ ;
  loglin r|g r|c r|m g|a g|c g|m a|c a|m c|m;
  ods output anova=temp8;
run;
quit;
data temp8;
  set temp8;
  model = 'homogeneous - AR';
run;
proc catmod data=drugmore;
  weight count1;
  model R*G*A*C*M= _response_ ;
  loglin r|g r|a r|c r|m g|a g|m a|c a|m c|m;
  ods output anova=temp9;
run;
quit;
data temp9;
  set temp9;
  model = 'homogeneous - CG';
run;
proc catmod data=drugmore;
  weight count1;
  model R*G*A*C*M= _response_ ;
  loglin r|g r|a r|m g|a g|c g|m a|c a|m c|m;
  ods output anova=temp10;
run;
quit;
data temp10;
  set temp10;
  model = 'homogeneous - CR';
run;
proc catmod data=drugmore;
  weight count1;
  model R*G*A*C*M= _response_ ;
  loglin r|g r|a r|c r|m g|a g|c  a|c a|m c|m;
  ods output anova=temp11;
run;
quit;
data temp11;
  set temp11;
  model = 'homogeneous - GM';
run;
proc catmod data=drugmore;
  weight count1;
  model R*G*A*C*M= _response_ ;
  loglin r|g r|a r|c  g|a g|c g|m a|c a|m c|m;
  ods output anova=temp12;
run;
quit;
data temp12;
  set temp12;
  model = 'homogeneous - MR';
run;
 
proc catmod data=drugmore;
  weight count1;
  model R*G*A*C*M= _response_ ;
  loglin r|g r|a r|m g|a g|m a|c a|m c|m;
  ods output anova=temp13;
run;
quit;
data temp13;
  set temp13;
  model = 'RG_RA_RM_GA_GM_AC_AM_CM';
run;
proc catmod data=drugmore;
  weight count1;
  model R*G*A*C*M= _response_ ;
  loglin r|g r|a  g|a g|m a|c a|m c|m;
  ods output anova=temp14;
run;
quit;
data temp14;
  set temp14;
  model = 'RG_RA_GA_GM_AC_AM_CM';
run;
proc catmod data=drugmore;
  weight count1;
  model R*G*A*C*M= _response_ ;
  loglin g|r r|a  g|a a|c a|m c|m;
  ods output anova=temp15;
run;
quit;
data temp15;
  set temp15;
  model = 'RG_RA_RM_GA_AC_AM_CM';
run;
ods output close;
ods listing;
data combo;
  set temp1 temp2 temp3 temp4 temp5 temp6 temp7 TEMP8 TEMP9 TEMP10 TEMP11 temp12 temp13 temp14 temp15;
  rename chisq=G2;
  rename probchisq=P_value;
run;
proc print data=combo noobs;
  where source='Likelihood Ratio';
  var model G2 df P_value;
run;

Model G2 DF P_value

RG_A_C_M 1325.09 25 <.0001 homogeneous 15.32 16 0.5011 three-way 5.25 6 0.5119 homogeneous – AC 201.19 17 <.0001 homogeneous – AM 106.93 17 <.0001 homogeneous – CM 513.45 17 <.0001 homogeneous – AG 18.70 17 0.3460 homogeneous – AR 20.31 17 0.2589 homogeneous – CG 16.30 17 0.5026 homogeneous – CR 15.77 17 0.5404 homogeneous – GM 25.14 17 0.0915 homogeneous – MR 18.91 17 0.3337 RG_RA_RM_GA_GM_AC_AM_CM 16.72 18 0.5425 RG_RA_GA_GM_AC_AM_CM 19.89 19 0.4012 RG_RA_RM_GA_AC_AM_CM 28.79 20 0.0920

Inputting Survey data, table 7.3, p. 181.

data sex;
  input premar birth count @@;
  assoc = premar*birth; 
cards;
1 4  38   1 3  60   1 2 68   1 1 81
2 4  14   2 3  29   2 2 26   2 1 24
3 4  42   3 3  74   3 2 41   3 1 18
4 4 157   4 3 161   4 2 57   4 1 36
;
run;

Table 7.3, p. 181.

proc format;
  value premar 1='Always wrong' 2='Usually Wrong' 3='Sometimes Wrong' 4='Never wrong';
  value birth 1='Strongly Disagree' 2='Disagree' 3='Agree' 4='Strongly Agree';
run;
proc genmod data=sex; 
  format premar premar. birth birth.;
  class premar birth;
  model count = premar birth / dist=poi link=log ;
  output out=temp p=predict reslik=adjres;
run;
proc print data = temp;
  format premar premar. birth birth.;
  var premar birth count predict adjres;  
run;

Obs premar birth count predict adjres

1 Always wrong Strongly Agree 38 66.951 -5.07551 2 Always wrong Agree 60 86.423 -4.23386 3 Always wrong Disagree 68 51.214 2.99125 4 Always wrong Strongly Disagree 81 42.411 7.09507 5 Usually Wrong Strongly Agree 14 25.208 -2.92676 6 Usually Wrong Agree 29 32.540 -0.82054 7 Usually Wrong Disagree 26 19.283 1.74615 8 Usually Wrong Strongly Disagree 24 15.969 2.20806 9 Sometimes Wrong Strongly Agree 42 47.435 -1.03860 10 Sometimes Wrong Agree 74 61.231 2.20962 11 Sometimes Wrong Disagree 41 36.285 0.96337 12 Sometimes Wrong Strongly Disagree 18 30.049 -2.83017 13 Never wrong Strongly Agree 157 111.405 6.62604 14 Never wrong Agree 161 143.806 2.36828 15 Never wrong Disagree 57 85.218 -4.73723 16 Never wrong Strongly Disagree 36 70.571 -6.36454

The Likelihood Ratio statistic in the output is the goodness-of-fit test statistic, G^2, for the independence model in the middle of p. 181.

proc catmod data=sex;
  weight count;
  model premar*birth= _response_/ noiter noprofile noparm noresponse;
  loglin premar birth;
run;
quit;

The CATMOD Procedure

Data Summary

Response           premar*birth     Response Levels   16
Weight Variable    count            Populations        1
Data Set           SEX              Total Frequency  926
Frequency Missing  0                Observations      16

                   Maximum Likelihood Analysis

           Maximum likelihood computations converged.

Maximum Likelihood Analysis of Variance

Source DF Chi-Square Pr > ChiSq ————————————————– premar 3 212.44 <.0001 birth 3 66.20 <.0001

Likelihood Ratio 9 127.65 <.0001

Linear-by-linear association in the Survey data.
The ML estimate of the association parameter ,beta , and its standard error are the estimate and standard error of assoc in the table labeled Analysis of Parameter Estimates which is followed by the Wald test and 95% Wald confidence interval, middle of p. 183.

proc genmod data=sex; 
  format premar premar. birth birth.;
  class premar birth;
  model count = premar birth assoc/ dist=poi link=log ;
  output out=temp p=predict ;
run;

The GENMOD Procedure

Model Information

Data Set              WORK.SEX
Distribution           Poisson
Link Function              Log
Dependent Variable       count
Observations Used           16

Class Level Information

Class Levels Values

premar           4    Always wrong Never wrong Sometimes Wrong Usually
                      Wrong
birth            4    Agree Disagree Strongly Agree Strongly Disagree

Criteria For Assessing Goodness Of Fit

Criterion DF Value Value/DF

Deviance 8 11.5337 1.4417 Scaled Deviance 8 11.5337 1.4417 Pearson Chi-Square 8 11.5085 1.4386 Scaled Pearson X2 8 11.5085 1.4386 Log Likelihood 3041.7446

Algorithm converged.

Analysis Of Parameter Estimates

Standard Wald 95% Chi- Parameter DF Estimate Error Confidence Limits Square Pr > ChiSq

Intercept                     1    2.4609    0.1323    2.2015    2.7202   345.91      <.0001
premar     Always wrong       1    1.6460    0.1347    1.3819    1.9100   149.24      <.0001
premar     Never wrong        1   -0.1077    0.1988   -0.4974    0.2820     0.29      0.5880
premar     Sometimes Wrong    1   -0.1241    0.1481   -0.4143    0.1661     0.70      0.4021
premar     Usually Wrong      0    0.0000    0.0000    0.0000    0.0000      .         .
birth      Agree              1   -0.7245    0.1620   -1.0420   -0.4070    20.00      <.0001
birth      Disagree           1   -0.4641    0.1195   -0.6984   -0.2298    15.08      0.0001
birth      Strongly Agree     1   -1.8797    0.2491   -2.3679   -1.3914    56.94      <.0001
birth      Strongly Disagre   0    0.0000    0.0000    0.0000    0.0000      .         .
assoc                         1    0.2858    0.0282    0.2305    0.3412   102.46      <.0001
Scale                         0    1.0000    0.0000    1.0000    1.0000

NOTE: The scale parameter was held fixed.

Table 7.4, p. 184.

proc print data = temp;
  format premar premar. birth birth.;
  var premar birth count predict;  
run;

Obs premar birth count predict

1 Always wrong Strongly Agree 38 29.094 2 Always wrong Agree 60 69.396 3 Always wrong Disagree 68 67.654 4 Always wrong Strongly Disagree 81 80.857 5 Usually Wrong Strongly Agree 14 17.600 6 Usually Wrong Agree 29 31.543 7 Usually Wrong Disagree 26 23.107 8 Usually Wrong Strongly Disagree 24 20.750 9 Sometimes Wrong Strongly Agree 42 48.773 10 Sometimes Wrong Agree 74 65.681 11 Sometimes Wrong Disagree 41 36.152 12 Sometimes Wrong Strongly Disagree 18 24.394 13 Never wrong Strongly Agree 157 155.533 14 Never wrong Agree 161 157.379 15 Never wrong Disagree 57 65.088 16 Never wrong Strongly Disagree 36 33.000

Inputting Job Satisfaction data, table 7.5, p. 186.

data cmh;
input gender $ income satisf count @@;
cards;
F  3 1 1    F 3  3 3    F  3 4 11    F  3 5 2
F 10 1 2    F 10 3 3    F 10 4 17    F 10 5 3
F 20 1 0    F 20 3 1    F 20 4  8    F 20 5 5
F 35 1 0    F 35 3 2    F 35 4  4    F 35 5 2
M  3 1 1    M  3 3 1    M  3 4  2    M  3 5 1
M 10 1 0    M 10 3 3    M 10 4  5    M 10 5 1
M 20 1 0    M 20 3 0    M 20 4  7    M 20 5 3
M 35 1 0    M 35 3 1    M 35 4  9    M 35 5 6
;
run;

Proc catmod will drop any observations where the count equals zero. To fix this problem we simply create a new count variable where we add a very small amount (in this case .00001) to all the counts. This will not change any of the Maximum Likelihood estimates, it will simply ensure that all the observations are used in the calculations.

data cmh1;
  set cmh;
  count1 = count + .00001;
run;

The Likelihood Ratio statistic in the output is the G^2 statistic for the model (GI, GS), Section 7.3.2, p. 186.

proc catmod data=cmh1;
  weight count1;
  model gender*income*satisf= _response_ /noiter noprofile noparm noresponse;
  loglin gender|income gender|satisf ;
  ods output anova=temp1;
run;
quit;
data _null_;
  set temp1;
  if source = 'Likelihood Ratio' then call symput('chisq1', chisq);
  if source = 'Likelihood Ratio' then call symput('df1', df);
run;
%put chisq=&chisq1 and df=&df1; /* this will make the two number appear in the log file. */

The CATMOD Procedure
                           Data Summary

Response           gender*income*satisf     Response Levels      32
Weight Variable    count1                   Populations           1
Data Set           CMH1                     Total Frequency     104
Frequency Missing  0                        Observations         32

                   Maximum Likelihood Analysis

           Maximum likelihood computations converged.

Maximum Likelihood Analysis of Variance

Source DF Chi-Square Pr > ChiSq ————————————————– gender 1 2.72 0.0991 income 3 2.47 0.4808 gender*income 3 11.52 0.0092 satisf 3 49.51 <.0001 gender*satisf 3 1.28 0.7330

Likelihood Ratio 18 19.37 0.3696

The Likelihood Ratio statistic in the output is the G^2 statistic for the model (GI, GS, IS), Section 7.3.2, p. 186.
Note: We have created two macro variables in order to use a Chi-squared test to compare the two models.
Note2: In the previous data step we created a new count variable in order to get rid of cells with zero entries. However, if the amount added to all the counts was too small proc catmod might have problems with collinearity when calculating the degrees of freedom for the G^2 statistic. Our suggestion is to try adding various amounts to all the counts starting with .1 then .01 then .001 and so on until the degrees of freedom start to drop. The point is that in order to get the best approximation for the G^2 statistic you want to add the smallest possible amount without losing degrees of freedom due to collinearity.
Note3: To obtain the correct degrees of freedom you can either calculate the number by hand as shown in the book or you can run a proc genmod and get the df from the Goodness of fit table in the output.

data cmh2;
  set cmh1;
  count2=count+.001;
run;
proc catmod data=cmh2;
  weight count2;
  model gender*income*satisf= _response_ /noiter noprofile noparm noresponse;
  loglin gender|income gender|satisf income|satisf;
  ods output anova=temp2;
run;
quit;
data _null_;
  set temp2;
  if source = 'Likelihood Ratio' then call symput('chisq2', chisq);
  if source = 'Likelihood Ratio' then call symput('df2', df);
run;
%put chisq=&chisq2 and df=&df2; /* this will make the numbers appear in the log file */

The CATMOD Procedure
                           Data Summary

Response           gender*income*satisf     Response Levels      32
Weight Variable    count2                   Populations           1
Data Set           CMH2                     Total Frequency 104.032
Frequency Missing  0                        Observations         32

                   Maximum Likelihood Analysis

           Maximum likelihood computations converged.

Maximum Likelihood Analysis of Variance

Source DF Chi-Square Pr > ChiSq ————————————————– gender 1 1.58 0.2089 income 3 0.99 0.8025 gender*income 3 10.47 0.0150 satisf 3 31.38 <.0001 gender*satisf 3 0.11 0.9902 income*satisf 9 6.46 0.6932

Likelihood Ratio 9 7.07 0.6301

The following is the code for the proc genmod to get the df in the Goodness of fit table in the output. This is a nice way to check the df because proc genmod can handle zero cells without any problems.

proc genmod data=cmh;
  class gender income satisf;
  model count = gender*income gender*satisf income*satisf gender income satisf/d=p;
run;

Using a Chi-squared test to compare the two models, (GI, GS, IS) to (GI, GS), section 7.3.2, p. 186.

data chitest;
  chi_diff= &chisq1-&chisq2;
  df_diff= &df1-&df2;
  p_value = 1-cdf('CHISQ', (&chisq1-&chisq2), (&df1-&df2) );
run;
proc print data = chitest;
run;

Obs    chi_diff    df_diff    p_value

 1      12.3000       9       0.19692

Generating the scores {1, 2, 3, 4} for income and satisfaction as well as creating the association variable, p. 187.

data score;
  set cmh1;
  if income=3 then score1 = 1;
  if income=10 then score1= 2;
  if income=20 then score1=3;
  if income=35 then score1=4;
  score2 = 1;
  if satisf ne 1 then score2 = satisf - 1;
  assoc = score1*score2;
run;

The estimate and standard error for assoc in the output is the ML estimate and ASE for beta-hat followed by the Wald test and 95% Wald confidence interval, p. 188.

proc genmod data=score;
  class income gender satisf;
  model count=income gender satisf assoc /  dist=p;
run;

The GENMOD Procedure

Model Information

Data Set              WORK.SCORE
Distribution             Poisson
Link Function                Log
Dependent Variable         count
Observations Used             32

Class Level Information

Class Levels Values

income           4    3 10 20 35
gender           2    F M
satisf           4    1 3 4 5

Criteria For Assessing Goodness Of Fit

Criterion DF Value Value/DF

Deviance 23 25.1215 1.0922 Scaled Deviance 23 25.1215 1.0922 Pearson Chi-Square 23 22.7298 0.9883 Scaled Pearson X2 23 22.7298 0.9883 Log Likelihood 63.8134

Algorithm converged.

Analysis Of Parameter Estimates

Standard Wald 95% Confidence Chi- Parameter DF Estimate Error Limits Square Pr > ChiSq

Intercept           1     -5.1408      2.2573     -9.5651     -0.7166       5.19        0.0228
income       3      1      3.4908      1.3815      0.7831      6.1985       6.38        0.0115
income       10     1      2.8073      0.9872      0.8724      4.7422       8.09        0.0045
income       20     1      1.2626      0.5749      0.1359      2.3894       4.82        0.0281
income       35     0      0.0000      0.0000      0.0000      0.0000        .           .
gender       F      1      0.4700      0.2016      0.0750      0.8650       5.44        0.0197
gender       M      0      0.0000      0.0000      0.0000      0.0000        .           .
satisf       1      1      0.9502      1.0904     -1.1870      3.0874       0.76        0.3835
satisf       3      1      1.4575      0.8084     -0.1270      3.0420       3.25        0.0714
satisf       4      1      2.0689      0.4918      1.1051      3.0327      17.70        <.0001
satisf       5      0      0.0000      0.0000      0.0000      0.0000        .           .
assoc               1      0.3951      0.1482      0.1046      0.6856       7.10        0.0077
Scale               0      1.0000      0.0000      1.0000      1.0000

NOTE: The scale parameter was held fixed.

Using the scores {3, 10, 20, 35} for income and {1, 3, 4, 5} for satisfaction to create an association variable, bottom of p. 188.

data cmh1;
  set cmh1;
  assoc1 = income*satisf;
run;
proc genmod data=cmh1;
  class income gender satisf;
  model count=income gender satisf assoc1/ dist=p;
run;

The GENMOD Procedure

Model Information

Data Set              WORK.CMH1
Distribution            Poisson
Link Function               Log
Dependent Variable        count
Observations Used            32

Class Level Information

Class Levels Values

income           4    3 10 20 35
gender           2    F M
satisf           4    1 3 4 5

Criteria For Assessing Goodness Of Fit

Criterion DF Value Value/DF

Deviance 23 25.0580 1.0895 Scaled Deviance 23 25.0580 1.0895 Pearson Chi-Square 23 22.5617 0.9809 Scaled Pearson X2 23 22.5617 0.9809 Log Likelihood 63.8452

Algorithm converged.

Analysis Of Parameter Estimates

Standard Wald 95% Confidence Chi- Parameter DF Estimate Error Limits Square Pr > ChiSq

Intercept           1     -4.7644      2.1925     -9.0617     -0.4672       4.72        0.0298
income       3      1      4.3179      1.7581      0.8720      7.7637       6.03        0.0141
income       10     1      3.8514      1.4213      1.0658      6.6370       7.34        0.0067
income       20     1      2.1447      0.9113      0.3585      3.9309       5.54        0.0186
income       35     0      0.0000      0.0000      0.0000      0.0000        .           .
gender       F      1      0.4700      0.2016      0.0750      0.8650       5.44        0.0197
gender       M      0      0.0000      0.0000      0.0000      0.0000        .           .
satisf       1      1      0.0622      0.8079     -1.5214      1.6457       0.01        0.9387
satisf       3      1      0.6328      0.5553     -0.4556      1.7213       1.30        0.2545
satisf       4      1      1.6466      0.3673      0.9267      2.3665      20.10        <.0001
satisf       5      0      0.0000      0.0000      0.0000      0.0000        .           .
assoc1              1      0.0341      0.0132      0.0082      0.0601       6.64        0.0100
Scale               0      1.0000      0.0000      1.0000      1.0000

NOTE: The scale parameter was held fixed.

Means by gender and by levels of income using the scores {1, 2, 3, 4} for job satisfaction, p. 189.

proc sort data=score out=sorted;
  by gender income;
run;
proc means data=sorted;
  freq count;
  class gender income;
  var score2;
run;

The MEANS Procedure Analysis Variable : score2

gender income N Obs N Mean Std Dev Minimum ——————————————————————————————– F 3 17 17 2.8235294 0.7276069 1.0000000 10 25 25 2.8400000 0.7461010 1.0000000 20 14 14 3.2857143 0.6112498 2.0000000 35 8 8 3.0000000 0.7559289 2.0000000

M 3 5 5 2.6000000 1.1401754 1.0000000 10 9 9 2.7777778 0.6666667 2.0000000 20 10 10 3.3000000 0.4830459 3.0000000 35 16 16 3.3125000 0.6020797 2.0000000 ——————————————————————————————–

Analysis Variable : score2

gender income N Obs Maximum ————————————————– F 3 17 4.0000000 10 25 4.0000000 20 14 4.0000000 35 8 4.0000000

M 3 5 4.0000000 10 9 4.0000000 20 10 4.0000000 35 16 4.0000000 ————————————————–

Table 7.6, p. 190.

proc freq data=cmh; 
  weight count;
  tables gender*income*satisf / nocol norow nopercent cmh;
run;

The FREQ Procedure

Table 1 of income by satisf Controlling for gender=F

income     satisf
Frequency|       1|       3|       4|       5|  Total
———+——–+——–+——–+——–+
       3 |      1 |      3 |     11 |      2 |     17
———+——–+——–+——–+——–+
      10 |      2 |      3 |     17 |      3 |     25
———+——–+——–+——–+——–+
      20 |      0 |      1 |      8 |      5 |     14
———+——–+——–+——–+——–+
      35 |      0 |      2 |      4 |      2 |      8
———+——–+——–+——–+——–+
Total           3        9       40       12       64

Table 2 of income by satisf
Controlling for gender=M

income     satisf
Frequency|       1|       3|       4|       5|  Total
———+——–+——–+——–+——–+
       3 |      1 |      1 |      2 |      1 |      5
———+——–+——–+——–+——–+
      10 |      0 |      3 |      5 |      1 |      9
———+——–+——–+——–+——–+
      20 |      0 |      0 |      7 |      3 |     10
———+——–+——–+——–+——–+
      35 |      0 |      1 |      9 |      6 |     16
———+——–+——–+——–+——–+
Total           1        5       23       11       40

The FREQ Procedure

Summary Statistics for income by satisf Controlling for gender

Cochran-Mantel-Haenszel Statistics (Based on Table Scores)

Statistic Alternative Hypothesis DF Value Prob ————————————————————— 1 Nonzero Correlation 1 6.1563 0.0131 2 Row Mean Scores Differ 3 9.0342 0.0288 3 General Association 9 10.2001 0.3345

Total Sample Size = 104

Inputting Clinical Trial data, table 7.7, p. 193.

data trial;
  input center treat response count;
cards;
       1         1         1         0  
       1         1         0         5  
       1         0         1         0  
       1         0         0         9  
       2         1         1         1  
       2         1         0        12  
       2         0         1         0  
       2         0         0        10  
       3         1         1         0  
       3         1         0         7  
       3         0         1         0  
       3         0         0         5  
       4         1         1         6  
       4         1         0         3  
       4         0         1         2  
       4         0         0         6  
       5         1         1         5  
       5         1         0         9  
       5         0         1         2  
       5         0         0        12  
;
run;

Allowing zero cell entries. We could not recreate the error mentioned in the book. This is perhaps due to improvements in SAS and its abilities in handling troublesome data. Even though the parameter estimate for center 5 is zero and the parameter estimates for center 1 and 3 are very close to -26 the standard errors are very small (<0.5).

proc genmod data=trial;
  class center treat response;
  model count= center treat response center*response / dist=p;
run;

The GENMOD Procedure

Model Information

Data Set              WORK.TRIAL
Distribution             Poisson
Link Function                Log
Dependent Variable         count
Observations Used             20

Class Level Information

Class Levels Values

center             5    1 2 3 4 5
treat              2    0 1
response           2    0 1

Criteria For Assessing Goodness Of Fit

Criterion DF Value Value/DF

Deviance 9 7.8904 0.8767 Scaled Deviance 9 7.8904 0.8767 Pearson Chi-Square 9 7.3331 0.8148 Scaled Pearson X2 9 7.3331 0.8148 Log Likelihood 89.6010

WARNING: Negative of Hessian not positive definite.

Analysis Of Parameter Estimates

Standard Wald 95% Confidence Chi- Parameter DF Estimate Error Limits Square Pr > ChiSq

Intercept                  1     1.2738     0.3912     0.5070     2.0406     10.60       0.0011
center            1        1   -25.2805     0.3450   -25.9567   -24.6042   5368.46       <.0001
center            2        1    -1.9459     1.0690    -4.0412     0.1494      3.31       0.0687
center            3        1   -25.4639     0.3619   -26.1732   -24.7547   4951.51       <.0001
center            4        1     0.1335     0.5175    -0.8808     1.1479      0.07       0.7964
center            5        0     0.0000     0.0000     0.0000     0.0000       .          .
treat             0        1    -0.0426     0.2063    -0.4470     0.3618      0.04       0.8366
treat             1        0     0.0000     0.0000     0.0000     0.0000       .          .
response          0        1     1.0986     0.4364     0.2432     1.9540      6.34       0.0118
response          1        0     0.0000     0.0000     0.0000     0.0000       .          .
center*response   1   0    0    24.8750     0.0000    24.8750    24.8750       .          .
center*response   1   1    0     0.0000     0.0000     0.0000     0.0000       .          .
center*response   2   0    1     1.9924     1.1117    -0.1865     4.1714      3.21       0.0731
center*response   2   1    0     0.0000     0.0000     0.0000     0.0000       .          .

The GENMOD Procedure

Analysis Of Parameter Estimates

Standard Wald 95% Confidence Chi- Parameter DF Estimate Error Limits Square Pr > ChiSq

center*response   3   0    0    24.9043     0.0000    24.9043    24.9043       .          .
center*response   3   1    0     0.0000     0.0000     0.0000     0.0000       .          .
center*response   4   0    1    -0.9808     0.6531    -2.2610     0.2993      2.26       0.1332
center*response   4   1    0     0.0000     0.0000     0.0000     0.0000       .          .
center*response   5   0    0     0.0000     0.0000     0.0000     0.0000       .          .
center*response   5   1    0     0.0000     0.0000     0.0000     0.0000       .          .
Scale                      0     1.0000     0.0000     1.0000     1.0000

NOTE: The scale parameter was held fixed.

SAS Textbook Examples An Introduction to Categorical Analysis by Alan Agresti Chapter 7 – Building and Applying Logit and Log-linear Models

SAS Textbook Examples
An Introduction to Categorical Analysis by Alan Agresti
Chapter 7 – Building and Applying Logit and Log-linear Models