Binomial vs Binary data structure

Author
Affiliation

Filippo Gambarota

University of Padova

Published

December 13, 2025

devtools::load_all()
ℹ Loading GLMphd
library(dplyr)

Caricamento pacchetto: 'dplyr'

I seguenti oggetti sono mascherati da 'package:stats':

    filter, lag

I seguenti oggetti sono mascherati da 'package:base':

    intersect, setdiff, setequal, union
data("teddy")

# binary format

teddybin <- teddy |>
  filter(Alcool_status != "Ex-drinker") |>
  select(Alcool_status, Depression_pp01)

fit_binary <- glm(Depression_pp01 ~ Alcool_status,
                    data = teddybin,
                    family = binomial(link = "logit"))

# binomial format

teddyb <- teddybin |>
  filter(Alcool_status != "Ex-drinker") |>
  group_by(Alcool_status) |>
  summarise(n_yes = sum(Depression_pp01),
            tot = n(),
            n_no = tot - n_yes)

fit_binomial <- glm(cbind(n_yes, n_no) ~ Alcool_status,
    data = teddyb,
    family = binomial(link = "logit"))

car::compareCoefs(fit_binary, fit_binomial)
Calls:
1: glm(formula = Depression_pp01 ~ Alcool_status, family = binomial(link = 
  "logit"), data = teddybin)
2: glm(formula = cbind(n_yes, n_no) ~ Alcool_status, family = binomial(link 
  = "logit"), data = teddyb)

                 Model 1 Model 2
(Intercept)       -2.055  -2.055
SE                 0.191   0.191
                                
Alcool_statusYes   0.537   0.537
SE                 0.336   0.336
                                
summary(fit_binomial)

Call:
glm(formula = cbind(n_yes, n_no) ~ Alcool_status, family = binomial(link = "logit"), 
    data = teddyb)

Coefficients:
                 Estimate Std. Error z value Pr(>|z|)    
(Intercept)       -2.0550     0.1908 -10.772   <2e-16 ***
Alcool_statusYes   0.5371     0.3355   1.601    0.109    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance:  2.4499e+00  on 1  degrees of freedom
Residual deviance: -3.2863e-14  on 0  degrees of freedom
AIC: 13.58

Number of Fisher Scoring iterations: 3
summary(fit_binary)

Call:
glm(formula = Depression_pp01 ~ Alcool_status, family = binomial(link = "logit"), 
    data = teddybin)

Coefficients:
                 Estimate Std. Error z value Pr(>|z|)    
(Intercept)       -2.0550     0.1908 -10.772   <2e-16 ***
Alcool_statusYes   0.5371     0.3355   1.601    0.109    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 279.52  on 361  degrees of freedom
Residual deviance: 277.07  on 360  degrees of freedom
AIC: 281.07

Number of Fisher Scoring iterations: 4