::load_all() # if using the rproject dowloaded from the slides
devtools# source("utils-glm.R") # if using a standard setup
library(here)
library(tidyr) # for data manipulation
library(dplyr) # for data manipulation
library(ggplot2) # plotting
library(car) # general utilities
library(effects) # for extracting and plotting effects
library(emmeans) # for marginal means
library(patchwork)
Lab 3
data("drop")
<- drop dat
Overview
This dataset dropout.csv
contains data about dropouts during high school for nrow(dat)
adolescents. We want to understand the impact of the parenting style (permissive, neglectful, authoritative, authoritarian) and the academic performance (high, low) on the probability of dropout (0 = no dropout, 1 = dropout).
- Importing data and overall check
- Exploratory data analysis of predictors and the relationships between predictors and the number of words
- Compute the odds ratio manually comparing the academic performances for each parenting style
- Model fitting with
glm()
using the dataset in the binary form - Model fitting with
glm()
using the dataset in the aggregated form - Plotting and interpreting effects of both models
- is there any difference? try to understand why
- Write a brief paragraph reporting the effects with your interpretation
1. Importing data and overall check
str(dat)
'data.frame': 500 obs. of 4 variables:
$ id : int 1 2 3 4 5 6 7 8 9 10 ...
$ parenting: chr "permissive" "neglectful" "authoritative" "neglectful" ...
$ academic : chr "high" "low" "high" "low" ...
$ drop : int 0 0 0 1 0 0 0 0 0 0 ...
Check for NA
values:
sapply(dat, function(x) sum(is.na(x)))
id parenting academic drop
0 0 0 0
Everything seems good, we do not have NA
values.
Let’s convert categorical variables into factor setting the appropriate order:
parenting
: neglectful, permissive, authoritative, authoritarianacademic
: low, high
$parenting <- factor(dat$parenting, levels = c("neglectful",
dat"permissive",
"authoritative",
"authoritarian"))
$academic <- factor(dat$academic, levels = c("low", "high"))
dat
levels(dat$parenting)
[1] "neglectful" "permissive" "authoritative" "authoritarian"
levels(dat$academic)
[1] "low" "high"
2. Exploratory data analysis
summary(dat) # not really meaningful
id parenting academic drop
Min. : 1.0 neglectful :119 low :237 Min. :0.000
1st Qu.:125.8 permissive :152 high:263 1st Qu.:0.000
Median :250.5 authoritative:124 Median :0.000
Mean :250.5 authoritarian:105 Mean :0.166
3rd Qu.:375.2 3rd Qu.:0.000
Max. :500.0 Max. :1.000
With categorical variables we need to use absolute/relative frequencies and contingency tables.
Let’s start by univariate EDA:
# distribution of parenting styles
table(dat$parenting)
neglectful permissive authoritative authoritarian
119 152 124 105
table(dat$parenting)/nrow(dat)
neglectful permissive authoritative authoritarian
0.238 0.304 0.248 0.210
# distribution of academic performance
table(dat$academic)
low high
237 263
table(dat$academic)/nrow(dat)
low high
0.474 0.526
# overall dropout rate
table(dat$drop)
0 1
417 83
table(dat$drop)/nrow(dat)
0 1
0.834 0.166
# mean(dat$drop) # directly
Let’s create an overall plot:
<- dat |>
plt_par ggplot(aes(x = parenting)) +
geom_bar()
<- dat |>
plt_academic ggplot(aes(x = academic)) +
geom_bar()
<- dat |>
plt_drop ggplot(aes(x = factor(drop))) +
geom_bar()
/ plt_academic / plt_drop plt_par
How to interpret?
Let’s now explore the bivariate relationships:
table(dat$parenting, dat$academic)
low high
neglectful 95 24
permissive 77 75
authoritative 20 104
authoritarian 45 60
table(dat$academic, dat$parenting)
neglectful permissive authoritative authoritarian
low 95 77 20 45
high 24 75 104 60
We can create tables with relative frequencies:
prop.table(table(dat$parenting, dat$academic), 1) # by row
low high
neglectful 0.7983193 0.2016807
permissive 0.5065789 0.4934211
authoritative 0.1612903 0.8387097
authoritarian 0.4285714 0.5714286
prop.table(table(dat$parenting, dat$academic), 2) # by column
low high
neglectful 0.40084388 0.09125475
permissive 0.32489451 0.28517110
authoritative 0.08438819 0.39543726
authoritarian 0.18987342 0.22813688
…and some plots:
|>
dat ggplot(aes(x = academic, fill = parenting)) +
geom_bar(position = position_dodge(),
col = "black")
Of course, we can compute the relative frequencies in multiple ways (total, row or column wise).
Then the bivariate relationships with the drop
variable:
table(dat$parenting, dat$drop)
0 1
neglectful 76 43
permissive 136 16
authoritative 117 7
authoritarian 88 17
prop.table(table(dat$parenting, dat$drop), 1)
0 1
neglectful 0.63865546 0.36134454
permissive 0.89473684 0.10526316
authoritative 0.94354839 0.05645161
authoritarian 0.83809524 0.16190476
table(dat$academic, dat$drop)
0 1
low 174 63
high 243 20
prop.table(table(dat$academic, dat$drop), 1)
0 1
low 0.73417722 0.26582278
high 0.92395437 0.07604563
And the plots:
barplot(prop.table(table(dat$parenting, dat$drop), 1),
beside = TRUE,
col = c("firebrick", "lightblue", "darkgreen", "pink"))
legend(7, 0.8, legend = levels(dat$parenting),
fill = c("firebrick", "lightblue", "darkgreen", "pink"))
barplot(prop.table(table(dat$parenting, dat$drop), 1),
beside = TRUE,
col = c("firebrick", "lightblue", "darkgreen", "pink"))
legend(7, 0.8, legend = levels(dat$parenting),
fill = c("firebrick", "lightblue", "darkgreen", "pink"))
barplot(prop.table(table(dat$academic, dat$drop), 1),
beside = TRUE,
col = c("red", "blue"))
legend(4, 0.5, legend = levels(dat$academic),
fill = c("red", "blue"))
Finally we can represent the full relationship:
|>
dat group_by(parenting, academic) |>
summarise(drop = mean(drop)) |>
ggplot(aes(x = parenting, y = drop, color = academic, group = academic)) +
geom_point() +
geom_line()
Comments? Main effects? Interactions?
3. Compute the odds ratio manually comparing the academic performances for each parenting style
Firstly we compute the probability of dropout for each category:
<- aggregate(drop ~ parenting + academic, FUN = mean, data = dat)
agg agg
parenting academic drop
1 neglectful low 0.37894737
2 permissive low 0.19480519
3 authoritative low 0.15000000
4 authoritarian low 0.20000000
5 neglectful high 0.29166667
6 permissive high 0.01333333
7 authoritative high 0.03846154
8 authoritarian high 0.13333333
Then we can compute the odds of the probabilities and the odds ratios
<- function(p) p / (1 - p)
odds $odds <- odds(agg$drop)
agg
<- agg$odds[agg$academic == "high"] / agg$odds[agg$academic == "low"]
ors names(ors) <- unique(agg$parenting)
ors
neglectful permissive authoritative authoritarian
0.67483660 0.05585586 0.22666667 0.61538462
Comments?
4. Model fitting with glm()
using the dataset in the binary form
Let’s start fitting the null model:
<- glm(drop ~ 1, data = dat, family = binomial(link = "logit"))
fit0 summary(fit0)
Call:
glm(formula = drop ~ 1, family = binomial(link = "logit"), data = dat)
Deviance Residuals:
Min 1Q Median 3Q Max
-0.6025 -0.6025 -0.6025 -0.6025 1.8951
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.6142 0.1202 -13.43 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 449.49 on 499 degrees of freedom
Residual deviance: 449.49 on 499 degrees of freedom
AIC: 451.49
Number of Fisher Scoring iterations: 3
The intercept is the overall odds of dropout:
exp(coef(fit0))
(Intercept)
0.1990408
plogis(coef(fit0))
(Intercept)
0.166
mean(dat$drop)
[1] 0.166
Let’s now fit a model with the two main effects:
<- glm(drop ~ academic + parenting, data = dat, family = binomial(link = "logit"))
fit1 summary(fit1)
Call:
glm(formula = drop ~ academic + parenting, family = binomial(link = "logit"),
data = dat)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.0161 -0.5696 -0.3506 -0.3036 2.4902
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -0.3921 0.1985 -1.975 0.048216 *
academichigh -1.0221 0.3030 -3.373 0.000742 ***
parentingpermissive -1.3446 0.3335 -4.031 5.55e-05 ***
parentingauthoritative -1.6402 0.4670 -3.512 0.000444 ***
parentingauthoritarian -0.7550 0.3412 -2.212 0.026935 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 449.49 on 499 degrees of freedom
Residual deviance: 392.66 on 495 degrees of freedom
AIC: 402.66
Number of Fisher Scoring iterations: 5
Comments?
Let’s now fit the interaction model:
<- glm(drop ~ academic * parenting, data = dat, family = binomial(link = "logit"))
fit2 summary(fit2)
Call:
glm(formula = drop ~ academic * parenting, family = binomial(link = "logit"),
data = dat)
Deviance Residuals:
Min 1Q Median 3Q Max
-0.9760 -0.6583 -0.2801 -0.1638 2.9385
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -0.49402 0.21149 -2.336 0.01950 *
academichigh -0.39328 0.49639 -0.792 0.42820
parentingpermissive -0.92507 0.35710 -2.590 0.00958 **
parentingauthoritative -1.24058 0.66097 -1.877 0.06053 .
parentingauthoritarian -0.89228 0.42850 -2.082 0.03731 *
academichigh:parentingpermissive -2.49170 1.15811 -2.152 0.03144 *
academichigh:parentingauthoritative -1.09099 0.94793 -1.151 0.24976
academichigh:parentingauthoritarian -0.09222 0.72769 -0.127 0.89915
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 449.49 on 499 degrees of freedom
Residual deviance: 384.58 on 492 degrees of freedom
AIC: 400.58
Number of Fisher Scoring iterations: 6
5. Model fitting with glm()
using the dataset in the aggregated form
In this case we can easily fit the same model using the aggregated form. The aggregated form is a dataset without 1s and 0s but counting the number of 1s for each condition.
<- dat |>
dat_agg group_by(academic, parenting) |>
summarise(drop_1 = sum(drop),
drop_0 = sum(drop == 0)) |>
data.frame()
$drop_tot <- dat_agg$drop_1 + dat_agg$drop_0 dat_agg
Now we have a column with the number of 1s and a column with the total. Then we can also compute the number of 0s:
dat_agg
academic parenting drop_1 drop_0 drop_tot
1 low neglectful 36 59 95
2 low permissive 15 62 77
3 low authoritative 3 17 20
4 low authoritarian 9 36 45
5 high neglectful 7 17 24
6 high permissive 1 74 75
7 high authoritative 4 100 104
8 high authoritarian 8 52 60
The two dataset (dat
and dat_agg
) contains the same information. Let’s now fit the same models as before:
<- glm(cbind(drop_1, drop_0) ~ 1, data = dat_agg, family = binomial(link = "logit"))
fit0_agg summary(fit0)
Call:
glm(formula = drop ~ 1, family = binomial(link = "logit"), data = dat)
Deviance Residuals:
Min 1Q Median 3Q Max
-0.6025 -0.6025 -0.6025 -0.6025 1.8951
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.6142 0.1202 -13.43 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 449.49 on 499 degrees of freedom
Residual deviance: 449.49 on 499 degrees of freedom
AIC: 451.49
Number of Fisher Scoring iterations: 3
Let’s now fit a model with the two main effects:
<- glm(cbind(drop_1, drop_0) ~ academic + parenting, data = dat_agg, family = binomial(link = "logit"))
fit1_agg summary(fit1_agg)
Call:
glm(formula = cbind(drop_1, drop_0) ~ academic + parenting, family = binomial(link = "logit"),
data = dat_agg)
Deviance Residuals:
1 2 3 4 5 6 7 8
-0.4840 1.0677 0.4590 -0.6573 1.1269 -2.0280 -0.3309 0.7549
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -0.3921 0.1985 -1.975 0.048216 *
academichigh -1.0221 0.3030 -3.373 0.000742 ***
parentingpermissive -1.3446 0.3335 -4.031 5.54e-05 ***
parentingauthoritative -1.6402 0.4670 -3.512 0.000444 ***
parentingauthoritarian -0.7550 0.3412 -2.212 0.026935 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 64.902 on 7 degrees of freedom
Residual deviance: 8.079 on 3 degrees of freedom
AIC: 46.507
Number of Fisher Scoring iterations: 4
Comments?
Let’s now fit the interaction model:
<- glm(cbind(drop_1, drop_0) ~ academic * parenting, data = dat_agg, family = binomial(link = "logit"))
fit2_agg summary(fit2_agg)
Call:
glm(formula = cbind(drop_1, drop_0) ~ academic * parenting, family = binomial(link = "logit"),
data = dat_agg)
Deviance Residuals:
[1] 0 0 0 0 0 0 0 0
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -0.49402 0.21149 -2.336 0.01950 *
academichigh -0.39328 0.49639 -0.792 0.42820
parentingpermissive -0.92507 0.35710 -2.590 0.00958 **
parentingauthoritative -1.24058 0.66097 -1.877 0.06053 .
parentingauthoritarian -0.89228 0.42850 -2.082 0.03731 *
academichigh:parentingpermissive -2.49170 1.15876 -2.150 0.03153 *
academichigh:parentingauthoritative -1.09099 0.94793 -1.151 0.24976
academichigh:parentingauthoritarian -0.09222 0.72769 -0.127 0.89915
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 6.4902e+01 on 7 degrees of freedom
Residual deviance: 1.1546e-14 on 0 degrees of freedom
AIC: 44.428
Number of Fisher Scoring iterations: 4
Do you notice any difference with the previous models?
6. Plotting and interpreting effects of both models
Let’s start by plotting the full model (in both forms):
plot(allEffects(fit2))
plot(allEffects(fit2_agg))
Let’s compare the coefficients:
::compareCoefs(fit2, fit2_agg) car
Calls:
1: glm(formula = drop ~ academic * parenting, family = binomial(link =
"logit"), data = dat)
2: glm(formula = cbind(drop_1, drop_0) ~ academic * parenting, family =
binomial(link = "logit"), data = dat_agg)
Model 1 Model 2
(Intercept) -0.494 -0.494
SE 0.211 0.211
academichigh -0.393 -0.393
SE 0.496 0.496
parentingpermissive -0.925 -0.925
SE 0.357 0.357
parentingauthoritative -1.241 -1.241
SE 0.661 0.661
parentingauthoritarian -0.892 -0.892
SE 0.429 0.429
academichigh:parentingpermissive -2.49 -2.49
SE 1.16 1.16
academichigh:parentingauthoritative -1.091 -1.091
SE 0.948 0.948
academichigh:parentingauthoritarian -0.0922 -0.0922
SE 0.7277 0.7277
Now let’s interpret the effects. The “new” component is the interaction between two categorical variable. If the coefficients with one categorical variable is the log(Odds Ratio), the interaction is the difference between the two odds ratios. When transformed on the probability scale, the parameter is the ratio between odds ratios.
This is the odds ratio for the academic effect with neglectful parenting (i.e., the reference level):
<- coef(fit2_agg)
coefs
"academichigh"] # log odds ratio coefs[
academichigh
-0.3932847
exp(coefs["academichigh"]) # odds ratio
academichigh
0.6748366
agg
parenting academic drop odds
1 neglectful low 0.37894737 0.61016949
2 permissive low 0.19480519 0.24193548
3 authoritative low 0.15000000 0.17647059
4 authoritarian low 0.20000000 0.25000000
5 neglectful high 0.29166667 0.41176471
6 permissive high 0.01333333 0.01351351
7 authoritative high 0.03846154 0.04000000
8 authoritarian high 0.13333333 0.15384615
<- agg$odds[agg$parenting == "neglectful" & agg$academic == "low"]
low <- agg$odds[agg$parenting == "neglectful" & agg$academic == "high"]
high
/low high
[1] 0.6748366
log(high/low)
[1] -0.3932847
Then the academichigh:parentingpermissive
is the difference of the log odds ratios for low vs high for neglectful and permissive parenting styles.
"academichigh:parentingpermissive"] coefs[
academichigh:parentingpermissive
-2.491696
exp(coefs["academichigh:parentingpermissive"])
academichigh:parentingpermissive
0.08276945
<- agg$odds[agg$parenting == "neglectful" & agg$academic == "low"]
low_neg <- agg$odds[agg$parenting == "neglectful" & agg$academic == "high"]
high_neg <- agg$odds[agg$parenting == "permissive" & agg$academic == "low"]
low_per <- agg$odds[agg$parenting == "permissive" & agg$academic == "high"]
high_per
log((high_per / low_per)) - log((high_neg / low_neg))
[1] -2.491696
/ low_per) / (high_neg / low_neg) (high_per
[1] 0.08276945
Similarly to the odds ratio, the ratio between two odds ratios can be interpreted in the same way:
- OR1 / OR2 > 1: the odds ratio for the numerator condition is x times higher than the odds ratio for the denominator condition
- OR1 / OR2 < 1: the odds ratio for the numerator condition is x times lower than the odds ratio for the denominator condition
Of course, the best way is using the predict()
function:
<- expand.grid(parenting = c("neglectful", "permissive"),
preds academic = c("low", "high"))
$pr <- predict(fit2_agg, newdata = preds)
preds
with(preds, (exp(pr)[4] / exp(pr)[2]) / (exp(pr)[3] / exp(pr)[1]))
[1] 0.08276945
Why the residual deviance is different between the aggregated and the binary model?
deviance(fit2)
[1] 384.5843
deviance(fit2_agg)
[1] 1.154632e-14
This is the main difference between the two approaches. Actually we do not have to compare the deviance of the two models e.g., the aggregated form is better because it is closer to 0 but we always need to compare the model with the null deviance.
anova(fit0, fit2, test = "LRT")
Analysis of Deviance Table
Model 1: drop ~ 1
Model 2: drop ~ academic * parenting
Resid. Df Resid. Dev Df Deviance Pr(>Chi)
1 499 449.49
2 492 384.58 7 64.902 1.573e-11 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(fit0_agg, fit2_agg, test = "LRT")
Analysis of Deviance Table
Model 1: cbind(drop_1, drop_0) ~ 1
Model 2: cbind(drop_1, drop_0) ~ academic * parenting
Resid. Df Resid. Dev Df Deviance Pr(>Chi)
1 7 64.902
2 0 0.000 7 64.902 1.573e-11 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
As you can see the ratio is the same, thus the two deviances are on a different scale. The two models explains the same amount of (relative) deviance.
Why?
The reason is that we are computing the residual deviance from observed 0 and 1 vs observed counts.
# aggregated model deviance
-2*(sum(log(dbinom(dat_agg$drop_1, dat_agg$drop_tot, fitted(fit2_agg))) - log(dbinom(dat_agg$drop_1, dat_agg$drop_tot, dat_agg$drop_1/dat_agg$drop_tot))))
[1] 0
# binary model deviance
-2*(sum(log(dbinom(dat$drop, 1, fitted(fit2))) - log(dbinom(dat$drop, 1, dat$drop))))
[1] 384.5843
In a way it is more difficult to predict 0 and 1 compared to counts thus the residuals and the residual deviance will be always higher. Model coefficients, standard error and tests are the same.
Where the two models are not the same? Depends on the type of variables. Let’s add a new column to our binary dataset with the age of each student:
$age <- round(runif(nrow(dat), 12, 18)) dat
Now, if we want to include the age
as predictor, we need to use the binary form because we have one value for each student. We are including a predictor at the level of the 0-1 values.
<- glm(drop ~ academic * parenting + age , data = dat, family = binomial(link = "logit"))
fit3 summary(fit3)
Call:
glm(formula = drop ~ academic * parenting + age, family = binomial(link = "logit"),
data = dat)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.0978 -0.6638 -0.3073 -0.1646 2.8690
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.98078 1.14444 0.857 0.3914
academichigh -0.42094 0.49898 -0.844 0.3989
parentingpermissive -0.91761 0.35800 -2.563 0.0104 *
parentingauthoritative -1.16681 0.66420 -1.757 0.0790 .
parentingauthoritarian -0.89637 0.42962 -2.086 0.0369 *
age -0.09758 0.07458 -1.308 0.1907
academichigh:parentingpermissive -2.47288 1.15933 -2.133 0.0329 *
academichigh:parentingauthoritative -1.15356 0.95100 -1.213 0.2251
academichigh:parentingauthoritarian -0.08127 0.72984 -0.111 0.9113
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 449.49 on 499 degrees of freedom
Residual deviance: 382.86 on 491 degrees of freedom
AIC: 400.86
Number of Fisher Scoring iterations: 6
When we have predictors at the 0-1 levels, we need to use the binary form.
A little (visual) demonstration:
<- seq(0, 1, 0.01)
x <- data.frame(
dat x = rep(x, 10)
)
$lp <- plogis(qlogis(0.01) + 8*dat$x)
dat$y <- rbinom(nrow(dat), 1, dat$lp)
dathead(dat)
x lp y
1 0.00 0.01000000 0
2 0.01 0.01082386 0
3 0.02 0.01171478 0
4 0.03 0.01267810 0
5 0.04 0.01371954 0
6 0.05 0.01484523 0
Let’s fit the model in the binary form:
# model prediction
<- glm(y ~ x, data = dat, family = binomial())
fit
# equivalent to predict()
<- plogis(coef(fit)[1] + coef(fit)[2]*unique(dat$x)) pi
Let’s fit the mode in the binomial form:
# aggregated form
<- aggregate(y ~ x, FUN = sum, data = dat)
dat_agg $n <- 10 # total trials
dat_agg$f <- dat_agg$n - dat_agg$y
dat_agg$p <- dat_agg$y / dat_agg$n
dat_agg
head(dat_agg)
x y n f p
1 0.00 0 10 10 0
2 0.01 0 10 10 0
3 0.02 0 10 10 0
4 0.03 0 10 10 0
5 0.04 0 10 10 0
6 0.05 0 10 10 0
<- glm(cbind(y, f) ~ x, data = dat_agg, family = binomial())
fit2 <- plogis(coef(fit2)[1] + coef(fit2)[2]*dat_agg$x) pi
The residuals (thus the residual deviance) will be always larger in the binary model (but the coefficients are the same):
par(mfrow = c(1,2))
<- runif(nrow(dat), -0.03, 0.03)
jit plot((y + jit) ~ x, data = dat, ylab = "y", xlab = "x",
main = "Binary Form")
lines(unique(dat$x), pi, lwd = 2, col = "red")
plot(y/n ~ x, data = dat_agg, ylab = "y",
main = "Binomial Form")
lines(dat_agg$x, pi, lwd = 2, col = "red")
This is the reason why binary model have also strange residuals:
# also residuals
par(mfrow = c(1,2))
plot(fitted(fit), residuals(fit), main = "Binary Form")
plot(fitted(fit2), residuals(fit2), main = "Binomial Form")