Preprocessing of the facial expression dataset (https://osf.io/download/tph5f/z).
library(tidyverse)
library(lme4)
library(here)
# https://osf.io/zhtbj/?view_only=
# download.file("https://osf.io/download/tph5f/", "data-raw/emoint.csv")
dat <- read.csv(here("data-raw/emoint.csv"))
# conversion for responses
# 1 = neutral
# 2 = anger
# 3 = disgust
# 4 = fear
# 5 = happiness
# 6 = sadness
# 7 = surprise
resp_code <- c(
"1" = "neutral",
"2" = "anger",
"3" = "disgust",
"4" = "fear",
"5" = "happiness",
"6" = "sadness",
"7" = "suprise"
)
emotion_code <- c(
"fear" = "fear",
"disop" = "disgust",
"discl" = "disgust",
"hap" = "happiness",
"sad" = "sadness",
"sur" = "suprise",
"angcl" = "anger",
"neutral" = "neutral"
)
dat_clean <- dat |>
pivot_longer(4:ncol(dat), values_to = "response") |>
separate(name, into = c("face", "emotion", "intensity"), sep = "_") |>
# intensity as number
mutate(intensity = as.numeric(intensity)) |>
# neutral as maximal intensity, avoid NA
mutate(intensity = ifelse(emotion == "neutral", 100, intensity))
names(dat_clean)[1:3] <- c("id", "gender", "age")
# recoding response with labels
# see https://adv-r.hadley.nz/subsetting.html?q=look#lookup-tables
dat_clean$response_lbl <- resp_code[dat_clean$response]
# recoding the displayed emotion as the response
dat_clean$emotion_lbl <- emotion_code[dat_clean$emotion]
# binary accuracy if emotion_lbl == response_lbl
dat_clean$acc <- as.integer(dat_clean$response_lbl == dat_clean$emotion_lbl)
head(dat_clean)
# A tibble: 6 × 10
id gender age face emotion intensity response response_lbl emotion_lbl
<int> <chr> <chr> <chr> <chr> <dbl> <int> <chr> <chr>
1 1 f 32 f1 fear 60 4 fear fear
2 1 f 32 m3 disop 60 3 disgust disgust
3 1 f 32 m3 hap 70 5 happiness happiness
4 1 f 32 m1 hap 100 5 happiness happiness
5 1 f 32 m4 disop 60 6 sadness disgust
6 1 f 32 m1 fear 20 1 neutral fear
# ℹ 1 more variable: acc <int>
saveRDS(dat_clean, here("data/emoint.rds"))