2025-03-28

Author

Filippo Gambarota

Preprocessing of the facial expression dataset (https://osf.io/download/tph5f/z).

library(tidyverse)
library(lme4)
library(here)

# https://osf.io/zhtbj/?view_only=
# download.file("https://osf.io/download/tph5f/", "data-raw/emoint.csv")

dat <- read.csv(here("data-raw/emoint.csv"))

# conversion for responses
# 1 = neutral
# 2 = anger
# 3 = disgust
# 4 = fear
# 5 = happiness
# 6 = sadness
# 7 = surprise

resp_code <- c(
  "1" = "neutral",
  "2" = "anger",
  "3" = "disgust",
  "4" = "fear",
  "5" = "happiness",
  "6" = "sadness",
  "7" = "suprise"
)

emotion_code <- c(
  "fear" = "fear",
  "disop" = "disgust",
  "discl" = "disgust",
  "hap" = "happiness",
  "sad" = "sadness",
  "sur" = "suprise",
  "angcl" = "anger",
  "neutral" = "neutral"
)

dat_clean <- dat |> 
  pivot_longer(4:ncol(dat), values_to = "response") |> 
  separate(name, into = c("face", "emotion", "intensity"), sep = "_")  |>
  # intensity as number
  mutate(intensity = as.numeric(intensity)) |> 
  # neutral as maximal intensity, avoid NA
  mutate(intensity = ifelse(emotion == "neutral", 100, intensity))

names(dat_clean)[1:3] <- c("id", "gender", "age")

# recoding response with labels
# see https://adv-r.hadley.nz/subsetting.html?q=look#lookup-tables

dat_clean$response_lbl <- resp_code[dat_clean$response]

# recoding the displayed emotion as the response

dat_clean$emotion_lbl <- emotion_code[dat_clean$emotion]

# binary accuracy if emotion_lbl == response_lbl

dat_clean$acc <- as.integer(dat_clean$response_lbl == dat_clean$emotion_lbl)

head(dat_clean)
# A tibble: 6 × 10
     id gender age   face  emotion intensity response response_lbl emotion_lbl
  <int> <chr>  <chr> <chr> <chr>       <dbl>    <int> <chr>        <chr>      
1     1 f      32    f1    fear           60        4 fear         fear       
2     1 f      32    m3    disop          60        3 disgust      disgust    
3     1 f      32    m3    hap            70        5 happiness    happiness  
4     1 f      32    m1    hap           100        5 happiness    happiness  
5     1 f      32    m4    disop          60        6 sadness      disgust    
6     1 f      32    m1    fear           20        1 neutral      fear       
# ℹ 1 more variable: acc <int>
saveRDS(dat_clean, here("data/emoint.rds"))