Note: Change ‘eval’ to ‘TRUE’ if user wants to run this code
Load packages
library(here) # makes pathways easier
library(tidyverse) # for data manipulation
library(glmmTMB) # for beta regression with random effects
library(buildmer) # for backward variable elimination
Data prep
# load data
sal <- read_csv(here("data_voicing_coded.csv"))
# filter out deleted tokens
sal <- sal %>% filter(!is.na(voic_hum))
40 beta regression models and save FRC’s p-values (if in minimal adequate model)
Note: This block takes hours to run
# varying thresholds for number of tokens upon which FRC is based, and varying amounts of removing the middle of the FRC range (centered on the median FRC)
collector <- tibble()
for (n in 3:10) {
cat("threshold = ", n, "\n")
for (sd_threshold in 0:4) {
cat("\tSD = ", sd_threshold, "\n")
model1 <- sal %>%
filter(n_ratio_cond >= n) %>% # filter threshold of N of tokens upon which FRC is based
filter((ratio_cond <= median(.$ratio_cond) - (sd(.$ratio_cond) / 4) * sd_threshold) | (ratio_cond >= median(.$ratio_cond) + (sd(.$ratio_cond) / 4) * sd_threshold)) %>% # filter out tokens with FRC near median FRC
mutate(voic_hum = if_else(voic_hum == 0, voic_hum + 0.001, if_else(voic_hum == 1, voic_hum - 0.001, voic_hum))) %>% # move voicing value slightly off of 0 and 1
group_by(file) %>%
mutate(post_spk_rate_dip = scale(post_spk_rate_dip)) %>% # z-score speech rate per speaker
ungroup() %>%
buildmer::buildglmmTMB(voic_hum ~ # to do stepwise elimination
(ratio_cond +
fol_phon_bin5 +
stress +
log(wd_freq+1) +
pre_phon_bin2 +
post_spk_rate_dip +
wd_class_bin)^2 +
(1+ratio_cond|file) + (1+ratio_cond|wd_upper),
family = beta_family(link = "logit"), data = .)
coefficients <- summary(model1)$coefficients$cond
vars <- rownames(coefficients)
coefficients <- as_tibble(coefficients) %>% mutate(vars) %>% select(vars, everything()) %>% rename(p = `Pr(>|z|)`)
coefficients <- coefficients %>% filter(str_detect(vars, "ratio_cond")) %>% mutate(n_frc = n) %>% mutate(sd_fourth = sd_threshold)
if (nrow(coefficients) > 0) {
collector <- bind_rows(collector, coefficients)
}
} # next standard deviation threshold
} # next threshold of number of tokens upon which FRC is based
# write out data frame to the hard drive as a CSV file
write_csv(collector, path = here("coefficients.csv"))
# read in data frame (if needed because it's no longer in working memory)
collector <- read_csv(here("coefficients.csv"))
Create temporary data frame with subset of data
FRC score must be based on at least 5 tokens in the reference corpus
Tokens whose FRC lay within the middle half standard deviation of FRC-range excluded
temp <- sal %>%
filter(n_ratio_cond >= 5) %>%
filter((ratio_cond <= median(.$ratio_cond) - (sd(.$ratio_cond) / 4) * 1) | (ratio_cond >= median(.$ratio_cond) + (sd(.$ratio_cond) / 4) * 1)) %>%
mutate(voic_hum = if_else(voic_hum == 0, voic_hum + 0.001, if_else(voic_hum == 1, voic_hum - 0.001, voic_hum))) %>%
group_by(file) %>%
mutate(post_spk_rate_dip = scale(post_spk_rate_dip)) %>%
mutate(fol_phon_bin5 = factor(fol_phon_bin5, levels = c("voicedC", "other"), labels = c("Voiced C", "Other sound or pause"))) %>%
ungroup() %>%
mutate(voic_hum = voic_hum * 100)
Table 1
model1 <- buildmer::buildglmmTMB(voic_hum ~ # to do stepwise elimination
(ratio_cond +
fol_phon_bin5 +
stress +
log(wd_freq+1) +
pre_phon_bin2 +
post_spk_rate_dip +
wd_class_bin)^2 +
(1+ratio_cond|file) + (1+ratio_cond|wd_upper),
family = beta_family(link = "logit"), data = temp)
summary(model1)