Load packages.
# install packages
# devtools::install_github("https://github.com/tdienlin/td@v.0.0.2.5")
# define packages
packages <- c("broom.mixed", "brms", "devtools", "GGally", "ggplot2",
"gridExtra", "kableExtra", "knitr", "lavaan", "lme4",
"magrittr", "mice", #"mvnormalTest",
"PerFit", "performance", "psych", "quanteda.textstats", "semTools", "tidyverse")
# load packages
lapply(c(packages, "td"), library, character.only = TRUE)
Provide session info to make results reproducible.
sessionInfo()
## R version 4.2.2 (2022-10-31)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] td_0.0.1 forcats_0.5.2 stringr_1.4.1 dplyr_1.0.10 purrr_0.3.5 readr_2.1.3 tidyr_1.2.1 tibble_3.1.8 tidyverse_1.3.2 semTools_0.5-6
## [11] quanteda.textstats_0.96 psych_2.2.9 performance_0.10.2 PerFit_1.4.6 mirt_1.37.1 lattice_0.20-45 ltm_1.2-0 polycor_0.8-1 msm_1.7 MASS_7.3-58.1
## [21] mice_3.15.0 magrittr_2.0.3 lme4_1.1-31 Matrix_1.5-1 lavaan_0.6-12 knitr_1.41 kableExtra_1.3.4 gridExtra_2.3 GGally_2.1.2 ggplot2_3.4.0
## [31] devtools_2.4.5 usethis_2.1.6 brms_2.18.0 Rcpp_1.0.9 broom.mixed_0.2.9.4
##
## loaded via a namespace (and not attached):
## [1] coda_0.19-4 stopwords_2.3 dygraphs_1.1.1.6 data.table_1.14.6 rpart_4.1.19 inline_0.3.19 RCurl_1.98-1.9 generics_0.1.3 callr_3.7.3 future_1.29.0 nsyllable_1.0.1
## [12] tzdb_0.3.0 webshot_0.5.4 xml2_1.3.3 lubridate_1.9.0 httpuv_1.6.6 StanHeaders_2.21.0-7 assertthat_0.2.1 gargle_1.2.1 xfun_0.35 hms_1.1.2 jquerylib_0.1.4
## [23] bayesplot_1.10.0 evaluate_0.18 promises_1.2.0.1 fansi_1.0.3 readxl_1.4.1 dbplyr_2.2.1 igraph_1.3.5 DBI_1.1.3 htmlwidgets_1.5.4 reshape_0.8.9 tensorA_0.36.2
## [34] googledrive_2.0.0 ellipsis_0.3.2 crosstalk_1.2.0 ks_1.14.0 backports_1.4.1 pbivnorm_0.6.0 insight_0.18.8 permute_0.9-7 markdown_1.4 RcppParallel_5.1.5 deldir_1.0-6
## [45] vctrs_0.5.1 remotes_2.4.2 abind_1.4-5 cachem_1.0.6 withr_2.5.0 checkmate_2.1.0 vegan_2.6-4 xts_0.12.2 prettyunits_1.1.1 mclust_6.0.0 mnormt_2.1.1
## [56] svglite_2.1.0 cluster_2.1.4 crayon_1.5.2 pkgconfig_2.0.3 nlme_3.1-160 pkgload_1.3.2 nnet_7.3-18 rlang_1.0.6 globals_0.16.1 lifecycle_1.0.3 miniUI_0.1.1.1
## [67] colourpicker_1.2.0 modelr_0.1.10 cellranger_1.1.0 distributional_0.3.1 matrixStats_0.63.0 loo_2.5.1 boot_1.3-28 zoo_1.8-11 reprex_2.0.2 base64enc_0.1-3 processx_3.8.0
## [78] googlesheets4_1.0.1 png_0.1-7 viridisLite_0.4.1 bitops_1.0-7 KernSmooth_2.23-20 parallelly_1.32.1 jpeg_0.1-9 shinystan_2.6.0 scales_1.2.1 memoise_2.0.1 plyr_1.8.8
## [89] threejs_0.3.3 compiler_4.2.2 hdrcde_3.4 rstantools_2.2.0 RColorBrewer_1.1-3 cli_3.4.1 urlchecker_1.0.1 listenv_0.8.0 pbapply_1.6-0 ps_1.7.2 Brobdingnag_1.2-9
## [100] htmlTable_2.4.1 Formula_1.2-4 mgcv_1.8-41 tidyselect_1.2.0 stringi_1.7.8 yaml_2.3.6 latticeExtra_0.6-30 bridgesampling_1.1-2 grid_4.2.2 sass_0.4.3 fastmatch_1.1-3
## [111] tools_4.2.2 timechange_0.1.1 parallel_4.2.2 rstudioapi_0.14 foreign_0.8-83 GPArotation_2022.10-2 quanteda_3.2.3 posterior_1.3.1 farver_2.1.1 irtoys_0.2.2 digest_0.6.30
## [122] shiny_1.7.3 pracma_2.4.2 broom_1.0.1 later_1.3.0 fda_6.0.5 httr_1.4.4 Deriv_4.1.3 colorspace_2.0-3 rvest_1.0.3 fs_1.5.2 rainbow_3.7
## [133] splines_4.2.2 sm_2.2-5.7.1 expm_0.999-6 shinythemes_1.2.0 sessioninfo_1.2.2 systemfonts_1.0.4 xtable_1.8-4 jsonlite_1.8.3 nloptr_2.0.3 fds_1.8 rstan_2.21.7
## [144] dcurver_0.9.2 R6_2.5.1 profvis_0.3.7 Hmisc_4.7-2 pillar_1.8.1 htmltools_0.5.3 mime_0.12 glue_1.6.2 fastmap_1.1.0 minqa_1.2.5 DT_0.26
## [155] deSolve_1.34 codetools_0.2-18 pkgbuild_1.3.1 pcaPP_2.0-3 mvtnorm_1.1-3 furrr_0.3.1 utf8_1.2.2 bslib_0.4.1 gtools_3.9.3 shinyjs_2.1.0 interp_1.1-3
## [166] survival_3.4-0 admisc_0.30 rmarkdown_2.18 munsell_0.5.0 haven_2.5.1 reshape2_1.4.4 gtable_0.3.1
Find variables in dataset.
find_var <- function(name, data = doc)(
# finds the variables names for an item for each wave
data %>%
filter(Label == name) %>%
select(Variable) %>%
unlist() %>%
set_names(sub("\\_.*", "", .))
)
Extract characteristics from fitted lmer models.
get_specs <- function(model){
# Get mean, max, and min values
dat <- coefficients(model)$wave
specs <- data.frame(
sd = attr(VarCorr(model), "sc"),
min = min(dat),
max = max(dat),
mean = mean(dat$`(Intercept)`)
)
}
Get data from fitted lmer objects for descriptives.
get_dat <- function(model){
coefficients(model)$wave %>%
tibble::rownames_to_column("wave") %>%
rename(value = "(Intercept)") %>%
mutate(wave = as.integer(.$wave),
value = as.numeric(.$value))
}
Determine average reliability for measures across waves
get_rel <- function(data, waves=24){
# extract average reliability from lavaan fitted cfa with several groups
reliability(data) %>%
unlist() %>%
matrix(5, waves) %>%
as.data.frame() %>%
set_rownames(c("alpha", "omega", "omega2", "omega3", "avevar")) %>%
summarise(omega = rowMeans(.["omega",])) %>%
return()
}
Make graph of variables’ development.
make_graph <- function(model, title, ll, ul, line = TRUE, points = TRUE, labels = FALSE, lmer=FALSE, legend=FALSE){
if(isTRUE(lmer)){
dat <- get_dat(model)
} else{
dat <- model
}
graph <-
ggplot(dat, aes(wave, value, color = dimension)) +
{if(legend) theme(
legend.position = c(0.85, 0.8),
legend.title = element_blank()
)} +
{if(!legend) theme(
legend.position = "none"
)} +
theme(axis.title.x=element_blank()) +
coord_cartesian(ylim = c(ll, ul)) +
ggtitle(title) +
{if(line) geom_smooth(se = FALSE, method = 'loess')} +
{if(points) geom_point()} +
scale_color_manual(values=c("dodgerblue3", "deepskyblue2", "magenta2", "green2", "red"))
graph
}
# print results for online material
print_res <- function(object, imputation = TRUE){
if(isTRUE(imputation)){
object %>%
as.data.frame %>%
select(term, estimate, `2.5 %`, `97.5 %`, p.value) %>%
mutate(p.value = td::my_round(p.value, "p"))
}
else{
object %>%
as.data.frame %>%
select(term, estimate, `2.5 %` = conf.low, `97.5 %` = conf.high, p.value) %>%
mutate(p.value = td::my_round(p.value, "p"))
}
}
Get data of lmer objects for specific results.
# get data
get_dat_res <- function(data_aff_neg, data_aff_pos, data_life_sat, variance = "within", type = "channels", analysis = "Standardized"){
if(isTRUE(class(data_aff_neg) == "lmerModLmerTest")) {
dat_fig_results <-
broom.mixed::tidy(data_aff_neg, conf.int = T) %>%
mutate(dv = "aff_neg") %>%
rbind(
broom.mixed::tidy(data_aff_pos, conf.int = T) %>%
mutate(dv = "aff_pos")) %>%
rbind(
broom.mixed::tidy(data_life_sat, conf.int = T) %>%
mutate(dv = "life_sat"))
} else{
dat_fig_results <-
data_aff_neg %>%
mutate(dv = "aff_neg") %>%
rbind(data_aff_pos %>%
mutate(dv = "aff_pos")) %>%
rbind(data_life_sat %>%
mutate(dv = "life_sat")) %>%
rename(conf.low = `2.5 %`, conf.high = `97.5 %`)
}
dat_fig_results %<>%
mutate(
Variance = ifelse(grepl(".*_w\\>", .$term), "within", "between"),
iv = gsub("_(w|b)\\>", "", .$term)
) %>%
mutate(
Variance = factor(.$Variance,
levels = c("within", "between")),
dv = factor(.$dv,
levels = c("life_sat", "aff_pos", "aff_neg"),
labels = c("Life satisfaction", "Positive affect", "Negative affect")),
Type = type,
Analysis = analysis
) %>%
select(dv, iv, Variance, Type, Analysis, estimate, conf.low, conf.high, p.value) %>%
filter(Variance == variance)
# select Social Media type of activity
if(type == "Activity") {
dat_fig_results %<>%
filter(iv %in% c("soc_med_read", "soc_med_like_share", "soc_med_post")) %>%
mutate(
iv = factor(.$iv,
levels = c("soc_med_post", "soc_med_like_share", "soc_med_read"),
labels = c("Posting", "Liking & Sharing", "Reading"))
)
} else if(type == "Channels"){
dat_fig_results %<>%
filter(iv %in% c("soc_med_fb", "soc_med_ig", "soc_med_wa", "soc_med_yt", "soc_med_tw")) %>%
mutate(iv = factor(.$iv, levels = c("soc_med_yt", "soc_med_wa", "soc_med_ig", "soc_med_tw", "soc_med_fb"), labels = c("YouTube", "WhatsApp", "Instagram", "Twitter", "Facebook")))
} else if(type == "Controls"){
dat_fig_results %<>%
filter(iv %in% c("male", "health", "loc_cntrl_int_m",
"act_spo", "sat_dem", "corona_pos")) %>%
mutate(iv = factor(.$iv, levels = c("male", "health", "loc_cntrl_int_m",
"act_spo", "sat_dem", "corona_pos"),
labels = c("Male", "Health", "Internal locus of control",
"Sport", "Satisfaction democracy", "Corona positive")))
} else if(type == "Living\nconditions"){
dat_fig_results %<>%
filter(iv %in% c("health", "corona_pos", "work_h", "work_homeoff", "hh_income")) %>%
mutate(iv = factor(.$iv, levels = c("health", "corona_pos", "work_h", "work_homeoff", "hh_income"),
labels = c("Health", "Corona positive", "Work hours", "Homeoffice", "Household income")))
} else if(type == "News\nuse"){
dat_fig_results %<>%
filter(iv %in% c("med_txt_kro", "med_txt_sta", "med_txt_pre", "med_txt_oes", "med_txt_kur", "med_txt_slz", "med_txt_son", "med_vid_orf", "med_vid_pri")) %>%
mutate(iv = factor(.$iv, levels = c("med_txt_kro", "med_txt_sta", "med_txt_pre", "med_txt_oes", "med_txt_kur", "med_txt_slz", "med_txt_son", "med_vid_orf", "med_vid_pri"),
labels = c("Krone", "Der Standard", "Die Presse", "Österreich", "Kurier", "Salzb. Nachrichten", "Other", "ORF", "Private news")))
} else if(type == "Outdoor\nactivities"){
dat_fig_results %<>%
filter(iv %in% c("act_wrk", "act_spo", "act_frn", "act_sho", "act_pet")) %>%
mutate(iv = factor(.$iv, levels = c("act_wrk", "act_spo", "act_frn", "act_sho", "act_pet"),
labels = c("Working", "Sports", "Friends", "Shopping", "With pets")))
} else if(type == "Psycho-\nlogy"){
dat_fig_results %<>%
filter(iv %in% c("risk_prop", "loc_cntrl_int_m", "sat_dem")) %>%
mutate(iv = factor(.$iv, levels = c("risk_prop", "loc_cntrl_int_m", "sat_dem"),
labels = c("Risk taking", "Internal locus of control", "Satisfaction democracy")))
}
return(dat_fig_results)
}
Make graph of effects
make_graph_res <- function(data, sesoi = NULL, legend = TRUE, facet = "type", title = NULL){
ggplot(data, aes(x = estimate, y = iv)) +
scale_color_manual(values = c("black", "grey75", "darkcyan", "deepskyblue", "cornflowerblue", "darkcyan", "aquamarine")) +
geom_vline(xintercept = 0, lwd = .75, colour = "darkgrey") +
geom_errorbarh(aes(xmin = conf.low, xmax = conf.high, color = Analysis),
lwd = .75, height = .2, position = position_dodge(-.7)) +
geom_point(aes(color = Analysis), size = 1.5, position = position_dodge(-.7)) +
{if(isTRUE(sesoi == "est")) geom_vline(data=filter(data, dv=="Life satisfaction"), aes(xintercept=-.3), colour="darkgrey", linetype = "dashed")} +
{if(isTRUE(sesoi == "est")) geom_vline(data=filter(data, dv=="Life satisfaction"), aes(xintercept=.3), colour="darkgrey", linetype = "dashed")} +
{if(isTRUE(sesoi == "est")) geom_vline(data=filter(data, dv!="Life satisfaction"), aes(xintercept=-.15), colour="darkgrey", linetype = "dashed")} +
{if(isTRUE(sesoi == "est")) geom_vline(data=filter(data, dv!="Life satisfaction"), aes(xintercept=.15), colour="darkgrey", linetype = "dashed")} +
{if(isTRUE(sesoi == "std")) geom_vline(aes(xintercept=.1), colour="darkgrey", linetype = "dashed")} +
{if(isTRUE(sesoi == "std")) geom_vline(aes(xintercept=-.1), colour="darkgrey", linetype = "dashed")} +
theme(
axis.title.y = element_blank(),
plot.title = element_text(hjust = .5),
panel.spacing = unit(.9, "lines"),
text = element_text(size = 12),
axis.text.y = element_text(hjust = 1)
) +
{if(!is.null(title)) ggtitle(title)} +
{if(!isTRUE(legend)) theme(legend.position = "none")} +
# guides(colour = guide_legend(reverse=T)) +
{if(facet == "type")
facet_grid(rows = vars(Type),
cols = vars(dv),
scales = "free",
space = "free_y")
} +
{if(facet == "analysis")
facet_grid(rows = vars(Variance),
cols = vars(dv),
scales = "free",
space = "free_y")
}
}
Extract factor scores.
get_fs <- function(model) {
# the aim is to get factor scores on original scaling
# hence export factor values for all items
# then compute average
dat_ov <- lavaan::lavPredict(model, type = "ov", assemble = TRUE) %>%
mutate(fs = rowMeans(.[1:3]))
return(dat_ov$fs)
}
doc <- read_csv2("data/Variable_List_w1-34.csv")
d_raw <- read_csv("data/W1-32_ACPP_V20220601.csv")
Identify the names of each item for each wave.
# identify variable names of items, using custom function
health <- find_var("Gesundheitszustand") # wsn't collected at wave 21
aff_neg_1 <- find_var("Depressivitaet: einsam")
aff_neg_2 <- find_var("Depressivitaet: aergerlich")
aff_neg_3 <- find_var("Depressivitaet: so niedergeschlagen")
aff_neg_4 <- find_var("Depressivitaet: sehr nervoes")
aff_neg_5 <- find_var("Depressivitaet: aengstlich")
aff_neg_6 <- find_var("Depressivitaet: bedrueckt und traurig")
aff_pos_1 <- find_var("Depressivitaet: ruhig und gelassen")
aff_pos_2 <- find_var("Depressivitaet: gluecklich")
aff_pos_3 <- find_var("Depressivitaet: voller Energie")
act_wrk <- find_var("Zuhause verlassen: Arbeit")
act_spo <- find_var("Zuhause verlassen: Sport")
act_frn <- find_var("Zuhause verlassen: Freunde oder Verwandte treffen")
act_sho <- find_var("Zuhause verlassen: Essen einkaufen")
act_pet <- find_var("Zuhause verlassen: Haustier ausfuehren")
sat_dem <- find_var("Demokratiezufriedenheit: Oesterreich")
work_h <- find_var("Arbeitsstunden: Jetzt pro Woche")
work_homeoff <- find_var("Aenderung berufliche Situation: Home-Office")
hh_income <- find_var("Aktuelles Haushaltseinkommen")
med_txt_kro <- find_var("Mediennutzung: Kronen Zeitung oder www.krone.at")
med_txt_sta <- find_var("Mediennutzung: Der Standard oder derstandard.at")
med_txt_pre <- find_var("Mediennutzung: Die Presse oder diepresse.com")
med_txt_oes <- find_var("Mediennutzung: Oesterreich oder oe24.at")
med_txt_kur <- find_var("Mediennutzung: Kurier oder kurier.at")
med_txt_slz <- find_var("Mediennutzung: Salzburger Nachrichten oder salzburg.at")
med_txt_son <- find_var("Mediennutzung: Sonstige oesterreichische Tageszeitungen")
med_vid_orf <- find_var("Mediennutzung: ORF (Nachrichten)")
med_vid_pri <- find_var("Mediennutzung: Privatfernsehen (Nachrichten)")
soc_med_fb <- find_var("Soziale Medien: Facebook")
soc_med_tw <- find_var("Soziale Medien: Twitter")
soc_med_ig <- find_var("Soziale Medien: Instagram")
soc_med_yt <- find_var("Soziale Medien: Youtube")
soc_med_wa <- find_var("Soziale Medien: WhatsApp")
soc_med_read <- find_var("Soziale Medien Aktivitaet: Postings zu Corona anderer lesen")
soc_med_like_share <- find_var("Soziale Medien Aktivitaet: Postings liken, teilen oder retweeten")
soc_med_post <- find_var("Soziale Medien Aktivitaet: selber Postings zu Corona verfassen")
life_sat <- find_var("Lebenszufriedenheit")
risk_prop <- find_var("Risikobereitschaft")
loc_cntrl_int_1 <- find_var("Psychologie: habe Leben selbst in der Hand")
loc_cntrl_int_2 <- find_var("Psychologie: Belohnung durch Anstrengung")
loc_cntrl_int_3 <- find_var("Psychologie: Fremdbestimmung")
loc_cntrl_int_4 <- find_var("Psychologie: Schicksal")
trst_media <- find_var("Vertrauen: ORF")
trst_police <- find_var("Vertrauen: Polizei")
trst_media <- find_var("Vertrauen: Parlament")
trst_hlthsec <- find_var("Vertrauen: Gesundheitswesen")
trst_gov <- find_var("Vertrauen: Bundesregierung")
trst_army <- find_var("Vertrauen: Bundesheer")
corona_pos <- c(
find_var("Corona-Diagnose: Respondent"),
find_var("Corona-Diagnose: Monat")
)
d_wide <- d_raw %>%
select(
id = RESPID,
gender = SD_GENDER,
acc_bal = SD_ACCESS_BALCONY,
acc_gar = SD_ACCESS_GARDEN,
year_birth = SD_BIRTHYEAR,
born_aus = SD_BORN_AUSTRIA,
born_aus_prnts = SD_MIGR_BACKGR,
county = SD_BULA,
edu = SD_EDU,
employment = SD_EMPLSTATUS_FEB2020,
hh_adults = SD_HH_ADULTS,
hh_child18 = SD_HH_CHILD18,
hh_child17 = SD_HH_TEENS,
hh_child14 = SD_HH_CHILD14,
hh_child5 = SD_HH_CHILD5,
hh_child2 = SD_HH_CHILD2,
hh_oldfam = SD_HH_OLDERFAM,
hh_outfam = SD_HH_OUTERFAM,
hh_partner = SD_HH_PARTNER,
# hh_income = SD_HHINCOME_FEB2020,
home_sqm = SD_HOME_SQM,
home_owner = SD_HOMEOWNER,
# work_h = SD_WORKHOURS_FEB2020,
health = all_of(health),
life_sat = all_of(life_sat),
aff_neg_1 = all_of(aff_neg_1),
aff_neg_2 = all_of(aff_neg_2),
aff_neg_3 = all_of(aff_neg_3),
aff_neg_4 = all_of(aff_neg_4),
aff_neg_5 = all_of(aff_neg_5),
aff_neg_6 = all_of(aff_neg_6),
aff_pos_1 = all_of(aff_pos_1),
aff_pos_2 = all_of(aff_pos_2),
aff_pos_3 = all_of(aff_pos_3),
act_wrk = all_of(act_wrk),
act_spo = all_of(act_spo),
act_frn = all_of(act_frn),
act_sho = all_of(act_sho),
act_pet = all_of(act_pet),
sat_dem = all_of(sat_dem),
sat_dem = all_of(sat_dem),
work_h = all_of(work_h),
work_homeoff = all_of(work_homeoff),
hh_income = all_of(hh_income),
med_txt_kro = all_of(med_txt_kro),
med_txt_sta = all_of(med_txt_sta),
med_txt_pre = all_of(med_txt_pre),
med_txt_oes = all_of(med_txt_oes),
med_txt_kur = all_of(med_txt_kur),
med_txt_slz = all_of(med_txt_slz),
med_txt_son = all_of(med_txt_son),
med_vid_orf = all_of(med_vid_orf),
med_vid_pri = all_of(med_vid_pri),
soc_med_fb = all_of(soc_med_fb),
soc_med_tw = all_of(soc_med_tw),
soc_med_ig = all_of(soc_med_ig),
soc_med_yt = all_of(soc_med_yt),
soc_med_wa = all_of(soc_med_wa),
soc_med_like_share = all_of(soc_med_like_share),
soc_med_read = all_of(soc_med_read),
soc_med_post = all_of(soc_med_post),
risk_prop = all_of(risk_prop),
loc_cntrl_int_1 = all_of(loc_cntrl_int_1),
loc_cntrl_int_2 = all_of(loc_cntrl_int_2),
loc_cntrl_int_3 = all_of(loc_cntrl_int_3),
loc_cntrl_int_4 = all_of(loc_cntrl_int_4),
trst_media = all_of(trst_media),
trst_police = all_of(trst_police),
trst_media = all_of(trst_media),
trst_hlthsec = all_of(trst_hlthsec),
trst_gov = all_of(trst_gov),
trst_army = all_of(trst_army),
corona_pos = all_of(corona_pos)
)
Make new documentation with selected variables.
doc_selected <- filter(doc, Variable %in% c(
"RESPID",
"SD_GENDER",
"SD_ACCESS_BALCONY",
"SD_ACCESS_GARDEN",
"SD_BIRTHYEAR",
"SD_BORN_AUSTRIA",
"SD_MIGR_BACKGR",
"SD_BULA",
"SD_EDU",
"SD_EMPLSTATUS_FEB2020",
"SD_HH_ADULTS",
"SD_HH_CHILD18",
"SD_HH_TEENS",
"SD_HH_CHILD14",
"SD_HH_CHILD5",
"SD_HH_CHILD2",
"SD_HH_OLDERFAM",
"SD_HH_OUTERFAM",
"SD_HH_PARTNER",
"SD_HHINCOME_FEB2020",
"SD_HOME_SQM",
"SD_HOMEOWNER",
"SD_WORKHOURS_FEB2020",
health,
life_sat,
aff_neg_1,
aff_neg_2,
aff_neg_3,
aff_neg_4,
aff_neg_5,
aff_neg_6,
aff_pos_1,
aff_pos_2,
aff_pos_3,
act_wrk,
act_spo,
act_frn,
act_sho,
act_pet,
sat_dem,
work_h,
work_homeoff,
hh_income,
med_txt_kro,
med_txt_sta,
med_txt_pre,
med_txt_oes,
med_txt_kur,
med_txt_slz,
med_txt_son,
med_vid_orf,
med_vid_pri,
soc_med_fb,
soc_med_tw,
soc_med_ig,
soc_med_yt,
soc_med_wa,
soc_med_like_share,
soc_med_post,
soc_med_read,
risk_prop,
loc_cntrl_int_1,
loc_cntrl_int_2,
loc_cntrl_int_3,
loc_cntrl_int_4,
trst_media,
trst_police,
trst_media,
trst_hlthsec,
trst_gov,
trst_army,
corona_pos
))
write.csv(doc_selected, "data/documentation.csv")
d_wide %<>%
mutate_at(vars(everything(.)), funs(na_if(., 88))) %>%
mutate_at(vars(everything(.)), funs(na_if(., 99))) %>%
mutate(
male = 2 - .$gender,
age = 2021 - .$year_birth,
res_vienna = recode(.$county, `8` = 1L, .default = 0L,),
born_aus = 2 - .$born_aus,
home_owner = 2 - .$home_owner,
employment_fac = factor(.$employment,
labels = c("Unemployed",
"Industrie",
"Public service",
"Self-employed",
"Retired",
"Housekeeping",
"Student",
"Incapacitated",
"Parental Leave"),
levels = c(4, 1:3, 5:8, 10) # make unemployment reference cat
),
edu_fac = factor(.$edu,
labels = c("No degree",
"Middle school",
"Vocational school",
"Technical school",
"High school",
"Applied high school",
"State college",
"Bachelor",
"Master",
"PhD")
))
d_long <-
d_wide %>%
pivot_longer(
cols = health...W1:corona_pos...W32,
names_to = "item",
values_to = "value"
) %>%
separate(item, c("item", "wave"), sep = "\\.\\.\\.", extra = "merge") %>%
pivot_wider(names_from = "item", values_from = "value")
# recode such that higher values imply more strength / align with wording
d_long %<>%
mutate_at(vars(med_txt_kro:med_vid_pri, health, sat_dem, soc_med_fb:soc_med_post),
funs(recode(., `1` = 5L, `2` = 4L, `3` = 3L, `4` = 2L, `5` = 1L))) %>%
mutate_at(vars(loc_cntrl_int_1:loc_cntrl_int_4),
funs(recode(., `1` = 4L, `2` = 3L, `3` = 2L, `4` = 1L))) %>%
mutate_at(vars(born_aus_prnts),
funs(recode(., `3` = 0L, `2` = 2L, `1` = 1L)))
# recode inverted items
d_long %<>%
mutate_at(vars(loc_cntrl_int_3, loc_cntrl_int_4),
funs(recode(., `1` = 4L, `2` = 3L, `3` = 2L, `4` = 1L)))
# recode other
d_long %<>%
mutate(
wave = gsub("W", "", .$wave) %>% as.integer(),
id = as.integer(id)
)
d_long %<>%
arrange(id, wave)
Next we impute data. Note that the actual imputation below is deactivated and loaded from memory to save time.
Determine amount of missingness per respondent per wave.
vars_used <- c("life_sat", "aff_pos_1", "aff_pos_2", "aff_pos_3", "aff_neg_1", "aff_neg_2", "aff_neg_3", "aff_neg_4", "aff_neg_5", "aff_neg_6", "soc_med_read", "soc_med_like_share", "soc_med_post", "soc_med_fb", "soc_med_ig", "soc_med_wa", "soc_med_yt", "soc_med_tw", "age", "male", "born_aus", "born_aus_prnts", "edu_fac", "employment_fac", "health", "res_vienna", "acc_bal", "acc_gar", "home_sqm", "med_txt_kro", "med_txt_sta", "med_txt_pre", "med_txt_oes", "med_txt_kur", "med_txt_slz", "med_txt_son", "med_vid_orf", "med_vid_pri", "risk_prop", "loc_cntrl_int_1", "loc_cntrl_int_2", "loc_cntrl_int_3", "loc_cntrl_int_4", "act_wrk", "act_spo", "act_frn", "act_sho", "act_pet", "sat_dem", "trst_media", "trst_police", "trst_media", "trst_hlthsec", "trst_gov", "trst_army", "corona_pos") # only include vars measured at _all_ points
Filter respondents with more than 50% missing data – only needed for analyses as originally preregistered (see additional analyses).
d_long_50 <-
d_long %>%
mutate(na_perc = rowSums(is.na(select(., all_of(vars_used))) / ncol(select(., all_of(vars_used))))) %>%
filter(na_perc < .5)
write_csv(d_long_50, "data/data_50.csv")
Exclude social media use data, because they were measured only on selected waves.
vars_excl <- c("soc_med_read", "soc_med_like_share", "soc_med_post", "soc_med_fb", "soc_med_ig", "soc_med_wa", "soc_med_yt", "soc_med_tw")
incl_ma <- d_long %>%
mutate(across(.cols = everything(), .fns = is.na))
incl_ma[vars_excl] <- FALSE
# now also for data where 50% missing excluded
incl_ma_50 <- d_long_50 %>%
mutate(across(.cols = everything(), .fns = is.na))
incl_ma_50[vars_excl] <- FALSE
For analyses as originally preregistered (see additional analyses), hence only participants who provided more than 50% of all data.
d_long_50_imp <- mice(d_long_50,
method = "pmm", # use predictive mean matching
m = 1, maxit = 30, # only 1 imputation
# where = incl_ma_50,
seed = 180719, print = FALSE)
d_long_50_imp <- mice::complete(d_long_50_imp)
write_csv(d_long_50_imp, "data/data_50_imputed.csv")
Because CFAs aren’t evaluated with multiple imputation, impute single data-set. Missing data was imputed for everyone.
d_long_100_imp <- mice(d_long,
method = "pmm", # use predictive mean matching
m = 1, maxit = 30, # only 1 imputation
# where = incl_ma,
seed = 180719, print = FALSE)
d_long_100_imp <- mice::complete(d_long_100_imp)
write_csv(d_long_100_imp, "data/data_100_imputed.csv")
Now, multiple imputation of data-sets (5 data sets, 5 iterations), used for the final analyses.
# impute missing data with multiple imputation
d_long_100_mim_mice <- mice(d_long,
method = "pmm", # use predictive mean matching
m = 5, maxit = 5, # 5 imputations, 5 iterations
# where = incl_ma,
seed = 180719, print = FALSE)
d_long_100_mim <- mice::complete(d_long_100_mim_mice, action = "long", include = TRUE)
write_csv(d_long_100_mim, "data/data_100_mim.csv")
To increase speed, you can also load the imputed data from the directory
d_long_50_imp <- read_csv("data/data_50_imputed.csv") # data where people with >50% missing data were filtered
d_long_100_imp <- read_csv("data/data_100_imputed.csv") # data where all people were included
d_long_100_mim <- read_csv("data/data_100_mim.csv") # data where all people were included
First, create means for scales. Then, create mean values for each person (between), and unique changes per wave (within).
d_long_100_mim <-
d_long_100_mim %>%
mutate(aff_pos_m = rowMeans(select(., aff_pos_1 : aff_pos_3)),
aff_neg_m = rowMeans(select(., aff_neg_1 : aff_neg_6)),
loc_cntrl_int_m = rowMeans(select(., loc_cntrl_int_1 : loc_cntrl_int_4)))
d_long_100_mim %<>%
group_by(id) %>%
mutate_at(vars(health:loc_cntrl_int_m), funs(b = mean(., na.rm = TRUE), w = . - mean(., na.rm = TRUE))) %>%
ungroup()
d_long_100_mim_mice <- as.mids(d_long_100_mim)
d_long_100_imp <-
d_long_100_imp %>%
mutate(aff_pos_m = rowMeans(select(., aff_pos_1 : aff_pos_3)),
aff_neg_m = rowMeans(select(., aff_neg_1 : aff_neg_6)),
loc_cntrl_int_m = rowMeans(select(., loc_cntrl_int_1 : loc_cntrl_int_4)))
d_long_100_imp %<>%
group_by(id) %>%
mutate_at(vars(health:corona_pos, loc_cntrl_int_m), funs(b = mean(., na.rm = TRUE), w = . - mean(., na.rm = TRUE))) %>%
ungroup()
And because later we want to rerun the results as originally preregistered, we’ll now do the same for analyses without filtered data.
d_long_50_imp <-
d_long_50_imp %>%
mutate(aff_pos_m = rowMeans(select(., aff_pos_1 : aff_pos_3)),
aff_neg_m = rowMeans(select(., aff_neg_1 : aff_neg_6)),
loc_cntrl_int_m = rowMeans(select(., loc_cntrl_int_1 : loc_cntrl_int_4)))
d_long_50_imp %<>%
group_by(id) %>%
mutate_at(vars(health:corona_pos, loc_cntrl_int_m),
funs(b = mean(., na.rm = TRUE), w = . - mean(., na.rm = TRUE))) %>%
ungroup()
And because later we want to rerun the results without imputed data, now the same for data without imputation.
d_long_50 %<>%
mutate(aff_pos_m = rowMeans(select(., aff_pos_1 : aff_pos_3)),
aff_neg_m = rowMeans(select(., aff_neg_1 : aff_neg_6)),
loc_cntrl_int_m = rowMeans(select(., loc_cntrl_int_1 : loc_cntrl_int_4)))
d_long_50 %<>%
group_by(id) %>%
mutate_at(vars(health:corona_pos, loc_cntrl_int_m), funs(b = mean(., na.rm = TRUE), w = . - mean(., na.rm = TRUE))) %>%
ungroup()
And now the same for the standardized data set.
d_long_100_mim_std <-
d_long_100_mim %>%
group_by(.imp) %>%
mutate_at(vars(gender:corona_pos, -id, -wave, -gender, -male, -born_aus, -born_aus_prnts, -edu_fac, -employment_fac, -res_vienna, -acc_bal, -acc_gar), ~ c(scale(.))) %>%
ungroup()
d_long_100_mim_std %<>%
mutate(aff_pos_m = rowMeans(select(., aff_pos_1 : aff_pos_3)),
aff_neg_m = rowMeans(select(., aff_neg_1 : aff_neg_6)),
loc_cntrl_int_m = rowMeans(select(., loc_cntrl_int_1 : loc_cntrl_int_4)))
d_long_100_mim_std %<>%
group_by(id) %>%
mutate_at(vars(health:corona_pos, loc_cntrl_int_m), funs(b = mean(., na.rm = TRUE), w = . - mean(., na.rm = TRUE))) %>%
ungroup()
d_long_100_mim_mice_std <- as.mids(d_long_100_mim_std)
save.image("data/workspace_1.RData")