# load tidyversepacman::p_load(tidyverse, here)# load datadf_lifetime <- readr::read_csv(here::here("data/tidy_data_lifetime_pilot.csv"), # for special characterslocale = readr::locale(encoding ="latin1") ) |>mutate_if(is.character,as.factor) |># all character variables as factorfilter(type =="critical", # only critical trials px !="px3") # this participant had lots of 0's for some reason
Data dictionary
we haven’t really discussed what exactly our data are, though
data dictionaries (a.k.a. code books)
describe each variable in a dataset
ideally also provide information regarding possible values
Variable names
we can list the names of all variables in a dataset using names()
but we need to be able to put these names into a single column
where each row contains one variable name
and other columns contain information like description and data class
Names to rows
# From day 2 of Lisa DeBruine's [Coding Club: Creating an R Package](https://psyteachr.github.io/intro-r-pkgs/02-data.html#documentation)# create as many empty strings as we name variable namescoldesc <-rep("", ncol(df_lifetime))# add variable names to these empty stringsnames(coldesc) <-names(df_lifetime)
# print as code needed to create an objectdput(coldesc)
and fill in the quotations with description of the data
dict_lifetime <-tibble(px ="participant ID (factor)",trial ="trial number (ordered factor)",region ="sentence region (order factor)",region_n ="numerical representation of sentence region (ordered factor)",region_text ="text presented in the region (string)",eye ="which eye was tracking: right or left (binomial)",ff ="first-fixation times in milliseconds (continuous, values can be 0<)",fp ="first-pass reading times in milliseconds (numeric, values can be 0<)",rpd ="regression-path duration in milliseconds (numeric, values can be 0<)",tt ="total reading time in milliseconds (numeric, values can be 0<)",fix_count ="number of total fixations in the region (count)",reg_in ="whether of a regression was made into the regions (binomial: 0 = no, 1 = yes)",reg_in_count ="number of fixations into the region (count)",reg_out ="whether of a regression was made out of the regions (binomial: 0 = no, 1 = yes)",reg_out_count ="number of fixations out of the region (count)",rt ="reaction time from critical sentence presentation to button press (continuous, values can be 0<)",bio ="lifetime biography context sentence (string)",critical ="critical sentence (string)",gender ="gender of stimulus subject (binomial: male, female)",item_id ="item identification number (critical items: 1-80)",list ="experimental list version: base list version (1-4) and whether the yes-button was coded as 4 or 5 (factor: 14, 15, 24, 25, 34, 35, 44, 45)",match ="whether the referent-lifetime was congruent with tense",condition ="condition: lifetime (dead, alive) + tense (PP, SF) (factor)",name ="name of stimulis subject (string)",lifetime ="lifetime status of stimulus subject at time of experiment (binomial: dead, alive)",tense ="tense used in critical sentence (binomail: PP = present perfect, SF = simple future)",type ="sentence type (factor with one level: critical)",yes_press ="corresponding coding for the yes-button on Cedrus response box (4 = left button, 5 = right button)",KeyPress ="key that was pressed (4 = left button, 5 = right button)",accept ="whether the item was accepted, i.e., whether KeyPress equalled yes_press",accuracy ="whether the acceptance was accurate (reject for a mismatch, accept for a match)",px_accuracy ="participant's overall accuracy score")
# compute summary summary_ff <- df_lifetime |>filter(region=="verb") |>group_by(condition,lifetime,tense) %>%summarise(N =n(),mean.ff =mean(ff, na.rm = T),sd =sd(ff, na.rm = T)) %>%# compute standard error, confidence intervals, and lower/upper ci boundsmutate(se = sd /sqrt(N),ci =qt(1- (0.05/2), N -1) * se,lower.ci = mean.ff -qt(1- (0.05/2), N -1) * se,upper.ci = mean.ff +qt(1- (0.05/2), N -1) * se)
and print the output with the kable() function from the knitr package
for extra customisation you can also use the kableExtra package (e.g., with the kable_styling() function)
# install.packages("knitr") # if not yet installedknitr::kable(summary_ff, digits=1,caption ="Table with summmary statistics for first-fixation duration at the verb region")
Table with summmary statistics for first-fixation duration at the verb region
condition
lifetime
tense
N
mean.ff
sd
se
ci
lower.ci
upper.ci
deadPP
dead
PP
140
198.9
57.9
4.9
9.7
189.2
208.6
deadSF
dead
SF
139
194.6
67.9
5.8
11.4
183.2
205.9
livingPP
living
PP
140
194.2
77.3
6.5
12.9
181.3
207.1
livingSF
living
SF
140
186.0
57.6
4.9
9.6
176.4
195.6
Saving summary tables
we could also save this table using write_csv()
but it’s relatively simple to re-produce, so I wouldn’t bother
instead, when writing up my results I would load in the data and print the summary directly
sometimes summary tables are more code-intensive
in this case I would save the summary as a csv, and simply load and print it when writing in R markdown or Quarto
Additional packages
There are many other packages for including tables that are publication-ready. Some that I would suggest you look into:
kableExtra which includes additionally formatting options for knitr::kable() tables via the kable_styling() function and others
tables must first pass through knitr::kable(), e.g., my_table |> knitr::kable() |> kableExtra::kable_styling()