ggplot2
Communicating your data
Humboldt-Universität zu Berlin
2024-06-03
rds
or as figures# load data
df_lifetime <- readr::read_csv(here::here("data/tidy_data_lifetime_pilot.csv"),
# for special characters
locale = readr::locale(encoding = "latin1")
) |>
mutate_if(is.character,as.factor) |> # all character variables as factor
filter(type == "critical", # only critical trials
px != "px3") # this participant had lots of 0's for some reason
# compute summary
summary_ff <- df_lifetime |>
filter(region=="verb") |>
group_by(condition, lifetime, tense) %>%
summarise(N = n(),
mean.ff = mean(ff, na.rm = T),
sd = sd(ff, na.rm = T)) %>%
# compute standard error, confidence intervals, and lower/upper ci bounds
mutate(se = sd / sqrt(N),
ci = qt(1 - (0.05 / 2), N - 1) * se,
lower.ci = mean.ff - qt(1 - (0.05 / 2), N - 1) * se,
upper.ci = mean.ff + qt(1 - (0.05 / 2), N - 1) * se) |>
ungroup()
ggplot2
ggplot2
is part of the tidyverse (like dplyr
)
alpha =
takes a value between 0 to 1theme()
to customise font sizes, legend placement, etc.theme_bw()
and theme_minimal()
geom_histogram()
with geom_density()
ff
above 0aes(fill = )
facet_grid()
facet_grid(x~y)
verb-1
should be before verb
condition
, but:aes()
accept
as a factor!dodge
) with:
tense
lifetime
on the x-axisaccept
reg_in
at the verb1
regionreg_out
for all sentence regionsfacet_grid()
distinct()
verb to have a single observation per participant and per trial)stat_summary()
geom_point()
, geom_errorbar()
, and geom_line()
condition | lifetime | tense | N | mean.ff | sd | se | ci | lower.ci | upper.ci |
---|---|---|---|---|---|---|---|---|---|
deadPP | dead | PP | 140 | 198.9 | 57.9 | 4.9 | 9.7 | 189.2 | 208.6 |
deadSF | dead | SF | 139 | 194.6 | 67.9 | 5.8 | 11.4 | 183.2 | 205.9 |
livingPP | living | PP | 140 | 194.2 | 77.3 | 6.5 | 12.9 | 181.3 | 207.1 |
livingSF | living | SF | 140 | 186.0 | 57.6 | 4.9 | 9.6 | 176.4 | 195.6 |
library(patchwork)
df_lifetime |>
filter(region == "verb") |>
ggplot(aes(x = lifetime, y = ff,
shape = tense,
group = tense,
color = tense)) +
labs(title="Interaction plot (`stat_summary()`)",
x = "Lifetime",
y = "First fix (ms)",
shape = "Tense", group = "Tense", color = "Tense", linetype = "Tense") +
stat_summary(fun = "mean", geom = "point", size = 3, position = position_dodge(0.2)) +
stat_summary(fun = "mean", geom = "line", position = position_dodge(0.2), aes(linetype=tense)) +
stat_summary(fun.data = "mean_cl_normal", geom = "errorbar", width = .2
, position = position_dodge(0.2)) +
theme_bw() +
summary_ff |>
ggplot(aes(x = lifetime, y = mean.ff,
shape = tense,
group = tense,
color = tense)) +
labs(title="Interaction plot (geoms)",
x = "Lifetime",
y = "First fix (ms)",
shape = "Tense", group = "Tense", color = "Tense", linetype = "Tense") +
geom_point(size = 3,
position = position_dodge(0.2)) +
geom_line(aes(linetype=tense), position = position_dodge(0.2)) +
geom_errorbar(aes(ymin = mean.ff - ci,
ymax = mean.ff + ci),
width = .2,
position = position_dodge(0.2)) +
theme_bw() +
plot_annotation(tag_levels = "A") +
plot_layout(guides = "collect") &
theme(legend.position = "bottom")
ggsave()
ggsave()
function is useful for saving ggplot objects
fig_
(short for figure)figures
fig_lifetime_ff <-
summary_ff |>
ggplot(aes(x = lifetime, y = mean.ff,
shape = tense,
group = tense,
color = tense)) +
labs(title="Mean first-fixation times (verb region) with 95% CIs",
x = "Lifetime",
y = "First fix (ms)",
shape = "Tense", group = "Tense", color = "Tense", linetype = "Tense") +
geom_point(size = 3,
position = position_dodge(0.2)) +
geom_line(aes(linetype=tense), position = position_dodge(0.2)) +
geom_errorbar(aes(ymin = mean.ff - ci,
ymax = mean.ff + ci),
width = .2,
position = position_dodge(0.2)) +
theme_bw()
ggsave()
has lots of arguments to control width, height, resolution, etc.
?ggsave
in the ConsolesaveRDS()
readRDS()
Naming files and saving code
You’ll notice I saved the PNG and RDS files using the same name that the I used for the figure in my script. This is an important point: I want to be able to traceback my figures from the code so I can easily track them. It also helps encourage informative object and file names.
Of course, saving the code used to save the files in our scripts is also useful because we can easily adjust the saved files (e.g., change figure width or height)
Data Visualisation