03-eyetracking-data.Rmd

---
title: "Plot eyetracking data"
author: "Tristan Mahr"
date: "`r Sys.Date()`"
output: 
  github_document:
    toc: true
    toc_depth: 4
---

```{r setup, include = FALSE, message = FALSE, warning = FALSE, results = 'hide'}
library("knitr")
opts_chunk$set(
  cache.path = "assets/cache/03-",
  fig.path = "assets/figure/03-",
  warning = FALSE,
  collapse = TRUE, 
  comment = "#>", 
  message = FALSE,
  fig.width = 8,
  fig.asp = 0.618,
  dpi = 300,
  out.width = "80%")

wd <- rprojroot::find_rstudio_root_file()
opts_knit$set(root.dir = wd)
options(width = 100)
```

## Set up 

```{r, results = "hide"}
library(dplyr)
library(littlelisteners)
library(ggplot2)
library(hrbrthemes)

# Work relative to RStudio project
wd <- rprojroot::find_rstudio_root_file()

df_child_vars <- readr::read_csv(file.path(wd, "data", "scores.csv"))
df_looks <- readr::read_csv(file.path(wd, "data", "screened.csv.gz"))
```


## Head counts and other stats

Boys/girls by native dialect.

```{r}
df_child_vars %>% 
  count(Dialect, Gender) %>% 
  ungroup() %>% 
  rename(`N Children` = n) %>% 
  knitr::kable()
```

Maternal education by native dialect.

```{r}
df_child_vars %>% 
  count(Dialect, Maternal_Education_Group) %>% 
  ungroup() %>% 
  rename(`N Children` = n) %>%   
  knitr::kable() 

df_child_vars %>% 
  count(Dialect, Maternal_Education_Group) %>% 
  ungroup() %>% 
  tidyr::spread(Dialect, n) %>% 
  knitr::kable()

df_child_vars %>% 
  count(Dialect, Maternal_Education_Group, Maternal_Education) %>% 
  ungroup() %>% 
  tidyr::spread(Dialect, n) %>% 
  tidyr::replace_na(list(AAE = 0, MAE = 0)) %>% 
  knitr::kable()
```

Child level measures by dialect group.

```{r}
narm_mean <- function(...) mean(..., na.rm = TRUE)
narm_sd <- function(...) sd(..., na.rm = TRUE)
narm_n <- function(...) sum(!is.na(...))

df_child_vars %>% 
  group_by(Dialect) %>% 
  summarise(
    `N Children` = n(), 
    sum(Female),
    `Mean Age (months)` = narm_mean(EVT_Age),
    `SD Age (months)` = narm_sd(EVT_Age),
    `N EVT` = narm_n(EVT_Raw), 
    `Mean EVT Standard` = narm_mean(EVT_Standard),
    `SD EVT Standard` = narm_sd(EVT_Standard),
    `N PPVT` = narm_n(PPVT_Raw), 
    `Mean PPVT Standard` = narm_mean(PPVT_Standard),
    `SD PPVT Standard` = narm_sd(PPVT_Standard)) %>% 
  knitr::kable(digits = 1)

df_child_vars %>% 
  group_by(Maternal_Education_Group) %>% 
  summarise(
    `N Children` = n(), 
    `Mean Age (months)` = narm_mean(EVT_Age),
    `SD Age (months)` = narm_sd(EVT_Age),
    `N EVT` = narm_n(EVT_Raw), 
    `Mean EVT Standard` = narm_mean(EVT_Standard),
    `SD EVT Standard` = narm_sd(EVT_Standard),
    `N PPVT` = narm_n(PPVT_Raw), 
    `Mean PPVT Standard` = narm_mean(PPVT_Standard),
    `SD PPVT Standard` = narm_sd(PPVT_Standard)) %>% 
  knitr::kable(digits = 1)

df_child_vars %>% 
  group_by(Dialect, Maternal_Education_Group) %>% 
  summarise(
    `N Children` = n(), 
    `Mean Age (months)` = narm_mean(EVT_Age),
    `SD Age (months)` = narm_sd(EVT_Age),
    `N EVT` = narm_n(EVT_Raw), 
    `Mean EVT Standard` = narm_mean(EVT_Standard),
    `SD EVT Standard` = narm_sd(EVT_Standard),
    `N PPVT` = narm_n(PPVT_Raw), 
    `Mean PPVT Standard` = narm_mean(PPVT_Standard),
    `SD PPVT Standard` = narm_sd(PPVT_Standard)) %>% 
  knitr::kable(digits = 1)
```

Home dialect predicts maternal ed. This is a recruitment artifact.

```{r}
df_child_vars %>% 
  count(Dialect, Maternal_Education_Group) %>% 
  mutate(`Mat. ed.` = Maternal_Education_Group %>% 
           factor(c("Low", "Mid", "High")) ) %>% 
  select(Dialect, `Mat. ed.`, n) %>% 
  arrange(Dialect, `Mat. ed.`) %>% 
  knitr::kable()
```

Maternal ed. in turn predicts vocabulary ntiles

```{r}
df_child_vars %>% 
  count(Maternal_Education_Group, ntile(EVT_Standard, 3)) %>% 
  rename(`Vocab. level` = `ntile(EVT_Standard, 3)`) %>% 
  mutate(`Mat. ed.` = Maternal_Education_Group %>% 
           factor(c("Low", "Mid", "High")) ) %>% 
  select(`Mat. ed.`, `Vocab. level`, n) %>% 
  arrange(`Mat. ed.`, `Vocab. level`) %>% 
  knitr::kable()
```


## Aggregate looking data

Define a response code for the `aggregate_looks()` function.

```{r}
resp_def <- create_response_def(
  primary = "Target",
  others = c("PhonologicalFoil", "SemanticFoil", "Unrelated"),
  elsewhere = "tracked",
  missing = NA
)  
```

Assign the individual frames into 50-ms bins.

```{r}
df_bin_times <- df_looks %>%  
  left_join(df_child_vars) %>% 
  mutate(BlockDialect = ifelse(BlockDialect == "AAE", "AAE", "MAE"),
         HearsNativeDialect = Dialect == BlockDialect) %>% 
  filter(between(Time, -520, 1970)) %>% 
  assign_bins(3, Time, Basename, TrialNo) %>% 
  group_by(.bin) %>% 
  mutate(BinTime = Time %>% median() %>% round(-1)) %>% 
  ungroup()
```


Aggregated looking data over Time across trials within Dialect x BlockDialect
within Child.

```{r}
df_looks <- df_bin_times %>%
  aggregate_looks2(resp_def = resp_def, resp_var = GazeByImageAOI, 
                   Maternal_Education_Group, Study, Dialect, BlockDialect, 
                   HearsNativeDialect, ResearchID, BinTime) %>% 
  rename(Time = BinTime) %>% 
  mutate(
    Looks_Images = Target + Others,
    Prop_Target = Target / Looks_Images,
    Prop_PhonologicalFoil = PhonologicalFoil / Looks_Images,
    Prop_SemanticFoil = SemanticFoil / Looks_Images,
    Prop_Unrelated = Unrelated / Looks_Images)
```

Add some grouping variables to the data.

```{r}
fct_add_counts <- function(f) {
 counts <- forcats::fct_count(f) 
 counts[["new"]] <- sprintf("%s (%s)", counts[["f"]], counts[["n"]])
 x <- setNames(counts[["new"]], counts[["f"]])
 forcats::fct_relabel(f, function(level) x[level])
}

tertile_labels <- c("Lower third", "Middle third", "Upper third")
df_evt <- df_child_vars %>% 
  mutate(Vocab3tile = ntile(EVT_Standard, 3), 
         `Exp. vocab.` = Vocab3tile %>% 
           factor(1:3, tertile_labels) %>% 
           fct_add_counts(),
         `Maternal edu.` = Maternal_Education_Group %>% 
            factor(c("Low", "Mid", "High")) %>% 
            fct_add_counts(),
         `Native dialect` = Dialect %>% factor() %>% fct_add_counts()) %>% 
  select(Study, ResearchID, Vocab3tile, 
         `Exp. vocab.`, `Maternal edu.`, `Native dialect`,
         EVT_Standard, Maternal_Education_Group)

df_looks <- df_looks %>% 
  mutate(
    `Child hears` = ifelse(HearsNativeDialect, "Native dialect", 
                           "Non-native dialect")) %>% 
  left_join(df_evt)
```


## Finally, plots of fixation patterns over time

Set up plotting constants and helpers.

```{r}
plot_text <- list(
  x_time = "Time (ms) after target noun onset", 
  y_target = "Proportion of looks to named image",
  y_image = "Proportion of looks to image",
  caption_mean_se = "Mean ± SE"
)

legend_position <- theme(
  legend.position = "bottom", 
  legend.text = element_text(size = 10), 
  legend.justification = "left")

colors <- viridis::scale_color_viridis(end = .7, discrete = TRUE)

hline_chance <- geom_hline(yintercept = .25, size = 1.25, color = "#cccccc")
vline_onset <- geom_vline(xintercept = 0, size = 1.25, color = "#cccccc")
```


```{r gca-plots}
ggplot(df_looks) + 
  aes(x = Time, y = Prop) +   
  hline_chance + 
  vline_onset + 
  stat_summary() + 
  theme_ipsum_rc(axis_title_size = 11) + 
  labs(x = plot_text$x_time, 
       y = plot_text$y_target, 
       caption = plot_text$caption_mean_se)

ggplot(df_looks) + 
  aes(x = Time, y = Prop, color = `Child hears`) + 
  hline_chance + 
  vline_onset + 
  stat_summary() + 
  colors +
  theme_ipsum_rc(axis_title_size = 11) + 
  legend_position + 
  labs(x = plot_text$x_time, 
       y = plot_text$y_target)

df_looks %>% 
  filter(between(Time, 0, 2000)) %>% 
  ggplot() + 
    aes(x = Time, y = Prop, color = `Child hears`) + 
    hline_chance + 
    stat_summary() + 
    facet_wrap("`Native dialect`", labeller = label_both) + 
    colors +
    theme_ipsum_rc(axis_title_size = 11) + 
    legend_position + 
    labs(x = plot_text$x_time, 
         y = plot_text$y_target)

df_looks %>% 
  filter(!is.na(`Exp. vocab.`)) %>% 
  filter(between(Time, 0, 2000)) %>% 
  ggplot() + 
    aes(x = Time, y = Prop, color = `Child hears`) + 
    hline_chance + 
    stat_summary() + 
    facet_wrap("`Exp. vocab.`") +
    colors +
    theme_ipsum_rc(axis_title_size = 11) + 
    legend_position + 
    labs(x = plot_text$x_time, 
         y = plot_text$y_target) 

df_looks %>% 
  filter(!is.na(Maternal_Education_Group)) %>% 
  filter(between(Time, 0, 2000)) %>% 
  ggplot() + 
    aes(x = Time, y = Prop, color = `Child hears`) + 
    hline_chance + 
    stat_summary() + 
    facet_wrap("`Maternal edu.`") + 
    colors +
    theme_ipsum_rc(axis_title_size = 11) + 
    legend_position + 
    labs(x = plot_text$x_time, 
         y = plot_text$y_target)
```

## Spaghetti

Double check the individual growth curves.

```{r everyone, fig.width = 8, fig.height = 10, fig.asp = NULL, out.width = "100%"}
df_looks %>% 
  filter(Dialect == "AAE") %>% 
  filter(between(Time, 0, 2000)) %>% 
  ggplot() + 
    aes(x = Time, y = Prop, color = `Child hears`) + 
    hline_chance + 
    geom_line(aes(group = interaction(ResearchID, `Child hears`))) +
    facet_wrap("ResearchID") + 
    colors +
    theme_ipsum_rc(axis_title_size = 11) + 
    theme(panel.spacing.x = grid::unit(1, "lines"),
          panel.spacing.y = grid::unit(1, "lines"),
          panel.grid.minor = element_blank()) +
    legend_position +
    scale_x_continuous(labels = function(x) x / 1000) +
    labs(x = "Time (s)", 
         y = plot_text$y_target)

df_looks %>% 
  filter(Dialect == "MAE") %>% 
  filter(between(Time, 0, 2000)) %>% 
  ggplot() + 
    aes(x = Time, y = Prop, color = `Child hears`) + 
    hline_chance + 
    geom_line(aes(group = interaction(ResearchID, `Child hears`))) +
    facet_wrap("ResearchID") + 
    colors +
    theme_ipsum_rc(axis_title_size = 11) + 
    theme(panel.spacing.x = grid::unit(1, "lines"),
          panel.spacing.y = grid::unit(1, "lines"),
          panel.grid.minor = element_blank()) +
    legend_position +
    scale_x_continuous(labels = function(x) x / 1000) +
    labs(x = "Time (s)", 
         y = plot_text$y_target)
```


## Save data-set for modeling

```{r}
df_looks %>% 
  select(-.response_def, -Study, -PhonologicalFoil, 
         -SemanticFoil, -Unrelated, -Elsewhere, -Looks_Images, 
         -(Prop_PhonologicalFoil:Prop_Unrelated), -Vocab3tile) %>% 
  select(ResearchID, Dialect, BlockDialect, 
         HearsNativeDialect, Time, everything()) %>% 
readr::write_csv(file.path("data", "modeling.csv"))
```

***

```{r}
sessioninfo::session_info()
```