GazeGenie / popEye_funcs.py
hugpv's picture
initial commit
da572bf
"""
Mostly adapted from: https://github.com/sascha2schroeder/popEye
"""
import numpy as np
import pandas as pd
from icecream import ic
from scipy import stats
import pathlib as pl
RESULTS_FOLDER = pl.Path("results")
def compute_velocity(xy):
samp = 1000
N = xy.shape[0]
v = pd.DataFrame(data=np.zeros((N, 3)), columns=["time", "vx", "vy"])
v["time"] = xy["time"]
v.iloc[2 : (N - 2), 1:3] = (
samp
/ 6
* (
xy.iloc[4:N, 1:3].values
+ xy.iloc[3 : (N - 1), 1:3].values
- xy.iloc[1 : (N - 3), 1:3].values
- xy.iloc[0 : (N - 4), 1:3].values
)
)
v.iloc[1, 1:3] = samp / 2 * (xy.iloc[2, 1:3].values - xy.iloc[0, 1:3].values)
v.iloc[(N - 2), 1:3] = samp / 2 * (xy.iloc[N - 1, 1:3].values - xy.iloc[N - 4, 1:3].values)
xy = pd.concat([xy.set_index("time"), v.set_index("time")], axis=1).reset_index()
return xy
def event_long(events_df):
events_df["duration"] = events_df["stop"] - events_df["start"]
events_df = events_df[events_df["duration"] > 0]
events_df = events_df.drop(columns=["duration"])
events_df.reset_index(drop=True, inplace=True)
tmplong_cols = list(events_df.columns)
tmplong_cols.remove("msg")
events_df["del"] = 0
for i in events_df.index:
if events_df.loc[i, "msg"] == "BLINK":
if i == 0:
continue
for col in tmplong_cols:
events_df.loc[i, col] = events_df.loc[i - 1, col]
events_df.loc[i - 1, "del"] = 1
events_df = events_df[events_df["del"] == 0]
events_df = events_df.drop(columns=["del"])
events_df.reset_index(drop=True, inplace=True)
events_df["num"] = range(len(events_df))
# compute blinks
# ---------------
events_df["blink_before"] = 0
events_df["blink_after"] = 0
for i in events_df.index:
if events_df.loc[i, "msg"] == "BLINK":
events_df.loc[i - 1, "blink_after"] = 1
if i < len(events_df) - 1:
events_df.loc[i + 1, "blink_before"] = 1
# combine
events_df["blink"] = (events_df["blink_before"] == 1) | (events_df["blink_after"] == 1)
return events_df.copy()
def compute_non_line_dependent_saccade_measures(saccade_df, trial_dict):
saccade_df["trial_id"] = trial_dict["trial_id"]
gaze_df = trial_dict["gaze_df"]
for s in range(len(saccade_df)):
is_directional_deviation = False
a = saccade_df["start_time"][s]
b = saccade_df["end_time"][s]
if not gaze_df["x"][[True if (a <= x <= b) else False for x in gaze_df["time"]]].any():
gaze_df.loc[a:b, "x"] = np.nan
bool_vec = (gaze_df["time"] >= a) & (gaze_df["time"] <= b)
if (not gaze_df["x"][bool_vec].isna().any()) and bool_vec.any():
# saccade amplitude (dX, dY)
minx = min(gaze_df.loc[bool_vec, "x"])
maxx = max(gaze_df.loc[bool_vec, "x"])
if "calibration_method" not in trial_dict or trial_dict["calibration_method"] != "H3":
miny = min(gaze_df.loc[bool_vec, "y"])
maxy = max(gaze_df.loc[bool_vec, "y"])
ix1 = gaze_df.loc[bool_vec, "x"].index[np.argmin(gaze_df.loc[bool_vec, "x"])]
ix2 = gaze_df.loc[bool_vec, "x"].index[np.argmax(gaze_df.loc[bool_vec, "x"])]
if "calibration_method" not in trial_dict or trial_dict["calibration_method"] != "H3":
iy1 = gaze_df.loc[bool_vec, "y"].index[np.argmin(gaze_df.loc[bool_vec, "y"])]
iy2 = gaze_df.loc[bool_vec, "y"].index[np.argmax(gaze_df.loc[bool_vec, "y"])]
saccade_df.loc[s, "dX"] = round(np.sign(ix2 - ix1) * (maxx - minx))
if "calibration_method" not in trial_dict or trial_dict["calibration_method"] != "H3":
saccade_df.loc[s, "dY"] = round(np.sign(iy2 - iy1) * (maxy - miny))
# saccade amplitude/angle
if "calibration_method" not in trial_dict or trial_dict["calibration_method"] != "H3":
saccade_df.loc[s, "amp_px"] = round(
np.sqrt(saccade_df.loc[s, "dX"] ** 2 + saccade_df.loc[s, "dY"] ** 2)
)
saccade_df.loc[s, "amp_angle"] = round(np.arctan2(saccade_df.loc[s, "dY"], saccade_df.loc[s, "dX"]), 2)
saccade_df.loc[s, "amp_angle_deg"] = round(
np.arctan2(saccade_df.loc[s, "dY"], saccade_df.loc[s, "dX"]) * (180 / np.pi), 2
)
else:
saccade_df.loc[s, "amp_px"] = np.nan
saccade_df.loc[s, "amp_angle"] = np.nan
saccade_df.loc[s, "amp_angle_deg"] = np.nan
if 35 <= abs(saccade_df.loc[s, "angle"]) <= 145:
if saccade_df.loc[s, "xe"] - saccade_df.loc[s, "xs"] > 0 and not (
"blink_before" in saccade_df.columns
and (saccade_df.loc[s, "blink_before"] or saccade_df.loc[s, "blink_after"])
):
is_directional_deviation = True
saccade_df.loc[s, "is_directional_deviation"] = is_directional_deviation
return saccade_df
def compute_saccade_measures(saccade_df, trial_dict, algo_choice):
if algo_choice is not None:
algo_str = f"_{algo_choice}"
else:
algo_str = ""
gaze_df = trial_dict["gaze_df"]
saccade_df.reset_index(drop=True, inplace=True)
saccade_df.loc[:, f"has_line_change{algo_str}"] = (
saccade_df.loc[:, f"lines{algo_str}"] != saccade_df.loc[:, f"linee{algo_str}"]
)
saccade_df.loc[:, f"goes_to_next_line{algo_str}"] = saccade_df.loc[:, f"linee{algo_str}"] == (
saccade_df.loc[:, f"lines{algo_str}"] + 1
)
saccade_df.loc[:, f"is_directional_deviation{algo_str}"] = False
saccade_df.loc[:, f"is_return_sweep{algo_str}"] = False
for sidx, subdf in saccade_df.groupby(f"lines{algo_str}"):
if subdf.iloc[-1][f"goes_to_next_line{algo_str}"]:
saccade_df.loc[subdf.index[-1], f"is_return_sweep{algo_str}"] = True
for s in range(len(saccade_df)):
is_directional_deviation = False
a = saccade_df["start_time"][s]
b = saccade_df["end_time"][s]
if not gaze_df["x"][[True if (a <= x <= b) else False for x in gaze_df["time"]]].any():
gaze_df.loc[a:b, "x"] = np.nan
# saccade distance in letters
if saccade_df.loc[s, f"lete{algo_str}"] is None or saccade_df.loc[s, f"lets{algo_str}"] is None:
ic(
f"None found for compute_saccade_measures at index {s} for subj {trial_dict['subject']} and trial {trial_dict['trial_id']}"
)
else:
saccade_df.loc[s, f"dist_let{algo_str}"] = (
saccade_df.loc[s, f"lete{algo_str}"] - saccade_df.loc[s, f"lets{algo_str}"]
)
bool_vec = (gaze_df["time"] >= a) & (gaze_df["time"] <= b)
if (not gaze_df["x"][bool_vec].isna().any()) and bool_vec.any():
# saccade peak velocity (vpeak)
if "calibration_method" not in trial_dict or trial_dict["calibration_method"] != "H3":
vx = gaze_df.vx[bool_vec]
vy = gaze_df.vy[bool_vec]
if not vx.empty and not vy.empty:
saccade_df.loc[s, f"peak_vel{algo_str}"] = round(np.nanmax(np.sqrt(vx**2 + vy**2)))
else:
saccade_df.loc[s, f"peak_vel{algo_str}"] = round(np.nanmax(np.sqrt(gaze_df.vx[bool_vec] ** 2)))
if 35 <= abs(saccade_df.loc[s, f"angle{algo_str}"]) <= 145:
if saccade_df.loc[s, "xe"] - saccade_df.loc[s, "xs"] > 0 and not (
"blink_before" in saccade_df.columns
and (saccade_df.loc[s, "blink_before"] or saccade_df.loc[s, "blink_after"])
):
is_directional_deviation = True
saccade_df.loc[s, f"is_directional_deviation{algo_str}"] = is_directional_deviation
return saccade_df.copy()
def get_angle_and_eucl_dist(saccade_df, algo_choice=None):
if algo_choice is not None:
algo_str = f"_{algo_choice}"
else:
algo_str = ""
saccade_df["xe_minus_xs"] = saccade_df["xe"] - saccade_df["xs"]
saccade_df[f"ye_minus_ys{algo_str}"] = saccade_df[f"ye{algo_str}"] - saccade_df[f"ys{algo_str}"]
saccade_df["eucledian_distance"] = (
saccade_df["xe_minus_xs"].map(np.square) + saccade_df[f"ye_minus_ys{algo_str}"].map(np.square)
).map(np.sqrt)
saccade_df[f"angle{algo_str}"] = np.arctan2(
saccade_df.loc[:, f"ye_minus_ys{algo_str}"], saccade_df.loc[:, "xe_minus_xs"]
) * (180 / np.pi)
return saccade_df
def compute_saccade_length(dffix, stimulus_df, algo_choice):
for j in dffix.index:
if (
j == 0
or pd.isna(dffix.at[j, f"line_num_{algo_choice}"])
or pd.isna(dffix.at[j - 1, f"line_num_{algo_choice}"])
or dffix.at[j, f"letternum_{algo_choice}"] is None
or dffix.at[j - 1, f"letternum_{algo_choice}"] is None
):
continue
# Same line, calculate saccade length as difference in letter numbers
if dffix.at[j - 1, f"line_num_{algo_choice}"] == dffix.at[j, f"line_num_{algo_choice}"]:
dffix.at[j, f"sac_in_{algo_choice}"] = (
dffix.at[j, f"letternum_{algo_choice}"] - dffix.at[j - 1, f"letternum_{algo_choice}"]
)
# Go to line ahead, calculate saccade length as difference in minimum letter numbers in target and previous lines, respectively
elif dffix.at[j - 1, f"line_num_{algo_choice}"] < dffix.at[j, f"line_num_{algo_choice}"]:
min_stim_j = np.min(
stimulus_df[stimulus_df["assigned_line"] == dffix.at[j, f"line_num_{algo_choice}"]]["letternum"]
)
min_stim_j_1 = np.min(
stimulus_df[stimulus_df["assigned_line"] == dffix.at[j - 1, f"line_num_{algo_choice}"]]["letternum"]
)
dffix.at[j, f"sac_in_{algo_choice}"] = (dffix.at[j, f"letternum_{algo_choice}"] - min_stim_j) - (
dffix.at[j - 1, f"letternum_{algo_choice}"] - min_stim_j_1
)
# Return to line visited before, calculate saccade length as difference in minimum letter numbers in target and next lines, respectively
elif dffix.at[j - 1, f"line_num_{algo_choice}"] > dffix.at[j, f"line_num_{algo_choice}"]:
min_stim_j_1 = np.min(
stimulus_df[stimulus_df["assigned_line"] == dffix.at[j - 1, f"line_num_{algo_choice}"]]["letternum"]
)
min_stim_j = np.min(
stimulus_df[stimulus_df["assigned_line"] == dffix.at[j, f"line_num_{algo_choice}"]]["letternum"]
)
dffix.at[j, f"sac_in_{algo_choice}"] = (dffix.at[j - 1, f"letternum_{algo_choice}"] - min_stim_j_1) - (
dffix.at[j, f"letternum_{algo_choice}"] - min_stim_j
)
for j in range(len(dffix) - 1):
if (
pd.isna(dffix.at[j, f"line_num_{algo_choice}"])
or pd.isna(dffix.at[j + 1, f"line_num_{algo_choice}"])
or dffix.at[j + 1, f"letternum_{algo_choice}"] is None
or dffix.at[j, f"letternum_{algo_choice}"] is None
):
continue
# Same line, calculate saccade length as difference in letter numbers
if dffix.at[j + 1, f"line_num_{algo_choice}"] == dffix.at[j, f"line_num_{algo_choice}"]:
dffix.at[j, f"sac_out_{algo_choice}"] = (
dffix.at[j + 1, f"letternum_{algo_choice}"] - dffix.at[j, f"letternum_{algo_choice}"]
)
elif dffix.at[j + 1, f"line_num_{algo_choice}"] > dffix.at[j, f"line_num_{algo_choice}"]:
min_stim_j_1 = np.min(
stimulus_df[stimulus_df["assigned_line"] == dffix.at[j + 1, f"line_num_{algo_choice}"]]["letternum"]
)
min_stim_j = np.min(
stimulus_df[stimulus_df["assigned_line"] == dffix.at[j, f"line_num_{algo_choice}"]]["letternum"]
)
dffix.at[j, f"sac_out_{algo_choice}"] = (dffix.at[j + 1, f"letternum_{algo_choice}"] - min_stim_j_1) - (
dffix.at[j, f"letternum_{algo_choice}"] - min_stim_j
)
elif dffix.at[j + 1, f"line_num_{algo_choice}"] < dffix.at[j, f"line_num_{algo_choice}"]:
min_stim_j_1 = np.min(
stimulus_df[stimulus_df["assigned_line"] == dffix.at[j, f"line_num_{algo_choice}"]]["letternum"]
)
min_stim_j = np.min(
stimulus_df[stimulus_df["assigned_line"] == dffix.at[j + 1, f"line_num_{algo_choice}"]]["letternum"]
)
dffix.at[j, f"sac_out_{algo_choice}"] = (dffix.at[j, f"letternum_{algo_choice}"] - min_stim_j) - (
dffix.at[j + 1, f"letternum_{algo_choice}"] - min_stim_j_1
)
return dffix
def compute_launch_distance(dffix, algo_choice):
for i in range(1, dffix.shape[0]):
if pd.isna(dffix.loc[i, f"sac_in_{algo_choice}"]):
continue
if dffix.loc[i, f"sac_in_{algo_choice}"] >= 0:
dffix.loc[i, f"word_launch_{algo_choice}"] = (
dffix.loc[i, f"sac_in_{algo_choice}"] - dffix.loc[i, f"word_land_{algo_choice}"]
)
else:
dffix.loc[i, f"word_launch_{algo_choice}"] = (
dffix.loc[i, f"sac_in_{algo_choice}"] + dffix.loc[i - 1, f"word_land_{algo_choice}"]
)
return dffix
def compute_refixation(dffix, algo_choice):
dffix.loc[:, f"word_refix_{algo_choice}"] = False
dffix.loc[:, f"sentence_refix_{algo_choice}"] = False
for j in dffix.index:
if (
j == 0
or pd.isna(dffix.loc[j, f"on_word_number_{algo_choice}"])
or pd.isna(dffix.loc[j - 1, f"on_word_number_{algo_choice}"])
):
continue
dffix.loc[j, f"word_refix_{algo_choice}"] = (
dffix.loc[j, f"on_word_number_{algo_choice}"] == dffix.loc[j - 1, f"on_word_number_{algo_choice}"]
)
dffix.loc[j, f"sentence_refix_{algo_choice}"] = (
dffix.loc[j, f"on_sentence_num_{algo_choice}"] == dffix.loc[j - 1, f"on_sentence_num_{algo_choice}"]
)
return dffix
def compute_regression(dffix, algo_choice):
tmp = dffix.copy()
tmp.reset_index(drop=True, inplace=True)
tmp.loc[:, f"word_reg_out_{algo_choice}"] = False
tmp.loc[:, f"word_reg_in_{algo_choice}"] = False
tmp.loc[:, f"word_reg_out_to_{algo_choice}"] = float("nan")
tmp.loc[:, f"word_reg_in_from_{algo_choice}"] = float("nan")
tmp.loc[:, f"sentence_reg_out_{algo_choice}"] = False
tmp.loc[:, f"sentence_reg_in_{algo_choice}"] = False
tmp.loc[:, f"sentence_reg_out_to_{algo_choice}"] = float("nan")
tmp.loc[:, f"sentence_reg_in_from_{algo_choice}"] = float("nan")
if len(tmp) > 1:
for j in range(1, len(tmp)):
# Skip outliers
if pd.isnull(tmp.iloc[j][f"on_word_number_{algo_choice}"]) or pd.isnull(
tmp.iloc[j - 1][f"on_word_number_{algo_choice}"]
):
continue
# Word
if tmp.iloc[j][f"on_word_number_{algo_choice}"] < tmp.iloc[j - 1][f"on_word_number_{algo_choice}"]:
tmp.loc[j, f"word_reg_in_{algo_choice}"] = True
tmp.loc[j - 1, f"word_reg_out_{algo_choice}"] = True
tmp.loc[j, f"word_reg_in_from_{algo_choice}"] = tmp.iloc[j - 1][f"on_word_number_{algo_choice}"]
tmp.loc[j - 1, f"word_reg_out_to_{algo_choice}"] = tmp.iloc[j][f"on_word_number_{algo_choice}"]
# Sentence
if tmp.iloc[j][f"on_sentence_num_{algo_choice}"] < tmp.iloc[j - 1][f"on_sentence_num_{algo_choice}"]:
tmp.loc[j, f"sentence_reg_in_{algo_choice}"] = True
tmp.loc[j - 1, f"sentence_reg_out_{algo_choice}"] = True
tmp.loc[j, f"sentence_reg_in_from_{algo_choice}"] = tmp.iloc[j - 1][f"on_sentence_num_{algo_choice}"]
tmp.loc[j - 1, f"sentence_reg_out_to_{algo_choice}"] = tmp.iloc[j][f"on_sentence_num_{algo_choice}"]
extra_cols = list(set(tmp.columns) - set(dffix.columns))
# select these columns from tmp and add the 'fixation_number'
cols_to_add = ["fixation_number"] + extra_cols
# merge selected columns to dffix with 'outer' how and 'fixation_number' as common key
dffix = pd.merge(dffix, tmp[cols_to_add], on="fixation_number", how="outer")
return dffix
def compute_firstskip(dffix, algo_choice):
dffix[f"word_firstskip_{algo_choice}"] = 0
word_mem = []
dffix[f"sentence_firstskip_{algo_choice}"] = 0
sentence_mem = []
dffix.reset_index(inplace=True)
for j in range(dffix.shape[0]):
# word
if (
dffix.loc[j, f"on_word_number_{algo_choice}"] < np.max(word_mem, initial=0)
and dffix.loc[j, f"on_word_number_{algo_choice}"] not in word_mem
):
dffix.loc[j, f"word_firstskip_{algo_choice}"] = 1
# sent
if (
dffix.loc[j, f"on_sentence_num_{algo_choice}"] < np.max(sentence_mem, initial=0)
and dffix.loc[j, f"on_sentence_num_{algo_choice}"] not in sentence_mem
):
dffix.loc[j, f"sentence_firstskip_{algo_choice}"] = 1
word_mem.append(dffix.loc[j, f"on_word_number_{algo_choice}"])
sentence_mem.append(dffix.loc[j, f"on_sentence_num_{algo_choice}"])
# set NA values for missing line numbers
dffix.loc[dffix[f"line_num_{algo_choice}"].isna(), f"word_firstskip_{algo_choice}"] = np.nan
dffix.loc[dffix[f"line_num_{algo_choice}"].isna(), f"sentence_firstskip_{algo_choice}"] = np.nan
dffix.set_index("index", inplace=True)
return dffix
def compute_run(dffix, algo_choice):
if "fixation_number" not in dffix.columns and "num" in dffix.columns:
dffix["fixation_number"] = dffix["num"]
tmp = dffix.copy()
tmp.reset_index(inplace=True, drop=True)
# initialize
tmp.loc[~tmp[f"on_word_{algo_choice}"].isna(), f"word_runid_{algo_choice}"] = 0
tmp[f"sentence_runid_{algo_choice}"] = 0
# fixation loop
if len(tmp) > 1:
for j in range(1, len(tmp)):
# word
if tmp[f"word_reg_in_{algo_choice}"][j] == 1 and tmp[f"word_reg_in_{algo_choice}"][j - 1] != 1:
tmp.loc[j, f"word_runid_{algo_choice}"] = tmp[f"word_runid_{algo_choice}"][j - 1] + 1
else:
tmp.loc[j, f"word_runid_{algo_choice}"] = tmp.loc[j - 1, f"word_runid_{algo_choice}"]
# sentence
if tmp[f"sentence_reg_in_{algo_choice}"][j] == 1 and tmp[f"sentence_reg_in_{algo_choice}"][j - 1] != 1:
tmp.loc[j, f"sentence_runid_{algo_choice}"] = tmp[f"sentence_runid_{algo_choice}"][j - 1] + 1
else:
tmp.loc[j, f"sentence_runid_{algo_choice}"] = tmp[f"sentence_runid_{algo_choice}"][j - 1]
tmp[f"word_runid_{algo_choice}"] = tmp[f"word_runid_{algo_choice}"] - 1
tmp[f"sentence_runid_{algo_choice}"] = tmp[f"sentence_runid_{algo_choice}"] - 1
# fixid in word
tmp[f"word_fix_{algo_choice}"] = tmp.groupby(f"on_word_number_{algo_choice}")["fixation_number"].transform(
lambda x: stats.rankdata(x, method="min")
)
# fixid in sent
tmp[f"sentence_fix_{algo_choice}"] = tmp.groupby(f"on_sentence_num_{algo_choice}")["fixation_number"].transform(
lambda x: stats.rankdata(x, method="min")
)
# runid in word
tmp["id"] = tmp[f"on_word_number_{algo_choice}"].astype(str) + ":" + tmp[f"word_runid_{algo_choice}"].astype(str)
fix_tmp = tmp.copy().drop_duplicates(subset="id")
fix_tmp[f"word_run_{algo_choice}"] = fix_tmp.groupby(f"on_word_number_{algo_choice}")[
f"word_runid_{algo_choice}"
].transform(lambda x: stats.rankdata(x, method="min"))
if f"word_run_{algo_choice}" in tmp.columns:
tmp = tmp.drop(columns=[f"word_run_{algo_choice}"])
tmp = pd.merge(tmp, fix_tmp[["id", f"word_run_{algo_choice}"]], on="id")
del tmp["id"]
tmp = tmp.sort_values("fixation_number")
# runid in sentence
tmp["id"] = (
tmp[f"on_sentence_num_{algo_choice}"].astype(str) + ":" + tmp[f"sentence_runid_{algo_choice}"].astype(str)
)
fix_tmp = tmp.copy().drop_duplicates(subset="id")
fix_tmp[f"sentence_run_{algo_choice}"] = fix_tmp.groupby(f"on_sentence_num_{algo_choice}")["id"].transform(
lambda x: stats.rankdata(x, method="min")
)
if f"sentence_run_{algo_choice}" in tmp.columns:
tmp = tmp.drop(columns=[f"sentence_run_{algo_choice}"])
tmp = pd.merge(tmp, fix_tmp[["id", f"sentence_run_{algo_choice}"]], on="id")
del tmp["id"]
tmp = tmp.sort_values("fixation_number")
# fixnum in word_run
tmp["id"] = tmp[f"on_word_number_{algo_choice}"].astype(str) + ":" + tmp[f"word_run_{algo_choice}"].astype(str)
tmp[f"word_run_fix_{algo_choice}"] = tmp.groupby(["id"])["fixation_number"].rank("first").values
del tmp["id"]
tmp = tmp.sort_values("fixation_number")
# fixnum in sentence_run
tmp["id"] = tmp[f"on_sentence_num_{algo_choice}"].astype(str) + ":" + tmp[f"sentence_run_{algo_choice}"].astype(str)
tmp[f"sentence_run_fix_{algo_choice}"] = tmp.groupby(["id"])["fixation_number"].rank("first").values
del tmp["id"]
tmp = tmp.sort_values("fixation_number")
names = [
"fixation_number",
f"word_runid_{algo_choice}",
f"sentence_runid_{algo_choice}",
f"word_fix_{algo_choice}",
f"sentence_fix_{algo_choice}",
f"word_run_{algo_choice}",
f"sentence_run_{algo_choice}",
f"word_run_fix_{algo_choice}",
f"sentence_run_fix_{algo_choice}",
]
dffix = pd.merge(dffix, tmp[names], on="fixation_number", how="left")
return dffix.copy()
def compute_landing_position(dffix, algo_choice):
dffix[f"word_cland_{algo_choice}"] = (
dffix[f"word_land_{algo_choice}"] - (dffix[f"on_word_{algo_choice}"].str.len() + 1) / 2
)
return dffix
def aggregate_words_firstrun(
fix,
algo_choice,
measures_to_calculate=[
"firstrun_blink",
"firstrun_skip",
"firstrun_refix",
"firstrun_reg_in",
"firstrun_reg_out",
"firstrun_dur",
"firstrun_gopast",
"firstrun_gopast_sel",
],
):
firstruntmp = fix.loc[fix[f"word_run_{algo_choice}"] == 1].copy()
firstrun = firstruntmp.drop_duplicates(subset=f"on_word_number_{algo_choice}", keep="first").copy()
names = [
"subject",
"trial_id",
"item",
"condition",
f"on_word_number_{algo_choice}",
f"on_word_{algo_choice}",
"fixation_number",
]
firstrun = firstrun[names].sort_values(f"on_word_number_{algo_choice}")
# compute measures
firstrun[f"firstrun_nfix_{algo_choice}"] = firstruntmp.groupby(f"on_word_number_{algo_choice}")[
"fixation_number"
].transform(
"count"
) # Required for many other measures
firstrun[f"firstrun_nfix_{algo_choice}"] = firstrun[f"firstrun_nfix_{algo_choice}"].fillna(0)
if "firstrun_blink" in measures_to_calculate:
if "blink" in firstruntmp:
firstrun[f"firstrun_blink_{algo_choice}"] = firstruntmp.groupby(f"on_word_number_{algo_choice}")[
"blink"
].transform("max")
else:
firstrun[f"firstrun_blink_{algo_choice}"] = 0
if "firstrun_skip" in measures_to_calculate:
firstrun[f"firstrun_skip_{algo_choice}"] = firstruntmp.groupby(f"on_word_number_{algo_choice}")[
f"word_firstskip_{algo_choice}"
].transform("max")
if "firstrun_refix" in measures_to_calculate:
firstrun[f"firstrun_refix_{algo_choice}"] = firstruntmp.groupby(f"on_word_number_{algo_choice}")[
f"word_refix_{algo_choice}"
].transform("max")
if "firstrun_reg_in" in measures_to_calculate:
firstrun[f"firstrun_reg_in_{algo_choice}"] = firstruntmp.groupby(f"on_word_number_{algo_choice}")[
f"word_reg_out_{algo_choice}"
].transform("max")
if "firstrun_reg_out" in measures_to_calculate:
firstrun[f"firstrun_reg_out_{algo_choice}"] = firstruntmp.groupby(f"on_word_number_{algo_choice}")[
f"word_reg_in_{algo_choice}"
].transform("max")
if "firstrun_dur" in measures_to_calculate:
firstrun[f"firstrun_dur_{algo_choice}"] = firstruntmp.groupby(f"on_word_number_{algo_choice}")[
"duration"
].transform("sum")
firstrun = firstrun.sort_values(["trial_id", f"on_word_number_{algo_choice}"]).copy()
return firstrun
def compute_gopast_word(fixations_dataframe, algo_choice):
ias = np.unique(fixations_dataframe.loc[:, f"on_word_number_{algo_choice}"])
for j in range(len(ias) - 1):
fixations_dataframe.loc[
(fixations_dataframe[f"on_word_number_{algo_choice}"] == ias[j]), f"gopast_{algo_choice}"
] = np.nansum(
fixations_dataframe.loc[
(
fixations_dataframe["fixation_number"]
>= np.min(
fixations_dataframe.loc[
(fixations_dataframe[f"on_word_number_{algo_choice}"] == ias[j]), "fixation_number"
]
)
)
& (
fixations_dataframe["fixation_number"]
< np.min(
fixations_dataframe.loc[
(fixations_dataframe[f"on_word_number_{algo_choice}"] > ias[j]), "fixation_number"
]
)
)
& (~fixations_dataframe[f"on_word_number_{algo_choice}"].isna())
]["duration"]
)
fixations_dataframe.loc[
(fixations_dataframe[f"on_word_number_{algo_choice}"] == ias[j]), f"selgopast_{algo_choice}"
] = np.nansum(
fixations_dataframe.loc[
(
fixations_dataframe["fixation_number"]
>= np.min(
fixations_dataframe.loc[
(fixations_dataframe[f"on_word_number_{algo_choice}"] == ias[j]), "fixation_number"
]
)
)
& (
fixations_dataframe["fixation_number"]
< np.min(
fixations_dataframe.loc[
(fixations_dataframe[f"on_word_number_{algo_choice}"] > ias[j]), "fixation_number"
]
)
)
& (fixations_dataframe[f"on_word_number_{algo_choice}"] == ias[j])
& (~fixations_dataframe[f"on_word_number_{algo_choice}"].isna())
]["duration"]
)
return fixations_dataframe
def aggregate_words(
fix,
word_item,
algo_choice,
measures_to_calculate=[
"blink",
],
):
wordtmp = fix.copy()
word = wordtmp.drop_duplicates(subset=f"on_word_number_{algo_choice}", keep="first").copy()
names = [
f"on_sentence_num_{algo_choice}",
f"on_word_number_{algo_choice}",
f"on_word_{algo_choice}",
]
word = word.loc[:, names].sort_values(by=f"on_word_number_{algo_choice}")
wordtmp = compute_gopast_word(wordtmp, algo_choice)
if "blink" in measures_to_calculate:
if "blink" in wordtmp:
word[f"blink_{algo_choice}"] = wordtmp.groupby(f"on_word_number_{algo_choice}")["blink"].transform("max")
else:
word[f"blink_{algo_choice}"] = 0
if "nrun" in measures_to_calculate or "reread" in measures_to_calculate:
word[f"nrun_{algo_choice}"] = wordtmp.groupby(f"on_word_number_{algo_choice}")[
f"word_run_{algo_choice}"
].transform("max")
if "reread" in measures_to_calculate:
word[f"reread_{algo_choice}"] = word[f"nrun_{algo_choice}"] > 1
word[f"number_of_fixations_{algo_choice}"] = wordtmp.groupby(f"on_word_number_{algo_choice}")[
"fixation_number"
].transform("count")
if "refix" in measures_to_calculate:
word[f"refix_{algo_choice}"] = wordtmp.groupby(f"on_word_number_{algo_choice}")[
f"word_refix_{algo_choice}"
].transform("max")
if "reg_in" in measures_to_calculate:
word[f"reg_in_{algo_choice}"] = wordtmp.groupby(f"on_word_number_{algo_choice}")[
f"word_reg_in_{algo_choice}"
].transform("max")
if "reg_out" in measures_to_calculate:
word[f"reg_out_{algo_choice}"] = wordtmp.groupby(f"on_word_number_{algo_choice}")[
f"word_reg_out_{algo_choice}"
].transform("max")
if "total_fixation_duration" in measures_to_calculate:
word[f"total_fixation_duration_{algo_choice}"] = wordtmp.groupby(f"on_word_number_{algo_choice}")[
"duration"
].transform("sum")
if "gopast" in measures_to_calculate and f"gopast_{algo_choice}" in wordtmp.columns:
word[f"gopast_{algo_choice}"] = wordtmp.groupby(f"on_word_number_{algo_choice}")[
f"gopast_{algo_choice}"
].transform("max")
word[f"gopast_{algo_choice}"] = word[f"gopast_{algo_choice}"].fillna(0)
if "gopast_sel" in measures_to_calculate and f"selgopast_{algo_choice}" in wordtmp.columns:
word[f"gopast_sel_{algo_choice}"] = wordtmp.groupby(f"on_word_number_{algo_choice}")[
f"selgopast_{algo_choice}"
].transform("max")
word[f"gopast_sel_{algo_choice}"] = word[f"gopast_sel_{algo_choice}"].fillna(0)
word.rename({f"on_word_number_{algo_choice}": "word_number"}, axis=1, inplace=True)
word = pd.merge(
word.reset_index(drop=True), word_item.reset_index(drop=True), on="word_number", how="right", validate="1:1"
)
word[f"number_of_fixations_{algo_choice}"] = word[f"number_of_fixations_{algo_choice}"].fillna(0)
if "total_fixation_duration" in measures_to_calculate:
word[f"total_fixation_duration_{algo_choice}"] = word[f"total_fixation_duration_{algo_choice}"].fillna(0)
word[f"skip_{algo_choice}"] = 0
if "blink" in measures_to_calculate:
word.loc[word[f"blink_{algo_choice}"].isna(), f"skip_{algo_choice}"] = 1
word.loc[word[f"number_of_fixations_{algo_choice}"] == 0, f"skip_{algo_choice}"] = 1
word[f"skip_{algo_choice}"] = word[f"skip_{algo_choice}"].astype("boolean")
if "number_of_fixations" not in measures_to_calculate:
word = word.drop(columns=f"number_of_fixations_{algo_choice}")
if "blink" in measures_to_calculate:
word[f"blink_{algo_choice}"] = word[f"blink_{algo_choice}"].astype("boolean")
word = word.sort_values(by=["word_number"])
if "condition" in wordtmp.columns and "condition" not in word.columns:
word.insert(loc=0, column="condition", value=wordtmp["condition"].iloc[0])
if "item" in wordtmp.columns and "item" not in word.columns:
word.insert(loc=0, column="item", value=wordtmp["item"].iloc[0])
if "trial_id" in wordtmp.columns and "trial_id" not in word.columns:
word.insert(loc=0, column="trial_id", value=wordtmp["trial_id"].iloc[0])
if "subject" in wordtmp.columns and "subject" not in word.columns:
word.insert(loc=0, column="subject", value=wordtmp["subject"].iloc[0])
return word
def combine_words(fix, wordfirst, wordtmp, algo_choice, measures_to_calculate):
subject = wordtmp["subject"].values[0]
trial_id = wordtmp["trial_id"].values[0]
item = wordtmp["item"].values[0]
condition = wordtmp["condition"].values[0]
wordtmp = wordtmp.loc[
:,
[
c
for c in [
"word_number",
"word",
f"blink_{algo_choice}",
f"skip_{algo_choice}",
f"nrun_{algo_choice}",
f"reread_{algo_choice}",
f"number_of_fixations_{algo_choice}",
f"refix_{algo_choice}",
f"reg_in_{algo_choice}",
f"reg_out_{algo_choice}",
f"total_fixation_duration_{algo_choice}",
f"gopast_{algo_choice}",
f"gopast_sel_{algo_choice}",
]
if c in wordtmp.columns
],
]
wordfirsttmp = wordfirst.loc[
:,
[
c
for c in [
f"on_word_number_{algo_choice}",
f"firstrun_skip_{algo_choice}",
f"firstrun_nfix_{algo_choice}",
f"firstrun_refix_{algo_choice}",
f"firstrun_reg_in_{algo_choice}",
f"firstrun_reg_out_{algo_choice}",
f"firstrun_dur_{algo_choice}",
f"firstrun_gopast_{algo_choice}",
f"firstrun_gopast_sel_{algo_choice}",
]
if c in wordfirst.columns
],
]
fixtmp = fix[(fix[f"word_run_{algo_choice}"] == 1) & (fix[f"word_run_fix_{algo_choice}"] == 1)].copy()
names = [
c
for c in [
f"on_word_number_{algo_choice}",
f"sac_in_{algo_choice}",
f"sac_out_{algo_choice}",
f"word_launch_{algo_choice}",
f"word_land_{algo_choice}",
f"word_cland_{algo_choice}",
f"duration",
]
if c in fixtmp.columns
]
fixtmp = fixtmp[names].copy()
fixtmp.rename(
{
f"sac_in_{algo_choice}": f"firstfix_sac_in_{algo_choice}",
f"sac_out_{algo_choice}": f"firstfix_sac_out_{algo_choice}",
f"word_launch_{algo_choice}": f"firstfix_launch_{algo_choice}",
f"word_land_{algo_choice}": f"firstfix_land_{algo_choice}",
f"word_cland_{algo_choice}": f"firstfix_cland_{algo_choice}",
f"duration": f"firstfix_dur_{algo_choice}",
},
axis=1,
inplace=True,
)
comb = pd.merge(
pd.merge(
wordtmp,
wordfirsttmp.rename({f"on_word_number_{algo_choice}": "word_number"}, axis=1),
on="word_number",
how="left",
),
fixtmp.rename({f"on_word_number_{algo_choice}": "word_number"}, axis=1),
on="word_number",
how="left",
)
dropcols = [
c
for c in [
f"firstrun_skip_{algo_choice}",
f"firstrun_refix_{algo_choice}",
f"firstrun_reg_in_{algo_choice}",
f"firstrun_reg_out_{algo_choice}",
f"firstrun_dur_{algo_choice}",
f"firstrun_gopast_{algo_choice}",
f"firstrun_gopast_sel_{algo_choice}",
f"firstfix_sac_in_{algo_choice}",
f"firstfix_sac_out_{algo_choice}",
f"firstfix_launch_{algo_choice}",
f"firstfix_land_{algo_choice}",
f"firstfix_cland_{algo_choice}",
f"firstfix_dur_{algo_choice}",
]
if ((c.replace(f"_{algo_choice}", "") not in measures_to_calculate) & (c in comb.columns))
]
comb = comb.drop(columns=dropcols).copy()
comb.sort_values(by="word_number", inplace=True)
# recompute firstrun skip (skips are also firstkips)
if f"skip_{algo_choice}" in comb.columns and f"firstrun_skip_{algo_choice}" in comb.columns:
comb.loc[comb[f"skip_{algo_choice}"] == 1, f"firstrun_skip_{algo_choice}"] = 1
# gopast time in firstrun
if f"gopast_{algo_choice}" in comb.columns and "firstrun_gopast" in measures_to_calculate:
comb[f"firstrun_gopast_{algo_choice}"] = comb[f"gopast_{algo_choice}"]
if f"gopast_sel_{algo_choice}" in comb.columns and "firstrun_gopast_sel" in measures_to_calculate:
comb[f"firstrun_gopast_sel_{algo_choice}"] = comb[f"gopast_sel_{algo_choice}"]
if f"gopast_{algo_choice}" in comb.columns:
comb.drop(columns=[f"gopast_{algo_choice}"], inplace=True)
if f"gopast_sel_{algo_choice}" in comb.columns:
comb.drop(columns=[f"gopast_sel_{algo_choice}"], inplace=True)
if f"firstrun_nfix_{algo_choice}" in comb.columns and "singlefix" in measures_to_calculate:
comb[f"singlefix_{algo_choice}"] = 0
comb.loc[(comb[f"firstrun_nfix_{algo_choice}"] == 1), f"singlefix_{algo_choice}"] = 1
if f"firstfix_sac_in_{algo_choice}" in comb.columns and "singlefix_sac_in" in measures_to_calculate:
comb.loc[(comb[f"firstrun_nfix_{algo_choice}"] == 1), f"singlefix_sac_in_{algo_choice}"] = comb[
f"firstfix_sac_in_{algo_choice}"
][(comb[f"firstrun_nfix_{algo_choice}"] == 1)]
if f"firstfix_sac_out_{algo_choice}" in comb.columns and "singlefix_sac_out" in measures_to_calculate:
comb.loc[(comb[f"firstrun_nfix_{algo_choice}"] == 1), f"singlefix_sac_out_{algo_choice}"] = comb[
f"firstfix_sac_out_{algo_choice}"
][(comb[f"firstrun_nfix_{algo_choice}"] == 1)]
if f"firstfix_launch_{algo_choice}" in comb.columns and "singlefix_launch" in measures_to_calculate:
comb.loc[(comb[f"firstrun_nfix_{algo_choice}"] == 1), f"singlefix_launch_{algo_choice}"] = comb[
f"firstfix_launch_{algo_choice}"
][(comb[f"firstrun_nfix_{algo_choice}"] == 1)]
if f"firstfix_land_{algo_choice}" in comb.columns and "singlefix_land" in measures_to_calculate:
comb.loc[(comb[f"firstrun_nfix_{algo_choice}"] == 1), f"singlefix_land_{algo_choice}"] = comb[
f"firstfix_land_{algo_choice}"
][(comb[f"firstrun_nfix_{algo_choice}"] == 1)]
if f"firstfix_cland_{algo_choice}" in comb.columns and "singlefix_cland" in measures_to_calculate:
comb.loc[(comb[f"firstrun_nfix_{algo_choice}"] == 1), f"singlefix_cland_{algo_choice}"] = comb[
f"firstfix_cland_{algo_choice}"
][(comb[f"firstrun_nfix_{algo_choice}"] == 1)]
if f"firstfix_dur_{algo_choice}" in comb.columns and "singlefix_dur" in measures_to_calculate:
comb.loc[(comb[f"firstrun_nfix_{algo_choice}"] == 1), f"singlefix_dur_{algo_choice}"] = comb[
f"firstfix_dur_{algo_choice}"
][(comb[f"firstrun_nfix_{algo_choice}"] == 1)]
if "condition" not in comb.columns:
comb.insert(loc=0, column="condition", value=condition)
if "item" not in comb.columns:
comb.insert(loc=0, column="item", value=item)
if "trial_id" not in comb.columns:
comb.insert(loc=0, column="trial_id", value=trial_id)
if "subject" not in comb.columns:
comb.insert(loc=0, column="subject", value=subject)
return comb.copy()
def compute_sentence_measures(fix, stimmat, algo_choice, measures_to_calc, save_to_csv=False):
sentitem = stimmat.drop_duplicates(
subset="in_sentence_number", keep="first"
) # TODO check why there are rows with sent number None
fixin = fix.copy().reset_index(drop=True)
fixin["on_sentence_num2"] = fixin[f"on_sentence_num_{algo_choice}"].copy()
# Recompute sentence number (two fixation exception rule)
for j in range(1, len(fixin) - 1):
if fixin.loc[j, "on_sentence_num2"] != fixin.loc[j - 1, "on_sentence_num2"]:
if j + 1 in fixin.index and fixin.loc[j + 1, "on_sentence_num2"] == fixin.loc[j - 1, "on_sentence_num2"]:
fixin.loc[j, "on_sentence_num2"] = fixin.loc[j - 1, "on_sentence_num2"]
elif j + 2 in fixin.index and fixin.loc[j + 2, "on_sentence_num2"] == fixin.loc[j - 1, "on_sentence_num2"]:
fixin.loc[j, "on_sentence_num2"] = fixin.loc[j - 1, "on_sentence_num2"]
fixin["id"] = fixin.apply(lambda row: f"{row['on_sentence_num2']}", axis=1)
fixin[f"sent_reg_in2_{algo_choice}"] = 0
fixin[f"sent_reg_out2_{algo_choice}"] = 0
fixin[f"sent_runid2_{algo_choice}"] = 1
fixin.loc[0, "last"] = fixin.loc[0, "id"]
fixin.loc[0, f"firstpass_{algo_choice}"] = 1
mem = [fixin.loc[0, "on_sentence_num2"]]
wordmem = [fixin.loc[0, f"on_word_number_{algo_choice}"]]
fixin.loc[0, f"forward_{algo_choice}"] = 1
for j in range(1, len(fixin)):
fixin.loc[j, "last"] = fixin.loc[j - 1, "id"]
if fixin.loc[j, "on_sentence_num2"] != fixin.loc[j - 1, "on_sentence_num2"]:
fixin.loc[j, f"sent_reg_in2_{algo_choice}"] = 1
fixin.loc[j - 1, f"sent_reg_out2_{algo_choice}"] = 1
fixin.loc[j, f"sent_reg_in_from2_{algo_choice}"] = fixin.loc[j - 1, "on_sentence_num2"]
fixin.loc[j - 1, f"sent_reg_out_to2_{algo_choice}"] = fixin.loc[j, "on_sentence_num2"]
if fixin.loc[j, f"sent_reg_in2_{algo_choice}"] == 1 and fixin.loc[j - 1, f"sent_reg_in2_{algo_choice}"] != 1:
fixin.loc[j, f"sent_runid2_{algo_choice}"] = fixin.loc[j - 1, f"sent_runid2_{algo_choice}"] + 1
else:
fixin.loc[j, f"sent_runid2_{algo_choice}"] = fixin.loc[j - 1, f"sent_runid2_{algo_choice}"]
if fixin.loc[j, "on_sentence_num2"] >= fixin.loc[j - 1, "on_sentence_num2"]:
if fixin.loc[j, "on_sentence_num2"] in mem:
if fixin.loc[j, "on_sentence_num2"] == max(mem):
fixin.loc[j, f"firstpass_{algo_choice}"] = 1
else:
fixin.loc[j, f"firstpass_{algo_choice}"] = 0
else:
mem.append(fixin.loc[j, "on_sentence_num2"])
fixin.loc[j, f"firstpass_{algo_choice}"] = 1
else:
fixin.loc[j, f"firstpass_{algo_choice}"] = 0
if fixin.loc[j, f"on_word_number_{algo_choice}"] > max(wordmem):
wordmem.append(fixin.loc[j, f"on_word_number_{algo_choice}"])
fixin.loc[j, f"forward_{algo_choice}"] = 1
elif fixin.loc[j, f"on_word_number_{algo_choice}"] < max(wordmem):
fixin.loc[j, f"forward_{algo_choice}"] = 0
for i in range(len(fixin) - 3):
if fixin.loc[i, f"line_change_{algo_choice}"] > 0:
fixin.loc[i, "on_word_number"] = 0
fixin.loc[i + 1, f"forward_{algo_choice}"] = 1
fixin.loc[i + 2, f"forward_{algo_choice}"] = 1
fixin.loc[i + 3, f"forward_{algo_choice}"] = 1
for i in range(1, len(fixin) - 3):
if fixin.loc[i, "on_sentence_num2"] > fixin.loc[i - 1, "on_sentence_num2"]:
fixin.loc[i + 1, f"forward_{algo_choice}"] = 1
fixin.loc[i + 2, f"forward_{algo_choice}"] = 1
fixin["id2"] = fixin["id"] + ":" + fixin[f"sent_runid2_{algo_choice}"].astype(str)
fixin = fixin.sort_values(["trial_id", "fixation_number"])
sent = fixin.copy().drop_duplicates(subset="id", keep="first")
names = [
"id",
"subject",
"trial_id",
"item",
"condition",
"on_sentence_num2",
f"on_sentence_num_{algo_choice}",
f"on_sentence_{algo_choice}",
"num_words_in_sentence",
]
sent = sent[names].reset_index(drop=True)
sent[f"firstrun_skip_{algo_choice}"] = 0
mem = []
for j in range(len(sent)):
if not pd.isna(sent.loc[j, f"on_sentence_num_{algo_choice}"]):
if len(mem) > 0 and sent.loc[j, f"on_sentence_num_{algo_choice}"] < max(mem) and not pd.isna(max(mem)):
sent.loc[j, f"firstrun_skip_{algo_choice}"] = 1
if (
not pd.isna(sent.loc[j, f"on_sentence_num_{algo_choice}"])
and sent.loc[j, f"on_sentence_num_{algo_choice}"] not in mem
):
mem.append(sent.loc[j, f"on_sentence_num_{algo_choice}"])
if "total_n_fixations" in measures_to_calc:
tmp = fixin.groupby("id")["duration"].count().reset_index()
tmp.columns = ["id", f"total_n_fixations_{algo_choice}"]
sent = pd.merge(sent, tmp, on="id", how="left")
sent.fillna({f"total_n_fixations_{algo_choice}": 0}, inplace=True)
tmp = fixin.groupby("id")["duration"].sum().reset_index()
tmp.columns = ["id", f"total_dur_{algo_choice}"]
sent = pd.merge(sent, tmp, on="id", how="left")
sent.fillna({f"total_dur_{algo_choice}": 0}, inplace=True)
if "firstpass_n_fixations" in measures_to_calc:
tmp = fixin[fixin[f"firstpass_{algo_choice}"] == 1].groupby("id")["duration"].count().reset_index()
tmp.columns = ["id", f"firstpass_n_fixations_{algo_choice}"]
sent = pd.merge(sent, tmp, on="id", how="left")
sent.fillna({f"firstpass_n_fixations_{algo_choice}": 0}, inplace=True)
if "firstpass_dur" in measures_to_calc:
tmp = fixin[fixin[f"firstpass_{algo_choice}"] == 1].groupby("id")["duration"].sum().reset_index()
tmp.columns = ["id", f"firstpass_dur_{algo_choice}"]
sent = pd.merge(sent, tmp, on="id", how="left")
sent.fillna({f"firstpass_dur_{algo_choice}": 0}, inplace=True)
if "firstpass_forward_n_fixations" in measures_to_calc:
tmp = (
fixin[(fixin[f"firstpass_{algo_choice}"] == 1) & (fixin[f"forward_{algo_choice}"] == 1)]
.groupby("id")["duration"]
.count()
.reset_index()
)
tmp.columns = ["id", f"firstpass_forward_n_fixations_{algo_choice}"]
sent = pd.merge(sent, tmp, on="id", how="left")
sent.fillna({f"firstpass_forward_n_fixations_{algo_choice}": 0}, inplace=True)
if "firstpass_forward_dur" in measures_to_calc:
tmp = (
fixin[(fixin[f"firstpass_{algo_choice}"] == 1) & (fixin[f"forward_{algo_choice}"] == 1)]
.groupby("id")["duration"]
.sum()
.reset_index()
)
tmp.columns = ["id", f"firstpass_forward_dur_{algo_choice}"]
sent = pd.merge(sent, tmp, on="id", how="left")
sent.fillna({f"firstpass_forward_dur_{algo_choice}": 0}, inplace=True)
if "firstpass_reread_n_fixations" in measures_to_calc:
tmp = (
fixin[(fixin[f"firstpass_{algo_choice}"] == 1) & (fixin[f"forward_{algo_choice}"] == 0)]
.groupby("id")["duration"]
.count()
.reset_index()
)
tmp.columns = ["id", f"firstpass_reread_n_fixations_{algo_choice}"]
sent = pd.merge(sent, tmp, on="id", how="left")
sent.fillna({f"firstpass_reread_n_fixations_{algo_choice}": 0}, inplace=True)
if "firstpass_reread_dur" in measures_to_calc:
tmp = (
fixin[(fixin[f"firstpass_{algo_choice}"] == 1) & (fixin[f"forward_{algo_choice}"] == 0)]
.groupby("id")["duration"]
.sum()
.reset_index()
)
tmp.columns = ["id", f"firstpass_reread_dur_{algo_choice}"]
sent = pd.merge(sent, tmp, on="id", how="left")
sent.fillna({f"firstpass_reread_dur_{algo_choice}": 0}, inplace=True)
if sum(fixin[f"firstpass_{algo_choice}"] == 0) != 0:
if "lookback_n_fixations" in measures_to_calc:
tmp = fixin[fixin[f"firstpass_{algo_choice}"] == 0].groupby("id")["duration"].count().reset_index()
tmp.columns = ["id", f"lookback_n_fixations_{algo_choice}"]
sent = pd.merge(sent, tmp, on="id", how="left")
sent.fillna({f"lookback_n_fixations_{algo_choice}": 0}, inplace=True)
if "lookback_dur" in measures_to_calc:
tmp = fixin[fixin[f"firstpass_{algo_choice}"] == 0].groupby("id")["duration"].sum().reset_index()
tmp.columns = ["id", f"lookback_dur_{algo_choice}"]
sent = pd.merge(sent, tmp, on="id", how="left")
sent.fillna({f"lookback_dur_{algo_choice}": 0}, inplace=True)
fixin["id2"] = fixin.apply(lambda row: f"{row['id']}:{row[f'sent_runid2_{algo_choice}']}", axis=1)
sent2 = fixin.drop_duplicates(subset="id2", keep="first")
sent3 = sent2[(sent2[f"firstpass_{algo_choice}"] == 0) & (~pd.isna(sent2[f"sent_reg_in_from2_{algo_choice}"]))]
tmp = fixin[fixin["id2"].isin(sent3["id2"])].groupby("id")["duration"].count().reset_index()
tmp.columns = ["id", f"lookfrom_n_fixations_{algo_choice}"]
tmp2 = pd.merge(tmp, sent3)
tmp3 = tmp2.groupby("last")[f"lookfrom_n_fixations_{algo_choice}"].sum().reset_index()
tmp3.columns = ["last", f"lookfrom_n_fixations_{algo_choice}"]
sent = pd.merge(sent, tmp3, left_on="id", right_on="last", how="left")
sent.fillna({f"lookfrom_n_fixations_{algo_choice}": 0}, inplace=True)
if "lookfrom_dur" in measures_to_calc:
tmp = fixin[fixin["id2"].isin(sent3["id2"])].groupby("id")["duration"].sum().reset_index()
tmp.columns = ["id", f"lookfrom_dur_{algo_choice}"]
tmp2 = pd.merge(tmp, sent3)
tmp3 = tmp2.groupby("last")[f"lookfrom_dur_{algo_choice}"].sum().reset_index()
tmp3.columns = ["last", f"lookfrom_dur_{algo_choice}"]
sent = pd.merge(sent, tmp3, left_on="id", right_on="last", how="left")
sent.fillna({f"lookfrom_dur_{algo_choice}": 0}, inplace=True)
# Firstrun
firstruntmp = fixin[fixin[f"sentence_run_{algo_choice}"] == 1]
if "firstrun_reg_in" in measures_to_calc:
tmp = firstruntmp.groupby("id")[f"sent_reg_in2_{algo_choice}"].max().reset_index()
tmp.columns = ["id", f"firstrun_reg_in_{algo_choice}"]
sent = pd.merge(sent, tmp, on="id", how="left")
sent.fillna({f"firstrun_reg_in_{algo_choice}": 0}, inplace=True)
if "firstrun_reg_out" in measures_to_calc:
tmp = firstruntmp.groupby("id")[f"sent_reg_out2_{algo_choice}"].max().reset_index()
tmp.columns = ["id", f"firstrun_reg_out_{algo_choice}"]
sent = pd.merge(sent, tmp, on="id", how="left")
sent.fillna({f"firstrun_reg_out_{algo_choice}": 0}, inplace=True)
# Complete sentence
gopasttmp = fixin.copy()
gopasttmp[f"on_sentence_num_{algo_choice}"] = gopasttmp["on_sentence_num2"]
tmp = compute_gopast_sentence(gopasttmp, algo_choice)
names = ["id", f"gopast_{algo_choice}", f"selgopast_{algo_choice}"]
tmp = tmp[names]
tmp = tmp.drop_duplicates(subset="id", keep="first")
tmp.columns = ["id", f"gopast_{algo_choice}", f"gopast_sel_{algo_choice}"]
sent = pd.merge(sent, tmp, on="id", how="left")
# Nrun
tmp = fixin.groupby("id")[f"sentence_run_{algo_choice}"].max().reset_index()
tmp.columns = ["id", f"nrun_{algo_choice}"]
sent = pd.merge(sent, tmp, on="id", how="left")
# Reread
sent[f"reread_{algo_choice}"] = sent.apply(lambda row: 1 if row[f"nrun_{algo_choice}"] > 1 else 0, axis=1)
# Reg_in
tmp = fixin.groupby("id")[f"sent_reg_in2_{algo_choice}"].max().reset_index()
tmp.columns = ["id", f"reg_in_{algo_choice}"]
sent = pd.merge(sent, tmp, on="id", how="left")
# Reg_out
tmp = fixin.groupby("id")[f"sent_reg_out2_{algo_choice}"].max().reset_index()
tmp.columns = ["id", f"reg_out_{algo_choice}"]
sent = pd.merge(sent, tmp, on="id", how="left")
sent = sent.sort_values(by=f"on_sentence_num_{algo_choice}").reset_index(drop=True)
# Rate
sent[f"rate_{algo_choice}"] = round(60000 / (sent[f"total_dur_{algo_choice}"] / sent["num_words_in_sentence"]))
# Write out
item = sentitem.copy()
sent = pd.merge(
sent,
item.rename({"in_sentence_number": f"on_sentence_num_{algo_choice}"}, axis=1),
on=f"on_sentence_num_{algo_choice}",
how="left",
)
sent[f"skip_{algo_choice}"] = 0
sent.loc[pd.isna(sent[f"nrun_{algo_choice}"]), f"skip_{algo_choice}"] = 1
names = [
"subject",
"trial_id",
"item",
"condition",
] + [
c
for c in [
f"on_sentence_num_{algo_choice}",
f"on_sentence_{algo_choice}",
"num_words_in_sentence",
f"skip_{algo_choice}",
f"nrun_{algo_choice}",
f"reread_{algo_choice}",
f"reg_in_{algo_choice}",
f"reg_out_{algo_choice}",
f"total_n_fixations_{algo_choice}",
f"total_dur_{algo_choice}",
f"rate_{algo_choice}",
f"gopast_{algo_choice}",
f"gopast_sel_{algo_choice}",
f"firstrun_skip_{algo_choice}",
f"firstrun_reg_in_{algo_choice}",
f"firstrun_reg_out_{algo_choice}",
f"firstpass_n_fixations_{algo_choice}",
f"firstpass_dur_{algo_choice}",
f"firstpass_forward_n_fixations_{algo_choice}",
f"firstpass_forward_dur_{algo_choice}",
f"firstpass_reread_n_fixations_{algo_choice}",
f"firstpass_reread_dur_{algo_choice}",
f"lookback_n_fixations_{algo_choice}",
f"lookback_dur_{algo_choice}",
f"lookfrom_n_fixations_{algo_choice}",
f"lookfrom_dur_{algo_choice}",
]
if (c in sent.columns and c.replace(f"_{algo_choice}", "") in measures_to_calc)
]
sent = sent[names].copy()
sent.rename(
{
f"on_sentence_num_{algo_choice}": "sentence_number",
f"on_sentence_{algo_choice}": "sentence",
"num_words_in_sentence": "number_of_words",
},
axis=1,
inplace=True,
)
if save_to_csv:
subj = fix["subject"].iloc[0]
trial_id = fix["trial_id"].iloc[0]
sent.to_csv(RESULTS_FOLDER / f"{subj}_{trial_id}_{algo_choice}_sentence_measures.csv")
return sent.copy()
def compute_gopast_sentence(fixin, algo_choice):
# create response vectors
fixin[f"gopast_{algo_choice}"] = np.nan
fixin[f"selgopast_{algo_choice}"] = np.nan
# compute trialid within person
ias = fixin[f"on_sentence_num_{algo_choice}"].unique()
# compute measures
for j in ias:
min_fixation_number_j = fixin.loc[fixin[f"on_sentence_num_{algo_choice}"] == j, "fixation_number"].min(
skipna=True
)
next_min_fixation_number = (
fixin.loc[fixin[f"on_sentence_num_{algo_choice}"] > j, "fixation_number"].min(skipna=True)
if j != ias[-1]
else float("inf")
)
mask = (
(fixin["fixation_number"] >= min_fixation_number_j)
& (fixin["fixation_number"] < next_min_fixation_number)
& (~fixin[f"on_sentence_num_{algo_choice}"].isna())
)
fixin.loc[fixin[f"on_sentence_num_{algo_choice}"] == j, f"gopast_{algo_choice}"] = fixin.loc[
mask, "duration"
].sum(skipna=True)
mask_j = (
(fixin["fixation_number"] >= min_fixation_number_j)
& (fixin["fixation_number"] < next_min_fixation_number)
& (~fixin[f"on_sentence_num_{algo_choice}"].isna())
& (fixin[f"on_sentence_num_{algo_choice}"] == j)
)
fixin.loc[fixin[f"on_sentence_num_{algo_choice}"] == j, f"selgopast_{algo_choice}"] = fixin.loc[
mask_j, "duration"
].sum(skipna=True)
return fixin
def aggregate_trials(dffix_combined, wordcomb, all_trials_by_subj, algo_choices):
tmp = dffix_combined.copy()
trial = tmp.drop_duplicates(subset="subject_trialID", keep="first")
names = ["subject_trialID", "subject", "trial_id", "item", "condition"]
trial = trial[names].copy()
for index, row in trial.iterrows():
selected_trial = all_trials_by_subj[row["subject"]][row["trial_id"]]
info_keys = [
k for k in selected_trial.keys() if k in ["trial_start_time", "trial_end_time", "question_correct"]
]
if row["subject"] in all_trials_by_subj and row["trial_id"] in all_trials_by_subj[row["subject"]]:
if selected_trial["Fixation Cleaning Stats"]["Discard fixation before or after blinks"]:
trial.at[index, "blink"] = selected_trial["Fixation Cleaning Stats"][
"Number of discarded fixations due to blinks"
]
for key, value in selected_trial.items():
if key in info_keys:
trial.at[index, key] = value
subdf = wordcomb.copy().loc[:, ["subject_trialID"]].drop_duplicates(subset=["subject_trialID"], keep="first")
trial = pd.merge(trial, subdf, on="subject_trialID", how="left")
for sub, subdf in wordcomb.groupby("subject"):
for trialid, trialdf in subdf.groupby("trial_id"):
trial.loc[((trial["subject"] == sub) & (trial["trial_id"] == trialid)), "number_of_words_in_trial"] = (
trialdf["word"].count()
)
trial.sort_values(by="subject_trialID", inplace=True)
if "blink" in tmp.columns:
blink = tmp.groupby("subject_trialID")["blink"].sum() / 2
blink = blink.round().reset_index()
trial = pd.merge(trial, blink, on="subject_trialID", how="left")
trial["nfix"] = tmp.groupby("subject_trialID")["fixation_number"].agg("count").values
new_col_dfs = []
new_col_dfs.append(tmp.groupby("subject_trialID")["duration"].agg("mean").reset_index(name="mean_fix_duration"))
new_col_dfs.append(tmp.groupby("subject_trialID")["duration"].agg("sum").reset_index(name="total_fix_duration"))
for algo_choice in algo_choices:
new_col_dfs.append(
tmp.groupby("subject_trialID")[f"word_runid_{algo_choice}"]
.agg("max")
.reset_index(name=f"nrun_{algo_choice}")
)
tmp[f"saccade_length_{algo_choice}"] = tmp[f"word_land_{algo_choice}"] + tmp[f"word_launch_{algo_choice}"]
new_col_dfs.append(
tmp[(tmp[f"saccade_length_{algo_choice}"] >= 0) & tmp[f"saccade_length_{algo_choice}"].notna()]
.groupby("subject_trialID")[f"saccade_length_{algo_choice}"]
.agg("mean")
.reset_index(name=f"saccade_length_{algo_choice}")
)
word = wordcomb.copy()
if f"firstrun_skip_{algo_choice}" in wordcomb.columns:
new_col_dfs.append(
word.groupby("subject_trialID")[f"firstrun_skip_{algo_choice}"]
.agg("mean")
.reset_index(name=f"skip_{algo_choice}")
)
if f"refix_{algo_choice}" in wordcomb.columns:
new_col_dfs.append(
word.groupby("subject_trialID")[f"refix_{algo_choice}"]
.agg("mean")
.reset_index(name=f"refix_{algo_choice}")
)
if f"reg_in_{algo_choice}" in wordcomb.columns:
new_col_dfs.append(
word.groupby("subject_trialID")[f"reg_in_{algo_choice}"]
.agg("mean")
.reset_index(name=f"reg_{algo_choice}")
)
if f"firstrun_dur_{algo_choice}" in wordcomb.columns:
new_col_dfs.append(
word.groupby("subject_trialID")[f"firstrun_dur_{algo_choice}"]
.agg("sum")
.reset_index(name=f"firstpass_{algo_choice}")
)
if f"total_fixation_duration_{algo_choice}" in wordcomb.columns:
new_col_dfs.append(
(word[f"total_fixation_duration_{algo_choice}"] - word[f"firstrun_dur_{algo_choice}"])
.groupby(word["subject_trialID"])
.agg("sum")
.reset_index(name=f"rereading_{algo_choice}")
)
trial = pd.concat(
[trial.set_index("subject_trialID")] + [df.set_index("subject_trialID") for df in new_col_dfs], axis=1
).reset_index()
trial[f"reading_rate_{algo_choice}"] = (
60000 / (trial["total_fix_duration"] / trial["number_of_words_in_trial"])
).round()
return trial.copy()
def aggregate_subjects(trials, algo_choices):
trial_aggregates = trials.groupby("subject")[["nfix", "blink"]].mean().round(3).reset_index()
trial_aggregates = trial_aggregates.merge(
trials.groupby("subject")["question_correct"].sum().reset_index(name="n_question_correct"), on="subject"
)
trial_aggregates = trial_aggregates.merge(
trials.groupby("subject")["trial_id"].count().reset_index(name="ntrial"), on="subject"
)
for algo_choice in algo_choices:
cols_to_do = [
c
for c in [
f"saccade_length_{algo_choice}",
f"reg_{algo_choice}",
f"mean_fix_duration_{algo_choice}",
f"total_fix_duration_{algo_choice}",
f"reading_rate_{algo_choice}",
f"refix_{algo_choice}",
f"nrun_{algo_choice}",
f"skip_{algo_choice}",
]
if c in trials.columns
]
trial_aggregates_temp = trials.groupby("subject")[cols_to_do].mean().round(3).reset_index()
trial_aggregates = pd.merge(trial_aggregates, trial_aggregates_temp, how="left", on="subject")
return trial_aggregates