|
""" |
|
Mostly adapted from: https://github.com/sascha2schroeder/popEye |
|
""" |
|
|
|
import numpy as np |
|
import pandas as pd |
|
from icecream import ic |
|
from scipy import stats |
|
import pathlib as pl |
|
|
|
RESULTS_FOLDER = pl.Path("results") |
|
|
|
|
|
def compute_velocity(xy): |
|
samp = 1000 |
|
|
|
N = xy.shape[0] |
|
v = pd.DataFrame(data=np.zeros((N, 3)), columns=["time", "vx", "vy"]) |
|
v["time"] = xy["time"] |
|
|
|
v.iloc[2 : (N - 2), 1:3] = ( |
|
samp |
|
/ 6 |
|
* ( |
|
xy.iloc[4:N, 1:3].values |
|
+ xy.iloc[3 : (N - 1), 1:3].values |
|
- xy.iloc[1 : (N - 3), 1:3].values |
|
- xy.iloc[0 : (N - 4), 1:3].values |
|
) |
|
) |
|
v.iloc[1, 1:3] = samp / 2 * (xy.iloc[2, 1:3].values - xy.iloc[0, 1:3].values) |
|
v.iloc[(N - 2), 1:3] = samp / 2 * (xy.iloc[N - 1, 1:3].values - xy.iloc[N - 4, 1:3].values) |
|
|
|
xy = pd.concat([xy.set_index("time"), v.set_index("time")], axis=1).reset_index() |
|
return xy |
|
|
|
|
|
def event_long(events_df): |
|
events_df["duration"] = events_df["stop"] - events_df["start"] |
|
events_df = events_df[events_df["duration"] > 0] |
|
events_df = events_df.drop(columns=["duration"]) |
|
events_df.reset_index(drop=True, inplace=True) |
|
tmplong_cols = list(events_df.columns) |
|
tmplong_cols.remove("msg") |
|
events_df["del"] = 0 |
|
for i in events_df.index: |
|
if events_df.loc[i, "msg"] == "BLINK": |
|
if i == 0: |
|
continue |
|
for col in tmplong_cols: |
|
events_df.loc[i, col] = events_df.loc[i - 1, col] |
|
events_df.loc[i - 1, "del"] = 1 |
|
|
|
events_df = events_df[events_df["del"] == 0] |
|
events_df = events_df.drop(columns=["del"]) |
|
events_df.reset_index(drop=True, inplace=True) |
|
events_df["num"] = range(len(events_df)) |
|
|
|
|
|
|
|
events_df["blink_before"] = 0 |
|
events_df["blink_after"] = 0 |
|
|
|
for i in events_df.index: |
|
if events_df.loc[i, "msg"] == "BLINK": |
|
events_df.loc[i - 1, "blink_after"] = 1 |
|
if i < len(events_df) - 1: |
|
events_df.loc[i + 1, "blink_before"] = 1 |
|
|
|
|
|
events_df["blink"] = (events_df["blink_before"] == 1) | (events_df["blink_after"] == 1) |
|
return events_df.copy() |
|
|
|
|
|
def compute_non_line_dependent_saccade_measures(saccade_df, trial_dict): |
|
|
|
saccade_df["trial_id"] = trial_dict["trial_id"] |
|
gaze_df = trial_dict["gaze_df"] |
|
for s in range(len(saccade_df)): |
|
is_directional_deviation = False |
|
a = saccade_df["start_time"][s] |
|
b = saccade_df["end_time"][s] |
|
|
|
if not gaze_df["x"][[True if (a <= x <= b) else False for x in gaze_df["time"]]].any(): |
|
gaze_df.loc[a:b, "x"] = np.nan |
|
|
|
bool_vec = (gaze_df["time"] >= a) & (gaze_df["time"] <= b) |
|
if (not gaze_df["x"][bool_vec].isna().any()) and bool_vec.any(): |
|
|
|
minx = min(gaze_df.loc[bool_vec, "x"]) |
|
maxx = max(gaze_df.loc[bool_vec, "x"]) |
|
if "calibration_method" not in trial_dict or trial_dict["calibration_method"] != "H3": |
|
miny = min(gaze_df.loc[bool_vec, "y"]) |
|
maxy = max(gaze_df.loc[bool_vec, "y"]) |
|
ix1 = gaze_df.loc[bool_vec, "x"].index[np.argmin(gaze_df.loc[bool_vec, "x"])] |
|
ix2 = gaze_df.loc[bool_vec, "x"].index[np.argmax(gaze_df.loc[bool_vec, "x"])] |
|
if "calibration_method" not in trial_dict or trial_dict["calibration_method"] != "H3": |
|
iy1 = gaze_df.loc[bool_vec, "y"].index[np.argmin(gaze_df.loc[bool_vec, "y"])] |
|
iy2 = gaze_df.loc[bool_vec, "y"].index[np.argmax(gaze_df.loc[bool_vec, "y"])] |
|
saccade_df.loc[s, "dX"] = round(np.sign(ix2 - ix1) * (maxx - minx)) |
|
if "calibration_method" not in trial_dict or trial_dict["calibration_method"] != "H3": |
|
saccade_df.loc[s, "dY"] = round(np.sign(iy2 - iy1) * (maxy - miny)) |
|
|
|
|
|
if "calibration_method" not in trial_dict or trial_dict["calibration_method"] != "H3": |
|
saccade_df.loc[s, "amp_px"] = round( |
|
np.sqrt(saccade_df.loc[s, "dX"] ** 2 + saccade_df.loc[s, "dY"] ** 2) |
|
) |
|
saccade_df.loc[s, "amp_angle"] = round(np.arctan2(saccade_df.loc[s, "dY"], saccade_df.loc[s, "dX"]), 2) |
|
saccade_df.loc[s, "amp_angle_deg"] = round( |
|
np.arctan2(saccade_df.loc[s, "dY"], saccade_df.loc[s, "dX"]) * (180 / np.pi), 2 |
|
) |
|
|
|
else: |
|
saccade_df.loc[s, "amp_px"] = np.nan |
|
saccade_df.loc[s, "amp_angle"] = np.nan |
|
saccade_df.loc[s, "amp_angle_deg"] = np.nan |
|
|
|
if 35 <= abs(saccade_df.loc[s, "angle"]) <= 145: |
|
if saccade_df.loc[s, "xe"] - saccade_df.loc[s, "xs"] > 0 and not ( |
|
"blink_before" in saccade_df.columns |
|
and (saccade_df.loc[s, "blink_before"] or saccade_df.loc[s, "blink_after"]) |
|
): |
|
is_directional_deviation = True |
|
|
|
saccade_df.loc[s, "is_directional_deviation"] = is_directional_deviation |
|
|
|
return saccade_df |
|
|
|
|
|
def compute_saccade_measures(saccade_df, trial_dict, algo_choice): |
|
|
|
if algo_choice is not None: |
|
algo_str = f"_{algo_choice}" |
|
else: |
|
algo_str = "" |
|
gaze_df = trial_dict["gaze_df"] |
|
saccade_df.reset_index(drop=True, inplace=True) |
|
saccade_df.loc[:, f"has_line_change{algo_str}"] = ( |
|
saccade_df.loc[:, f"lines{algo_str}"] != saccade_df.loc[:, f"linee{algo_str}"] |
|
) |
|
saccade_df.loc[:, f"goes_to_next_line{algo_str}"] = saccade_df.loc[:, f"linee{algo_str}"] == ( |
|
saccade_df.loc[:, f"lines{algo_str}"] + 1 |
|
) |
|
saccade_df.loc[:, f"is_directional_deviation{algo_str}"] = False |
|
saccade_df.loc[:, f"is_return_sweep{algo_str}"] = False |
|
|
|
for sidx, subdf in saccade_df.groupby(f"lines{algo_str}"): |
|
if subdf.iloc[-1][f"goes_to_next_line{algo_str}"]: |
|
saccade_df.loc[subdf.index[-1], f"is_return_sweep{algo_str}"] = True |
|
|
|
for s in range(len(saccade_df)): |
|
is_directional_deviation = False |
|
a = saccade_df["start_time"][s] |
|
b = saccade_df["end_time"][s] |
|
|
|
if not gaze_df["x"][[True if (a <= x <= b) else False for x in gaze_df["time"]]].any(): |
|
gaze_df.loc[a:b, "x"] = np.nan |
|
|
|
|
|
if saccade_df.loc[s, f"lete{algo_str}"] is None or saccade_df.loc[s, f"lets{algo_str}"] is None: |
|
ic( |
|
f"None found for compute_saccade_measures at index {s} for subj {trial_dict['subject']} and trial {trial_dict['trial_id']}" |
|
) |
|
else: |
|
saccade_df.loc[s, f"dist_let{algo_str}"] = ( |
|
saccade_df.loc[s, f"lete{algo_str}"] - saccade_df.loc[s, f"lets{algo_str}"] |
|
) |
|
|
|
bool_vec = (gaze_df["time"] >= a) & (gaze_df["time"] <= b) |
|
if (not gaze_df["x"][bool_vec].isna().any()) and bool_vec.any(): |
|
|
|
if "calibration_method" not in trial_dict or trial_dict["calibration_method"] != "H3": |
|
vx = gaze_df.vx[bool_vec] |
|
vy = gaze_df.vy[bool_vec] |
|
if not vx.empty and not vy.empty: |
|
saccade_df.loc[s, f"peak_vel{algo_str}"] = round(np.nanmax(np.sqrt(vx**2 + vy**2))) |
|
else: |
|
saccade_df.loc[s, f"peak_vel{algo_str}"] = round(np.nanmax(np.sqrt(gaze_df.vx[bool_vec] ** 2))) |
|
|
|
if 35 <= abs(saccade_df.loc[s, f"angle{algo_str}"]) <= 145: |
|
if saccade_df.loc[s, "xe"] - saccade_df.loc[s, "xs"] > 0 and not ( |
|
"blink_before" in saccade_df.columns |
|
and (saccade_df.loc[s, "blink_before"] or saccade_df.loc[s, "blink_after"]) |
|
): |
|
is_directional_deviation = True |
|
|
|
saccade_df.loc[s, f"is_directional_deviation{algo_str}"] = is_directional_deviation |
|
return saccade_df.copy() |
|
|
|
|
|
def get_angle_and_eucl_dist(saccade_df, algo_choice=None): |
|
if algo_choice is not None: |
|
algo_str = f"_{algo_choice}" |
|
else: |
|
algo_str = "" |
|
saccade_df["xe_minus_xs"] = saccade_df["xe"] - saccade_df["xs"] |
|
saccade_df[f"ye_minus_ys{algo_str}"] = saccade_df[f"ye{algo_str}"] - saccade_df[f"ys{algo_str}"] |
|
saccade_df["eucledian_distance"] = ( |
|
saccade_df["xe_minus_xs"].map(np.square) + saccade_df[f"ye_minus_ys{algo_str}"].map(np.square) |
|
).map(np.sqrt) |
|
saccade_df[f"angle{algo_str}"] = np.arctan2( |
|
saccade_df.loc[:, f"ye_minus_ys{algo_str}"], saccade_df.loc[:, "xe_minus_xs"] |
|
) * (180 / np.pi) |
|
return saccade_df |
|
|
|
|
|
def compute_saccade_length(dffix, stimulus_df, algo_choice): |
|
|
|
for j in dffix.index: |
|
if ( |
|
j == 0 |
|
or pd.isna(dffix.at[j, f"line_num_{algo_choice}"]) |
|
or pd.isna(dffix.at[j - 1, f"line_num_{algo_choice}"]) |
|
or dffix.at[j, f"letternum_{algo_choice}"] is None |
|
or dffix.at[j - 1, f"letternum_{algo_choice}"] is None |
|
): |
|
continue |
|
|
|
|
|
if dffix.at[j - 1, f"line_num_{algo_choice}"] == dffix.at[j, f"line_num_{algo_choice}"]: |
|
dffix.at[j, f"sac_in_{algo_choice}"] = ( |
|
dffix.at[j, f"letternum_{algo_choice}"] - dffix.at[j - 1, f"letternum_{algo_choice}"] |
|
) |
|
|
|
|
|
elif dffix.at[j - 1, f"line_num_{algo_choice}"] < dffix.at[j, f"line_num_{algo_choice}"]: |
|
min_stim_j = np.min( |
|
stimulus_df[stimulus_df["assigned_line"] == dffix.at[j, f"line_num_{algo_choice}"]]["letternum"] |
|
) |
|
min_stim_j_1 = np.min( |
|
stimulus_df[stimulus_df["assigned_line"] == dffix.at[j - 1, f"line_num_{algo_choice}"]]["letternum"] |
|
) |
|
dffix.at[j, f"sac_in_{algo_choice}"] = (dffix.at[j, f"letternum_{algo_choice}"] - min_stim_j) - ( |
|
dffix.at[j - 1, f"letternum_{algo_choice}"] - min_stim_j_1 |
|
) |
|
|
|
|
|
elif dffix.at[j - 1, f"line_num_{algo_choice}"] > dffix.at[j, f"line_num_{algo_choice}"]: |
|
min_stim_j_1 = np.min( |
|
stimulus_df[stimulus_df["assigned_line"] == dffix.at[j - 1, f"line_num_{algo_choice}"]]["letternum"] |
|
) |
|
min_stim_j = np.min( |
|
stimulus_df[stimulus_df["assigned_line"] == dffix.at[j, f"line_num_{algo_choice}"]]["letternum"] |
|
) |
|
dffix.at[j, f"sac_in_{algo_choice}"] = (dffix.at[j - 1, f"letternum_{algo_choice}"] - min_stim_j_1) - ( |
|
dffix.at[j, f"letternum_{algo_choice}"] - min_stim_j |
|
) |
|
|
|
for j in range(len(dffix) - 1): |
|
if ( |
|
pd.isna(dffix.at[j, f"line_num_{algo_choice}"]) |
|
or pd.isna(dffix.at[j + 1, f"line_num_{algo_choice}"]) |
|
or dffix.at[j + 1, f"letternum_{algo_choice}"] is None |
|
or dffix.at[j, f"letternum_{algo_choice}"] is None |
|
): |
|
continue |
|
|
|
|
|
if dffix.at[j + 1, f"line_num_{algo_choice}"] == dffix.at[j, f"line_num_{algo_choice}"]: |
|
dffix.at[j, f"sac_out_{algo_choice}"] = ( |
|
dffix.at[j + 1, f"letternum_{algo_choice}"] - dffix.at[j, f"letternum_{algo_choice}"] |
|
) |
|
|
|
elif dffix.at[j + 1, f"line_num_{algo_choice}"] > dffix.at[j, f"line_num_{algo_choice}"]: |
|
min_stim_j_1 = np.min( |
|
stimulus_df[stimulus_df["assigned_line"] == dffix.at[j + 1, f"line_num_{algo_choice}"]]["letternum"] |
|
) |
|
min_stim_j = np.min( |
|
stimulus_df[stimulus_df["assigned_line"] == dffix.at[j, f"line_num_{algo_choice}"]]["letternum"] |
|
) |
|
dffix.at[j, f"sac_out_{algo_choice}"] = (dffix.at[j + 1, f"letternum_{algo_choice}"] - min_stim_j_1) - ( |
|
dffix.at[j, f"letternum_{algo_choice}"] - min_stim_j |
|
) |
|
|
|
elif dffix.at[j + 1, f"line_num_{algo_choice}"] < dffix.at[j, f"line_num_{algo_choice}"]: |
|
min_stim_j_1 = np.min( |
|
stimulus_df[stimulus_df["assigned_line"] == dffix.at[j, f"line_num_{algo_choice}"]]["letternum"] |
|
) |
|
min_stim_j = np.min( |
|
stimulus_df[stimulus_df["assigned_line"] == dffix.at[j + 1, f"line_num_{algo_choice}"]]["letternum"] |
|
) |
|
dffix.at[j, f"sac_out_{algo_choice}"] = (dffix.at[j, f"letternum_{algo_choice}"] - min_stim_j) - ( |
|
dffix.at[j + 1, f"letternum_{algo_choice}"] - min_stim_j_1 |
|
) |
|
|
|
return dffix |
|
|
|
|
|
def compute_launch_distance(dffix, algo_choice): |
|
|
|
for i in range(1, dffix.shape[0]): |
|
if pd.isna(dffix.loc[i, f"sac_in_{algo_choice}"]): |
|
continue |
|
|
|
if dffix.loc[i, f"sac_in_{algo_choice}"] >= 0: |
|
dffix.loc[i, f"word_launch_{algo_choice}"] = ( |
|
dffix.loc[i, f"sac_in_{algo_choice}"] - dffix.loc[i, f"word_land_{algo_choice}"] |
|
) |
|
|
|
else: |
|
dffix.loc[i, f"word_launch_{algo_choice}"] = ( |
|
dffix.loc[i, f"sac_in_{algo_choice}"] + dffix.loc[i - 1, f"word_land_{algo_choice}"] |
|
) |
|
|
|
return dffix |
|
|
|
|
|
def compute_refixation(dffix, algo_choice): |
|
dffix.loc[:, f"word_refix_{algo_choice}"] = False |
|
dffix.loc[:, f"sentence_refix_{algo_choice}"] = False |
|
for j in dffix.index: |
|
if ( |
|
j == 0 |
|
or pd.isna(dffix.loc[j, f"on_word_number_{algo_choice}"]) |
|
or pd.isna(dffix.loc[j - 1, f"on_word_number_{algo_choice}"]) |
|
): |
|
continue |
|
dffix.loc[j, f"word_refix_{algo_choice}"] = ( |
|
dffix.loc[j, f"on_word_number_{algo_choice}"] == dffix.loc[j - 1, f"on_word_number_{algo_choice}"] |
|
) |
|
dffix.loc[j, f"sentence_refix_{algo_choice}"] = ( |
|
dffix.loc[j, f"on_sentence_num_{algo_choice}"] == dffix.loc[j - 1, f"on_sentence_num_{algo_choice}"] |
|
) |
|
return dffix |
|
|
|
|
|
def compute_regression(dffix, algo_choice): |
|
tmp = dffix.copy() |
|
tmp.reset_index(drop=True, inplace=True) |
|
tmp.loc[:, f"word_reg_out_{algo_choice}"] = False |
|
tmp.loc[:, f"word_reg_in_{algo_choice}"] = False |
|
tmp.loc[:, f"word_reg_out_to_{algo_choice}"] = float("nan") |
|
tmp.loc[:, f"word_reg_in_from_{algo_choice}"] = float("nan") |
|
tmp.loc[:, f"sentence_reg_out_{algo_choice}"] = False |
|
tmp.loc[:, f"sentence_reg_in_{algo_choice}"] = False |
|
tmp.loc[:, f"sentence_reg_out_to_{algo_choice}"] = float("nan") |
|
tmp.loc[:, f"sentence_reg_in_from_{algo_choice}"] = float("nan") |
|
|
|
if len(tmp) > 1: |
|
for j in range(1, len(tmp)): |
|
|
|
if pd.isnull(tmp.iloc[j][f"on_word_number_{algo_choice}"]) or pd.isnull( |
|
tmp.iloc[j - 1][f"on_word_number_{algo_choice}"] |
|
): |
|
continue |
|
|
|
|
|
if tmp.iloc[j][f"on_word_number_{algo_choice}"] < tmp.iloc[j - 1][f"on_word_number_{algo_choice}"]: |
|
tmp.loc[j, f"word_reg_in_{algo_choice}"] = True |
|
tmp.loc[j - 1, f"word_reg_out_{algo_choice}"] = True |
|
tmp.loc[j, f"word_reg_in_from_{algo_choice}"] = tmp.iloc[j - 1][f"on_word_number_{algo_choice}"] |
|
tmp.loc[j - 1, f"word_reg_out_to_{algo_choice}"] = tmp.iloc[j][f"on_word_number_{algo_choice}"] |
|
|
|
|
|
if tmp.iloc[j][f"on_sentence_num_{algo_choice}"] < tmp.iloc[j - 1][f"on_sentence_num_{algo_choice}"]: |
|
tmp.loc[j, f"sentence_reg_in_{algo_choice}"] = True |
|
tmp.loc[j - 1, f"sentence_reg_out_{algo_choice}"] = True |
|
tmp.loc[j, f"sentence_reg_in_from_{algo_choice}"] = tmp.iloc[j - 1][f"on_sentence_num_{algo_choice}"] |
|
tmp.loc[j - 1, f"sentence_reg_out_to_{algo_choice}"] = tmp.iloc[j][f"on_sentence_num_{algo_choice}"] |
|
|
|
extra_cols = list(set(tmp.columns) - set(dffix.columns)) |
|
|
|
cols_to_add = ["fixation_number"] + extra_cols |
|
|
|
|
|
dffix = pd.merge(dffix, tmp[cols_to_add], on="fixation_number", how="outer") |
|
return dffix |
|
|
|
|
|
def compute_firstskip(dffix, algo_choice): |
|
dffix[f"word_firstskip_{algo_choice}"] = 0 |
|
word_mem = [] |
|
|
|
dffix[f"sentence_firstskip_{algo_choice}"] = 0 |
|
sentence_mem = [] |
|
dffix.reset_index(inplace=True) |
|
for j in range(dffix.shape[0]): |
|
|
|
|
|
if ( |
|
dffix.loc[j, f"on_word_number_{algo_choice}"] < np.max(word_mem, initial=0) |
|
and dffix.loc[j, f"on_word_number_{algo_choice}"] not in word_mem |
|
): |
|
dffix.loc[j, f"word_firstskip_{algo_choice}"] = 1 |
|
|
|
|
|
if ( |
|
dffix.loc[j, f"on_sentence_num_{algo_choice}"] < np.max(sentence_mem, initial=0) |
|
and dffix.loc[j, f"on_sentence_num_{algo_choice}"] not in sentence_mem |
|
): |
|
dffix.loc[j, f"sentence_firstskip_{algo_choice}"] = 1 |
|
|
|
word_mem.append(dffix.loc[j, f"on_word_number_{algo_choice}"]) |
|
sentence_mem.append(dffix.loc[j, f"on_sentence_num_{algo_choice}"]) |
|
|
|
|
|
dffix.loc[dffix[f"line_num_{algo_choice}"].isna(), f"word_firstskip_{algo_choice}"] = np.nan |
|
dffix.loc[dffix[f"line_num_{algo_choice}"].isna(), f"sentence_firstskip_{algo_choice}"] = np.nan |
|
dffix.set_index("index", inplace=True) |
|
return dffix |
|
|
|
|
|
def compute_run(dffix, algo_choice): |
|
if "fixation_number" not in dffix.columns and "num" in dffix.columns: |
|
dffix["fixation_number"] = dffix["num"] |
|
tmp = dffix.copy() |
|
tmp.reset_index(inplace=True, drop=True) |
|
|
|
tmp.loc[~tmp[f"on_word_{algo_choice}"].isna(), f"word_runid_{algo_choice}"] = 0 |
|
tmp[f"sentence_runid_{algo_choice}"] = 0 |
|
|
|
|
|
if len(tmp) > 1: |
|
for j in range(1, len(tmp)): |
|
|
|
|
|
if tmp[f"word_reg_in_{algo_choice}"][j] == 1 and tmp[f"word_reg_in_{algo_choice}"][j - 1] != 1: |
|
tmp.loc[j, f"word_runid_{algo_choice}"] = tmp[f"word_runid_{algo_choice}"][j - 1] + 1 |
|
else: |
|
tmp.loc[j, f"word_runid_{algo_choice}"] = tmp.loc[j - 1, f"word_runid_{algo_choice}"] |
|
|
|
|
|
if tmp[f"sentence_reg_in_{algo_choice}"][j] == 1 and tmp[f"sentence_reg_in_{algo_choice}"][j - 1] != 1: |
|
tmp.loc[j, f"sentence_runid_{algo_choice}"] = tmp[f"sentence_runid_{algo_choice}"][j - 1] + 1 |
|
else: |
|
tmp.loc[j, f"sentence_runid_{algo_choice}"] = tmp[f"sentence_runid_{algo_choice}"][j - 1] |
|
tmp[f"word_runid_{algo_choice}"] = tmp[f"word_runid_{algo_choice}"] - 1 |
|
tmp[f"sentence_runid_{algo_choice}"] = tmp[f"sentence_runid_{algo_choice}"] - 1 |
|
|
|
tmp[f"word_fix_{algo_choice}"] = tmp.groupby(f"on_word_number_{algo_choice}")["fixation_number"].transform( |
|
lambda x: stats.rankdata(x, method="min") |
|
) |
|
|
|
tmp[f"sentence_fix_{algo_choice}"] = tmp.groupby(f"on_sentence_num_{algo_choice}")["fixation_number"].transform( |
|
lambda x: stats.rankdata(x, method="min") |
|
) |
|
|
|
|
|
tmp["id"] = tmp[f"on_word_number_{algo_choice}"].astype(str) + ":" + tmp[f"word_runid_{algo_choice}"].astype(str) |
|
fix_tmp = tmp.copy().drop_duplicates(subset="id") |
|
fix_tmp[f"word_run_{algo_choice}"] = fix_tmp.groupby(f"on_word_number_{algo_choice}")[ |
|
f"word_runid_{algo_choice}" |
|
].transform(lambda x: stats.rankdata(x, method="min")) |
|
|
|
if f"word_run_{algo_choice}" in tmp.columns: |
|
tmp = tmp.drop(columns=[f"word_run_{algo_choice}"]) |
|
tmp = pd.merge(tmp, fix_tmp[["id", f"word_run_{algo_choice}"]], on="id") |
|
del tmp["id"] |
|
tmp = tmp.sort_values("fixation_number") |
|
|
|
|
|
tmp["id"] = ( |
|
tmp[f"on_sentence_num_{algo_choice}"].astype(str) + ":" + tmp[f"sentence_runid_{algo_choice}"].astype(str) |
|
) |
|
fix_tmp = tmp.copy().drop_duplicates(subset="id") |
|
fix_tmp[f"sentence_run_{algo_choice}"] = fix_tmp.groupby(f"on_sentence_num_{algo_choice}")["id"].transform( |
|
lambda x: stats.rankdata(x, method="min") |
|
) |
|
if f"sentence_run_{algo_choice}" in tmp.columns: |
|
tmp = tmp.drop(columns=[f"sentence_run_{algo_choice}"]) |
|
tmp = pd.merge(tmp, fix_tmp[["id", f"sentence_run_{algo_choice}"]], on="id") |
|
del tmp["id"] |
|
tmp = tmp.sort_values("fixation_number") |
|
|
|
|
|
tmp["id"] = tmp[f"on_word_number_{algo_choice}"].astype(str) + ":" + tmp[f"word_run_{algo_choice}"].astype(str) |
|
tmp[f"word_run_fix_{algo_choice}"] = tmp.groupby(["id"])["fixation_number"].rank("first").values |
|
del tmp["id"] |
|
tmp = tmp.sort_values("fixation_number") |
|
|
|
|
|
tmp["id"] = tmp[f"on_sentence_num_{algo_choice}"].astype(str) + ":" + tmp[f"sentence_run_{algo_choice}"].astype(str) |
|
tmp[f"sentence_run_fix_{algo_choice}"] = tmp.groupby(["id"])["fixation_number"].rank("first").values |
|
del tmp["id"] |
|
tmp = tmp.sort_values("fixation_number") |
|
names = [ |
|
"fixation_number", |
|
f"word_runid_{algo_choice}", |
|
f"sentence_runid_{algo_choice}", |
|
f"word_fix_{algo_choice}", |
|
f"sentence_fix_{algo_choice}", |
|
f"word_run_{algo_choice}", |
|
f"sentence_run_{algo_choice}", |
|
f"word_run_fix_{algo_choice}", |
|
f"sentence_run_fix_{algo_choice}", |
|
] |
|
dffix = pd.merge(dffix, tmp[names], on="fixation_number", how="left") |
|
return dffix.copy() |
|
|
|
|
|
def compute_landing_position(dffix, algo_choice): |
|
dffix[f"word_cland_{algo_choice}"] = ( |
|
dffix[f"word_land_{algo_choice}"] - (dffix[f"on_word_{algo_choice}"].str.len() + 1) / 2 |
|
) |
|
return dffix |
|
|
|
|
|
def aggregate_words_firstrun( |
|
fix, |
|
algo_choice, |
|
measures_to_calculate=[ |
|
"firstrun_blink", |
|
"firstrun_skip", |
|
"firstrun_refix", |
|
"firstrun_reg_in", |
|
"firstrun_reg_out", |
|
"firstrun_dur", |
|
"firstrun_gopast", |
|
"firstrun_gopast_sel", |
|
], |
|
): |
|
firstruntmp = fix.loc[fix[f"word_run_{algo_choice}"] == 1].copy() |
|
|
|
firstrun = firstruntmp.drop_duplicates(subset=f"on_word_number_{algo_choice}", keep="first").copy() |
|
|
|
names = [ |
|
"subject", |
|
"trial_id", |
|
"item", |
|
"condition", |
|
f"on_word_number_{algo_choice}", |
|
f"on_word_{algo_choice}", |
|
"fixation_number", |
|
] |
|
firstrun = firstrun[names].sort_values(f"on_word_number_{algo_choice}") |
|
|
|
|
|
firstrun[f"firstrun_nfix_{algo_choice}"] = firstruntmp.groupby(f"on_word_number_{algo_choice}")[ |
|
"fixation_number" |
|
].transform( |
|
"count" |
|
) |
|
firstrun[f"firstrun_nfix_{algo_choice}"] = firstrun[f"firstrun_nfix_{algo_choice}"].fillna(0) |
|
if "firstrun_blink" in measures_to_calculate: |
|
if "blink" in firstruntmp: |
|
firstrun[f"firstrun_blink_{algo_choice}"] = firstruntmp.groupby(f"on_word_number_{algo_choice}")[ |
|
"blink" |
|
].transform("max") |
|
else: |
|
firstrun[f"firstrun_blink_{algo_choice}"] = 0 |
|
|
|
if "firstrun_skip" in measures_to_calculate: |
|
firstrun[f"firstrun_skip_{algo_choice}"] = firstruntmp.groupby(f"on_word_number_{algo_choice}")[ |
|
f"word_firstskip_{algo_choice}" |
|
].transform("max") |
|
if "firstrun_refix" in measures_to_calculate: |
|
firstrun[f"firstrun_refix_{algo_choice}"] = firstruntmp.groupby(f"on_word_number_{algo_choice}")[ |
|
f"word_refix_{algo_choice}" |
|
].transform("max") |
|
if "firstrun_reg_in" in measures_to_calculate: |
|
firstrun[f"firstrun_reg_in_{algo_choice}"] = firstruntmp.groupby(f"on_word_number_{algo_choice}")[ |
|
f"word_reg_out_{algo_choice}" |
|
].transform("max") |
|
if "firstrun_reg_out" in measures_to_calculate: |
|
firstrun[f"firstrun_reg_out_{algo_choice}"] = firstruntmp.groupby(f"on_word_number_{algo_choice}")[ |
|
f"word_reg_in_{algo_choice}" |
|
].transform("max") |
|
if "firstrun_dur" in measures_to_calculate: |
|
firstrun[f"firstrun_dur_{algo_choice}"] = firstruntmp.groupby(f"on_word_number_{algo_choice}")[ |
|
"duration" |
|
].transform("sum") |
|
firstrun = firstrun.sort_values(["trial_id", f"on_word_number_{algo_choice}"]).copy() |
|
|
|
return firstrun |
|
|
|
|
|
def compute_gopast_word(fixations_dataframe, algo_choice): |
|
|
|
ias = np.unique(fixations_dataframe.loc[:, f"on_word_number_{algo_choice}"]) |
|
|
|
for j in range(len(ias) - 1): |
|
fixations_dataframe.loc[ |
|
(fixations_dataframe[f"on_word_number_{algo_choice}"] == ias[j]), f"gopast_{algo_choice}" |
|
] = np.nansum( |
|
fixations_dataframe.loc[ |
|
( |
|
fixations_dataframe["fixation_number"] |
|
>= np.min( |
|
fixations_dataframe.loc[ |
|
(fixations_dataframe[f"on_word_number_{algo_choice}"] == ias[j]), "fixation_number" |
|
] |
|
) |
|
) |
|
& ( |
|
fixations_dataframe["fixation_number"] |
|
< np.min( |
|
fixations_dataframe.loc[ |
|
(fixations_dataframe[f"on_word_number_{algo_choice}"] > ias[j]), "fixation_number" |
|
] |
|
) |
|
) |
|
& (~fixations_dataframe[f"on_word_number_{algo_choice}"].isna()) |
|
]["duration"] |
|
) |
|
|
|
fixations_dataframe.loc[ |
|
(fixations_dataframe[f"on_word_number_{algo_choice}"] == ias[j]), f"selgopast_{algo_choice}" |
|
] = np.nansum( |
|
fixations_dataframe.loc[ |
|
( |
|
fixations_dataframe["fixation_number"] |
|
>= np.min( |
|
fixations_dataframe.loc[ |
|
(fixations_dataframe[f"on_word_number_{algo_choice}"] == ias[j]), "fixation_number" |
|
] |
|
) |
|
) |
|
& ( |
|
fixations_dataframe["fixation_number"] |
|
< np.min( |
|
fixations_dataframe.loc[ |
|
(fixations_dataframe[f"on_word_number_{algo_choice}"] > ias[j]), "fixation_number" |
|
] |
|
) |
|
) |
|
& (fixations_dataframe[f"on_word_number_{algo_choice}"] == ias[j]) |
|
& (~fixations_dataframe[f"on_word_number_{algo_choice}"].isna()) |
|
]["duration"] |
|
) |
|
return fixations_dataframe |
|
|
|
|
|
def aggregate_words( |
|
fix, |
|
word_item, |
|
algo_choice, |
|
measures_to_calculate=[ |
|
"blink", |
|
], |
|
): |
|
wordtmp = fix.copy() |
|
|
|
word = wordtmp.drop_duplicates(subset=f"on_word_number_{algo_choice}", keep="first").copy() |
|
names = [ |
|
f"on_sentence_num_{algo_choice}", |
|
f"on_word_number_{algo_choice}", |
|
f"on_word_{algo_choice}", |
|
] |
|
word = word.loc[:, names].sort_values(by=f"on_word_number_{algo_choice}") |
|
|
|
wordtmp = compute_gopast_word(wordtmp, algo_choice) |
|
|
|
if "blink" in measures_to_calculate: |
|
if "blink" in wordtmp: |
|
word[f"blink_{algo_choice}"] = wordtmp.groupby(f"on_word_number_{algo_choice}")["blink"].transform("max") |
|
else: |
|
word[f"blink_{algo_choice}"] = 0 |
|
if "nrun" in measures_to_calculate or "reread" in measures_to_calculate: |
|
word[f"nrun_{algo_choice}"] = wordtmp.groupby(f"on_word_number_{algo_choice}")[ |
|
f"word_run_{algo_choice}" |
|
].transform("max") |
|
if "reread" in measures_to_calculate: |
|
word[f"reread_{algo_choice}"] = word[f"nrun_{algo_choice}"] > 1 |
|
word[f"number_of_fixations_{algo_choice}"] = wordtmp.groupby(f"on_word_number_{algo_choice}")[ |
|
"fixation_number" |
|
].transform("count") |
|
if "refix" in measures_to_calculate: |
|
word[f"refix_{algo_choice}"] = wordtmp.groupby(f"on_word_number_{algo_choice}")[ |
|
f"word_refix_{algo_choice}" |
|
].transform("max") |
|
if "reg_in" in measures_to_calculate: |
|
word[f"reg_in_{algo_choice}"] = wordtmp.groupby(f"on_word_number_{algo_choice}")[ |
|
f"word_reg_in_{algo_choice}" |
|
].transform("max") |
|
if "reg_out" in measures_to_calculate: |
|
word[f"reg_out_{algo_choice}"] = wordtmp.groupby(f"on_word_number_{algo_choice}")[ |
|
f"word_reg_out_{algo_choice}" |
|
].transform("max") |
|
if "total_fixation_duration" in measures_to_calculate: |
|
word[f"total_fixation_duration_{algo_choice}"] = wordtmp.groupby(f"on_word_number_{algo_choice}")[ |
|
"duration" |
|
].transform("sum") |
|
if "gopast" in measures_to_calculate and f"gopast_{algo_choice}" in wordtmp.columns: |
|
word[f"gopast_{algo_choice}"] = wordtmp.groupby(f"on_word_number_{algo_choice}")[ |
|
f"gopast_{algo_choice}" |
|
].transform("max") |
|
word[f"gopast_{algo_choice}"] = word[f"gopast_{algo_choice}"].fillna(0) |
|
|
|
if "gopast_sel" in measures_to_calculate and f"selgopast_{algo_choice}" in wordtmp.columns: |
|
word[f"gopast_sel_{algo_choice}"] = wordtmp.groupby(f"on_word_number_{algo_choice}")[ |
|
f"selgopast_{algo_choice}" |
|
].transform("max") |
|
word[f"gopast_sel_{algo_choice}"] = word[f"gopast_sel_{algo_choice}"].fillna(0) |
|
|
|
word.rename({f"on_word_number_{algo_choice}": "word_number"}, axis=1, inplace=True) |
|
word = pd.merge( |
|
word.reset_index(drop=True), word_item.reset_index(drop=True), on="word_number", how="right", validate="1:1" |
|
) |
|
word[f"number_of_fixations_{algo_choice}"] = word[f"number_of_fixations_{algo_choice}"].fillna(0) |
|
if "total_fixation_duration" in measures_to_calculate: |
|
word[f"total_fixation_duration_{algo_choice}"] = word[f"total_fixation_duration_{algo_choice}"].fillna(0) |
|
|
|
word[f"skip_{algo_choice}"] = 0 |
|
if "blink" in measures_to_calculate: |
|
word.loc[word[f"blink_{algo_choice}"].isna(), f"skip_{algo_choice}"] = 1 |
|
word.loc[word[f"number_of_fixations_{algo_choice}"] == 0, f"skip_{algo_choice}"] = 1 |
|
word[f"skip_{algo_choice}"] = word[f"skip_{algo_choice}"].astype("boolean") |
|
|
|
if "number_of_fixations" not in measures_to_calculate: |
|
word = word.drop(columns=f"number_of_fixations_{algo_choice}") |
|
if "blink" in measures_to_calculate: |
|
word[f"blink_{algo_choice}"] = word[f"blink_{algo_choice}"].astype("boolean") |
|
|
|
word = word.sort_values(by=["word_number"]) |
|
|
|
if "condition" in wordtmp.columns and "condition" not in word.columns: |
|
word.insert(loc=0, column="condition", value=wordtmp["condition"].iloc[0]) |
|
if "item" in wordtmp.columns and "item" not in word.columns: |
|
word.insert(loc=0, column="item", value=wordtmp["item"].iloc[0]) |
|
if "trial_id" in wordtmp.columns and "trial_id" not in word.columns: |
|
word.insert(loc=0, column="trial_id", value=wordtmp["trial_id"].iloc[0]) |
|
if "subject" in wordtmp.columns and "subject" not in word.columns: |
|
word.insert(loc=0, column="subject", value=wordtmp["subject"].iloc[0]) |
|
|
|
return word |
|
|
|
|
|
def combine_words(fix, wordfirst, wordtmp, algo_choice, measures_to_calculate): |
|
|
|
subject = wordtmp["subject"].values[0] |
|
trial_id = wordtmp["trial_id"].values[0] |
|
item = wordtmp["item"].values[0] |
|
condition = wordtmp["condition"].values[0] |
|
wordtmp = wordtmp.loc[ |
|
:, |
|
[ |
|
c |
|
for c in [ |
|
"word_number", |
|
"word", |
|
f"blink_{algo_choice}", |
|
f"skip_{algo_choice}", |
|
f"nrun_{algo_choice}", |
|
f"reread_{algo_choice}", |
|
f"number_of_fixations_{algo_choice}", |
|
f"refix_{algo_choice}", |
|
f"reg_in_{algo_choice}", |
|
f"reg_out_{algo_choice}", |
|
f"total_fixation_duration_{algo_choice}", |
|
f"gopast_{algo_choice}", |
|
f"gopast_sel_{algo_choice}", |
|
] |
|
if c in wordtmp.columns |
|
], |
|
] |
|
|
|
wordfirsttmp = wordfirst.loc[ |
|
:, |
|
[ |
|
c |
|
for c in [ |
|
f"on_word_number_{algo_choice}", |
|
f"firstrun_skip_{algo_choice}", |
|
f"firstrun_nfix_{algo_choice}", |
|
f"firstrun_refix_{algo_choice}", |
|
f"firstrun_reg_in_{algo_choice}", |
|
f"firstrun_reg_out_{algo_choice}", |
|
f"firstrun_dur_{algo_choice}", |
|
f"firstrun_gopast_{algo_choice}", |
|
f"firstrun_gopast_sel_{algo_choice}", |
|
] |
|
if c in wordfirst.columns |
|
], |
|
] |
|
|
|
fixtmp = fix[(fix[f"word_run_{algo_choice}"] == 1) & (fix[f"word_run_fix_{algo_choice}"] == 1)].copy() |
|
names = [ |
|
c |
|
for c in [ |
|
f"on_word_number_{algo_choice}", |
|
f"sac_in_{algo_choice}", |
|
f"sac_out_{algo_choice}", |
|
f"word_launch_{algo_choice}", |
|
f"word_land_{algo_choice}", |
|
f"word_cland_{algo_choice}", |
|
f"duration", |
|
] |
|
if c in fixtmp.columns |
|
] |
|
fixtmp = fixtmp[names].copy() |
|
fixtmp.rename( |
|
{ |
|
f"sac_in_{algo_choice}": f"firstfix_sac_in_{algo_choice}", |
|
f"sac_out_{algo_choice}": f"firstfix_sac_out_{algo_choice}", |
|
f"word_launch_{algo_choice}": f"firstfix_launch_{algo_choice}", |
|
f"word_land_{algo_choice}": f"firstfix_land_{algo_choice}", |
|
f"word_cland_{algo_choice}": f"firstfix_cland_{algo_choice}", |
|
f"duration": f"firstfix_dur_{algo_choice}", |
|
}, |
|
axis=1, |
|
inplace=True, |
|
) |
|
comb = pd.merge( |
|
pd.merge( |
|
wordtmp, |
|
wordfirsttmp.rename({f"on_word_number_{algo_choice}": "word_number"}, axis=1), |
|
on="word_number", |
|
how="left", |
|
), |
|
fixtmp.rename({f"on_word_number_{algo_choice}": "word_number"}, axis=1), |
|
on="word_number", |
|
how="left", |
|
) |
|
|
|
dropcols = [ |
|
c |
|
for c in [ |
|
f"firstrun_skip_{algo_choice}", |
|
f"firstrun_refix_{algo_choice}", |
|
f"firstrun_reg_in_{algo_choice}", |
|
f"firstrun_reg_out_{algo_choice}", |
|
f"firstrun_dur_{algo_choice}", |
|
f"firstrun_gopast_{algo_choice}", |
|
f"firstrun_gopast_sel_{algo_choice}", |
|
f"firstfix_sac_in_{algo_choice}", |
|
f"firstfix_sac_out_{algo_choice}", |
|
f"firstfix_launch_{algo_choice}", |
|
f"firstfix_land_{algo_choice}", |
|
f"firstfix_cland_{algo_choice}", |
|
f"firstfix_dur_{algo_choice}", |
|
] |
|
if ((c.replace(f"_{algo_choice}", "") not in measures_to_calculate) & (c in comb.columns)) |
|
] |
|
comb = comb.drop(columns=dropcols).copy() |
|
comb.sort_values(by="word_number", inplace=True) |
|
|
|
|
|
if f"skip_{algo_choice}" in comb.columns and f"firstrun_skip_{algo_choice}" in comb.columns: |
|
comb.loc[comb[f"skip_{algo_choice}"] == 1, f"firstrun_skip_{algo_choice}"] = 1 |
|
|
|
|
|
if f"gopast_{algo_choice}" in comb.columns and "firstrun_gopast" in measures_to_calculate: |
|
comb[f"firstrun_gopast_{algo_choice}"] = comb[f"gopast_{algo_choice}"] |
|
if f"gopast_sel_{algo_choice}" in comb.columns and "firstrun_gopast_sel" in measures_to_calculate: |
|
comb[f"firstrun_gopast_sel_{algo_choice}"] = comb[f"gopast_sel_{algo_choice}"] |
|
if f"gopast_{algo_choice}" in comb.columns: |
|
comb.drop(columns=[f"gopast_{algo_choice}"], inplace=True) |
|
|
|
if f"gopast_sel_{algo_choice}" in comb.columns: |
|
comb.drop(columns=[f"gopast_sel_{algo_choice}"], inplace=True) |
|
|
|
if f"firstrun_nfix_{algo_choice}" in comb.columns and "singlefix" in measures_to_calculate: |
|
comb[f"singlefix_{algo_choice}"] = 0 |
|
comb.loc[(comb[f"firstrun_nfix_{algo_choice}"] == 1), f"singlefix_{algo_choice}"] = 1 |
|
|
|
if f"firstfix_sac_in_{algo_choice}" in comb.columns and "singlefix_sac_in" in measures_to_calculate: |
|
comb.loc[(comb[f"firstrun_nfix_{algo_choice}"] == 1), f"singlefix_sac_in_{algo_choice}"] = comb[ |
|
f"firstfix_sac_in_{algo_choice}" |
|
][(comb[f"firstrun_nfix_{algo_choice}"] == 1)] |
|
|
|
if f"firstfix_sac_out_{algo_choice}" in comb.columns and "singlefix_sac_out" in measures_to_calculate: |
|
comb.loc[(comb[f"firstrun_nfix_{algo_choice}"] == 1), f"singlefix_sac_out_{algo_choice}"] = comb[ |
|
f"firstfix_sac_out_{algo_choice}" |
|
][(comb[f"firstrun_nfix_{algo_choice}"] == 1)] |
|
|
|
if f"firstfix_launch_{algo_choice}" in comb.columns and "singlefix_launch" in measures_to_calculate: |
|
comb.loc[(comb[f"firstrun_nfix_{algo_choice}"] == 1), f"singlefix_launch_{algo_choice}"] = comb[ |
|
f"firstfix_launch_{algo_choice}" |
|
][(comb[f"firstrun_nfix_{algo_choice}"] == 1)] |
|
|
|
if f"firstfix_land_{algo_choice}" in comb.columns and "singlefix_land" in measures_to_calculate: |
|
comb.loc[(comb[f"firstrun_nfix_{algo_choice}"] == 1), f"singlefix_land_{algo_choice}"] = comb[ |
|
f"firstfix_land_{algo_choice}" |
|
][(comb[f"firstrun_nfix_{algo_choice}"] == 1)] |
|
|
|
if f"firstfix_cland_{algo_choice}" in comb.columns and "singlefix_cland" in measures_to_calculate: |
|
comb.loc[(comb[f"firstrun_nfix_{algo_choice}"] == 1), f"singlefix_cland_{algo_choice}"] = comb[ |
|
f"firstfix_cland_{algo_choice}" |
|
][(comb[f"firstrun_nfix_{algo_choice}"] == 1)] |
|
|
|
if f"firstfix_dur_{algo_choice}" in comb.columns and "singlefix_dur" in measures_to_calculate: |
|
comb.loc[(comb[f"firstrun_nfix_{algo_choice}"] == 1), f"singlefix_dur_{algo_choice}"] = comb[ |
|
f"firstfix_dur_{algo_choice}" |
|
][(comb[f"firstrun_nfix_{algo_choice}"] == 1)] |
|
|
|
if "condition" not in comb.columns: |
|
comb.insert(loc=0, column="condition", value=condition) |
|
if "item" not in comb.columns: |
|
comb.insert(loc=0, column="item", value=item) |
|
if "trial_id" not in comb.columns: |
|
comb.insert(loc=0, column="trial_id", value=trial_id) |
|
if "subject" not in comb.columns: |
|
comb.insert(loc=0, column="subject", value=subject) |
|
return comb.copy() |
|
|
|
|
|
def compute_sentence_measures(fix, stimmat, algo_choice, measures_to_calc, save_to_csv=False): |
|
sentitem = stimmat.drop_duplicates( |
|
subset="in_sentence_number", keep="first" |
|
) |
|
fixin = fix.copy().reset_index(drop=True) |
|
|
|
fixin["on_sentence_num2"] = fixin[f"on_sentence_num_{algo_choice}"].copy() |
|
|
|
|
|
for j in range(1, len(fixin) - 1): |
|
if fixin.loc[j, "on_sentence_num2"] != fixin.loc[j - 1, "on_sentence_num2"]: |
|
if j + 1 in fixin.index and fixin.loc[j + 1, "on_sentence_num2"] == fixin.loc[j - 1, "on_sentence_num2"]: |
|
fixin.loc[j, "on_sentence_num2"] = fixin.loc[j - 1, "on_sentence_num2"] |
|
elif j + 2 in fixin.index and fixin.loc[j + 2, "on_sentence_num2"] == fixin.loc[j - 1, "on_sentence_num2"]: |
|
fixin.loc[j, "on_sentence_num2"] = fixin.loc[j - 1, "on_sentence_num2"] |
|
|
|
fixin["id"] = fixin.apply(lambda row: f"{row['on_sentence_num2']}", axis=1) |
|
|
|
fixin[f"sent_reg_in2_{algo_choice}"] = 0 |
|
fixin[f"sent_reg_out2_{algo_choice}"] = 0 |
|
|
|
fixin[f"sent_runid2_{algo_choice}"] = 1 |
|
|
|
fixin.loc[0, "last"] = fixin.loc[0, "id"] |
|
fixin.loc[0, f"firstpass_{algo_choice}"] = 1 |
|
mem = [fixin.loc[0, "on_sentence_num2"]] |
|
wordmem = [fixin.loc[0, f"on_word_number_{algo_choice}"]] |
|
fixin.loc[0, f"forward_{algo_choice}"] = 1 |
|
|
|
for j in range(1, len(fixin)): |
|
fixin.loc[j, "last"] = fixin.loc[j - 1, "id"] |
|
|
|
if fixin.loc[j, "on_sentence_num2"] != fixin.loc[j - 1, "on_sentence_num2"]: |
|
fixin.loc[j, f"sent_reg_in2_{algo_choice}"] = 1 |
|
fixin.loc[j - 1, f"sent_reg_out2_{algo_choice}"] = 1 |
|
fixin.loc[j, f"sent_reg_in_from2_{algo_choice}"] = fixin.loc[j - 1, "on_sentence_num2"] |
|
fixin.loc[j - 1, f"sent_reg_out_to2_{algo_choice}"] = fixin.loc[j, "on_sentence_num2"] |
|
|
|
if fixin.loc[j, f"sent_reg_in2_{algo_choice}"] == 1 and fixin.loc[j - 1, f"sent_reg_in2_{algo_choice}"] != 1: |
|
fixin.loc[j, f"sent_runid2_{algo_choice}"] = fixin.loc[j - 1, f"sent_runid2_{algo_choice}"] + 1 |
|
else: |
|
fixin.loc[j, f"sent_runid2_{algo_choice}"] = fixin.loc[j - 1, f"sent_runid2_{algo_choice}"] |
|
|
|
if fixin.loc[j, "on_sentence_num2"] >= fixin.loc[j - 1, "on_sentence_num2"]: |
|
if fixin.loc[j, "on_sentence_num2"] in mem: |
|
if fixin.loc[j, "on_sentence_num2"] == max(mem): |
|
fixin.loc[j, f"firstpass_{algo_choice}"] = 1 |
|
else: |
|
fixin.loc[j, f"firstpass_{algo_choice}"] = 0 |
|
else: |
|
mem.append(fixin.loc[j, "on_sentence_num2"]) |
|
fixin.loc[j, f"firstpass_{algo_choice}"] = 1 |
|
else: |
|
fixin.loc[j, f"firstpass_{algo_choice}"] = 0 |
|
|
|
if fixin.loc[j, f"on_word_number_{algo_choice}"] > max(wordmem): |
|
wordmem.append(fixin.loc[j, f"on_word_number_{algo_choice}"]) |
|
fixin.loc[j, f"forward_{algo_choice}"] = 1 |
|
elif fixin.loc[j, f"on_word_number_{algo_choice}"] < max(wordmem): |
|
fixin.loc[j, f"forward_{algo_choice}"] = 0 |
|
|
|
for i in range(len(fixin) - 3): |
|
if fixin.loc[i, f"line_change_{algo_choice}"] > 0: |
|
fixin.loc[i, "on_word_number"] = 0 |
|
fixin.loc[i + 1, f"forward_{algo_choice}"] = 1 |
|
fixin.loc[i + 2, f"forward_{algo_choice}"] = 1 |
|
fixin.loc[i + 3, f"forward_{algo_choice}"] = 1 |
|
|
|
for i in range(1, len(fixin) - 3): |
|
if fixin.loc[i, "on_sentence_num2"] > fixin.loc[i - 1, "on_sentence_num2"]: |
|
fixin.loc[i + 1, f"forward_{algo_choice}"] = 1 |
|
fixin.loc[i + 2, f"forward_{algo_choice}"] = 1 |
|
|
|
fixin["id2"] = fixin["id"] + ":" + fixin[f"sent_runid2_{algo_choice}"].astype(str) |
|
|
|
fixin = fixin.sort_values(["trial_id", "fixation_number"]) |
|
|
|
sent = fixin.copy().drop_duplicates(subset="id", keep="first") |
|
names = [ |
|
"id", |
|
"subject", |
|
"trial_id", |
|
"item", |
|
"condition", |
|
"on_sentence_num2", |
|
f"on_sentence_num_{algo_choice}", |
|
f"on_sentence_{algo_choice}", |
|
"num_words_in_sentence", |
|
] |
|
sent = sent[names].reset_index(drop=True) |
|
|
|
sent[f"firstrun_skip_{algo_choice}"] = 0 |
|
|
|
mem = [] |
|
for j in range(len(sent)): |
|
if not pd.isna(sent.loc[j, f"on_sentence_num_{algo_choice}"]): |
|
if len(mem) > 0 and sent.loc[j, f"on_sentence_num_{algo_choice}"] < max(mem) and not pd.isna(max(mem)): |
|
sent.loc[j, f"firstrun_skip_{algo_choice}"] = 1 |
|
if ( |
|
not pd.isna(sent.loc[j, f"on_sentence_num_{algo_choice}"]) |
|
and sent.loc[j, f"on_sentence_num_{algo_choice}"] not in mem |
|
): |
|
mem.append(sent.loc[j, f"on_sentence_num_{algo_choice}"]) |
|
|
|
if "total_n_fixations" in measures_to_calc: |
|
tmp = fixin.groupby("id")["duration"].count().reset_index() |
|
tmp.columns = ["id", f"total_n_fixations_{algo_choice}"] |
|
sent = pd.merge(sent, tmp, on="id", how="left") |
|
sent.fillna({f"total_n_fixations_{algo_choice}": 0}, inplace=True) |
|
|
|
tmp = fixin.groupby("id")["duration"].sum().reset_index() |
|
tmp.columns = ["id", f"total_dur_{algo_choice}"] |
|
sent = pd.merge(sent, tmp, on="id", how="left") |
|
sent.fillna({f"total_dur_{algo_choice}": 0}, inplace=True) |
|
|
|
if "firstpass_n_fixations" in measures_to_calc: |
|
tmp = fixin[fixin[f"firstpass_{algo_choice}"] == 1].groupby("id")["duration"].count().reset_index() |
|
tmp.columns = ["id", f"firstpass_n_fixations_{algo_choice}"] |
|
sent = pd.merge(sent, tmp, on="id", how="left") |
|
sent.fillna({f"firstpass_n_fixations_{algo_choice}": 0}, inplace=True) |
|
|
|
if "firstpass_dur" in measures_to_calc: |
|
tmp = fixin[fixin[f"firstpass_{algo_choice}"] == 1].groupby("id")["duration"].sum().reset_index() |
|
tmp.columns = ["id", f"firstpass_dur_{algo_choice}"] |
|
sent = pd.merge(sent, tmp, on="id", how="left") |
|
sent.fillna({f"firstpass_dur_{algo_choice}": 0}, inplace=True) |
|
|
|
if "firstpass_forward_n_fixations" in measures_to_calc: |
|
tmp = ( |
|
fixin[(fixin[f"firstpass_{algo_choice}"] == 1) & (fixin[f"forward_{algo_choice}"] == 1)] |
|
.groupby("id")["duration"] |
|
.count() |
|
.reset_index() |
|
) |
|
tmp.columns = ["id", f"firstpass_forward_n_fixations_{algo_choice}"] |
|
sent = pd.merge(sent, tmp, on="id", how="left") |
|
sent.fillna({f"firstpass_forward_n_fixations_{algo_choice}": 0}, inplace=True) |
|
|
|
if "firstpass_forward_dur" in measures_to_calc: |
|
tmp = ( |
|
fixin[(fixin[f"firstpass_{algo_choice}"] == 1) & (fixin[f"forward_{algo_choice}"] == 1)] |
|
.groupby("id")["duration"] |
|
.sum() |
|
.reset_index() |
|
) |
|
tmp.columns = ["id", f"firstpass_forward_dur_{algo_choice}"] |
|
sent = pd.merge(sent, tmp, on="id", how="left") |
|
sent.fillna({f"firstpass_forward_dur_{algo_choice}": 0}, inplace=True) |
|
|
|
if "firstpass_reread_n_fixations" in measures_to_calc: |
|
tmp = ( |
|
fixin[(fixin[f"firstpass_{algo_choice}"] == 1) & (fixin[f"forward_{algo_choice}"] == 0)] |
|
.groupby("id")["duration"] |
|
.count() |
|
.reset_index() |
|
) |
|
tmp.columns = ["id", f"firstpass_reread_n_fixations_{algo_choice}"] |
|
sent = pd.merge(sent, tmp, on="id", how="left") |
|
sent.fillna({f"firstpass_reread_n_fixations_{algo_choice}": 0}, inplace=True) |
|
|
|
if "firstpass_reread_dur" in measures_to_calc: |
|
tmp = ( |
|
fixin[(fixin[f"firstpass_{algo_choice}"] == 1) & (fixin[f"forward_{algo_choice}"] == 0)] |
|
.groupby("id")["duration"] |
|
.sum() |
|
.reset_index() |
|
) |
|
tmp.columns = ["id", f"firstpass_reread_dur_{algo_choice}"] |
|
sent = pd.merge(sent, tmp, on="id", how="left") |
|
sent.fillna({f"firstpass_reread_dur_{algo_choice}": 0}, inplace=True) |
|
|
|
if sum(fixin[f"firstpass_{algo_choice}"] == 0) != 0: |
|
if "lookback_n_fixations" in measures_to_calc: |
|
tmp = fixin[fixin[f"firstpass_{algo_choice}"] == 0].groupby("id")["duration"].count().reset_index() |
|
tmp.columns = ["id", f"lookback_n_fixations_{algo_choice}"] |
|
sent = pd.merge(sent, tmp, on="id", how="left") |
|
sent.fillna({f"lookback_n_fixations_{algo_choice}": 0}, inplace=True) |
|
|
|
if "lookback_dur" in measures_to_calc: |
|
tmp = fixin[fixin[f"firstpass_{algo_choice}"] == 0].groupby("id")["duration"].sum().reset_index() |
|
tmp.columns = ["id", f"lookback_dur_{algo_choice}"] |
|
sent = pd.merge(sent, tmp, on="id", how="left") |
|
sent.fillna({f"lookback_dur_{algo_choice}": 0}, inplace=True) |
|
|
|
fixin["id2"] = fixin.apply(lambda row: f"{row['id']}:{row[f'sent_runid2_{algo_choice}']}", axis=1) |
|
sent2 = fixin.drop_duplicates(subset="id2", keep="first") |
|
sent3 = sent2[(sent2[f"firstpass_{algo_choice}"] == 0) & (~pd.isna(sent2[f"sent_reg_in_from2_{algo_choice}"]))] |
|
|
|
tmp = fixin[fixin["id2"].isin(sent3["id2"])].groupby("id")["duration"].count().reset_index() |
|
tmp.columns = ["id", f"lookfrom_n_fixations_{algo_choice}"] |
|
tmp2 = pd.merge(tmp, sent3) |
|
tmp3 = tmp2.groupby("last")[f"lookfrom_n_fixations_{algo_choice}"].sum().reset_index() |
|
tmp3.columns = ["last", f"lookfrom_n_fixations_{algo_choice}"] |
|
sent = pd.merge(sent, tmp3, left_on="id", right_on="last", how="left") |
|
sent.fillna({f"lookfrom_n_fixations_{algo_choice}": 0}, inplace=True) |
|
|
|
if "lookfrom_dur" in measures_to_calc: |
|
tmp = fixin[fixin["id2"].isin(sent3["id2"])].groupby("id")["duration"].sum().reset_index() |
|
tmp.columns = ["id", f"lookfrom_dur_{algo_choice}"] |
|
tmp2 = pd.merge(tmp, sent3) |
|
tmp3 = tmp2.groupby("last")[f"lookfrom_dur_{algo_choice}"].sum().reset_index() |
|
tmp3.columns = ["last", f"lookfrom_dur_{algo_choice}"] |
|
sent = pd.merge(sent, tmp3, left_on="id", right_on="last", how="left") |
|
sent.fillna({f"lookfrom_dur_{algo_choice}": 0}, inplace=True) |
|
|
|
|
|
firstruntmp = fixin[fixin[f"sentence_run_{algo_choice}"] == 1] |
|
|
|
if "firstrun_reg_in" in measures_to_calc: |
|
tmp = firstruntmp.groupby("id")[f"sent_reg_in2_{algo_choice}"].max().reset_index() |
|
tmp.columns = ["id", f"firstrun_reg_in_{algo_choice}"] |
|
sent = pd.merge(sent, tmp, on="id", how="left") |
|
sent.fillna({f"firstrun_reg_in_{algo_choice}": 0}, inplace=True) |
|
|
|
if "firstrun_reg_out" in measures_to_calc: |
|
tmp = firstruntmp.groupby("id")[f"sent_reg_out2_{algo_choice}"].max().reset_index() |
|
tmp.columns = ["id", f"firstrun_reg_out_{algo_choice}"] |
|
sent = pd.merge(sent, tmp, on="id", how="left") |
|
sent.fillna({f"firstrun_reg_out_{algo_choice}": 0}, inplace=True) |
|
|
|
|
|
gopasttmp = fixin.copy() |
|
gopasttmp[f"on_sentence_num_{algo_choice}"] = gopasttmp["on_sentence_num2"] |
|
tmp = compute_gopast_sentence(gopasttmp, algo_choice) |
|
names = ["id", f"gopast_{algo_choice}", f"selgopast_{algo_choice}"] |
|
tmp = tmp[names] |
|
tmp = tmp.drop_duplicates(subset="id", keep="first") |
|
tmp.columns = ["id", f"gopast_{algo_choice}", f"gopast_sel_{algo_choice}"] |
|
sent = pd.merge(sent, tmp, on="id", how="left") |
|
|
|
|
|
tmp = fixin.groupby("id")[f"sentence_run_{algo_choice}"].max().reset_index() |
|
tmp.columns = ["id", f"nrun_{algo_choice}"] |
|
sent = pd.merge(sent, tmp, on="id", how="left") |
|
|
|
|
|
sent[f"reread_{algo_choice}"] = sent.apply(lambda row: 1 if row[f"nrun_{algo_choice}"] > 1 else 0, axis=1) |
|
|
|
|
|
tmp = fixin.groupby("id")[f"sent_reg_in2_{algo_choice}"].max().reset_index() |
|
tmp.columns = ["id", f"reg_in_{algo_choice}"] |
|
sent = pd.merge(sent, tmp, on="id", how="left") |
|
|
|
|
|
tmp = fixin.groupby("id")[f"sent_reg_out2_{algo_choice}"].max().reset_index() |
|
tmp.columns = ["id", f"reg_out_{algo_choice}"] |
|
sent = pd.merge(sent, tmp, on="id", how="left") |
|
|
|
sent = sent.sort_values(by=f"on_sentence_num_{algo_choice}").reset_index(drop=True) |
|
|
|
|
|
sent[f"rate_{algo_choice}"] = round(60000 / (sent[f"total_dur_{algo_choice}"] / sent["num_words_in_sentence"])) |
|
|
|
|
|
item = sentitem.copy() |
|
|
|
sent = pd.merge( |
|
sent, |
|
item.rename({"in_sentence_number": f"on_sentence_num_{algo_choice}"}, axis=1), |
|
on=f"on_sentence_num_{algo_choice}", |
|
how="left", |
|
) |
|
sent[f"skip_{algo_choice}"] = 0 |
|
sent.loc[pd.isna(sent[f"nrun_{algo_choice}"]), f"skip_{algo_choice}"] = 1 |
|
|
|
names = [ |
|
"subject", |
|
"trial_id", |
|
"item", |
|
"condition", |
|
] + [ |
|
c |
|
for c in [ |
|
f"on_sentence_num_{algo_choice}", |
|
f"on_sentence_{algo_choice}", |
|
"num_words_in_sentence", |
|
f"skip_{algo_choice}", |
|
f"nrun_{algo_choice}", |
|
f"reread_{algo_choice}", |
|
f"reg_in_{algo_choice}", |
|
f"reg_out_{algo_choice}", |
|
f"total_n_fixations_{algo_choice}", |
|
f"total_dur_{algo_choice}", |
|
f"rate_{algo_choice}", |
|
f"gopast_{algo_choice}", |
|
f"gopast_sel_{algo_choice}", |
|
f"firstrun_skip_{algo_choice}", |
|
f"firstrun_reg_in_{algo_choice}", |
|
f"firstrun_reg_out_{algo_choice}", |
|
f"firstpass_n_fixations_{algo_choice}", |
|
f"firstpass_dur_{algo_choice}", |
|
f"firstpass_forward_n_fixations_{algo_choice}", |
|
f"firstpass_forward_dur_{algo_choice}", |
|
f"firstpass_reread_n_fixations_{algo_choice}", |
|
f"firstpass_reread_dur_{algo_choice}", |
|
f"lookback_n_fixations_{algo_choice}", |
|
f"lookback_dur_{algo_choice}", |
|
f"lookfrom_n_fixations_{algo_choice}", |
|
f"lookfrom_dur_{algo_choice}", |
|
] |
|
if (c in sent.columns and c.replace(f"_{algo_choice}", "") in measures_to_calc) |
|
] |
|
sent = sent[names].copy() |
|
sent.rename( |
|
{ |
|
f"on_sentence_num_{algo_choice}": "sentence_number", |
|
f"on_sentence_{algo_choice}": "sentence", |
|
"num_words_in_sentence": "number_of_words", |
|
}, |
|
axis=1, |
|
inplace=True, |
|
) |
|
|
|
if save_to_csv: |
|
subj = fix["subject"].iloc[0] |
|
trial_id = fix["trial_id"].iloc[0] |
|
sent.to_csv(RESULTS_FOLDER / f"{subj}_{trial_id}_{algo_choice}_sentence_measures.csv") |
|
return sent.copy() |
|
|
|
|
|
def compute_gopast_sentence(fixin, algo_choice): |
|
|
|
fixin[f"gopast_{algo_choice}"] = np.nan |
|
fixin[f"selgopast_{algo_choice}"] = np.nan |
|
|
|
|
|
ias = fixin[f"on_sentence_num_{algo_choice}"].unique() |
|
|
|
|
|
for j in ias: |
|
min_fixation_number_j = fixin.loc[fixin[f"on_sentence_num_{algo_choice}"] == j, "fixation_number"].min( |
|
skipna=True |
|
) |
|
next_min_fixation_number = ( |
|
fixin.loc[fixin[f"on_sentence_num_{algo_choice}"] > j, "fixation_number"].min(skipna=True) |
|
if j != ias[-1] |
|
else float("inf") |
|
) |
|
|
|
mask = ( |
|
(fixin["fixation_number"] >= min_fixation_number_j) |
|
& (fixin["fixation_number"] < next_min_fixation_number) |
|
& (~fixin[f"on_sentence_num_{algo_choice}"].isna()) |
|
) |
|
fixin.loc[fixin[f"on_sentence_num_{algo_choice}"] == j, f"gopast_{algo_choice}"] = fixin.loc[ |
|
mask, "duration" |
|
].sum(skipna=True) |
|
|
|
mask_j = ( |
|
(fixin["fixation_number"] >= min_fixation_number_j) |
|
& (fixin["fixation_number"] < next_min_fixation_number) |
|
& (~fixin[f"on_sentence_num_{algo_choice}"].isna()) |
|
& (fixin[f"on_sentence_num_{algo_choice}"] == j) |
|
) |
|
fixin.loc[fixin[f"on_sentence_num_{algo_choice}"] == j, f"selgopast_{algo_choice}"] = fixin.loc[ |
|
mask_j, "duration" |
|
].sum(skipna=True) |
|
|
|
return fixin |
|
|
|
|
|
def aggregate_trials(dffix_combined, wordcomb, all_trials_by_subj, algo_choices): |
|
tmp = dffix_combined.copy() |
|
|
|
trial = tmp.drop_duplicates(subset="subject_trialID", keep="first") |
|
names = ["subject_trialID", "subject", "trial_id", "item", "condition"] |
|
trial = trial[names].copy() |
|
|
|
for index, row in trial.iterrows(): |
|
selected_trial = all_trials_by_subj[row["subject"]][row["trial_id"]] |
|
info_keys = [ |
|
k for k in selected_trial.keys() if k in ["trial_start_time", "trial_end_time", "question_correct"] |
|
] |
|
if row["subject"] in all_trials_by_subj and row["trial_id"] in all_trials_by_subj[row["subject"]]: |
|
if selected_trial["Fixation Cleaning Stats"]["Discard fixation before or after blinks"]: |
|
trial.at[index, "blink"] = selected_trial["Fixation Cleaning Stats"][ |
|
"Number of discarded fixations due to blinks" |
|
] |
|
for key, value in selected_trial.items(): |
|
if key in info_keys: |
|
trial.at[index, key] = value |
|
|
|
subdf = wordcomb.copy().loc[:, ["subject_trialID"]].drop_duplicates(subset=["subject_trialID"], keep="first") |
|
trial = pd.merge(trial, subdf, on="subject_trialID", how="left") |
|
for sub, subdf in wordcomb.groupby("subject"): |
|
for trialid, trialdf in subdf.groupby("trial_id"): |
|
trial.loc[((trial["subject"] == sub) & (trial["trial_id"] == trialid)), "number_of_words_in_trial"] = ( |
|
trialdf["word"].count() |
|
) |
|
trial.sort_values(by="subject_trialID", inplace=True) |
|
|
|
if "blink" in tmp.columns: |
|
blink = tmp.groupby("subject_trialID")["blink"].sum() / 2 |
|
blink = blink.round().reset_index() |
|
trial = pd.merge(trial, blink, on="subject_trialID", how="left") |
|
|
|
trial["nfix"] = tmp.groupby("subject_trialID")["fixation_number"].agg("count").values |
|
new_col_dfs = [] |
|
new_col_dfs.append(tmp.groupby("subject_trialID")["duration"].agg("mean").reset_index(name="mean_fix_duration")) |
|
|
|
new_col_dfs.append(tmp.groupby("subject_trialID")["duration"].agg("sum").reset_index(name="total_fix_duration")) |
|
for algo_choice in algo_choices: |
|
new_col_dfs.append( |
|
tmp.groupby("subject_trialID")[f"word_runid_{algo_choice}"] |
|
.agg("max") |
|
.reset_index(name=f"nrun_{algo_choice}") |
|
) |
|
tmp[f"saccade_length_{algo_choice}"] = tmp[f"word_land_{algo_choice}"] + tmp[f"word_launch_{algo_choice}"] |
|
new_col_dfs.append( |
|
tmp[(tmp[f"saccade_length_{algo_choice}"] >= 0) & tmp[f"saccade_length_{algo_choice}"].notna()] |
|
.groupby("subject_trialID")[f"saccade_length_{algo_choice}"] |
|
.agg("mean") |
|
.reset_index(name=f"saccade_length_{algo_choice}") |
|
) |
|
|
|
word = wordcomb.copy() |
|
if f"firstrun_skip_{algo_choice}" in wordcomb.columns: |
|
new_col_dfs.append( |
|
word.groupby("subject_trialID")[f"firstrun_skip_{algo_choice}"] |
|
.agg("mean") |
|
.reset_index(name=f"skip_{algo_choice}") |
|
) |
|
if f"refix_{algo_choice}" in wordcomb.columns: |
|
new_col_dfs.append( |
|
word.groupby("subject_trialID")[f"refix_{algo_choice}"] |
|
.agg("mean") |
|
.reset_index(name=f"refix_{algo_choice}") |
|
) |
|
if f"reg_in_{algo_choice}" in wordcomb.columns: |
|
new_col_dfs.append( |
|
word.groupby("subject_trialID")[f"reg_in_{algo_choice}"] |
|
.agg("mean") |
|
.reset_index(name=f"reg_{algo_choice}") |
|
) |
|
|
|
if f"firstrun_dur_{algo_choice}" in wordcomb.columns: |
|
new_col_dfs.append( |
|
word.groupby("subject_trialID")[f"firstrun_dur_{algo_choice}"] |
|
.agg("sum") |
|
.reset_index(name=f"firstpass_{algo_choice}") |
|
) |
|
|
|
if f"total_fixation_duration_{algo_choice}" in wordcomb.columns: |
|
new_col_dfs.append( |
|
(word[f"total_fixation_duration_{algo_choice}"] - word[f"firstrun_dur_{algo_choice}"]) |
|
.groupby(word["subject_trialID"]) |
|
.agg("sum") |
|
.reset_index(name=f"rereading_{algo_choice}") |
|
) |
|
trial = pd.concat( |
|
[trial.set_index("subject_trialID")] + [df.set_index("subject_trialID") for df in new_col_dfs], axis=1 |
|
).reset_index() |
|
trial[f"reading_rate_{algo_choice}"] = ( |
|
60000 / (trial["total_fix_duration"] / trial["number_of_words_in_trial"]) |
|
).round() |
|
|
|
return trial.copy() |
|
|
|
|
|
def aggregate_subjects(trials, algo_choices): |
|
trial_aggregates = trials.groupby("subject")[["nfix", "blink"]].mean().round(3).reset_index() |
|
trial_aggregates = trial_aggregates.merge( |
|
trials.groupby("subject")["question_correct"].sum().reset_index(name="n_question_correct"), on="subject" |
|
) |
|
trial_aggregates = trial_aggregates.merge( |
|
trials.groupby("subject")["trial_id"].count().reset_index(name="ntrial"), on="subject" |
|
) |
|
for algo_choice in algo_choices: |
|
cols_to_do = [ |
|
c |
|
for c in [ |
|
f"saccade_length_{algo_choice}", |
|
f"reg_{algo_choice}", |
|
f"mean_fix_duration_{algo_choice}", |
|
f"total_fix_duration_{algo_choice}", |
|
f"reading_rate_{algo_choice}", |
|
f"refix_{algo_choice}", |
|
f"nrun_{algo_choice}", |
|
f"skip_{algo_choice}", |
|
] |
|
if c in trials.columns |
|
] |
|
trial_aggregates_temp = trials.groupby("subject")[cols_to_do].mean().round(3).reset_index() |
|
trial_aggregates = pd.merge(trial_aggregates, trial_aggregates_temp, how="left", on="subject") |
|
|
|
return trial_aggregates |
|
|