from icecream import ic
from matplotlib import pyplot as plt
import pathlib as pl
import json
from PIL import Image
from torch.utils.data.dataloader import DataLoader as dl
import matplotlib.patches as patches
from torch.utils.data import Dataset as torch_dset
import torchvision.transforms.functional as tvfunc
import einops as eo
from collections.abc import Iterable
import numpy as np
import pandas as pd
from matplotlib import font_manager
from matplotlib.font_manager import FontProperties
from matplotlib.patches import Rectangle
from tqdm.auto import tqdm
import torch as t
import plotly.express as px
import copy

import yaml
import classic_correction_algos as calgo
import analysis_funcs as anf
import models
import popEye_funcs as pf
from loss_functions import corn_label_from_logits
import torch.multiprocessing
torch.multiprocessing.set_sharing_strategy('file_system') # Needed to make multi proc not fail on linux

ic.configureOutput(includeContext=True)

PLOTS_FOLDER = pl.Path("plots")
event_strs = [
    "EFIX",
    "EFIX R",
    "EFIX L",
    "SSACC",
    "ESACC",
    "SFIX",
    "MSG",
    "SBLINK",
    "EBLINK",
    "BUTTON",
    "INPUT",
    "END",
    "START",
    "DISPLAY ON",
]
AVAILABLE_FONTS = [x.name for x in font_manager.fontManager.ttflist]
COLORS = px.colors.qualitative.Alphabet
RESULTS_FOLDER = pl.Path("results")
PLOTS_FOLDER = pl.Path("plots")

DIST_MODELS_FOLDER = pl.Path("models")
IMAGENET_MEAN = [0.485, 0.456, 0.406]
IMAGENET_STD = [0.229, 0.224, 0.225]
DEFAULT_FIX_MEASURES = [
    "letternum",
    "letter",
    "on_word_number",
    "on_word",
    "on_sentence",
    "num_words_in_sentence",
    "on_sentence_num",
    "word_land",
    "line_let",
    "line_word",
    "sac_in",
    "sac_out",
    "word_launch",
    "word_refix",
    "word_reg_in",
    "word_reg_out",
    "sentence_reg_in",
    "word_firstskip",
    "word_run",
    "sentence_run",
    "word_run_fix",
    "word_cland",
]
ALL_FIX_MEASURES = DEFAULT_FIX_MEASURES + [
    "angle_incoming",
    "angle_outgoing",
    "line_let_from_last_letter",
    "sentence_word",
    "line_let_previous",
    "line_let_next",
    "sentence_refix",
    "word_reg_out_to",
    "word_reg_in_from",
    "sentence_reg_out",
    "sentence_reg_in_from",
    "sentence_reg_out_to",
    "sentence_firstskip",
    "word_runid",
    "sentence_runid",
    "word_fix",
    "sentence_fix",
    "sentence_run_fix",
]


class DSet(torch_dset):
    def __init__(
        self,
        in_sequence: t.Tensor,
        chars_center_coords_padded: t.Tensor,
        out_categories: t.Tensor,
        trialslist: list,
        padding_list: list = None,
        padding_at_end: bool = False,
        return_images_for_conv: bool = False,
        im_partial_string: str = "fixations_chars_channel_sep",
        input_im_shape=[224, 224],
    ) -> None:
        super().__init__()

        self.in_sequence = in_sequence
        self.chars_center_coords_padded = chars_center_coords_padded
        self.out_categories = out_categories
        self.padding_list = padding_list
        self.padding_at_end = padding_at_end
        self.trialslist = trialslist
        self.return_images_for_conv = return_images_for_conv
        self.input_im_shape = input_im_shape
        if return_images_for_conv:
            self.im_partial_string = im_partial_string
            self.plot_files = [
                str(x["plot_file"]).replace("fixations_words", im_partial_string) for x in self.trialslist
            ]

    def __getitem__(self, index):

        if self.return_images_for_conv:
            im = Image.open(self.plot_files[index])
            if [im.size[1], im.size[0]] != self.input_im_shape:
                im = tvfunc.resize(im, self.input_im_shape)
            im = tvfunc.normalize(tvfunc.to_tensor(im), IMAGENET_MEAN, IMAGENET_STD)
        if self.chars_center_coords_padded is not None:
            if self.padding_list is not None:
                attention_mask = t.ones(self.in_sequence[index].shape[:-1], dtype=t.long)
                if self.padding_at_end:
                    if self.padding_list[index] > 0:
                        attention_mask[-self.padding_list[index] :] = 0
                else:
                    attention_mask[: self.padding_list[index]] = 0
                if self.return_images_for_conv:
                    return (
                        self.in_sequence[index],
                        self.chars_center_coords_padded[index],
                        im,
                        attention_mask,
                        self.out_categories[index],
                    )
                return (
                    self.in_sequence[index],
                    self.chars_center_coords_padded[index],
                    attention_mask,
                    self.out_categories[index],
                )
            else:
                if self.return_images_for_conv:
                    return (
                        self.in_sequence[index],
                        self.chars_center_coords_padded[index],
                        im,
                        self.out_categories[index],
                    )
                else:
                    return (self.in_sequence[index], self.chars_center_coords_padded[index], self.out_categories[index])

        if self.padding_list is not None:
            attention_mask = t.ones(self.in_sequence[index].shape[:-1], dtype=t.long)
            if self.padding_at_end:
                if self.padding_list[index] > 0:
                    attention_mask[-self.padding_list[index] :] = 0
            else:
                attention_mask[: self.padding_list[index]] = 0
            if self.return_images_for_conv:
                return (self.in_sequence[index], im, attention_mask, self.out_categories[index])
            else:
                return (self.in_sequence[index], attention_mask, self.out_categories[index])
        if self.return_images_for_conv:
            return (self.in_sequence[index], im, self.out_categories[index])
        else:
            return (self.in_sequence[index], self.out_categories[index])

    def __len__(self):
        if isinstance(self.in_sequence, t.Tensor):
            return self.in_sequence.shape[0]
        else:
            return len(self.in_sequence)


def remove_compile_from_model(model):
    if hasattr(model.project, "_orig_mod"):
        model.project = model.project._orig_mod
        model.chars_conv = model.chars_conv._orig_mod
        model.chars_classifier = model.chars_classifier._orig_mod
        model.layer_norm_in = model.layer_norm_in._orig_mod
        model.bert_model = model.bert_model._orig_mod
        model.linear = model.linear._orig_mod
    return model


def remove_compile_from_dict(state_dict):
    for key in list(state_dict.keys()):
        newkey = key.replace("._orig_mod.", ".")
        state_dict[newkey] = state_dict.pop(key)
    return state_dict


def load_model(model_file, cfg):
    try:
        model_loaded = t.load(model_file, map_location="cpu", weights_only=True)
        if "hyper_parameters" in model_loaded.keys():
            model_cfg_temp = model_loaded["hyper_parameters"]["cfg"]
        else:
            model_cfg_temp = cfg
        model_state_dict = model_loaded["state_dict"]
    except Exception as e:
        ic(e)
        ic(f"Failed to load {model_file}")
        return None
    model = models.LitModel(
        [1, 500, 3],
        model_cfg_temp["hidden_dim_bert"],
        model_cfg_temp["num_attention_heads"],
        model_cfg_temp["n_layers_BERT"],
        model_cfg_temp["loss_function"],
        1e-4,
        model_cfg_temp["weight_decay"],
        model_cfg_temp,
        model_cfg_temp["use_lr_warmup"],
        model_cfg_temp["use_reduce_on_plateau"],
        track_gradient_histogram=model_cfg_temp["track_gradient_histogram"],
        register_forw_hook=model_cfg_temp["track_activations_via_hook"],
        char_dims=model_cfg_temp["char_dims"],
    )
    model = remove_compile_from_model(model)
    model_state_dict = remove_compile_from_dict(model_state_dict)
    with t.no_grad():
        model.load_state_dict(model_state_dict, strict=False)
    model.eval()
    model.freeze()
    return model


def find_and_load_model(model_date: str):
    model_cfg_file = list(DIST_MODELS_FOLDER.glob(f"*{model_date}*.yaml"))
    if len(model_cfg_file) == 0:
        ic(f"No model cfg yaml found for {model_date}")
        return None, None
    model_cfg_file = model_cfg_file[0]
    with open(model_cfg_file) as f:
        model_cfg = yaml.safe_load(f)

    model_file = list(pl.Path("models").glob(f"*{model_date}*.ckpt"))[0]
    model = load_model(model_file, model_cfg)

    return model, model_cfg


def set_up_models(dist_models_folder):
    out_dict = {}
    dist_models_with_norm = list(dist_models_folder.glob("*normalize_by_line_height_and_width_True*.ckpt"))
    dist_models_without_norm = list(dist_models_folder.glob("*normalize_by_line_height_and_width_False*.ckpt"))
    DIST_MODEL_DATE_WITH_NORM = dist_models_with_norm[0].stem.split("_")[1]

    models_without_norm_df = [find_and_load_model(m_file.stem.split("_")[1]) for m_file in dist_models_without_norm]
    models_with_norm_df = [find_and_load_model(m_file.stem.split("_")[1]) for m_file in dist_models_with_norm]

    model_cfg_without_norm_df = [x[1] for x in models_without_norm_df if x[1] is not None][0]
    model_cfg_with_norm_df = [x[1] for x in models_with_norm_df if x[1] is not None][0]

    models_without_norm_df = [x[0] for x in models_without_norm_df if x[0] is not None]
    models_with_norm_df = [x[0] for x in models_with_norm_df if x[0] is not None]

    ensemble_model_avg = models.EnsembleModel(
        models_without_norm_df, models_with_norm_df, learning_rate=0.0058, use_simple_average=True
    )
    out_dict["ensemble_model_avg"] = ensemble_model_avg

    out_dict["model_cfg_without_norm_df"] = model_cfg_without_norm_df
    out_dict["model_cfg_with_norm_df"] = model_cfg_with_norm_df

    single_DIST_model, single_DIST_model_cfg = find_and_load_model(model_date=DIST_MODEL_DATE_WITH_NORM)
    out_dict["single_DIST_model"] = single_DIST_model
    out_dict["single_DIST_model_cfg"] = single_DIST_model_cfg
    return out_dict


def reorder_columns(
    df,
    cols=[
        "subject",
        "trial_id",
        "item",
        "condition",
        "fixation_number",
        "num",
        "word_number",
        "sentence_number",
        "duration",
        "start_uncorrected",
        "stop_uncorrected",
        "start_time",
        "end_time",
        "corrected_start_time",
        "corrected_end_time",
        "dX",
        "dY",
    ],
):
    existing_cols = [col for col in cols if col in df.columns]
    other_cols = [col for col in df.columns if col not in cols]
    return df[existing_cols + other_cols]


def nan_or_int_minus_one(x):
    if not pd.isna(x):
        return int(x - 1.0)
    else:
        return pd.NA


def add_popEye_cols_to_chars_df(chars_df):

    if "letternum" not in chars_df.columns or "letline" not in chars_df.columns:
        chars_df.reset_index(drop=False, inplace=True)
        chars_df.rename({"index": "letternum"}, axis=1, inplace=True)
        chars_df.loc[:, "letline"] = -1
        chars_df["wordline"] = (
            chars_df.groupby("assigned_line")["in_word_number"].rank(method="dense").map(nan_or_int_minus_one)
        )
        chars_df["wordsent"] = (
            chars_df.groupby("in_sentence_number")["in_word_number"].rank(method="dense").map(nan_or_int_minus_one)
        )
        chars_df["letword"] = (
            chars_df.groupby("in_word_number")["letternum"].rank(method="dense").map(nan_or_int_minus_one)
        )
        for line_idx in chars_df.assigned_line.unique():
            chars_df.loc[chars_df.assigned_line == line_idx, "letline"] = (
                chars_df.loc[chars_df.assigned_line == line_idx, "char"].reset_index().index
            )
    return chars_df


def add_boxes_to_ax(
    chars_list,
    ax,
    font_to_use="DejaVu Sans Mono",
    fontsize=21,
    prefix="char",
    box_annotations: list = None,
    edgecolor="grey",
    linewidth=0.8,
):
    if box_annotations is None:
        enum = chars_list
    else:
        enum = zip(chars_list, box_annotations)
    for v in enum:
        if box_annotations is not None:
            v, annot_text = v
        x0, y0 = v[f"{prefix}_xmin"], v[f"{prefix}_ymin"]
        xdiff, ydiff = v[f"{prefix}_xmax"] - v[f"{prefix}_xmin"], v[f"{prefix}_ymax"] - v[f"{prefix}_ymin"]
        ax.add_patch(Rectangle((x0, y0), xdiff, ydiff, edgecolor=edgecolor, facecolor="none", lw=linewidth, alpha=0.4))
        if box_annotations is not None:
            ax.annotate(
                str(annot_text),
                (x0 + xdiff / 2, y0),
                horizontalalignment="center",
                verticalalignment="center",
                fontproperties=FontProperties(family=font_to_use, style="normal", size=fontsize / 1.5),
            )


def add_text_to_ax(
    chars_list,
    ax,
    font_to_use="DejaVu Sans Mono",
    fontsize=21,
    prefix="char",
):
    font_props = FontProperties(family=font_to_use, style="normal", size=fontsize)
    enum = chars_list
    for v in enum:
        ax.text(
            v[f"{prefix}_x_center"],
            v[f"{prefix}_y_center"],
            v[prefix],
            horizontalalignment="center",
            verticalalignment="center",
            fontproperties=font_props,
        )


def set_font_from_chars_list(trial):

    if "chars_list" in trial:
        chars_df = pd.DataFrame(trial["chars_list"])
        line_diffs = np.diff(chars_df.char_y_center.unique())
        y_diffs = np.unique(line_diffs)
        if len(y_diffs) == 1:
            y_diff = y_diffs[0]
        else:
            y_diff = np.min(y_diffs)
        y_diff = round(y_diff * 2) / 2

    else:
        y_diff = 1 / 0.333 * 18
    font_size = y_diff * 0.333  # pixel to point conversion
    return round((font_size) * 4, ndigits=0) / 4


def get_plot_props(trial, available_fonts):
    if "font" in trial.keys():
        font = trial["font"]
        font_size = trial["font_size"]
        if font not in available_fonts:
            font = "DejaVu Sans Mono"
    else:
        font = "DejaVu Sans Mono"
        font_size = 21
    dpi = 96
    if "display_coords" in trial.keys() and trial["display_coords"] is not None:
        screen_res = (trial["display_coords"][2], trial["display_coords"][3])
    else:
        screen_res = (1920, 1080)
    return font, font_size, dpi, screen_res


def get_font_and_font_size_from_trial(trial):
    font_face, font_size, dpi, screen_res = get_plot_props(trial, AVAILABLE_FONTS)

    if font_size is None and "font_size" in trial:
        font_size = trial["font_size"]
    elif font_size is None:
        font_size = set_font_from_chars_list(trial)
    return font_face, font_size


def sigmoid(x):
    return 1 / (1 + np.exp(-1 * x))


def matplotlib_plot_df(
    dffix,
    trial,
    algo_choice,
    dffix_no_clean=None,
    desired_dpi=300,
    fix_to_plot=[],
    stim_info_to_plot=["Characters", "Word boxes"],
    box_annotations: list = None,
    font=None,
    use_duration_arrow_sizes=True,
):
    chars_df = pd.DataFrame(trial["chars_list"]) if "chars_list" in trial else None

    if chars_df is not None:
        font_face, font_size = get_font_and_font_size_from_trial(trial)
        font_size = font_size * 0.65
    else:
        ic("No character or word information available to plot")

    if "display_coords" in trial:
        desired_width_in_pixels = trial["display_coords"][2] + 1
        desired_height_in_pixels = trial["display_coords"][3] + 1
    else:
        desired_width_in_pixels = 1920
        desired_height_in_pixels = 1080

    figure_width = desired_width_in_pixels / desired_dpi
    figure_height = desired_height_in_pixels / desired_dpi

    fig = plt.figure(figsize=(figure_width, figure_height), dpi=desired_dpi)
    ax = fig.add_subplot(1, 1, 1)
    fig.subplots_adjust(bottom=0)
    fig.subplots_adjust(top=1)
    fig.subplots_adjust(right=1)
    fig.subplots_adjust(left=0)
    if font is None:
        if "font" in trial and trial["font"] in AVAILABLE_FONTS:
            font_to_use = trial["font"]
        else:
            font_to_use = "DejaVu Sans Mono"
    else:
        font_to_use = font
    if "font_size" in trial:
        font_size = trial["font_size"]
    else:
        font_size = 20

    if "Words" in stim_info_to_plot and "words_list" in trial:
        add_text_to_ax(
            trial["words_list"],
            ax,
            font_to_use,
            prefix="word",
            fontsize=font_size / 3.89,
        )
    if "Word boxes" in stim_info_to_plot and "words_list" in trial:
        add_boxes_to_ax(
            trial["words_list"],
            ax,
            font_to_use,
            prefix="word",
            fontsize=font_size / 3.89,
            box_annotations=box_annotations,
            edgecolor="black",
            linewidth=0.9,
        )

    if "Characters" in stim_info_to_plot and "chars_list" in trial:
        add_text_to_ax(
            trial["chars_list"],
            ax,
            font_to_use,
            prefix="char",
            fontsize=font_size / 3.89,
        )
    if "Character boxes" in stim_info_to_plot and "chars_list" in trial:
        add_boxes_to_ax(
            trial["chars_list"],
            ax,
            font_to_use,
            prefix="char",
            fontsize=font_size / 3.89,
            box_annotations=box_annotations,
        )

    if "Uncorrected Fixations" in fix_to_plot and dffix_no_clean is None:
        if use_duration_arrow_sizes and "duration" in dffix.columns:
            duration_scaled = dffix.duration - dffix.duration.min()
            duration_scaled = (((duration_scaled / duration_scaled.max()) - 0.5) * 3).values
            durations = sigmoid(duration_scaled) * 50 * 0.5
        if use_duration_arrow_sizes:
            ax.plot(
                dffix.x,
                dffix.y,
                label="Raw fixations",
                color="blue",
                alpha=0.5,
            )
            add_arrow_annotations(dffix, "y", ax, "blue", durations[:-1])
        else:
            ax.plot(
                dffix.x,
                dffix.y,
                label="Remaining fixations",
                color="blue",
                alpha=0.5,
            )
            add_arrow_annotations(dffix, "y", ax, "blue", 4)

    if dffix_no_clean is not None and "Uncorrected Fixations" in fix_to_plot:

        ax.plot(
            dffix_no_clean.x,
            dffix_no_clean.y,
            # marker='.',
            label="All fixations",
            color="k",
            alpha=0.5,
            lw=1,
        )
        add_arrow_annotations(dffix_no_clean, "y", ax, "k", 4)
        if "was_discarded_due_blinks" in dffix_no_clean.columns and dffix_no_clean["was_discarded_due_blinks"].any():
            discarded_blink_fix = dffix_no_clean.loc[dffix_no_clean["was_discarded_due_blinks"], :].copy()
            ax.scatter(
                discarded_blink_fix.x,
                discarded_blink_fix.y,
                s=12,
                label="Discarded due to blinks",
                lw=1.5,
                edgecolors="orange",
                facecolors="none",
            )
        if (
            "was_discarded_due_to_long_duration" in dffix_no_clean.columns
            and dffix_no_clean["was_discarded_due_to_long_duration"].any()
        ):
            discarded_long_fix = dffix_no_clean.loc[dffix_no_clean["was_discarded_due_to_long_duration"], :].copy()
            ax.scatter(
                discarded_long_fix.x,
                discarded_long_fix.y,
                s=18,
                label="Overly long fixations",
                lw=0.8,
                edgecolors="purple",
                facecolors="none",
            )
        if "was_merged" in dffix_no_clean.columns:
            merged_fix = dffix_no_clean.loc[dffix_no_clean["was_merged"], :].copy()
            if not merged_fix.empty:
                ax.scatter(
                    merged_fix.x,
                    merged_fix.y,
                    s=7,
                    label="Merged short fixations",
                    lw=1,
                    edgecolors="red",
                    facecolors="none",
                )
        if "was_discarded_outside_text" in dffix_no_clean.columns:
            was_discarded_outside_text_fix = dffix_no_clean.loc[dffix_no_clean["was_discarded_outside_text"], :].copy()
            if not was_discarded_outside_text_fix.empty:
                ax.scatter(
                    was_discarded_outside_text_fix.x,
                    was_discarded_outside_text_fix.y,
                    s=8,
                    label="Outside text fixations",
                    lw=1.2,
                    edgecolors="blue",
                    facecolors="none",
                )
        if "was_discarded_short_fix" in dffix_no_clean.columns:
            was_discarded_short_fix_fix = dffix_no_clean.loc[dffix_no_clean["was_discarded_short_fix"], :].copy()
            if not was_discarded_short_fix_fix.empty:
                ax.scatter(
                    was_discarded_short_fix_fix.x,
                    was_discarded_short_fix_fix.y,
                    label="Discarded short fixations",
                    s=9,
                    lw=1.5,
                    edgecolors="green",
                    facecolors="none",
                )
    if "Corrected Fixations" in fix_to_plot:
        if isinstance(algo_choice, list):
            algo_choices = algo_choice
            repeats = range(len(algo_choice))
        else:
            algo_choices = [algo_choice]
            repeats = range(1)
        for algoIdx in repeats:
            algo_choice = algo_choices[algoIdx]
            if f"y_{algo_choice}" in dffix.columns:
                ax.plot(
                    dffix.x,
                    dffix.loc[:, f"y_{algo_choice}"],
                    label=algo_choice,
                    color=COLORS[algoIdx],
                    alpha=0.6,
                    linewidth=0.6,
                )

                add_arrow_annotations(dffix, f"y_{algo_choice}", ax, COLORS[algoIdx], 6)

    ax.set_xlim((0, desired_width_in_pixels))
    ax.set_ylim((0, desired_height_in_pixels))
    ax.invert_yaxis()
    if "Corrected Fixations" in fix_to_plot or "Uncorrected Fixations" in fix_to_plot:
        ax.legend(prop={"size": 5})

    return fig, desired_width_in_pixels, desired_height_in_pixels


def add_arrow_annotations(dffix, y_col, ax, color, size):
    x = dffix.x.values

    y = dffix.loc[:, y_col].values

    x = x[:-1]
    y = y[:-1]
    dX = -(x[1:] - x[:-1])
    dY = -(y[1:] - y[:-1])

    xpos = x[1:]
    ypos = y[1:]
    if isinstance(size, Iterable):
        use_size_idx = True
    else:
        use_size_idx = False
        s = size
    for fidx, (X, Y, dX, dY) in enumerate(zip(xpos, ypos, dX, dY)):
        if use_size_idx:
            s = size[fidx]
        ax.annotate(
            "",
            xytext=(X + 0.001 * dX, Y + 0.001 * dY),
            xy=(X, Y),
            arrowprops=dict(arrowstyle="fancy", color=color),
            size=s,
            alpha=0.3,
        )


def plot_saccade_df(fix_df, sac_df, trial, show_numbers=False, add_lines_to_fix_df=False):
    stim_only_fig, _, _ = matplotlib_plot_df(
        fix_df,
        trial,
        None,
        dffix_no_clean=None,
        desired_dpi=300,
        fix_to_plot=[],
        stim_info_to_plot=["Characters", "Word boxes"],
        box_annotations=None,
        font=None,
    )
    if stim_only_fig is None:
        fig, ax = plt.subplots(1, 1, figsize=(8, 5), dpi=150)
        invert_ax_needed = True
    else:
        fig = stim_only_fig
        ax = fig.axes[0]
        invert_ax_needed = False

    def plot_arrow(x1, y1, x2, y2, scale_factor):
        """Plot an arrow from (x1,y1) to (x2,y2) with adjustable size"""
        ax.arrow(
            x1,
            y1,
            (x2 - x1),
            (y2 - y1),
            color="k",
            alpha=0.7,
            length_includes_head=True,
            width=3 * scale_factor,
            head_width=15 * scale_factor,
            head_length=15 * scale_factor,
        )

    xs = sac_df["xs"].values
    ys = sac_df["ys"].values
    xe = sac_df["xe"].values
    ye = sac_df["ye"].values
    extent = np.sqrt((xs.min() - xe.max()) ** 2 + (ys.min() - ye.max()) ** 2)
    scale_factor = 0.0005 * extent
    for i in range(len(xs)):
        plot_arrow(xs[i], ys[i], xe[i], ye[i], scale_factor=scale_factor)
    if add_lines_to_fix_df:
        plotfunc = ax.plot
    else:
        plotfunc = ax.scatter
    if "x" in fix_df.columns:
        plotfunc(fix_df["x"], fix_df["y"], marker=".")
    else:
        plotfunc(fix_df["xs"], fix_df["ys"], marker=".")

    if invert_ax_needed:
        ax.invert_yaxis()
    if show_numbers:
        size = 8 * scale_factor

        xytext = (
            1,
            -1,
        )
        for index, row in fix_df.iterrows():
            ax.annotate(
                index,
                xy=(row["x"], row["y"]),
                textcoords="offset points",
                ha="center",
                xytext=xytext,
                va="bottom",
                color="k",
                size=size,
            )

        for index, row in sac_df.iterrows():
            ax.annotate(
                index,
                xy=(row["xs"], row["ys"]),
                textcoords="offset points",
                ha="center",
                xytext=xytext,
                va="top",
                color="r",
                size=size,
            )
    return fig


def get_events_df_from_lines_and_trial_selection(trial, trial_lines, discard_fixations_without_sfix):

    line_dicts = []
    fixations_dicts = []
    events_dicts = []
    blink_started = False

    fixation_started = False
    esac_count = 0
    efix_count = 0
    sfix_count = 0
    sblink_count = 0
    eblink_times = []

    eye_to_use = "R"
    for l in trial_lines:
        if "EFIX R" in l:
            eye_to_use = "R"
            break
        elif "EFIX L" in l:
            eye_to_use = "L"
            break
    for l in trial_lines:
        parts = [x.strip() for x in l.split("\t")]
        if f"EFIX {eye_to_use}" in l:
            efix_count += 1
            if fixation_started:
                had_SFIX_before_it = True
                if parts[1] == "." and parts[2] == ".":
                    continue
                fixation_started = False
            else:
                had_SFIX_before_it = False
            fix_dict = {
                "fixation_number": efix_count,
                "start_time": float(pd.to_numeric(parts[0].split()[-1].strip(), errors="coerce")),
                "end_time": float(pd.to_numeric(parts[1].strip(), errors="coerce")),
                "duration": float(pd.to_numeric(parts[2].strip(), errors="coerce")),
                "x": float(pd.to_numeric(parts[3].strip(), errors="coerce")),
                "y": float(pd.to_numeric(parts[4].strip(), errors="coerce")),
                "pupil_size": float(pd.to_numeric(parts[5].strip(), errors="coerce")),
                "had_SFIX_before_it": had_SFIX_before_it,
                "msg": "FIX",
            }
            if not discard_fixations_without_sfix or had_SFIX_before_it:
                fixations_dicts.append(fix_dict)
                events_dicts.append(
                    {
                        "num": efix_count - 1,
                        "start": float(pd.to_numeric(parts[0].split()[-1].strip(), errors="coerce")),
                        "stop": float(pd.to_numeric(parts[1].strip(), errors="coerce")),
                        "duration": float(pd.to_numeric(parts[2].strip(), errors="coerce")),
                        "xs": float(pd.to_numeric(parts[3].strip(), errors="coerce")),
                        "xe": None,
                        "ys": float(pd.to_numeric(parts[4].strip(), errors="coerce")),
                        "ye": None,
                        "ampl": None,
                        "pv": None,
                        "pupil_size": float(pd.to_numeric(parts[5].strip(), errors="coerce")),
                        "msg": "FIX",
                    }
                )
            if len(fixations_dicts) >= 2:
                assert fixations_dicts[-1]["start_time"] > fixations_dicts[-2]["start_time"], "start times not in order"
        elif f"SFIX {eye_to_use}" in l:
            sfix_count += 1
            fixation_started = True
        elif f"SBLINK {eye_to_use}" in l:
            sblink_count += 1
            blink_started = True
        elif f"EBLINK {eye_to_use}" in l:
            blink_started = False
            blink_dict = {
                "num": len(eblink_times),
                "start": float(pd.to_numeric(parts[0].split()[-1].strip(), errors="coerce")),
                "stop": float(pd.to_numeric(parts[1].strip(), errors="coerce")),
                "duration": float(pd.to_numeric(parts[2].strip(), errors="coerce")),
                "xs": None,
                "xe": None,
                "ys": None,
                "ye": None,
                "ampl": None,
                "pv": None,
                "pupil_size": None,
                "msg": "BLINK",
            }
            events_dicts.append(blink_dict)
            eblink_times.append(float(pd.to_numeric(parts[-1], errors="coerce")))
        elif "ESACC" in l:
            sac_dict = {
                "num": esac_count,
                "start": float(pd.to_numeric(parts[0].split()[-1].strip(), errors="coerce")),
                "stop": float(pd.to_numeric(parts[1].strip(), errors="coerce")),
                "duration": float(pd.to_numeric(parts[2].strip(), errors="coerce")),
                "xs": float(pd.to_numeric(parts[3].strip(), errors="coerce")),
                "ys": float(pd.to_numeric(parts[4].strip(), errors="coerce")),
                "xe": float(pd.to_numeric(parts[5].strip(), errors="coerce")),
                "ye": float(pd.to_numeric(parts[6].strip(), errors="coerce")),
                "ampl": float(pd.to_numeric(parts[7].strip(), errors="coerce")),
                "pv": float(pd.to_numeric(parts[8].strip(), errors="coerce")),
                "pupil_size": None,
                "msg": "SAC",
            }
            events_dicts.append(sac_dict)
            esac_count += 1
        if not blink_started and not any([True for x in event_strs if x in l]):
            if len(parts) < 3 or (parts[1] == "." and parts[2] == "."):
                continue
            line_dicts.append(
                {
                    "idx": float(pd.to_numeric(parts[0].strip(), errors="coerce")),
                    "x": float(pd.to_numeric(parts[1].strip(), errors="coerce")),
                    "y": float(pd.to_numeric(parts[2].strip(), errors="coerce")),
                    "p": float(pd.to_numeric(parts[3].strip(), errors="coerce")),
                    "part_of_fixation": fixation_started,
                    "fixation_number": sfix_count,
                    "part_of_blink": blink_started,
                    "blink_number": sblink_count,
                }
            )

    trial["eblink_times"] = eblink_times
    df = pd.DataFrame(line_dicts)
    df["x_smoothed"] = np.convolve(df.x, np.ones((5,)) / 5, mode="same")  # popEye smoothes this way
    df["y_smoothed"] = np.convolve(df.y, np.ones((5,)) / 5, mode="same")
    df["time"] = df["idx"] - df["idx"].iloc[0]
    df = pf.compute_velocity(df)
    events_df = pd.DataFrame(events_dicts)
    events_df["start_uncorrected"] = events_df.start
    events_df["stop_uncorrected"] = events_df.stop
    events_df["start"] = events_df.start - trial["trial_start_time"]
    events_df["stop"] = events_df.stop - trial["trial_start_time"]
    events_df["start"] = events_df["start"].clip(0, events_df["start"].max())
    events_df.sort_values(by="start", inplace=True)  # Needed because blinks can happen during other events, I think
    events_df.reset_index(drop=True, inplace=True)
    events_df = pf.event_long(events_df)
    events_df["duration"] = events_df["stop"] - events_df["start"]

    trial["efix_count"] = efix_count
    trial["eye_to_use"] = eye_to_use
    trial["sfix_count"] = sfix_count
    trial["sblink_count"] = sblink_count
    return trial, df, events_df


def add_default_font_and_character_props_to_state(trial):
    chars_list = trial["chars_list"]
    chars_df = pd.DataFrame(trial["chars_list"])
    line_diffs = np.diff(chars_df.char_y_center.unique())
    y_diffs = np.unique(line_diffs)
    if len(y_diffs) > 1:
        y_diff = np.min(y_diffs)
    else:
        y_diff = y_diffs[0]

    y_diff = round(y_diff * 2) / 2
    x_txt_start = chars_list[0]["char_xmin"]
    y_txt_start = chars_list[0]["char_y_center"]

    font_face, font_size = get_font_and_font_size_from_trial(trial)

    line_height = y_diff
    return y_diff, x_txt_start, y_txt_start, font_face, font_size, line_height


def get_raw_events_df_and_trial(trial, discard_fixations_without_sfix):
    fname = pl.Path(trial["filename"]).stem
    trial_id = trial["trial_id"]
    trial_lines = trial.pop("trial_lines")

    trial["plot_file"] = str(PLOTS_FOLDER.joinpath(f"{fname}_{trial_id}_2ndInput_chars_channel_sep.png"))

    trial, df, events_df = get_events_df_from_lines_and_trial_selection(
        trial, trial_lines, discard_fixations_without_sfix
    )
    trial["gaze_df"] = df
    font, font_size, dpi, screen_res = get_plot_props(trial, AVAILABLE_FONTS)
    trial["font"] = font
    trial["font_size"] = font_size
    trial["dpi"] = dpi
    trial["screen_res"] = screen_res
    if "chars_list" in trial:
        chars_df = pd.DataFrame(trial["chars_list"])

        chars_df = add_popEye_cols_to_chars_df(chars_df)

        if "index" not in chars_df.columns:
            chars_df.reset_index(inplace=True)
        trial["chars_df"] = chars_df.to_dict()
        trial["y_char_unique"] = list(chars_df.char_y_center.sort_values().unique())
    return reorder_columns(events_df), trial


def get_outlier_indeces(
    dffix, chars_df, x_thres_in_chars, y_thresh_in_heights, xcol, ycol, letter_width_avg, line_heights_avg
):
    indeces_out = []
    for linenum, line_chars_subdf in chars_df.groupby("assigned_line"):
        left = line_chars_subdf["char_xmin"].min()
        right = line_chars_subdf["char_xmax"].max()
        top = line_chars_subdf["char_ymin"].min()
        bottom = line_chars_subdf["char_ymax"].max()
        left_min = left - (x_thres_in_chars * letter_width_avg)
        right_max = right + (x_thres_in_chars * letter_width_avg)
        top_max = top - (line_heights_avg * y_thresh_in_heights)
        bottom_min = bottom + (line_heights_avg * y_thresh_in_heights)
        indeces_out_line = []
        indeces_out_line.extend(list(dffix.loc[dffix[xcol] < left_min, :].index))
        indeces_out_line.extend(list(dffix.loc[dffix[xcol] > right_max, :].index))
        indeces_out_line.extend(list(dffix.loc[dffix[ycol] < top_max, :].index))
        indeces_out_line.extend(list(dffix.loc[dffix[ycol] > bottom_min, :].index))
        indeces_out_line_set = set(indeces_out_line)
        indeces_out.append(indeces_out_line_set)
    return list(set.intersection(*indeces_out))


def get_distance_between_fixations_in_characters_and_recalc_duration(
    fix, letter_width_avg, start_colname="start", stop_colname="stop", xcol="xs"
):
    fix.reset_index(drop=True, inplace=True)
    fix.loc[:, "duration"] = fix[stop_colname] - fix[start_colname]
    fix.loc[:, "distance_in_char_widths"] = 0.0
    for i in range(1, len(fix)):
        fix.loc[i, "distance_in_char_widths"] = np.round(
            np.abs(fix.loc[i, xcol] - fix.loc[i - 1, xcol]) / letter_width_avg, decimals=3
        )
    return fix


def clean_fixations_popeye_no_sacc(fix, trial, duration_threshold, distance_threshold):
    if "letter_width_avg" in trial:
        letter_width_avg = trial["letter_width_avg"]
    else:
        letter_width_avg = 12

    stop_time_col, start_time_col = get_time_cols(fix)
    if "xs" in fix.columns:
        x_colname = "xs"
        y_colname = "ys"
    else:
        x_colname = "x"
        y_colname = "y"
    if "blink" not in fix.columns:
        fix["blink"] = 0
    fix.dropna(subset=[x_colname, y_colname], how="any", axis=0, inplace=True)
    fix.reset_index(drop=True, inplace=True)
    fix = get_distance_between_fixations_in_characters_and_recalc_duration(
        fix, letter_width_avg, start_time_col, stop_time_col, x_colname
    )

    fix["num"] = np.arange(len(fix), dtype=int)
    i = 0
    while i <= len(fix) - 1:

        merge_before = False
        merge_after = False

        if fix["duration"].iloc[i] <= duration_threshold:

            # check fixation n - 1
            if i > 1:
                if (
                    fix["duration"].iloc[i - 1] > duration_threshold
                    and fix["blink"].iloc[i - 1] == 0
                    and fix["distance_in_char_widths"].iloc[i] <= distance_threshold
                ):
                    merge_before = True
            # check fixation n + 1
            if i < len(fix) - 1:
                if (
                    fix["duration"].iloc[i + 1] > duration_threshold
                    and fix["blink"].iloc[i + 1] == 0
                    and fix["distance_in_char_widths"].iloc[i + 1] <= distance_threshold
                ):
                    merge_after = True

            # check merge.status
            if merge_before and not merge_after:
                merge = -1
            elif not merge_before and merge_after:
                merge = 1
            elif not merge_before and not merge_after:
                merge = 0
            elif merge_before and merge_after:
                if fix["duration"].iloc[i - 1] >= fix["duration"].iloc[i + 1]:
                    merge = -1
                else:
                    merge = 1

        # close if above duration threshold
        else:
            merge = 0

        if merge == 0:
            i += 1

        elif merge == -1:

            fix.loc[i - 1, stop_time_col] = fix.loc[i, stop_time_col]
            fix.loc[i - 1, x_colname] = round((fix.loc[i - 1, x_colname] + fix.loc[i, x_colname]) / 2)
            fix.loc[i - 1, y_colname] = round((fix.loc[i - 1, y_colname] + fix.loc[i, y_colname]) / 2)

            fix = fix.drop(i, axis=0)
            fix.reset_index(drop=True, inplace=True)

            start = fix[start_time_col].iloc[i - 1]
            stop = fix[stop_time_col].iloc[i - 1]

            fix = get_distance_between_fixations_in_characters_and_recalc_duration(
                fix, letter_width_avg, start_time_col, stop_time_col, x_colname
            )

        elif merge == 1:
            fix.loc[i + 1, start_time_col] = fix.loc[i, start_time_col]
            fix.loc[i + 1, x_colname] = round((fix.loc[i, x_colname] + fix.loc[i + 1, x_colname]) / 2)
            fix.loc[i + 1, y_colname] = round((fix.loc[i, y_colname] + fix.loc[i + 1, y_colname]) / 2)

            fix.drop(index=i, inplace=True)
            fix.reset_index(drop=True, inplace=True)

            start = fix.loc[i, start_time_col]
            stop = fix.loc[i, stop_time_col]

            fix = get_distance_between_fixations_in_characters_and_recalc_duration(
                fix, letter_width_avg, start_time_col, stop_time_col, x_colname
            )

    fix.loc[:, "num"] = np.arange(len(fix), dtype=int)

    # delete last fixation
    if fix.iloc[-1]["duration"] < duration_threshold:
        fix = fix.iloc[:-1]
        trial["last_fixation_was_discarded_because_too_short"] = True
    else:
        trial["last_fixation_was_discarded_because_too_short"] = False
    fix.reset_index(drop=True, inplace=True)
    return fix.copy()


def clean_dffix_own(
    trial: dict,
    choice_handle_short_and_close_fix: str,
    discard_far_out_of_text_fix,
    x_thres_in_chars,
    y_thresh_in_heights,
    short_fix_threshold,
    merge_distance_threshold: float,
    discard_long_fix: bool,
    discard_long_fix_threshold: int,
    discard_blinks: bool,
    dffix: pd.DataFrame,
):
    dffix = dffix.dropna(how="all", axis=1).copy()
    if dffix.empty:
        return dffix, trial
    dffix = dffix.rename(
        {
            k: v
            for k, v in {
                "xs": "x",
                "ys": "y",
                "num": "fixation_number",
            }.items()
            if v not in dffix.columns
        },
        axis=1,
    )
    stop_time_col, start_time_col = get_time_cols(dffix)
    add_time_cols(dffix, stop_time_col, start_time_col)
    if "dffix_no_clean" not in trial:
        trial["dffix_no_clean"] = (
            dffix.copy()
        )  # TODO check if cleaning can be dialed in or if dffix get overwritten every time
    add_time_cols(trial["dffix_no_clean"], stop_time_col, start_time_col)

    trial["dffix_no_clean"]["was_merged"] = False
    trial["dffix_no_clean"]["was_discarded_short_fix"] = False
    trial["dffix_no_clean"]["was_discarded_outside_text"] = False

    num_fix_before_clean = trial["dffix_no_clean"].shape[0]
    trial["Fixation Cleaning Stats"] = {}
    trial["Fixation Cleaning Stats"]["Number of fixations before cleaning"] = num_fix_before_clean

    trial["Fixation Cleaning Stats"]["Discard fixation before or after blinks"] = discard_blinks

    if discard_blinks and "blink" in dffix.columns:
        trial["dffix_no_clean"]["was_discarded_due_blinks"] = False
        dffix = dffix[dffix["blink"] == False].copy()
        trial["dffix_no_clean"].loc[
            ~trial["dffix_no_clean"]["start_time"].isin(dffix["start_time"]), "was_discarded_due_blinks"
        ] = True
        trial["Fixation Cleaning Stats"]["Number of discarded fixations due to blinks"] = (
            num_fix_before_clean - dffix.shape[0]
        )
        trial["Fixation Cleaning Stats"]["Number of discarded fixations due to blinks (%)"] = round(
            100
            * (trial["Fixation Cleaning Stats"]["Number of discarded fixations due to blinks"] / num_fix_before_clean),
            2,
        )

    trial["Fixation Cleaning Stats"]["Discard long fixations"] = discard_long_fix

    if discard_long_fix and not dffix.empty:
        dffix_before_long_fix_removal = dffix.copy()
        trial["dffix_no_clean"]["was_discarded_due_to_long_duration"] = False
        dffix = dffix[dffix["duration"] < discard_long_fix_threshold].copy()
        dffix_after_long_fix_removal = dffix.copy()
        trial["dffix_no_clean"].loc[
            (
                ~trial["dffix_no_clean"]["start_time"].isin(dffix_after_long_fix_removal["start_time"])
                & (trial["dffix_no_clean"]["start_time"].isin(dffix_before_long_fix_removal["start_time"]))
            ),
            "was_discarded_due_to_long_duration",
        ] = True
        trial["Fixation Cleaning Stats"]["Number of discarded long fixations"] = num_fix_before_clean - dffix.shape[0]
        trial["Fixation Cleaning Stats"]["Number of discarded long fixations (%)"] = round(
            100 * (trial["Fixation Cleaning Stats"]["Number of discarded long fixations"] / num_fix_before_clean), 2
        )
    num_fix_before_merge = dffix.shape[0]
    trial["Fixation Cleaning Stats"]["How short and close fixations were handled"] = choice_handle_short_and_close_fix
    if (
        choice_handle_short_and_close_fix == "Merge" or choice_handle_short_and_close_fix == "Merge then discard"
    ) and not dffix.empty:
        dffix_before_merge = dffix.copy()
        dffix = clean_fixations_popeye_no_sacc(dffix, trial, short_fix_threshold, merge_distance_threshold)
        dffix_after_merge = dffix.copy()
        trial["dffix_no_clean"].loc[
            (~trial["dffix_no_clean"]["start_time"].isin(dffix_after_merge["start_time"]))
            & (trial["dffix_no_clean"]["start_time"].isin(dffix_before_merge["start_time"])),
            "was_merged",
        ] = True
        if trial["last_fixation_was_discarded_because_too_short"]:
            trial["dffix_no_clean"].iloc[-1, trial["dffix_no_clean"].columns.get_loc("was_merged")] = False
            trial["dffix_no_clean"].iloc[-1, trial["dffix_no_clean"].columns.get_loc("was_discarded_short_fix")] = True
        trial["Fixation Cleaning Stats"]["Number of merged fixations"] = (
            num_fix_before_merge - dffix_after_merge.shape[0]
        )
        trial["Fixation Cleaning Stats"]["Number of merged fixations (%)"] = round(
            100 * (trial["Fixation Cleaning Stats"]["Number of merged fixations"] / num_fix_before_merge), 2
        )

    if not dffix.empty:
        dffix.reset_index(drop=True, inplace=True)
        dffix.loc[:, "fixation_number"] = np.arange(dffix.shape[0])
    trial["x_thres_in_chars"], trial["y_thresh_in_heights"] = x_thres_in_chars, y_thresh_in_heights
    if "chars_list" in trial and not dffix.empty:
        indeces_out = get_outlier_indeces(
            dffix,
            pd.DataFrame(trial["chars_list"]),
            x_thres_in_chars,
            y_thresh_in_heights,
            "x",
            "y",
            trial["letter_width_avg"],
            np.mean(trial["line_heights"]),
        )
    else:
        indeces_out = []
    dffix["is_far_out_of_text_uncorrected"] = "in"
    if len(indeces_out) > 0:
        times_out = dffix.loc[indeces_out, "start_time"].copy()
        dffix.loc[indeces_out, "is_far_out_of_text_uncorrected"] = "out"
    trial["Fixation Cleaning Stats"]["Far out of text fixations were discarded"] = discard_far_out_of_text_fix
    if discard_far_out_of_text_fix and len(indeces_out) > 0:
        num_fix_before_clean_via_discard_far_out_of_text_fix = dffix.shape[0]
        trial["dffix_no_clean"].loc[
            trial["dffix_no_clean"]["start_time"].isin(times_out), "was_discarded_outside_text"
        ] = True
        dffix = dffix.loc[dffix["is_far_out_of_text_uncorrected"] == "in", :].reset_index(drop=True).copy()
        trial["Fixation Cleaning Stats"]["Number of discarded far-out-of-text fixations"] = (
            num_fix_before_clean_via_discard_far_out_of_text_fix - dffix.shape[0]
        )
        trial["Fixation Cleaning Stats"]["Number of discarded far-out-of-text fixations (%)"] = round(
            100
            * (
                trial["Fixation Cleaning Stats"]["Number of discarded far-out-of-text fixations"]
                / num_fix_before_clean_via_discard_far_out_of_text_fix
            ),
            2,
        )
    dffix = dffix.drop(columns="is_far_out_of_text_uncorrected")
    if (
        choice_handle_short_and_close_fix == "Discard"
        or choice_handle_short_and_close_fix == "Merge then discard"
        and not dffix.empty
    ):
        num_fix_before_clean_via_discard_short = dffix.shape[0]
        times_out = dffix.loc[(dffix["duration"] < short_fix_threshold), "start_time"].copy()
        if len(times_out) > 0:
            trial["dffix_no_clean"].loc[
                trial["dffix_no_clean"]["start_time"].isin(times_out), "was_discarded_short_fix"
            ] = True
            dffix = dffix[(dffix["duration"] >= short_fix_threshold)].reset_index(drop=True).copy()
            trial["Fixation Cleaning Stats"]["Number of discarded short fixations"] = (
                num_fix_before_clean_via_discard_short - dffix.shape[0]
            )
            trial["Fixation Cleaning Stats"]["Number of discarded short fixations (%)"] = round(
                100
                * (trial["Fixation Cleaning Stats"]["Number of discarded short fixations"])
                / num_fix_before_clean_via_discard_short,
                2,
            )

    trial["Fixation Cleaning Stats"]["Total number of discarded and merged fixations"] = (
        num_fix_before_clean - dffix.shape[0]
    )
    trial["Fixation Cleaning Stats"]["Total number of discarded and merged fixations (%)"] = round(
        100 * trial["Fixation Cleaning Stats"]["Total number of discarded and merged fixations"] / num_fix_before_clean,
        2,
    )

    if not dffix.empty:
        droplist = ["num", "msg"]
        if discard_blinks:
            droplist += ["blink", "blink_before", "blink_after"]
        for col in droplist:
            if col in dffix.columns:
                dffix = dffix.drop(col, axis=1)

        if "start" in dffix.columns:
            dffix = dffix.drop(axis=1, labels=["start", "stop"])
        if "corrected_start_time" not in dffix.columns:
            min_start_time = min(dffix["start_uncorrected"])
            dffix["corrected_start_time"] = dffix["start_uncorrected"] - min_start_time
            dffix["corrected_end_time"] = dffix["stop_uncorrected"] - min_start_time
        assert all(np.diff(dffix["corrected_start_time"]) > 0), "start times not in order"

        dffix_no_clean_fig, _, _ = matplotlib_plot_df(
            dffix,
            trial,
            None,
            trial["dffix_no_clean"],
            box_annotations=None,
            fix_to_plot=["Uncorrected Fixations"],
            stim_info_to_plot=["Characters", "Word boxes"],
        )
        savename = f"{trial['subject']}_{trial['trial_id']}_clean_compare.png"
        dffix_no_clean_fig.savefig(RESULTS_FOLDER.joinpath(savename), dpi=300, bbox_inches="tight")
        plt.close(dffix_no_clean_fig)

        dffix_clean_fig, _, _ = matplotlib_plot_df(
            dffix,
            trial,
            None,
            None,
            box_annotations=None,
            fix_to_plot=["Uncorrected Fixations"],
            stim_info_to_plot=["Characters", "Word boxes"],
            use_duration_arrow_sizes=False,
        )
        savename = f"{trial['subject']}_{trial['trial_id']}_after_clean.png"
        dffix_clean_fig.savefig(RESULTS_FOLDER.joinpath(savename), dpi=300, bbox_inches="tight")
        plt.close(dffix_clean_fig)
        if "item" not in dffix.columns and "item" in trial:
            dffix.insert(loc=0, column="item", value=trial["item"])
        if "condition" not in dffix.columns and "condition" in trial:
            dffix.insert(loc=0, column="condition", value=trial["condition"])
        if "subject" not in dffix.columns and "subject" in trial:
            dffix.insert(loc=0, column="subject", value=trial["subject"])
        if "trial_id" not in dffix.columns and "trial_id" in trial:
            dffix.insert(loc=0, column="trial_id", value=trial["trial_id"])
        dffix = reorder_columns(dffix)
    return dffix, trial


def add_time_cols(dffix, stop_time_col, start_time_col):
    if "start_time" not in dffix.columns:
        dffix["start_time"] = dffix[start_time_col]
    if "end_time" not in dffix.columns:
        dffix["end_time"] = dffix[stop_time_col]
    if "duration" not in dffix.columns:
        dffix["duration"] = dffix["end_time"] - dffix["start_time"]


def get_time_cols(dffix):
    if "stop" in dffix.columns:
        stop_time_col = "stop"
    elif "end_time" in dffix.columns:
        stop_time_col = "end_time"
    elif "corrected_end_time" in dffix.columns:
        stop_time_col = "corrected_end_time"
    if "start" in dffix.columns:
        start_time_col = "start"
    elif "start_time" in dffix.columns:
        start_time_col = "start_time"
    elif "corrected_start_time" in dffix.columns:
        start_time_col = "corrected_start_time"
    return stop_time_col, start_time_col


def trial_to_dfs(
    trial: dict,
    discard_fixations_without_sfix,
    choice_handle_short_and_close_fix,
    discard_far_out_of_text_fix,
    x_thres_in_chars,
    y_thresh_in_heights,
    short_fix_threshold,
    merge_distance_threshold,
    discard_long_fix,
    discard_long_fix_threshold,
    discard_blinks,
):
    events_df, trial = get_raw_events_df_and_trial(trial, discard_fixations_without_sfix)
    dffix, trial = clean_dffix_own(
        trial,
        choice_handle_short_and_close_fix,
        discard_far_out_of_text_fix,
        x_thres_in_chars,
        y_thresh_in_heights,
        short_fix_threshold,
        merge_distance_threshold,
        discard_long_fix,
        discard_long_fix_threshold,
        discard_blinks,
        events_df[events_df["msg"] == "FIX"].copy(),
    )

    dffix = dffix.dropna(how="all", axis=1).copy()
    trial["dffix"] = dffix
    trial["events_df"] = events_df
    return dffix, trial


def get_all_measures(
    trial,
    dffix,
    prefix,
    use_corrected_fixations=True,
    correction_algo="Wisdom_of_Crowds",
    measures_to_calculate=["initial_landing_position"],
    include_coords=False,
    save_to_csv=False,
):
    stim_df = pd.DataFrame(trial[f"{prefix}s_list"])
    if f"{prefix}_number" not in stim_df.columns:
        stim_df[f"{prefix}_number"] = np.arange(stim_df.shape[0])
    if use_corrected_fixations:
        dffix_copy = copy.deepcopy(dffix)
        dffix_copy["y"] = dffix_copy[f"y_{correction_algo}"]
    else:
        dffix_copy = dffix
        correction_algo = "uncorrected"
    res_dfs = []
    for measure in measures_to_calculate:
        if hasattr(anf, f"{measure}_own"):
            function = getattr(anf, f"{measure}_own")
            result = function(trial, dffix_copy, prefix, correction_algo)
            res_dfs.append(result)
    dfs_list = [df for df in [stim_df] + res_dfs if not df.empty]
    own_measure_df = stim_df
    if len(dfs_list) > 1:
        for df in dfs_list[1:]:
            droplist = [col for col in df.columns if (col != f"{prefix}_number" and col in stim_df.columns)]
            own_measure_df = own_measure_df.merge(df.drop(columns=droplist), how="left", on=[f"{prefix}_number"])
    first_column = own_measure_df.pop(prefix)
    own_measure_df.insert(0, prefix, first_column)
    wordfirst = pf.aggregate_words_firstrun(dffix_copy, correction_algo, measures_to_calculate)
    wordtmp = pf.aggregate_words(dffix_copy, pd.DataFrame(trial["words_list"]), correction_algo, measures_to_calculate)
    out = pf.combine_words(
        dffix_copy,
        wordfirst=wordfirst,
        wordtmp=wordtmp,
        algo_choice=correction_algo,
        measures_to_calculate=measures_to_calculate,
    )

    extra_cols = list(set(out.columns) - set(own_measure_df.columns))
    cols_to_add = ["word_number"] + extra_cols
    own_measure_df = pd.merge(own_measure_df, out.loc[:, cols_to_add], on="word_number", how="left")

    first_cols = [
        "subject",
        "trial_id",
        "item",
        "condition",
        "question_correct",
        "word_number",
        "word",
    ]
    for col in first_cols:
        if col in trial and col not in own_measure_df.columns:
            own_measure_df.insert(loc=0, column=col, value=trial[col])

    own_measure_df = own_measure_df.dropna(how="all", axis=1).copy()
    if not include_coords:
        word_cols = ["word_xmin", "word_xmax", "word_ymax", "word_xmin", "word_ymin", "word_x_center", "word_y_center"]
        own_measure_df = own_measure_df.drop(columns=word_cols)

    own_measure_df = reorder_columns(own_measure_df)
    if "question_correct" in own_measure_df.columns:
        own_measure_df = own_measure_df.drop(columns=["question_correct"])
    if save_to_csv:
        own_measure_df.to_csv(
            RESULTS_FOLDER / f"{trial['subject']}_{trial['trial_id']}_{correction_algo}_word_measures.csv"
        )
    return own_measure_df


def add_line_overlaps_to_sample(trial, sample):
    char_df = pd.DataFrame(trial["chars_list"])
    line_overlaps = []
    for arr in sample:
        y_val = arr[1]
        line_overlap = t.tensor(-1, dtype=t.float32)
        for idx, (x1, x2) in enumerate(zip(char_df.char_ymin.unique(), char_df.char_ymax.unique())):
            if x1 <= y_val <= x2:
                line_overlap = t.tensor(idx, dtype=t.float32)
                break
        line_overlaps.append(line_overlap)
    line_olaps_tensor = t.stack(line_overlaps, dim=0)
    sample = t.cat([sample, line_olaps_tensor.unsqueeze(1)], dim=1)
    return sample


def norm_coords_by_letter_min_x_y(
    sample_idx: int,
    trialslist: list,
    samplelist: list,
    chars_center_coords_list: list = None,
):
    chars_df = pd.DataFrame(trialslist[sample_idx]["chars_list"])
    trialslist[sample_idx]["x_char_unique"] = list(chars_df.char_xmin.unique())

    min_x_chars = chars_df.char_xmin.min()
    min_y_chars = chars_df.char_ymin.min()

    norm_vector_substract = t.zeros(
        (1, samplelist[sample_idx].shape[1]), dtype=samplelist[sample_idx].dtype, device=samplelist[sample_idx].device
    )
    norm_vector_substract[0, 0] = norm_vector_substract[0, 0] + 1 * min_x_chars
    norm_vector_substract[0, 1] = norm_vector_substract[0, 1] + 1 * min_y_chars

    samplelist[sample_idx] = samplelist[sample_idx] - norm_vector_substract

    if chars_center_coords_list is not None:
        norm_vector_substract = norm_vector_substract.squeeze(0)[:2]
        if chars_center_coords_list[sample_idx].shape[-1] == norm_vector_substract.shape[-1] * 2:
            chars_center_coords_list[sample_idx][:, :2] -= norm_vector_substract
            chars_center_coords_list[sample_idx][:, 2:] -= norm_vector_substract
        else:
            chars_center_coords_list[sample_idx] -= norm_vector_substract
    return trialslist, samplelist, chars_center_coords_list


def norm_coords_by_letter_positions(
    sample_idx: int,
    trialslist: list,
    samplelist: list,
    meanlist: list = None,
    stdlist: list = None,
    return_mean_std_lists=False,
    norm_by_char_averages=False,
    chars_center_coords_list: list = None,
    add_normalised_values_as_features=False,
):
    chars_df = pd.DataFrame(trialslist[sample_idx]["chars_list"])
    trialslist[sample_idx]["x_char_unique"] = list(chars_df.char_xmin.unique())

    min_x_chars = chars_df.char_xmin.min()
    max_x_chars = chars_df.char_xmax.max()

    norm_vector_multi = t.ones(
        (1, samplelist[sample_idx].shape[1]), dtype=samplelist[sample_idx].dtype, device=samplelist[sample_idx].device
    )
    if norm_by_char_averages:
        chars_list = trialslist[sample_idx]["chars_list"]
        char_widths = np.asarray([x["char_xmax"] - x["char_xmin"] for x in chars_list])
        char_heights = np.asarray([x["char_ymax"] - x["char_ymin"] for x in chars_list])
        char_widths_average = np.mean(char_widths[char_widths > 0])
        char_heights_average = np.mean(char_heights[char_heights > 0])

        norm_vector_multi[0, 0] = norm_vector_multi[0, 0] * char_widths_average
        norm_vector_multi[0, 1] = norm_vector_multi[0, 1] * char_heights_average

    else:
        line_height = min(np.unique(trialslist[sample_idx]["line_heights"]))
        line_width = max_x_chars - min_x_chars
        norm_vector_multi[0, 0] = norm_vector_multi[0, 0] * line_width
        norm_vector_multi[0, 1] = norm_vector_multi[0, 1] * line_height
    assert ~t.any(t.isnan(norm_vector_multi)), "Nan found in char norming vector"

    norm_vector_multi = norm_vector_multi.squeeze(0)
    if add_normalised_values_as_features:
        norm_vector_multi = norm_vector_multi[norm_vector_multi != 1]
        normed_features = samplelist[sample_idx][:, : norm_vector_multi.shape[0]] / norm_vector_multi
        samplelist[sample_idx] = t.cat([samplelist[sample_idx], normed_features], dim=1)
    else:
        samplelist[sample_idx] = samplelist[sample_idx] / norm_vector_multi  #  in case time or pupil size is included
    if chars_center_coords_list is not None:
        norm_vector_multi = norm_vector_multi[:2]
        if chars_center_coords_list[sample_idx].shape[-1] == norm_vector_multi.shape[-1] * 2:
            chars_center_coords_list[sample_idx][:, :2] /= norm_vector_multi
            chars_center_coords_list[sample_idx][:, 2:] /= norm_vector_multi
        else:
            chars_center_coords_list[sample_idx] /= norm_vector_multi
    if return_mean_std_lists:
        mean_val = samplelist[sample_idx].mean(axis=0).cpu().numpy()
        meanlist.append(mean_val)
        std_val = samplelist[sample_idx].std(axis=0).cpu().numpy()
        stdlist.append(std_val)
        assert ~any(pd.isna(mean_val)), "Nan found in mean_val"
        assert ~any(pd.isna(mean_val)), "Nan found in std_val"

        return trialslist, samplelist, meanlist, stdlist, chars_center_coords_list
    return trialslist, samplelist, chars_center_coords_list


def get_fig_ax(screen_res, dpi, words_df, x_margin, y_margin, dffix=None, prefix="word"):
    fig = plt.figure(figsize=(screen_res[0] / dpi, screen_res[1] / dpi), dpi=dpi)
    ax = plt.Axes(fig, [0.0, 0.0, 1.0, 1.0])
    ax.set_axis_off()
    if dffix is not None:
        ax.set_ylim((dffix.y.min(), dffix.y.max()))
        ax.set_xlim((dffix.x.min(), dffix.x.max()))
    else:
        ax.set_ylim((words_df[f"{prefix}_y_center"].min() - y_margin, words_df[f"{prefix}_y_center"].max() + y_margin))
        ax.set_xlim((words_df[f"{prefix}_x_center"].min() - x_margin, words_df[f"{prefix}_x_center"].max() + x_margin))
    ax.invert_yaxis()
    fig.add_axes(ax)
    return fig, ax


def get_save_path(fpath, fname_ending):
    save_path = PLOTS_FOLDER.joinpath(f"{fpath.stem}_{fname_ending}.png")
    return save_path


def save_im_load_convert(fpath, fig, fname_ending, mode):
    save_path = get_save_path(fpath, fname_ending)
    fig.savefig(save_path)
    im = Image.open(save_path).convert(mode)
    im.save(save_path)
    return im


def plot_text_boxes_fixations(
    fpath,
    dpi,
    screen_res,
    set_font_size: bool,
    font_size: int,
    dffix=None,
    trial=None,
):
    if isinstance(fpath, str):
        fpath = pl.Path(fpath)
    prefix = "char"

    if dffix is None:
        dffix = pd.read_csv(fpath)
    if trial is None:
        json_fpath = str(fpath).replace("_fixations.csv", "_trial.json")
        with open(json_fpath, "r") as f:
            trial = json.load(f)
    words_df = pd.DataFrame(trial[f"{prefix}s_list"])
    x_right = words_df[f"{prefix}_xmin"]
    x_left = words_df[f"{prefix}_xmax"]
    y_top = words_df[f"{prefix}_ymax"]
    y_bottom = words_df[f"{prefix}_ymin"]

    if f"{prefix}_x_center" not in words_df.columns:
        words_df[f"{prefix}_x_center"] = (words_df[f"{prefix}_xmax"] - words_df[f"{prefix}_xmin"]) / 2 + words_df[
            f"{prefix}_xmin"
        ]
        words_df[f"{prefix}_y_center"] = (words_df[f"{prefix}_ymax"] - words_df[f"{prefix}_ymin"]) / 2 + words_df[
            f"{prefix}_ymin"
        ]

    x_margin = words_df[f"{prefix}_x_center"].mean() / 8
    y_margin = words_df[f"{prefix}_y_center"].mean() / 4
    times = dffix.corrected_start_time - dffix.corrected_start_time.min()
    times = times / times.max()
    times = np.linspace(0.25, 1, len(times))

    if set_font_size:
        font = "monospace"
    else:
        font_size = trial["font_size"] * 27 // dpi

    font_props = FontProperties(family=font, style="normal", size=font_size)

    fig, ax = get_fig_ax(screen_res, dpi, words_df, x_margin, y_margin, prefix=prefix)

    ax.scatter(words_df[f"{prefix}_x_center"], words_df[f"{prefix}_y_center"], s=1, facecolor="k", alpha=0.01)
    for idx in range(len(x_left)):
        ax.text(
            words_df[f"{prefix}_x_center"][idx],
            words_df[f"{prefix}_y_center"][idx],
            words_df[prefix][idx],
            horizontalalignment="center",
            verticalalignment="center",
            fontproperties=font_props,
        )
    fname_ending = f"{prefix}s_grey"
    words_grey_im = save_im_load_convert(fpath, fig, fname_ending, "L")

    plt.close("all")
    fig, ax = get_fig_ax(screen_res, dpi, words_df, x_margin, y_margin, prefix=prefix)

    ax.scatter(words_df[f"{prefix}_x_center"], words_df[f"{prefix}_y_center"], s=1, facecolor="k", alpha=0.1)
    for idx in range(len(x_left)):
        xdiff = x_right[idx] - x_left[idx]
        ydiff = y_top[idx] - y_bottom[idx]
        rect = patches.Rectangle(
            (x_left[idx] - 1, y_bottom[idx] - 1), xdiff, ydiff, alpha=0.9, linewidth=1, edgecolor="k", facecolor="grey"
        )  # seems to need one pixel offset
        ax.add_patch(rect)
    fname_ending = f"{prefix}_boxes_grey"
    word_boxes_grey_im = save_im_load_convert(fpath, fig, fname_ending, "L")

    plt.close("all")

    fig, ax = get_fig_ax(screen_res, dpi, words_df, x_margin, y_margin, prefix=prefix)

    ax.scatter(dffix.x, dffix.y, facecolor="k", alpha=times)
    fname_ending = "fix_scatter_grey"
    fix_scatter_grey_im = save_im_load_convert(fpath, fig, fname_ending, "L")

    plt.close("all")

    arr_combo = np.stack(
        [
            np.asarray(words_grey_im),
            np.asarray(word_boxes_grey_im),
            np.asarray(fix_scatter_grey_im),
        ],
        axis=2,
    )

    im_combo = Image.fromarray(arr_combo)
    fname_ending = f"{prefix}s_channel_sep"

    im_combo.save(fpath)

    return im_combo


def prep_data_for_dist(model_cfg, dffix, trial):
    if isinstance(dffix, dict):
        dffix = dffix["value"]
    sample_tensor = t.tensor(dffix.loc[:, model_cfg["sample_cols"]].to_numpy(), dtype=t.float32)

    if model_cfg["add_line_overlap_feature"]:
        sample_tensor = add_line_overlaps_to_sample(trial, sample_tensor)

    has_nans = t.any(t.isnan(sample_tensor))
    assert not has_nans, "NaNs found in sample tensor"
    samplelist_eval = [sample_tensor]
    trialslist_eval = [trial]
    chars_center_coords_list_eval = None
    if model_cfg["norm_coords_by_letter_min_x_y"]:
        for sample_idx, _ in enumerate(samplelist_eval):
            trialslist_eval, samplelist_eval, chars_center_coords_list_eval = norm_coords_by_letter_min_x_y(
                sample_idx,
                trialslist_eval,
                samplelist_eval,
                chars_center_coords_list=chars_center_coords_list_eval,
            )

    if model_cfg["normalize_by_line_height_and_width"]:
        meanlist_eval, stdlist_eval = [], []
        for sample_idx, _ in enumerate(samplelist_eval):
            (
                trialslist_eval,
                samplelist_eval,
                meanlist_eval,
                stdlist_eval,
                chars_center_coords_list_eval,
            ) = norm_coords_by_letter_positions(
                sample_idx,
                trialslist_eval,
                samplelist_eval,
                meanlist_eval,
                stdlist_eval,
                return_mean_std_lists=True,
                norm_by_char_averages=model_cfg["norm_by_char_averages"],
                chars_center_coords_list=chars_center_coords_list_eval,
                add_normalised_values_as_features=model_cfg["add_normalised_values_as_features"],
            )
    sample_tensor = samplelist_eval[0]
    sample_means = t.tensor(model_cfg["sample_means"], dtype=t.float32)
    sample_std = t.tensor(model_cfg["sample_std"], dtype=t.float32)
    sample_tensor = (sample_tensor - sample_means) / sample_std
    sample_tensor = sample_tensor.unsqueeze(0)
    if not pl.Path(trial["plot_file"]).exists():
        plot_text_boxes_fixations(
            fpath=trial["plot_file"],
            dpi=250,
            screen_res=(1024, 768),
            set_font_size=True,
            font_size=4,
            dffix=dffix,
            trial=trial,
        )

    val_set = DSet(
        sample_tensor,
        None,
        t.zeros((1, sample_tensor.shape[1])),
        trialslist_eval,
        padding_list=[0],
        padding_at_end=model_cfg["padding_at_end"],
        return_images_for_conv=True,
        im_partial_string=model_cfg["im_partial_string"],
        input_im_shape=model_cfg["char_plot_shape"],
    )
    val_loader = dl(val_set, batch_size=1, shuffle=False, num_workers=0)
    return val_loader, val_set


def fold_in_seq_dim(out, y=None):
    batch_size, seq_len, num_classes = out.shape

    out = eo.rearrange(out, "b s c -> (b s) c", s=seq_len)
    if y is None:
        return out, None
    if len(y.shape) > 2:
        y = eo.rearrange(y, "b s c -> (b s) c", s=seq_len)
    else:
        y = eo.rearrange(y, "b s -> (b s)", s=seq_len)
    return out, y


def logits_to_pred(out, y=None):
    seq_len = out.shape[1]
    out, y = fold_in_seq_dim(out, y)
    preds = corn_label_from_logits(out)
    preds = eo.rearrange(preds, "(b s) -> b s", s=seq_len)
    if y is not None:
        y = eo.rearrange(y.squeeze(), "(b s) -> b s", s=seq_len)
        y = y
    return preds, y


def get_DIST_preds(dffix, trial, models_dict):
    algo_choice = "DIST"

    model = models_dict["single_DIST_model"]
    loader, dset = prep_data_for_dist(models_dict["single_DIST_model_cfg"], dffix, trial)
    batch = next(iter(loader))

    if "cpu" not in str(model.device):
        batch = [x.cuda() for x in batch]
    try:
        out = model(batch)
        preds, y = logits_to_pred(out, y=None)
        if len(trial["y_char_unique"]) < 1:
            y_char_unique = pd.DataFrame(trial["chars_list"]).char_y_center.sort_values().unique()
        else:
            y_char_unique = trial["y_char_unique"]
        num_lines = trial["num_char_lines"] - 1
        preds = t.clamp(preds, 0, num_lines).squeeze().cpu().numpy()
        y_pred_DIST = [y_char_unique[idx] for idx in preds]

        dffix[f"line_num_{algo_choice}"] = preds
        dffix[f"y_{algo_choice}"] = np.round(y_pred_DIST, decimals=2)
        dffix[f"y_{algo_choice}_correction"] = (dffix.loc[:, f"y_{algo_choice}"] - dffix.loc[:, "y"]).round(2)
    except Exception as e:
        ic(f"Exception on model(batch) for DIST \n{e}")
    return dffix


def get_DIST_ensemble_preds(
    dffix,
    trial,
    model_cfg_without_norm_df,
    model_cfg_with_norm_df,
    ensemble_model_avg,
):
    algo_choice = "DIST-Ensemble"
    loader_without_norm, dset_without_norm = prep_data_for_dist(model_cfg_without_norm_df, dffix, trial)
    loader_with_norm, dset_with_norm = prep_data_for_dist(model_cfg_with_norm_df, dffix, trial)
    batch_without_norm = next(iter(loader_without_norm))
    batch_with_norm = next(iter(loader_with_norm))
    out = ensemble_model_avg((batch_without_norm, batch_with_norm))
    preds, y = logits_to_pred(out[0]["out_avg"], y=None)
    if len(trial["y_char_unique"]) < 1:
        y_char_unique = pd.DataFrame(trial["chars_list"]).char_y_center.sort_values().unique()
    else:
        y_char_unique = trial["y_char_unique"]
    num_lines = trial["num_char_lines"] - 1
    preds = t.clamp(preds, 0, num_lines).squeeze().cpu().numpy()
    y_pred_DIST = [y_char_unique[idx] for idx in preds]

    dffix[f"line_num_{algo_choice}"] = preds
    dffix[f"y_{algo_choice}"] = np.round(y_pred_DIST, decimals=1)
    dffix[f"y_{algo_choice}_correction"] = (dffix.loc[:, f"y_{algo_choice}"] - dffix.loc[:, "y"]).round(1)
    return dffix


def get_EDIST_preds_with_model_check(dffix, trial, models_dict):

    dffix = get_DIST_ensemble_preds(
        dffix,
        trial,
        models_dict["model_cfg_without_norm_df"],
        models_dict["model_cfg_with_norm_df"],
        models_dict["ensemble_model_avg"],
    )
    return dffix


def get_all_classic_preds(dffix, trial, classic_algos_cfg):
    corrections = []
    for algo, classic_params in copy.deepcopy(classic_algos_cfg).items():
        dffix = calgo.apply_classic_algo(dffix, trial, algo, classic_params)
        corrections.append(np.asarray(dffix.loc[:, f"y_{algo}"]))
    return dffix, corrections


def apply_woc(dffix, trial, corrections, algo_choice):

    corrected_Y = calgo.wisdom_of_the_crowd(corrections)
    dffix.loc[:, f"y_{algo_choice}"] = corrected_Y
    dffix[f"y_{algo_choice}_correction"] = (dffix.loc[:, f"y_{algo_choice}"] - dffix.loc[:, "y"]).round(1)
    corrected_line_nums = [trial["y_char_unique"].index(y) for y in corrected_Y]
    dffix.loc[:, f"line_num_y_{algo_choice}"] = corrected_line_nums
    dffix.loc[:, f"line_num_{algo_choice}"] = corrected_line_nums
    return dffix


def apply_correction_algo(dffix, algo_choice, trial, models_dict, classic_algos_cfg):

    if algo_choice == "DIST":
        dffix = get_DIST_preds(dffix, trial, models_dict=models_dict)

    elif algo_choice == "DIST-Ensemble":
        dffix = get_EDIST_preds_with_model_check(dffix, trial, models_dict=models_dict)
    elif algo_choice == "Wisdom_of_Crowds_with_DIST":
        dffix, corrections = get_all_classic_preds(dffix, trial, classic_algos_cfg)
        dffix = get_DIST_preds(dffix, trial, models_dict=models_dict)
        for _ in range(3):
            corrections.append(np.asarray(dffix.loc[:, "y_DIST"]))
        dffix = apply_woc(dffix, trial, corrections, algo_choice)
    elif algo_choice == "Wisdom_of_Crowds_with_DIST_Ensemble":
        dffix, corrections = get_all_classic_preds(dffix, trial, classic_algos_cfg)
        dffix = get_EDIST_preds_with_model_check(dffix, trial, models_dict=models_dict)
        for _ in range(3):
            corrections.append(np.asarray(dffix.loc[:, "y_DIST-Ensemble"]))
        dffix = apply_woc(dffix, trial, corrections, algo_choice)
    elif algo_choice == "Wisdom_of_Crowds":
        dffix, corrections = get_all_classic_preds(dffix, trial, classic_algos_cfg)
        dffix = apply_woc(dffix, trial, corrections, algo_choice)

    else:
        algo_cfg = classic_algos_cfg[algo_choice]
        dffix = calgo.apply_classic_algo(dffix, trial, algo_choice, algo_cfg)
        dffix[f"y_{algo_choice}_correction"] = (dffix.loc[:, f"y_{algo_choice}"] - dffix.loc[:, "y"]).round(1)
    dffix = dffix.copy()  # apparently helps with fragmentation
    return dffix


def add_popEye_cols_to_dffix(dffix, algo_choice, chars_df, trial, xcol, cols_to_add: list):
    """
    Required for word or sentence measures:
    - letternum
    - letter
    - on_word_number
    - on_word
    - on_sentence
    - num_words_in_sentence
    - on_sentence_num
    - word_land
    - line_let
    - line_word
    - sac_in
    - sac_out
    - word_launch
    - word_refix
    - word_reg_in
    - word_reg_out
    - sentence_reg_in
    - word_firstskip
    - word_run
    - sentence_run
    - word_run_fix
    - word_cland
    Optional:
    - line_let_from_last_letter
    - sentence_word
    - line_let_previous
    - line_let_next
    - sentence_refix
    - word_reg_out_to
    - word_reg_in_from
    - sentence_reg_out
    - sentence_reg_in_from
    - sentence_reg_out_to
    - sentence_firstskip
    - word_runid
    - sentence_runid
    - word_fix
    - sentence_fix
    """
    if "angle_incoming" in cols_to_add:
        x_diff_incoming = dffix[xcol].values - dffix[xcol].shift(1).values
        y_diff_incoming = dffix["y"].values - dffix["y"].shift(1).values
        angle_incoming = np.arctan2(y_diff_incoming, x_diff_incoming) * (180 / np.pi)
        dffix["angle_incoming"] = angle_incoming
    if "angle_outgoing" in cols_to_add:
        x_diff_outgoing = dffix[xcol].shift(-1).values - dffix[xcol].values
        y_diff_outgoing = dffix["y"].shift(-1).values - dffix["y"].values
        angle_outgoing = np.arctan2(y_diff_outgoing, x_diff_outgoing) * (180 / np.pi)
        dffix["angle_outgoing"] = angle_outgoing
    dffix[f"line_change_{algo_choice}"] = np.concatenate(
        ([0], np.diff(dffix[f"line_num_{algo_choice}"])), axis=0
    ).astype(int)

    for i in list(dffix.index):
        if dffix.loc[i, f"line_num_{algo_choice}"] > -1 and not pd.isna(dffix.loc[i, f"line_num_{algo_choice}"]):
            selected_stimmat = chars_df[
                chars_df["assigned_line"] == dffix.loc[i, f"line_num_{algo_choice}"]
            ].reset_index()
            selected_stimmat.loc[:, "letword"] = selected_stimmat.groupby("in_word_number")["letternum"].rank()
            letters_on_line = selected_stimmat.shape[0]
            out = dffix.loc[i, xcol] - selected_stimmat["char_x_center"]
            min_idx = out.abs().idxmin()
            dffix.loc[i, f"letternum_{algo_choice}"] = selected_stimmat.loc[min_idx, "letternum"]
            dffix.loc[i, f"letter_{algo_choice}"] = selected_stimmat.loc[min_idx, "char"]
            dffix.loc[i, f"line_let_{algo_choice}"] = selected_stimmat.loc[min_idx, "letline"]
            if "line_let_from_last_letter" in cols_to_add:
                dffix.loc[i, f"line_let_from_last_letter_{algo_choice}"] = (
                    letters_on_line - dffix.loc[i, f"line_let_{algo_choice}"]
                )
            word_min_idx = min_idx
            if (
                selected_stimmat.loc[min_idx, "char"] == " "
                and (min_idx - 1) in selected_stimmat.index
                and (min_idx + 1) in selected_stimmat.index
            ):
                dist_to_previous_letter = np.abs(
                    dffix.loc[i, xcol] - selected_stimmat.loc[min_idx - 1, "char_x_center"]
                )
                dist_to_following_letter = np.abs(
                    dffix.loc[i, xcol] - selected_stimmat.loc[min_idx + 1, "char_x_center"]
                )
                if dist_to_previous_letter < dist_to_following_letter:
                    word_min_idx = min_idx - 1
            if not pd.isna(selected_stimmat.loc[min_idx, "in_word_number"]):
                dffix.loc[i, f"on_word_number_{algo_choice}"] = selected_stimmat.loc[word_min_idx, "in_word_number"]
                dffix.loc[i, f"on_word_{algo_choice}"] = selected_stimmat.loc[word_min_idx, "in_word"]
                dffix.loc[i, f"word_land_{algo_choice}"] = selected_stimmat.loc[
                    word_min_idx, "num_letters_from_start_of_word"
                ]
                dffix.loc[i, f"line_word_{algo_choice}"] = selected_stimmat.loc[word_min_idx, "wordline"]
                if "sentence_word" in cols_to_add:
                    dffix.loc[i, f"sentence_word_{algo_choice}"] = selected_stimmat.loc[word_min_idx, "wordsent"]
            dffix.loc[i, "num_words_in_sentence"] = len(selected_stimmat.loc[word_min_idx, "in_sentence"].split(" "))
            dffix.loc[i, f"on_sentence_num_{algo_choice}"] = selected_stimmat.loc[word_min_idx, "in_sentence_number"]
            dffix.loc[i, f"on_sentence_{algo_choice}"] = selected_stimmat.loc[word_min_idx, "in_sentence"]
    if "line_let_previous" in cols_to_add:
        dffix[f"line_let_previous_{algo_choice}"] = dffix[f"line_let_{algo_choice}"].shift(-1)
    if "line_let_next" in cols_to_add:
        dffix[f"line_let_next_{algo_choice}"] = dffix[f"line_let_{algo_choice}"].shift(1)
    dffix = pf.compute_saccade_length(dffix, chars_df, algo_choice)
    dffix = pf.compute_launch_distance(dffix, algo_choice)
    dffix = pf.compute_refixation(dffix, algo_choice)
    dffix = pf.compute_regression(dffix, algo_choice)
    dffix = pf.compute_firstskip(dffix, algo_choice)
    dffix = pf.compute_run(dffix, algo_choice)
    dffix = pf.compute_landing_position(dffix, algo_choice)
    dffix = dffix.loc[:, ~dffix.columns.duplicated()]
    return dffix


def export_dataframe(df: pd.DataFrame, csv_name: str):
    if isinstance(df, dict):
        df = df["value"]
    df.to_csv(csv_name)
    return csv_name


def _convert_to_json(obj):
    if isinstance(obj, (int, float, str, bool)):
        return obj
    elif isinstance(obj, dict):
        return {k: _convert_to_json(v) for k, v in obj.items()}
    elif isinstance(obj, list) or isinstance(obj, tuple):
        return [_convert_to_json(item) for item in obj]
    elif isinstance(obj, dict):
        return {k: _convert_to_json(val) for k, val in obj.items()}
    elif hasattr(obj, "to_dict"):
        return _convert_to_json(obj.to_dict())
    elif hasattr(obj, "tolist"):
        return _convert_to_json(obj.tolist())
    elif obj is None:
        return None
    else:
        raise TypeError(f"Object of type {type(obj)} is not JSON serializable")


def save_trial_to_json(trial, savename):
    filtered_trial = {}
    for key, value in trial.items():
        try:
            filtered_trial[key] = _convert_to_json(value)
        except TypeError as e:
            ic(f"Warning: Skipping non-serializable value for key '{key}' due to error: {e}")

    with open(savename, "w", encoding="utf-8") as f:
        json.dump(filtered_trial, f, ensure_ascii=False, indent=4)


def export_trial(trial: dict):

    trial_id = trial["trial_id"]
    savename = RESULTS_FOLDER.joinpath(pl.Path(trial["filename"]).stem)
    trial_name = f"{savename}_{trial_id}_trial_info.json"

    filtered_trial = copy.deepcopy(trial)
    _ = [filtered_trial.pop(k) for k in list(filtered_trial.keys()) if isinstance(filtered_trial[k], pd.DataFrame)]
    _ = [
        filtered_trial.pop(k)
        for k in list(filtered_trial.keys())
        if k
        in [
            "words_list",
            "chars_list",
            "chars_df_alt",
            "EMReading_fix",
            "chars_df",
            "dffix_sacdf_popEye",
            "fixdf_popEye",
            "sacdf_popEye",
            "saccade_df",
            "combined_df",
            "own_sentence_measures_dfs_for_algo",
            "own_word_measures_dfs_for_algo",
        ]
    ]

    filtered_trial["line_heights"] = list(np.unique(filtered_trial["line_heights"]))
    save_trial_to_json(filtered_trial, trial_name)
    return trial_name


def add_cols_from_trial(trial, df, cols=["item", "condition", "trial_id", "subject"]):
    for col in cols:
        if col not in df.columns:
            df.insert(loc=0, column=col, value=trial[col])


def correct_df(
    dffix,
    algo_choice,
    trial,
    for_multi,
    is_outside_of_streamlit,
    classic_algos_cfg,
    models_dict,
    measures_to_calculate_multi_asc=[],
    include_coords_multi_asc=False,
    sent_measures_to_calc_multi=[],
    fix_cols_to_add=[],
):
    if is_outside_of_streamlit:
        stqdm = tqdm
    else:
        from stqdm import stqdm

    if isinstance(dffix, dict):
        dffix = dffix["value"]
    if "x" not in dffix.keys() or "x" not in dffix.keys():
        ic(f"x or y not in dffix")
        ic(dffix.columns)
        return dffix

    if isinstance(algo_choice, list):
        algo_choices = algo_choice
        repeats = range(len(algo_choice))
    else:
        algo_choices = [algo_choice]
        repeats = range(1)

    chars_df = pd.DataFrame(trial["chars_df"]) if "chars_df" in trial else pd.DataFrame(trial["chars_list"])
    if for_multi:
        own_word_measures_dfs_for_algo = []
        own_sentence_measures_dfs_for_algo = []
    trial["average_y_corrections"] = []
    for algoIdx in stqdm(repeats, desc="Applying line-assignment algorithms"):
        algo_choice = algo_choices[algoIdx]
        dffix = apply_correction_algo(dffix, algo_choice, trial, models_dict, classic_algos_cfg)
        average_y_correction = (dffix[f"y_{algo_choice}"] - dffix["y"]).mean().round(1)
        trial["average_y_corrections"].append({"Algorithm": algo_choice, "average_y_correction": average_y_correction})
        fig, desired_width_in_pixels, desired_height_in_pixels = matplotlib_plot_df(
            dffix,
            trial,
            algo_choice,
            None,
            box_annotations=None,
            fix_to_plot=["Uncorrected Fixations", "Corrected Fixations"],
            stim_info_to_plot=["Characters", "Word boxes"],
        )
        savename = f"{trial['subject']}_{trial['trial_id']}_corr_{algo_choice}_fix.png"
        fig.savefig(RESULTS_FOLDER.joinpath(savename), dpi=300)
        plt.close(fig)
        dffix = add_popEye_cols_to_dffix(dffix, algo_choice, chars_df, trial, "x", cols_to_add=fix_cols_to_add)

        if for_multi and len(measures_to_calculate_multi_asc) > 0 and dffix.shape[0] > 1:
            own_word_measures = get_all_measures(
                trial,
                dffix,
                prefix="word",
                use_corrected_fixations=True,
                correction_algo=algo_choice,
                measures_to_calculate=measures_to_calculate_multi_asc,
                include_coords=include_coords_multi_asc,
            )
            own_word_measures_dfs_for_algo.append(own_word_measures)
            sent_measures_multi = pf.compute_sentence_measures(
                dffix, pd.DataFrame(trial["chars_df"]), algo_choice, sent_measures_to_calc_multi
            )
            own_sentence_measures_dfs_for_algo.append(sent_measures_multi)

    if for_multi and len(own_word_measures_dfs_for_algo) > 0:
        words_df = (
            pd.DataFrame(trial["chars_df"])
            .drop_duplicates(subset="in_word_number", keep="first")
            .loc[:, ["in_word_number", "in_word"]]
            .rename({"in_word_number": "word_number", "in_word": "word"}, axis=1)
            .reset_index(drop=True)
        )
        add_cols_from_trial(trial, words_df, cols=["item", "condition", "trial_id", "subject"])
        words_df["subject_trialID"] = [f"{id}_{num}" for id, num in zip(words_df["subject"], words_df["trial_id"])]
        words_df = words_df.merge(
            own_word_measures_dfs_for_algo[0],
            how="left",
            on=["subject", "trial_id", "item", "condition", "word_number", "word"],
        )
        for word_measure_df in own_word_measures_dfs_for_algo[1:]:
            words_df = words_df.merge(
                word_measure_df, how="left", on=["subject", "trial_id", "item", "condition", "word_number", "word"]
            )
        words_df = reorder_columns(words_df, ["subject", "trial_id", "item", "condition", "word_number", "word"])

        sentence_df = (
            pd.DataFrame(trial["chars_df"])
            .drop_duplicates(subset="in_sentence_number", keep="first")
            .loc[
                :,
                [
                    "in_sentence_number",
                    "in_sentence",
                ],
            ]
            .rename({"in_sentence_number": "sentence_number", "in_sentence": "sentence"}, axis=1)
            .reset_index(drop=True)
        )
        add_cols_from_trial(trial, sentence_df, cols=["item", "condition", "trial_id", "subject"])
        sentence_df["subject_trialID"] = [
            f"{id}_{num}" for id, num in zip(sentence_df["subject"], sentence_df["trial_id"])
        ]
        sentence_df = sentence_df.merge(
            own_sentence_measures_dfs_for_algo[0],
            how="left",
            on=["item", "condition", "trial_id", "subject", "sentence_number", "sentence"],
        )
        for sent_measure_df in own_sentence_measures_dfs_for_algo[1:]:
            sentence_df = sentence_df.merge(
                sent_measure_df,
                how="left",
                on=["subject", "trial_id", "item", "condition", "sentence_number", "sentence", "number_of_words"],
            )
        sentence_df = reorder_columns(
            sentence_df, ["subject", "trial_id", "item", "condition", "sentence_number", "sentence", "number_of_words"]
        )

        trial["own_word_measures_dfs_for_algo"] = words_df

        trial["own_sentence_measures_dfs_for_algo"] = sentence_df
    dffix = reorder_columns(dffix)
    if for_multi:
        return dffix
    else:
        fix_cols_to_keep = [
            c
            for c in dffix.columns
            if (
                (any([lname in c for lname in ALL_FIX_MEASURES]) and any([lname in c for lname in fix_cols_to_add]))
                or (not any([lname in c for lname in ALL_FIX_MEASURES]))
            )
        ]

        savename = RESULTS_FOLDER.joinpath(pl.Path(trial["filename"]).stem)
        csv_name = f"{savename}_{trial['trial_id']}_corrected_fixations.csv"
        csv_name = export_dataframe(dffix.loc[:, fix_cols_to_keep].copy(), csv_name)

        export_trial(trial)
        return dffix


def process_trial_choice(
    trial: dict,
    algo_choice: str,
    choice_handle_short_and_close_fix,
    for_multi,
    discard_fixations_without_sfix,
    discard_far_out_of_text_fix,
    x_thres_in_chars,
    y_thresh_in_heights,
    short_fix_threshold,
    merge_distance_threshold,
    discard_long_fix,
    discard_long_fix_threshold,
    discard_blinks,
    measures_to_calculate_multi_asc,
    include_coords_multi_asc,
    sent_measures_to_calculate_multi_asc,
    classic_algos_cfg,
    models_dict,
    fix_cols_to_add,
):

    dffix, trial = trial_to_dfs(
        trial=trial,
        choice_handle_short_and_close_fix=choice_handle_short_and_close_fix,
        discard_fixations_without_sfix=discard_fixations_without_sfix,
        discard_far_out_of_text_fix=discard_far_out_of_text_fix,
        x_thres_in_chars=x_thres_in_chars,
        y_thresh_in_heights=y_thresh_in_heights,
        short_fix_threshold=short_fix_threshold,
        discard_long_fix=discard_long_fix,
        discard_long_fix_threshold=discard_long_fix_threshold,
        merge_distance_threshold=merge_distance_threshold,
        discard_blinks=discard_blinks,
    )
    if "chars_list" in trial:
        chars_df = pd.DataFrame(trial["chars_df"])

        trial["chars_df"] = chars_df.to_dict()
        trial["y_char_unique"] = list(chars_df.char_y_center.sort_values().unique())
    if algo_choice is not None and ("chars_list" in trial or "words_list" in trial):
        if dffix.shape[0] > 1:
            dffix = correct_df(
                dffix,
                algo_choice,
                trial,
                for_multi=for_multi,
                is_outside_of_streamlit=False,
                classic_algos_cfg=classic_algos_cfg,
                models_dict=models_dict,
                measures_to_calculate_multi_asc=measures_to_calculate_multi_asc,
                include_coords_multi_asc=include_coords_multi_asc,
                sent_measures_to_calc_multi=sent_measures_to_calculate_multi_asc,
                fix_cols_to_add=fix_cols_to_add,
            )

            saccade_df = get_saccade_df(dffix, trial, algo_choice, trial.pop("events_df"))
            trial["saccade_df"] = saccade_df.to_dict()

            fig = plot_saccade_df(dffix, saccade_df, trial, True, False)
            fig.savefig(RESULTS_FOLDER / f"{trial['subject']}_{trial['trial_id']}_saccades.png")
            plt.close(fig)
        else:
            ic(
                f"🚨 Only {dffix.shape[0]} fixation left after processing. saccade_df not created for trial {trial['trial_id']} 🚨"
            )

    else:
        ic("🚨 Stimulus information needed for fixation line-assignment 🚨")
    for c in ["gaze_df", "dffix"]:
        if c in trial:
            trial.pop(c)
    return dffix, trial


def get_saccade_df(dffix, trial, algo_choices, events_df):
    if not isinstance(algo_choices, list):
        algo_choices = [algo_choices]
    sac_df_as_detected = events_df[events_df["msg"] == "SAC"].copy()
    last_sacc_stop_time = sac_df_as_detected["stop_uncorrected"].iloc[-1]
    dffix_after_last_sacc = dffix.loc[dffix["start_uncorrected"] > last_sacc_stop_time, :].copy()
    if not dffix_after_last_sacc.empty:
        dffix_before_last_sacc = dffix.loc[dffix["start_uncorrected"] < last_sacc_stop_time, :].copy()
        dffix = pd.concat([dffix_before_last_sacc, dffix_after_last_sacc.iloc[[0], :]], axis=0)
    sac_df_as_detected = sac_df_as_detected[sac_df_as_detected["start"] >= dffix["end_time"].iloc[0]]
    sac_df_as_detected = sac_df_as_detected[sac_df_as_detected["stop"] <= dffix["start_time"].iloc[-1]]

    sac_index_keep = [
        i for i, row in sac_df_as_detected.iterrows() if np.abs(row["start"] - dffix["start_time"].values).min() < 100
    ]
    sac_df_as_detected = sac_df_as_detected.loc[sac_index_keep, :]

    starts = pd.Series(dffix["start_time"].values, dffix["start_time"])
    ends = pd.Series(dffix["end_time"].values, dffix["end_time"])
    starts_reind = starts.reindex(sac_df_as_detected["stop"], method="bfill").dropna()
    ends_reind = ends.reindex(sac_df_as_detected["start"], method="ffill").dropna()

    sac_df_as_detected_start_indexed = sac_df_as_detected.copy().set_index("start")
    saccade_df = (
        sac_df_as_detected_start_indexed.loc[ends_reind.index, :]
        .reset_index(drop=False)
        .rename({"start": "start_time", "stop": "end_time"}, axis=1)
    )

    saccade_df = pf.get_angle_and_eucl_dist(saccade_df)
    # TODO maybe add incoming outgoing angle from sacc_df to dffix

    dffix_start_indexed = dffix.copy().set_index("start_time")
    dffix_end_indexed = dffix.copy().set_index("end_time")
    for algo_choice in algo_choices:

        saccade_df[f"ys_{algo_choice}"] = dffix_end_indexed.loc[ends_reind.values, f"y_{algo_choice}"].values
        saccade_df[f"ye_{algo_choice}"] = dffix_start_indexed.loc[starts_reind.values, f"y_{algo_choice}"].values
        saccade_df = pf.get_angle_and_eucl_dist(saccade_df, algo_choice)

        saccade_df[f"lines_{algo_choice}"] = dffix_end_indexed.loc[ends_reind.values, f"line_num_{algo_choice}"].values
        saccade_df[f"linee_{algo_choice}"] = dffix_start_indexed.loc[
            starts_reind.values, f"line_num_{algo_choice}"
        ].values

        saccade_df[f"line_word_s_{algo_choice}"] = dffix_end_indexed.loc[
            ends_reind.values, f"line_word_{algo_choice}"
        ].values
        saccade_df[f"line_word_e_{algo_choice}"] = dffix_start_indexed.loc[
            starts_reind.values, f"line_word_{algo_choice}"
        ].values

        saccade_df[f"lets_{algo_choice}"] = dffix_end_indexed.loc[ends_reind.values, f"letternum_{algo_choice}"].values
        saccade_df[f"lete_{algo_choice}"] = dffix_start_indexed.loc[
            starts_reind.values, f"letternum_{algo_choice}"
        ].values

    blink_df = events_df[events_df["msg"] == "BLINK"]
    for i in range(len(saccade_df)):
        if saccade_df.loc[i, "start_time"] in blink_df["start"]:
            saccade_df.loc[i, "blink"] = True

    saccade_df = pf.compute_non_line_dependent_saccade_measures(saccade_df, trial)
    for algo_choice in algo_choices:
        saccade_df = pf.compute_saccade_measures(saccade_df, trial, algo_choice)

    if "msg" in saccade_df.columns:
        saccade_df = saccade_df.drop(axis=1, labels=["msg"])
    saccade_df = reorder_columns(saccade_df)
    return saccade_df.dropna(how="all", axis=1).copy()