File size: 12,147 Bytes
da572bf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 |
"""
Partially taken and adapted from: https://github.com/jwcarr/eyekit/blob/1db1913411327b108b87e097a00278b6e50d0751/eyekit/measure.py
Functions for calculating common reading measures, such as gaze duration or initial landing position.
"""
import pandas as pd
from icecream import ic
ic.configureOutput(includeContext=True)
def fix_in_ia(fix_x, fix_y, ia_x_min, ia_x_max, ia_y_min, ia_y_max):
in_x = ia_x_min <= fix_x <= ia_x_max
in_y = ia_y_min <= fix_y <= ia_y_max
if in_x and in_y:
return True
else:
return False
def fix_in_ia_default(fixation, ia_row, prefix):
return fix_in_ia(
fixation.x,
fixation.y,
ia_row[f"{prefix}_xmin"],
ia_row[f"{prefix}_xmax"],
ia_row[f"{prefix}_ymin"],
ia_row[f"{prefix}_ymax"],
)
def number_of_fixations_own(trial, dffix, prefix, correction_algo):
"""
Return the number of fixations on that interest area.
"""
ia_df = pd.DataFrame(trial[f"{prefix}s_list"])
counts = []
for cidx, ia_row in ia_df.iterrows():
count = 0
for idx, fixation in dffix.iterrows():
if fix_in_ia(
fixation.x,
fixation.y,
ia_row[f"{prefix}_xmin"],
ia_row[f"{prefix}_xmax"],
ia_row[f"{prefix}_ymin"],
ia_row[f"{prefix}_ymax"],
):
count += 1
counts.append(
{
f"{prefix}_number": cidx,
prefix: ia_row[f"{prefix}"],
f"number_of_fixations_{correction_algo}": count,
}
)
return pd.DataFrame(counts)
def initial_fixation_duration_own(trial, dffix, prefix, correction_algo):
"""
The duration of the initial fixation on that interest area for each word.
"""
ia_df = pd.DataFrame(trial[f"{prefix}s_list"])
durations = []
for cidx, ia_row in ia_df.iterrows():
initial_duration = 0
for idx, fixation in dffix.iterrows():
if fix_in_ia_default(fixation, ia_row, prefix):
initial_duration = fixation.duration
break # Exit the loop after finding the initial fixation for the word
durations.append(
{
f"{prefix}_number": cidx,
prefix: ia_row[f"{prefix}"],
f"initial_fixation_duration_{correction_algo}": initial_duration,
}
)
return pd.DataFrame(durations)
def first_of_many_duration_own(trial, dffix, prefix, correction_algo):
ia_df = pd.DataFrame(trial[f"{prefix}s_list"])
durations = []
for cidx, ia_row in ia_df.iterrows():
fixation_durations = []
for idx, fixation in dffix.iterrows():
if fix_in_ia_default(fixation, ia_row, prefix):
fixation_durations.append(fixation.duration)
if len(fixation_durations) > 1:
durations.append(
{
f"{prefix}_number": cidx,
prefix: ia_row[f"{prefix}"],
f"first_of_many_duration_{correction_algo}": fixation_durations[0],
}
)
else:
durations.append(
{
f"{prefix}_number": cidx,
prefix: ia_row[f"{prefix}"],
f"first_of_many_duration_{correction_algo}": None,
}
)
if len(durations) > 0:
return pd.DataFrame(durations)
else:
return pd.DataFrame()
def total_fixation_duration_own(trial, dffix, prefix, correction_algo):
"""
sum duration of all fixations on that interest area.
"""
ia_df = pd.DataFrame(trial[f"{prefix}s_list"])
durations = []
for cidx, ia_row in ia_df.iterrows():
total_duration = 0
for idx, fixation in dffix.iterrows():
if fix_in_ia_default(fixation, ia_row, prefix):
total_duration += fixation.duration
durations.append(
{
f"{prefix}_number": cidx,
prefix: ia_row[f"{prefix}"],
f"total_fixation_duration_{correction_algo}": total_duration,
}
)
return pd.DataFrame(durations)
def gaze_duration_own(trial, dffix, prefix, correction_algo):
"""
Gaze duration is the sum duration of all fixations
inside an interest area until the area is exited for the first time.
"""
ia_df = pd.DataFrame(trial[f"{prefix}s_list"])
durations = []
for cidx, ia_row in ia_df.iterrows():
duration = 0
in_ia = False
for idx, fixation in dffix.iterrows():
if fix_in_ia_default(fixation, ia_row, prefix):
duration += fixation.duration
in_ia = True
elif in_ia:
break
durations.append(
{
f"{prefix}_number": cidx,
prefix: ia_row[f"{prefix}"],
f"gaze_duration_{correction_algo}": duration,
}
)
return pd.DataFrame(durations)
def go_past_duration_own(trial, dffix, prefix, correction_algo):
"""
Given an interest area and fixation sequence, return the go-past time on
that interest area. Go-past time is the sum duration of all fixations from
when the interest area is first entered until when it is first exited to
the right, including any regressions to the left that occur during that
time period (and vice versa in the case of right-to-left text).
"""
ia_df = pd.DataFrame(trial[f"{prefix}s_list"])
results = []
for cidx, ia_row in ia_df.iterrows():
entered = False
go_past_time = 0
for idx, fixation in dffix.iterrows():
if fix_in_ia_default(fixation, ia_row, prefix):
if not entered:
entered = True
go_past_time += fixation.duration
elif entered:
if ia_row[f"{prefix}_xmax"] < fixation.x: # Interest area has been exited to the right
break
go_past_time += fixation.duration
results.append(
{f"{prefix}_number": cidx, prefix: ia_row[f"{prefix}"], f"go_past_duration_{correction_algo}": go_past_time}
)
return pd.DataFrame(results)
def second_pass_duration_own(trial, dffix, prefix, correction_algo):
"""
Given an interest area and fixation sequence, return the second pass
duration on that interest area for each word.
"""
ia_df = pd.DataFrame(trial[f"{prefix}s_list"])
durations = []
for cidx, ia_row in ia_df.iterrows():
current_pass = None
next_pass = 1
pass_duration = 0
for idx, fixation in dffix.iterrows():
if fix_in_ia_default(fixation, ia_row, prefix):
if current_pass is None: # first fixation in a new pass
current_pass = next_pass
if current_pass == 2:
pass_duration += fixation.duration
elif current_pass == 1: # first fixation to exit the first pass
current_pass = None
next_pass += 1
elif current_pass == 2: # first fixation to exit the second pass
break
durations.append(
{
f"{prefix}_number": cidx,
prefix: ia_row[f"{prefix}"],
f"second_pass_duration_{correction_algo}": pass_duration,
}
)
return pd.DataFrame(durations)
def initial_landing_position_own(trial, dffix, prefix, correction_algo):
"""
initial landing position (expressed in character positions) on that interest area.
Counting is from 1. Returns `None` if no fixation
landed on the interest area.
"""
ia_df = pd.DataFrame(trial[f"{prefix}s_list"])
if prefix == "word":
chars_df = pd.DataFrame(trial[f"chars_list"])
else:
chars_df = None
results = []
for cidx, ia_row in ia_df.iterrows():
landing_position = None
for idx, fixation in dffix.iterrows():
if fix_in_ia_default(fixation, ia_row, prefix):
if prefix == "char":
landing_position = 1
else:
prefix_temp = "char"
matched_chars_df = chars_df.loc[
(chars_df.char_xmin >= ia_row[f"{prefix}_xmin"])
& (chars_df.char_xmax <= ia_row[f"{prefix}_xmax"])
& (chars_df.char_ymin >= ia_row[f"{prefix}_ymin"])
& (chars_df.char_ymax <= ia_row[f"{prefix}_ymax"]),
:,
] # TODO need to find way to count correct letter number
for char_idx, (rowidx, char_row) in enumerate(matched_chars_df.iterrows()):
if fix_in_ia_default(fixation, char_row, prefix_temp):
landing_position = char_idx + 1 # starts at 1
break
break
results.append(
{
f"{prefix}_number": cidx,
prefix: ia_row[f"{prefix}"],
f"initial_landing_position_{correction_algo}": landing_position,
}
)
return pd.DataFrame(results)
def initial_landing_distance_own(trial, dffix, prefix, correction_algo):
"""
Given an interest area and fixation sequence, return the initial landing
distance on that interest area. The initial landing distance is the pixel
distance between the first fixation to land in an interest area and the
left edge of that interest area (or, in the case of right-to-left text,
the right edge). Technically, the distance is measured from the text onset
without including any padding. Returns `None` if no fixation landed on the
interest area.
"""
ia_df = pd.DataFrame(trial[f"{prefix}s_list"])
distances = []
for cidx, ia_row in ia_df.iterrows():
initial_distance = None
for idx, fixation in dffix.iterrows():
if fix_in_ia_default(fixation, ia_row, prefix):
distance = abs(ia_row[f"{prefix}_xmin"] - fixation.x)
if initial_distance is None:
initial_distance = distance
break
distances.append(
{
f"{prefix}_number": cidx,
prefix: ia_row[f"{prefix}"],
f"initial_landing_distance_{correction_algo}": initial_distance,
}
)
return pd.DataFrame(distances)
def landing_distances_own(trial, dffix, prefix, correction_algo):
"""
Given an interest area and fixation sequence, return a dataframe with
landing distances for each word in the interest area.
"""
ia_df = pd.DataFrame(trial[f"{prefix}s_list"])
distances = []
for cidx, ia_row in ia_df.iterrows():
landing_distances = []
for idx, fixation in dffix.iterrows():
if fix_in_ia_default(fixation, ia_row, prefix):
landing_distance = abs(ia_row[f"{prefix}_xmin"] - fixation.x)
landing_distances.append(round(landing_distance, ndigits=2))
distances.append(
{
f"{prefix}_number": cidx,
prefix: ia_row[f"{prefix}"],
f"landing_distances_{correction_algo}": landing_distances,
}
)
return pd.DataFrame(distances)
def number_of_regressions_in_own(trial, dffix, prefix, correction_algo):
word_reg_in_count = (
dffix.groupby([f"on_{prefix}_number_{correction_algo}", f"on_{prefix}_{correction_algo}"])[
f"{prefix}_reg_in_{correction_algo}"
]
.sum()
.reset_index()
.rename(
columns={
f"on_{prefix}_number_{correction_algo}": f"{prefix}_number",
f"{prefix}_reg_in_{correction_algo}": f"number_of_regressions_in_{correction_algo}",
f"on_{prefix}_{correction_algo}": prefix,
}
)
)
return word_reg_in_count
|