{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Timings" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "vscode": { "languageId": "r" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Loading required package: ggplot2\n", "\n", "-- \u001b[1mAttaching packages\u001b[22m --------------------------------------- tidyverse 1.3.1 --\n", "\n", "\u001b[32mv\u001b[39m \u001b[34mtibble \u001b[39m 3.1.5 \u001b[32mv\u001b[39m \u001b[34mdplyr \u001b[39m 1.0.7\n", "\u001b[32mv\u001b[39m \u001b[34mtidyr \u001b[39m 1.1.4 \u001b[32mv\u001b[39m \u001b[34mstringr\u001b[39m 1.4.0\n", "\u001b[32mv\u001b[39m \u001b[34mpurrr \u001b[39m 0.3.4 \u001b[32mv\u001b[39m \u001b[34mforcats\u001b[39m 0.5.1\n", "\n", "-- \u001b[1mConflicts\u001b[22m ------------------------------------------ tidyverse_conflicts() --\n", "\u001b[31mx\u001b[39m \u001b[34mdplyr\u001b[39m::\u001b[32mfilter()\u001b[39m masks \u001b[34mstats\u001b[39m::filter()\n", "\u001b[31mx\u001b[39m \u001b[34mdplyr\u001b[39m::\u001b[32mlag()\u001b[39m masks \u001b[34mstats\u001b[39m::lag()\n", "\n", "Loading required package: mvtnorm\n", "\n", "Loading required package: survival\n", "\n", "Loading required package: TH.data\n", "\n", "Loading required package: MASS\n", "\n", "\n", "Attaching package: 'MASS'\n", "\n", "\n", "The following object is masked from 'package:dplyr':\n", "\n", " select\n", "\n", "\n", "\n", "Attaching package: 'TH.data'\n", "\n", "\n", "The following object is masked from 'package:MASS':\n", "\n", " geyser\n", "\n", "\n", "Loading required package: carData\n", "\n", "\n", "Attaching package: 'car'\n", "\n", "\n", "The following object is masked from 'package:dplyr':\n", "\n", " recode\n", "\n", "\n", "The following object is masked from 'package:purrr':\n", "\n", " some\n", "\n", "\n", "\n", "Attaching package: 'rstatix'\n", "\n", "\n", "The following object is masked from 'package:MASS':\n", "\n", " select\n", "\n", "\n", "The following object is masked from 'package:stats':\n", "\n", " filter\n", "\n", "\n" ] } ], "source": [ "library(\"ggpubr\")\n", "library(readr)\n", "library(ggplot2)\n", "library(tidyverse)\n", "library(ARTool)\n", "library(emmeans)\n", "library(multcomp)\n", "library(car)\n", "library(rstatix)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "vscode": { "languageId": "r" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "New names:\n", "* `` -> ...1\n", "\n", "\u001b[1mRows: \u001b[22m\u001b[34m59\u001b[39m \u001b[1mColumns: \u001b[22m\u001b[34m9\u001b[39m\n", "\u001b[36m--\u001b[39m \u001b[1mColumn specification\u001b[22m \u001b[36m--------------------------------------------------------\u001b[39m\n", "\u001b[1mDelimiter:\u001b[22m \",\"\n", "\u001b[32mdbl\u001b[39m (9): ...1, faiss_dpr.retrieve, faiss_dpr.read, faiss_longformer.retrieve...\n", "\n", "\u001b[36mi\u001b[39m Use `spec()` to retrieve the full column specification for this data.\n", "\u001b[36mi\u001b[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.\n" ] }, { "data": { "text/html": [ "\n", "\n", "\n", "\t\n", "\t\n", "\n", "\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\n", "
A tibble: 6 × 5
questionretrieverreadermethodtime
<dbl><fct><fct><fct><dbl>
0faissdpr retrieve0.30384302
0faissdpr read 4.56640005
0faisslongformerretrieve0.92279482
0faisslongformerread 5.76836824
0es dpr retrieve0.01930094
0es dpr read 2.74536490
\n" ], "text/latex": [ "A tibble: 6 × 5\n", "\\begin{tabular}{lllll}\n", " question & retriever & reader & method & time\\\\\n", " & & & & \\\\\n", "\\hline\n", "\t 0 & faiss & dpr & retrieve & 0.30384302\\\\\n", "\t 0 & faiss & dpr & read & 4.56640005\\\\\n", "\t 0 & faiss & longformer & retrieve & 0.92279482\\\\\n", "\t 0 & faiss & longformer & read & 5.76836824\\\\\n", "\t 0 & es & dpr & retrieve & 0.01930094\\\\\n", "\t 0 & es & dpr & read & 2.74536490\\\\\n", "\\end{tabular}\n" ], "text/markdown": [ "\n", "A tibble: 6 × 5\n", "\n", "| question <dbl> | retriever <fct> | reader <fct> | method <fct> | time <dbl> |\n", "|---|---|---|---|---|\n", "| 0 | faiss | dpr | retrieve | 0.30384302 |\n", "| 0 | faiss | dpr | read | 4.56640005 |\n", "| 0 | faiss | longformer | retrieve | 0.92279482 |\n", "| 0 | faiss | longformer | read | 5.76836824 |\n", "| 0 | es | dpr | retrieve | 0.01930094 |\n", "| 0 | es | dpr | read | 2.74536490 |\n", "\n" ], "text/plain": [ " question retriever reader method time \n", "1 0 faiss dpr retrieve 0.30384302\n", "2 0 faiss dpr read 4.56640005\n", "3 0 faiss longformer retrieve 0.92279482\n", "4 0 faiss longformer read 5.76836824\n", "5 0 es dpr retrieve 0.01930094\n", "6 0 es dpr read 2.74536490" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "timings <- read_csv(\"timings.csv\") %>%\n", " rename(question = `...1`) %>%\n", " pivot_longer(!question, names_to=c(\"retriever\", \"reader\", \"method\"), names_sep=\"[._]\", values_to=\"time\")\n", "\n", "timings$retriever <- as.factor(timings$retriever)\n", "timings$reader <- as.factor(timings$reader)\n", "timings$method <- as.factor(timings$method)\n", "\n", "head(timings)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "vscode": { "languageId": "r" } }, "outputs": [], "source": [ "timings_read <- filter(timings, method == \"read\") %>%\n", " select(!method)\n", "timings_retrieve <- filter(timings, method == \"retrieve\") %>%\n", " select(!method)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "To test which tests we can use, we need to check for normality. For this, we use a Shapiro-Wilk test of normality. As you can see in the results below, all $p$-values are lower than 0.001, so we reject the null-hypothesis of normality and now know that none of the f1-scores are normally distributed." ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "vscode": { "languageId": "r" } }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\t\n", "\t\n", "\n", "\n", "\t\n", "\t\n", "\n", "
A tibble: 2 × 3
retrieversw.statsw.p
<fct><dbl><dbl>
es 0.75342611.667341e-18
faiss0.75857272.563192e-18
\n" ], "text/latex": [ "A tibble: 2 × 3\n", "\\begin{tabular}{lll}\n", " retriever & sw.stat & sw.p\\\\\n", " & & \\\\\n", "\\hline\n", "\t es & 0.7534261 & 1.667341e-18\\\\\n", "\t faiss & 0.7585727 & 2.563192e-18\\\\\n", "\\end{tabular}\n" ], "text/markdown": [ "\n", "A tibble: 2 × 3\n", "\n", "| retriever <fct> | sw.stat <dbl> | sw.p <dbl> |\n", "|---|---|---|\n", "| es | 0.7534261 | 1.667341e-18 |\n", "| faiss | 0.7585727 | 2.563192e-18 |\n", "\n" ], "text/plain": [ " retriever sw.stat sw.p \n", "1 es 0.7534261 1.667341e-18\n", "2 faiss 0.7585727 2.563192e-18" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "\n", "\n", "\t\n", "\t\n", "\n", "\n", "\t\n", "\t\n", "\n", "
A tibble: 2 × 3
readersw.statsw.p
<fct><dbl><dbl>
dpr 0.76390054.029344e-18
longformer0.81163623.381683e-16
\n" ], "text/latex": [ "A tibble: 2 × 3\n", "\\begin{tabular}{lll}\n", " reader & sw.stat & sw.p\\\\\n", " & & \\\\\n", "\\hline\n", "\t dpr & 0.7639005 & 4.029344e-18\\\\\n", "\t longformer & 0.8116362 & 3.381683e-16\\\\\n", "\\end{tabular}\n" ], "text/markdown": [ "\n", "A tibble: 2 × 3\n", "\n", "| reader <fct> | sw.stat <dbl> | sw.p <dbl> |\n", "|---|---|---|\n", "| dpr | 0.7639005 | 4.029344e-18 |\n", "| longformer | 0.8116362 | 3.381683e-16 |\n", "\n" ], "text/plain": [ " reader sw.stat sw.p \n", "1 dpr 0.7639005 4.029344e-18\n", "2 longformer 0.8116362 3.381683e-16" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "\n", "\n", "\t\n", "\t\n", "\n", "\n", "\t\n", "\t\n", "\n", "
A tibble: 2 × 3
methodsw.statsw.p
<fct><dbl><dbl>
read 0.88381821.779766e-12
retrieve0.62377731.838892e-22
\n" ], "text/latex": [ "A tibble: 2 × 3\n", "\\begin{tabular}{lll}\n", " method & sw.stat & sw.p\\\\\n", " & & \\\\\n", "\\hline\n", "\t read & 0.8838182 & 1.779766e-12\\\\\n", "\t retrieve & 0.6237773 & 1.838892e-22\\\\\n", "\\end{tabular}\n" ], "text/markdown": [ "\n", "A tibble: 2 × 3\n", "\n", "| method <fct> | sw.stat <dbl> | sw.p <dbl> |\n", "|---|---|---|\n", "| read | 0.8838182 | 1.779766e-12 |\n", "| retrieve | 0.6237773 | 1.838892e-22 |\n", "\n" ], "text/plain": [ " method sw.stat sw.p \n", "1 read 0.8838182 1.779766e-12\n", "2 retrieve 0.6237773 1.838892e-22" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "timings %>%\n", " group_by(retriever) %>%\n", " summarise(sw.stat = shapiro.test(time)$statistic,\n", " sw.p = shapiro.test(time)$p)\n", "\n", "timings %>%\n", " group_by(reader) %>%\n", " summarise(sw.stat = shapiro.test(time)$statistic,\n", " sw.p = shapiro.test(time)$p)\n", "\n", "timings %>%\n", " group_by(method) %>%\n", " summarise(sw.stat = shapiro.test(time)$statistic,\n", " sw.p = shapiro.test(time)$p)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Since our data is not normally distributed, we cannot use an ANOVA to compare our results. Therefore, we use an aligned-rank test, which is a non-parameteric version of a factorial repeated measures ANOVA." ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "vscode": { "languageId": "r" } }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\t\n", "\t\n", "\n", "\n", "\t\n", "\t\n", "\t\n", "\n", "
A anova.art: 3 × 7
TermDfDf.resSum SqSum Sq.resF valuePr(>F)
<chr><dbl><dbl><dbl><dbl><dbl><dbl>
retrieverretriever 1232 41088.971037631.8 9.186922.714084e-03
readerreader 1232790427.81 301414.1608.396338.802730e-67
retriever:readerretriever:reader1232101903.46 983331.4 24.042351.771995e-06
\n" ], "text/latex": [ "A anova.art: 3 × 7\n", "\\begin{tabular}{r|lllllll}\n", " & Term & Df & Df.res & Sum Sq & Sum Sq.res & F value & Pr(>F)\\\\\n", " & & & & & & & \\\\\n", "\\hline\n", "\tretriever & retriever & 1 & 232 & 41088.97 & 1037631.8 & 9.18692 & 2.714084e-03\\\\\n", "\treader & reader & 1 & 232 & 790427.81 & 301414.1 & 608.39633 & 8.802730e-67\\\\\n", "\tretriever:reader & retriever:reader & 1 & 232 & 101903.46 & 983331.4 & 24.04235 & 1.771995e-06\\\\\n", "\\end{tabular}\n" ], "text/markdown": [ "\n", "A anova.art: 3 × 7\n", "\n", "| | Term <chr> | Df <dbl> | Df.res <dbl> | Sum Sq <dbl> | Sum Sq.res <dbl> | F value <dbl> | Pr(>F) <dbl> |\n", "|---|---|---|---|---|---|---|---|\n", "| retriever | retriever | 1 | 232 | 41088.97 | 1037631.8 | 9.18692 | 2.714084e-03 |\n", "| reader | reader | 1 | 232 | 790427.81 | 301414.1 | 608.39633 | 8.802730e-67 |\n", "| retriever:reader | retriever:reader | 1 | 232 | 101903.46 | 983331.4 | 24.04235 | 1.771995e-06 |\n", "\n" ], "text/plain": [ " Term Df Df.res Sum Sq Sum Sq.res F value \n", "retriever retriever 1 232 41088.97 1037631.8 9.18692\n", "reader reader 1 232 790427.81 301414.1 608.39633\n", "retriever:reader retriever:reader 1 232 101903.46 983331.4 24.04235\n", " Pr(>F) \n", "retriever 2.714084e-03\n", "reader 8.802730e-67\n", "retriever:reader 1.771995e-06" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "NOTE: Results may be misleading due to involvement in interactions\n", "\n" ] }, { "data": { "text/plain": [ " contrast estimate SE df t.ratio p.value\n", " es - faiss 26.4 8.71 232 3.031 0.0027\n", "\n", "Results are averaged over the levels of: reader " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "NOTE: Results may be misleading due to involvement in interactions\n", "\n" ] }, { "data": { "text/plain": [ " contrast estimate SE df t.ratio p.value\n", " dpr - longformer -116 4.69 232 -24.666 <.0001\n", "\n", "Results are averaged over the levels of: retriever " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "model.acc <- art(time ~ retriever * reader, data = timings_read)\n", "anova(model.acc)\n", "art.con(model.acc, ~ retriever)\n", "art.con(model.acc, ~ reader)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "vscode": { "languageId": "r" } }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\t\n", "\t\n", "\n", "\n", "\t\n", "\t\n", "\t\n", "\n", "
A anova.art: 3 × 7
TermDfDf.resSum SqSum Sq.resF valuePr(>F)
<chr><dbl><dbl><dbl><dbl><dbl><dbl>
retrieverretriever 1232821516240071.9793.89447.630526e-77
readerreader 1232821516214935.3886.73983.256422e-81
retriever:readerretriever:reader1232821516215501.6884.40964.148583e-81
\n" ], "text/latex": [ "A anova.art: 3 × 7\n", "\\begin{tabular}{r|lllllll}\n", " & Term & Df & Df.res & Sum Sq & Sum Sq.res & F value & Pr(>F)\\\\\n", " & & & & & & & \\\\\n", "\\hline\n", "\tretriever & retriever & 1 & 232 & 821516 & 240071.9 & 793.8944 & 7.630526e-77\\\\\n", "\treader & reader & 1 & 232 & 821516 & 214935.3 & 886.7398 & 3.256422e-81\\\\\n", "\tretriever:reader & retriever:reader & 1 & 232 & 821516 & 215501.6 & 884.4096 & 4.148583e-81\\\\\n", "\\end{tabular}\n" ], "text/markdown": [ "\n", "A anova.art: 3 × 7\n", "\n", "| | Term <chr> | Df <dbl> | Df.res <dbl> | Sum Sq <dbl> | Sum Sq.res <dbl> | F value <dbl> | Pr(>F) <dbl> |\n", "|---|---|---|---|---|---|---|---|\n", "| retriever | retriever | 1 | 232 | 821516 | 240071.9 | 793.8944 | 7.630526e-77 |\n", "| reader | reader | 1 | 232 | 821516 | 214935.3 | 886.7398 | 3.256422e-81 |\n", "| retriever:reader | retriever:reader | 1 | 232 | 821516 | 215501.6 | 884.4096 | 4.148583e-81 |\n", "\n" ], "text/plain": [ " Term Df Df.res Sum Sq Sum Sq.res F value \n", "retriever retriever 1 232 821516 240071.9 793.8944\n", "reader reader 1 232 821516 214935.3 886.7398\n", "retriever:reader retriever:reader 1 232 821516 215501.6 884.4096\n", " Pr(>F) \n", "retriever 7.630526e-77\n", "reader 3.256422e-81\n", "retriever:reader 4.148583e-81" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "NOTE: Results may be misleading due to involvement in interactions\n", "\n" ] }, { "data": { "text/plain": [ " contrast estimate SE df t.ratio p.value\n", " es - faiss -118 4.19 232 -28.176 <.0001\n", "\n", "Results are averaged over the levels of: reader " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "NOTE: Results may be misleading due to involvement in interactions\n", "\n" ] }, { "data": { "text/plain": [ " contrast estimate SE df t.ratio p.value\n", " dpr - longformer -118 3.96 232 -29.778 <.0001\n", "\n", "Results are averaged over the levels of: retriever " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "model.acc <- art(time ~ retriever * reader, data = timings_retrieve)\n", "anova(model.acc)\n", "art.con(model.acc, ~ retriever)\n", "art.con(model.acc, ~ reader)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [] } ], "metadata": { "kernelspec": { "display_name": "R", "language": "R", "name": "ir" }, "language_info": { "codemirror_mode": "r", "file_extension": ".r", "mimetype": "text/x-r-source", "name": "R", "pygments_lexer": "r", "version": "4.1.2" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }