File size: 4,922 Bytes
5332f93
 
 
 
1dfbccd
5332f93
d4a5294
 
 
1dfbccd
d4a5294
 
 
 
5332f93
 
 
 
 
471ccf2
 
5332f93
 
 
 
 
 
 
 
 
 
 
d4a5294
 
 
5332f93
 
 
471ccf2
d4a5294
 
 
 
 
 
 
 
 
5332f93
1dfbccd
5332f93
 
 
471ccf2
5332f93
 
 
 
 
 
 
 
1dfbccd
5332f93
 
42f96d8
 
 
 
 
 
 
5332f93
42f96d8
 
5332f93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d4a5294
5332f93
d4a5294
5332f93
d4a5294
5332f93
d4a5294
5332f93
d4a5294
5332f93
 
 
 
 
1dfbccd
5332f93
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import pandas as pd


class PaperList:
    def __init__(self) -> None:
        self.organization_name = "ICML2023"
        self.table = pd.read_json("papers.json").fillna("")

        claim_info = pd.read_csv("claim_info.csv", dtype={"arxiv_id": str, "n_authors": int, "n_linked_authors": int})
        self.table = self.table.merge(right=claim_info, on="arxiv_id", how="left")
        self.table[["n_authors", "n_linked_authors"]] = (
            self.table[["n_authors", "n_linked_authors"]].fillna(-1).astype(int)
        )

        self._preprocess_table()

        self.table_header = """
            <tr>
                <td width="38%">Title</td>
                <td width="20%">Authors</td>
                <td width="5%">Type</td>
                <td width="5%">arXiv</td>
                <td width="5%">GitHub</td>
                <td width="7%">Paper pages</td>
                <td width="5%">Spaces</td>
                <td width="5%">Models</td>
                <td width="5%">Datasets</td>
                <td width="5%">Claimed</td>
            </tr>"""

    def _preprocess_table(self) -> None:
        self.table["title_lowercase"] = self.table.title.str.lower()
        self.table["arxiv"] = self.table.arxiv_id.apply(lambda x: f"https://arxiv.org/abs/{x}" if x else "")
        self.table["hf_paper"] = self.table.arxiv_id.apply(lambda x: f"https://huggingface.co/papers/{x}" if x else "")
        self.table["authors"] = self.table.authors.apply(lambda x: ", ".join(x))

        rows = []
        for row in self.table.itertuples():
            title = f'<a href="{row.url}" target="_blank">{row.title}</a>'
            arxiv = f'<a href="{row.arxiv}" target="_blank">arXiv</a>' if row.arxiv else ""
            github = f'<a href="{row.github}" target="_blank">GitHub</a>' if row.github else ""
            hf_paper = f'<a href="{row.hf_paper}" target="_blank">Paper page</a>' if row.hf_paper else ""
            hf_space = f'<a href="{row.hf_space}" target="_blank">Space</a>' if row.hf_space else ""
            hf_model = f'<a href="{row.hf_model}" target="_blank">Model</a>' if row.hf_model else ""
            hf_dataset = f'<a href="{row.hf_dataset}" target="_blank">Dataset</a>' if row.hf_dataset else ""
            author_linked = "✅" if row.n_linked_authors > 0 else ""
            n_linked_authors = "" if row.n_linked_authors == -1 else row.n_linked_authors
            n_authors = "" if row.n_authors == -1 else row.n_authors
            claimed_paper = "" if n_linked_authors == "" else f"{n_linked_authors}/{n_authors} {author_linked}"
            new_row = f"""
                <tr>
                    <td>{title}</td>
                    <td>{row.authors}</td>
                    <td>{row.type}</td>
                    <td>{arxiv}</td>
                    <td>{github}</td>
                    <td>{hf_paper}</td>
                    <td>{hf_space}</td>
                    <td>{hf_model}</td>
                    <td>{hf_dataset}</td>
                    <td>{claimed_paper}</td>
                </tr>"""
            rows.append(new_row)
        self.table["html_table_content"] = rows

    def render(
        self,
        search_query: str,
        case_sensitive: bool,
        filter_names: list[str],
        presentation_type: str,
    ) -> tuple[str, str]:
        df = self.table
        if presentation_type != "(ALL)":
            df = df[df.type == presentation_type.lower()]
        if search_query:
            if case_sensitive:
                df = df[df.title.str.contains(search_query)]
            else:
                df = df[df.title_lowercase.str.contains(search_query.lower())]
        has_arxiv = "arXiv" in filter_names
        has_github = "GitHub" in filter_names
        has_hf_space = "Space" in filter_names
        has_hf_model = "Model" in filter_names
        has_hf_dataset = "Dataset" in filter_names
        df = self.filter_table(df, has_arxiv, has_github, has_hf_space, has_hf_model, has_hf_dataset)
        n_claimed = len(df[df.n_linked_authors > 0])
        return f"{len(df)} ({n_claimed} claimed)", self.to_html(df, self.table_header)

    @staticmethod
    def filter_table(
        df: pd.DataFrame,
        has_arxiv: bool,
        has_github: bool,
        has_hf_space: bool,
        has_hf_model: bool,
        has_hf_dataset: bool,
    ) -> pd.DataFrame:
        if has_arxiv:
            df = df[df.arxiv != ""]
        if has_github:
            df = df[df.github != ""]
        if has_hf_space:
            df = df[df.hf_space != ""]
        if has_hf_model:
            df = df[df.hf_model != ""]
        if has_hf_dataset:
            df = df[df.hf_dataset != ""]
        return df

    @staticmethod
    def to_html(df: pd.DataFrame, table_header: str) -> str:
        table_data = "".join(df.html_table_content)
        return f"""
        <table>
            {table_header}
            {table_data}
        </table>"""