Spaces:
Running
Running
File size: 4,922 Bytes
5332f93 1dfbccd 5332f93 d4a5294 1dfbccd d4a5294 5332f93 471ccf2 5332f93 d4a5294 5332f93 471ccf2 d4a5294 5332f93 1dfbccd 5332f93 471ccf2 5332f93 1dfbccd 5332f93 42f96d8 5332f93 42f96d8 5332f93 d4a5294 5332f93 d4a5294 5332f93 d4a5294 5332f93 d4a5294 5332f93 d4a5294 5332f93 1dfbccd 5332f93 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 |
import pandas as pd
class PaperList:
def __init__(self) -> None:
self.organization_name = "ICML2023"
self.table = pd.read_json("papers.json").fillna("")
claim_info = pd.read_csv("claim_info.csv", dtype={"arxiv_id": str, "n_authors": int, "n_linked_authors": int})
self.table = self.table.merge(right=claim_info, on="arxiv_id", how="left")
self.table[["n_authors", "n_linked_authors"]] = (
self.table[["n_authors", "n_linked_authors"]].fillna(-1).astype(int)
)
self._preprocess_table()
self.table_header = """
<tr>
<td width="38%">Title</td>
<td width="20%">Authors</td>
<td width="5%">Type</td>
<td width="5%">arXiv</td>
<td width="5%">GitHub</td>
<td width="7%">Paper pages</td>
<td width="5%">Spaces</td>
<td width="5%">Models</td>
<td width="5%">Datasets</td>
<td width="5%">Claimed</td>
</tr>"""
def _preprocess_table(self) -> None:
self.table["title_lowercase"] = self.table.title.str.lower()
self.table["arxiv"] = self.table.arxiv_id.apply(lambda x: f"https://arxiv.org/abs/{x}" if x else "")
self.table["hf_paper"] = self.table.arxiv_id.apply(lambda x: f"https://huggingface.co/papers/{x}" if x else "")
self.table["authors"] = self.table.authors.apply(lambda x: ", ".join(x))
rows = []
for row in self.table.itertuples():
title = f'<a href="{row.url}" target="_blank">{row.title}</a>'
arxiv = f'<a href="{row.arxiv}" target="_blank">arXiv</a>' if row.arxiv else ""
github = f'<a href="{row.github}" target="_blank">GitHub</a>' if row.github else ""
hf_paper = f'<a href="{row.hf_paper}" target="_blank">Paper page</a>' if row.hf_paper else ""
hf_space = f'<a href="{row.hf_space}" target="_blank">Space</a>' if row.hf_space else ""
hf_model = f'<a href="{row.hf_model}" target="_blank">Model</a>' if row.hf_model else ""
hf_dataset = f'<a href="{row.hf_dataset}" target="_blank">Dataset</a>' if row.hf_dataset else ""
author_linked = "✅" if row.n_linked_authors > 0 else ""
n_linked_authors = "" if row.n_linked_authors == -1 else row.n_linked_authors
n_authors = "" if row.n_authors == -1 else row.n_authors
claimed_paper = "" if n_linked_authors == "" else f"{n_linked_authors}/{n_authors} {author_linked}"
new_row = f"""
<tr>
<td>{title}</td>
<td>{row.authors}</td>
<td>{row.type}</td>
<td>{arxiv}</td>
<td>{github}</td>
<td>{hf_paper}</td>
<td>{hf_space}</td>
<td>{hf_model}</td>
<td>{hf_dataset}</td>
<td>{claimed_paper}</td>
</tr>"""
rows.append(new_row)
self.table["html_table_content"] = rows
def render(
self,
search_query: str,
case_sensitive: bool,
filter_names: list[str],
presentation_type: str,
) -> tuple[str, str]:
df = self.table
if presentation_type != "(ALL)":
df = df[df.type == presentation_type.lower()]
if search_query:
if case_sensitive:
df = df[df.title.str.contains(search_query)]
else:
df = df[df.title_lowercase.str.contains(search_query.lower())]
has_arxiv = "arXiv" in filter_names
has_github = "GitHub" in filter_names
has_hf_space = "Space" in filter_names
has_hf_model = "Model" in filter_names
has_hf_dataset = "Dataset" in filter_names
df = self.filter_table(df, has_arxiv, has_github, has_hf_space, has_hf_model, has_hf_dataset)
n_claimed = len(df[df.n_linked_authors > 0])
return f"{len(df)} ({n_claimed} claimed)", self.to_html(df, self.table_header)
@staticmethod
def filter_table(
df: pd.DataFrame,
has_arxiv: bool,
has_github: bool,
has_hf_space: bool,
has_hf_model: bool,
has_hf_dataset: bool,
) -> pd.DataFrame:
if has_arxiv:
df = df[df.arxiv != ""]
if has_github:
df = df[df.github != ""]
if has_hf_space:
df = df[df.hf_space != ""]
if has_hf_model:
df = df[df.hf_model != ""]
if has_hf_dataset:
df = df[df.hf_dataset != ""]
return df
@staticmethod
def to_html(df: pd.DataFrame, table_header: str) -> str:
table_data = "".join(df.html_table_content)
return f"""
<table>
{table_header}
{table_data}
</table>"""
|