import huggingface_hub as hf import gradio as gr import os, datetime fs = hf.HfFileSystem(token=os.environ["HF_TOKEN"]) datasetdir = "datasets/yoinked/blue-arxiv-papers/" basecss = """ .caaard-container { width: 250px; padding: 20px; border: 3px solid black; border-radius: 15px; text-align: left; } .title { font-size: 24px; margin-bottom: 10px; text-align: center; } .caaard-containers { display: flex; gap: 20px; flex-wrap: wrap; } .extra-info { font-size: 14px; line-height: 1.5; } .extra-info-paperid { font-size: 18px; line-height: 1.75; }""" def get_papers(): return reversed(fs.glob(datasetdir+"*.md")) def get_papers_metadata(papiers=None): metadatas = [] if papiers is None: papiers = get_papers() for paper in papiers: with fs.open(paper, "r") as f: papertxt = f.read() metadata = papertxt.split("---")[1] try: author = metadata.split("author: ")[1].split("\n")[0] except: author = "unknown" try: title = metadata.split("title: ")[1].split("\n")[0] except: title = "unknown" try: tags = metadata.split("tags: ")[1].split("\n")[0].split(", ") except: tags = [] try: abstract = metadata.split("abstract: ")[1].split("\n")[0] except: abstract = "unknown" try: date_published = metadata.split("date_published: ")[1].split("\n")[0] except: date_published = "unknown" try: paperid = metadata.split("paperid: ")[1].split("\n")[0] #if this fails then no reason to display md = {"fname": paper, "metadata": metadata, "author": author, "title": title, "tags": tags, "abstract": abstract, "date_published": date_published, "paperid": paperid} metadatas.append(md) except: pass return metadatas def make_paper_card(md): html = f"""
{md["title"]}


author: {md["author"]}
published: {md["date_published"]}
id: {md["paperid"]}
""" return html def make_paper_cards(tags=""): mds = get_papers_metadata() if tags != "": tags = tags.split(",") tags = [tag.strip() for tag in tags] mds = [md for md in mds if any(tag in md["tags"] for tag in tags)] htmls = [make_paper_card(md) for md in mds] fin = "
" for html in htmls: fin += html + "
" fin += "
" return fin def get_paper_markdown(paperid): allpapers = get_papers_metadata() fname = None for paper in allpapers: if paper["paperid"] == paperid: fname = paper["fname"] break print(fname, paperid) if fname is None: return "## paper not found" else: with fs.open(fname, "r") as f: papertxt = f.read() return papertxt.split("---")[2] def publish_paper(title, authors, tags, abst, data): paperid = "" year = datetime.datetime.now().year month = datetime.datetime.now().month if month < 10: month = "0"+str(month) day = datetime.datetime.now().day if day < 10: day = "0"+str(day) idx = 1 while True: paperid = f"{year}-{month}{day}.{idx}" if not fs.exists(datasetdir+paperid+".md"): break idx += 1 if idx > 100: return "could not generate paperid, try again tomorrow" bad_chars = "<>:|\\" # primitive anti-xss sanitization for c in bad_chars: title = title.replace(c, "") authors = authors.replace(c, "") tags = tags.replace(c, "") abst = abst.replace(c, "") metadata = f"""--- title: {title} author: {authors} tags: {tags} abstract: {abst} date_published: {year}-{month}-{day} paperid: {paperid} ---\n""" with fs.open(datasetdir+paperid+".md", "w") as f: raw = metadata + data f.write(raw) def makepreview(x): return x def upload(prefix, fname, ext, file): fname = prefix+"-"+fname with fs.open(datasetdir+"uploads/"+fname+"."+ext, "wb") as f: f.write(file) return f"uploaded, use https://huggingface.co/datasets/yoinked/blue-arxiv-papers/resolve/main/uploads/{fname}.{ext} to include in your paper (so like ![image](https://huggingface.co/datasets/yoinked/blue-arxiv-papers/resolve/main/uploads/{fname}.{ext})) for inline img" with gr.Blocks(css=basecss, theme='NoCrypt/miku') as demo: gr.Image("./blue-arxiv.png", container=False, label=None, interactive=False, show_fullscreen_button=False, show_share_button=False, show_download_button=False) with gr.Tab("search"): with gr.Row(): query = gr.Textbox(label="tags (optional, comma seperated)", lines=1, interactive=True) searchbutton = gr.Button("🔎") with gr.Row(): papercards = gr.HTML("Click the 🔎 to load all papers!") with gr.Tab("read"): with gr.Row(): paperid = gr.Textbox(label="paper id", lines=1, interactive=True) readbutton = gr.Button("read") with gr.Row(): paper = gr.Markdown() with gr.Tab("publish"): with gr.Row(): title = gr.Textbox(label="title", lines=1, interactive=True) authors = gr.Textbox(label="author(s)", lines=1, interactive=True) with gr.Row(): tags = gr.Textbox(label="tags (optional, comma seperated)", lines=1, interactive=True) abst = gr.Textbox(label="abriged abstract (aka tooltip)", lines=2, interactive=True) markd = gr.Textbox(label="markdown", lines=10, interactive=True, max_lines=1e3) preview = gr.Markdown() with gr.Row(): status = gr.Textbox(label="status", lines=1, interactive=False) publishbutton = gr.Button("publish") with gr.Tab("files"): with gr.Row(): prefix = gr.Textbox(label="prefix", lines=1, interactive=True) file_name = gr.Textbox(label="file name", lines=1, interactive=True) with gr.Row(): file = gr.File(label="file", file_types=[".png", ".gif", ".webp", ".jpg", ".wav", ".mp3"], type="binary") fileext = gr.Dropdown(label="filetype", choices=["png", "gif", "webp", "jpg", "wav", "mp3"]) uploadbutton = gr.Button("upload") statii = gr.Textbox(label="status", interactive=False) uploadbutton.click(fn=upload, inputs=[prefix, file_name, fileext, file], outputs=statii) markd.change(fn=makepreview, inputs=markd, outputs=preview) publishbutton.click(fn=publish_paper, inputs=[title, authors, tags, abst, markd], outputs=status) searchbutton.click(fn=make_paper_cards, inputs=query, outputs=papercards) readbutton.click(fn=get_paper_markdown, inputs=paperid, outputs=paper) demo.launch()