Spaces:

nouamanetazi
/

hf-search

Running

App Files Files Community

nouamanetazi HF staff commited on Feb 8, 2022

Commit

dc1b8a3

0 Parent(s):

initial commit

Browse files

Files changed (8) hide show

.gitignore +145 -0
README.md +6 -0
pages/__init__.py +2 -0
pages/document.py +21 -0
pages/search_engine.py +166 -0
requirements.txt +5 -0
server/api.py +65 -0
streamlit_app.py +49 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,145 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/

README.md ADDED Viewed

	@@ -0,0 +1,6 @@

+# Welcome to Streamlit!
+Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:
+If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
+forums](https://discuss.streamlit.io).

pages/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .search_engine import page as search_engine_page
2	+ from .document import page as document_page

pages/document.py ADDED Viewed

	@@ -0,0 +1,21 @@

+import os
+import re
+import json
+import datetime
+import itertools
+import requests
+from PIL import Image
+import base64
+import streamlit as st
+def page():
+    record = st.session_state.get("selected_record")
+    st.set_page_config(
+        page_title=f"Record {record['filename']}",
+        page_icon="👨‍⚕️",
+        layout="wide",
+        initial_sidebar_state="collapsed",
+    )
+    st.button("Back", on_click=lambda: set_record(None))
+    st.write(record)

pages/search_engine.py ADDED Viewed

	@@ -0,0 +1,166 @@

+import os
+import re
+import json
+import datetime
+import itertools
+import requests
+from PIL import Image
+import base64
+import streamlit as st
+from huggingface_hub import ModelSearchArguments
+import webbrowser
+from numerize.numerize import numerize
+def paginator(label, articles, articles_per_page=10, on_sidebar=True):
+    # https://gist.github.com/treuille/2ce0acb6697f205e44e3e0f576e810b7
+    """Lets the user paginate a set of article.
+    Parameters
+    ----------
+    label : str
+        The label to display over the pagination widget.
+    article : Iterator[Any]
+        The articles to display in the paginator.
+    articles_per_page: int
+        The number of articles to display per page.
+    on_sidebar: bool
+        Whether to display the paginator widget on the sidebar.
+    Returns
+    -------
+    Iterator[Tuple[int, Any]]
+        An iterator over *only the article on that page*, including
+        the item's index.
+    """
+    # Figure out where to display the paginator
+    if on_sidebar:
+        location = st.sidebar.empty()
+    else:
+        location = st.empty()
+    # Display a pagination selectbox in the specified location.
+    articles = list(articles)
+    n_pages = (len(articles) - 1) // articles_per_page + 1
+    page_format_func = lambda i: f"Results {i*10} to {i*10 +10 -1}"
+    page_number = location.selectbox(label, range(n_pages), format_func=page_format_func)
+    # Iterate over the articles in the page to let the user display them.
+    min_index = page_number * articles_per_page
+    max_index = min_index + articles_per_page
+    return itertools.islice(enumerate(articles), min_index, max_index)
+def page():
+    st.set_page_config(
+        page_title="HF Search Engine",
+        page_icon="🔎",
+        layout="wide",
+        initial_sidebar_state="auto",
+        # menu_items={
+        #     "Get Help": "https://www.extremelycoolapp.com/help",
+        #     "Report a bug": "https://www.extremelycoolapp.com/bug",
+        #     "About": "# This is a header. This is an *extremely* cool app!",
+        # },
+    )
+    ### SIDEBAR
+    search_backend = st.sidebar.selectbox(
+        "Search Engine",
+        ["hfapi", "custom"],
+        format_func=lambda x: {"hfapi": "Huggingface API", "custom": "Sentence Bert"}[x],
+    )
+    limit_results = st.sidebar.number_input("Limit results", min_value=0, value=10)
+    st.sidebar.markdown("# Filters")
+    args = ModelSearchArguments()
+    library = st.sidebar.multiselect(
+        "Library", args.library.values(), format_func=lambda x: {v: k for k, v in args.library.items()}[x]
+    )
+    task = st.sidebar.multiselect(
+        "Task", args.pipeline_tag.values(), format_func=lambda x: {v: k for k, v in args.pipeline_tag.items()}[x]
+    )
+    ### MAIN PAGE
+    st.markdown(
+        "<h1 style='text-align: center; '>🔎🤗 HF Search Engine</h1>",
+        unsafe_allow_html=True,
+    )
+    # Search bar
+    search_query = st.text_input(
+        "Search for a model in HuggingFace", value="", max_chars=None, key=None, type="default"
+    )
+    # Search API
+    endpoint = "http://localhost:5000"
+    headers = {
+        "Content-Type": "application/json",
+        "api-key": "password",
+    }
+    search_url = f"{endpoint}/{search_backend}/search"
+    filters = {
+        "library": library,
+        "task": task,
+    }
+    search_body = {
+        "query": search_query,
+        "filters": json.dumps(filters, default=str),
+        "limit": limit_results,
+    }
+    if search_query != "":
+        response = requests.post(search_url, headers=headers, json=search_body).json()
+        record_list = []
+        _ = [
+            record_list.append(
+                {
+                    "modelId": record["modelId"],
+                    "tags": record["tags"],
+                    "downloads": record["downloads"],
+                    "likes": record["likes"],
+                }
+            )
+            for record in response.get("value")
+        ]
+        # filter results
+        if record_list:
+            st.write(f'Search results ({response.get("count")}):')
+            if response.get("count") > 100:
+                shown_results = 100
+            else:
+                shown_results = response.get("count")
+            for i, record in paginator(
+                f"Select results (showing {shown_results} of {response.get('count')} results)",
+                record_list,
+            ):
+                col1, col2, col3 = st.columns([5,1,1])
+                col1.metric("Model", record["modelId"])
+                col2.metric("N° downloads", numerize(record["downloads"]))
+                col3.metric("N° likes", numerize(record["likes"]))
+                st.button(f"View model", on_click=lambda record=record: webbrowser.open(f"https://huggingface.co/{record['modelId']})"), key=record["modelId"])
+                st.markdown(f"**Tags:** {'  •  '.join(record['tags'])}")
+                # TODO: embed huggingface spaces
+                #                 import streamlit.components.v1 as components
+                #                 components.html(
+                #     f"""
+                #     <link rel="stylesheet" href="https://gradio.s3-us-west-2.amazonaws.com/2.6.2/static/bundle.css">
+                # <div id="target"></div>
+                # <script src="https://gradio.s3-us-west-2.amazonaws.com/2.6.2/static/bundle.js"></script>
+                # <script>
+                # launchGradioFromSpaces("abidlabs/question-answering", "#target")
+                # </script>
+                #     """,
+                #     height=400,
+                # )
+                st.markdown("---")
+        else:
+            st.write(f"No Search results, please try again with different keywords")

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+altair
+pandas
+streamlit
+huggingface_hub
+numerize

server/api.py ADDED Viewed

	@@ -0,0 +1,65 @@

+from flask import Flask, request
+import json
+from huggingface_hub import HfApi, ModelFilter, DatasetFilter, ModelSearchArguments
+from pprint import pprint
+app = Flask(__name__)
+@app.route("/hello")
+def hello():
+    return "<h1 style='color:blue'>Hello There!</h1>"
+@app.route("/hfapi/search", methods=["POST"])
+def hf_api():
+    request_data = request.get_json()
+    query = request_data.get("query")
+    filters = json.loads(request_data.get("filters"))
+    limit = request_data.get("limit", 5)
+    print("query", query)
+    print("filters", filters)
+    print("limit", limit)
+    api = HfApi()
+    filt = ModelFilter(
+        task=filters["task"],
+        library=filters["library"],
+    )
+    models = api.list_models(search=query, filter=filt, limit=limit, full=True)
+    res = []
+    for model in models:
+        model = model.__dict__
+        res.append(
+            {
+                "modelId": model.get("modelId"),
+                "tags": model.get("tags"),
+                "downloads": model.get("downloads"),
+                "likes": model.get("likes"),
+            }
+        )
+    count = len(res)
+    if len(res) > limit:
+        res = res[:limit]
+    pprint(res)
+    return json.dumps({"value": res, "count": count})
+@app.route("/custom/search", methods=["POST"])
+def main():
+    request_data = request.get_json()
+    query = request_data.get("query")
+    filters = json.loads(request_data.get("filters"))
+    limit = request_data.get("limit", 5)
+    print("query", query)
+    print("filters", filters)
+    print("limit", limit)
+    # records, count_filtered = search_query(query=request_data["query"], filters=filters, top=request_data["top"])
+    # assert len(set([record["id"] for record in records])) == len(records), "ids of results are not unique"
+    # res = {"value": records, "count": count_filtered}
+    # return json.dumps(res, indent=2)
+if __name__ == "__main__":
+    app.run(host="localhost", port=5000)

streamlit_app.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import streamlit as st
+from pages import search_engine_page, document_page
+if "selected_record" not in st.session_state:
+    st.session_state["selected_record"] = None
+def set_record(record):
+    st.session_state["selected_record"] = record
+if not st.session_state["selected_record"]:  # search engine page
+    search_engine_page()
+else:  # a record has been selected
+    document_page()
+st.markdown(
+    """<style>
+a:link , a:visited{
+color: blue;
+background-color: transparent;
+text-decoration: underline;
+}
+a:hover,  a:active {
+color: red;
+background-color: transparent;
+text-decoration: underline;
+}
+.footer {
+# position: fixed;
+left: 0;
+bottom: 0;
+width: 100%;
+background-color: white;
+color: black;
+text-align: center;
+}
+</style>
+<div class="footer">
+<p>Made with ❤️ by <b>Nouamane Tazi</b></p>
+</div>
+""",
+    unsafe_allow_html=True,
+)