Tuana commited on
Commit
af1cf81
Β·
1 Parent(s): 3a8e87d

first commit

Browse files
.github/workflows/hf_sync.yml ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Sync to Hugging Face hub
2
+ on:
3
+ push:
4
+ branches: [main]
5
+
6
+ # to run this workflow manually from the Actions tab
7
+ workflow_dispatch:
8
+
9
+ jobs:
10
+ sync-to-hub:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v2
14
+ with:
15
+ fetch-depth: 0
16
+ lfs: true
17
+ - name: Push to hub
18
+ env:
19
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
20
+ run: git push --force https://Tuana:[email protected]/spaces/Tuana/pubmed-qa-mixtral-haystack main
README.md CHANGED
@@ -1 +1,10 @@
1
- # pubmed-qa-mixtral-haystack
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Ask PubMed
3
+ emoji: πŸ‘©πŸ»β€βš•οΈ
4
+ colorFrom: pink
5
+ colorTo: yellow
6
+ sdk: streamlit
7
+ sdk_version: 1.25.0
8
+ app_file: app.py
9
+ pinned: true
10
+ ---
app.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from json import JSONDecodeError
3
+ import logging
4
+ from markdown import markdown
5
+ import requests
6
+
7
+ import streamlit as st
8
+
9
+ from utils.haystack import query, start_haystack
10
+ from utils.ui import reset_results, set_initial_state, sidebar
11
+
12
+ set_initial_state()
13
+
14
+ sidebar()
15
+
16
+ st.write("# 🐀 What have they been posting about lately on Mastodon?")
17
+
18
+ if st.session_state.get("H"):
19
+ pipeline = start_haystack(st.session_state.get("HUGGING_FACE_TOKEN"))
20
+ st.session_state["api_key_configured"] = True
21
+ search_bar, button = st.columns(2)
22
+ # Search bar
23
+ with search_bar:
24
+ question = st.text_input("Ask a question", on_change=reset_results)
25
+
26
+ with button:
27
+ st.write("")
28
+ st.write("")
29
+ run_pressed = st.button("Search posts (toots)")
30
+ else:
31
+ st.write("Please provide your OpenAI Key to start using the application")
32
+ st.write("If you are using a smaller screen, open the sidebar from the top left to provide your OpenAI Key πŸ™Œ")
33
+
34
+ if st.session_state.get("api_key_configured"):
35
+ run_query = (
36
+ run_pressed or username != st.session_state.username
37
+ )
38
+
39
+ # Get results for query
40
+ if run_query and username:
41
+ reset_results()
42
+ st.session_state.username = username
43
+ with st.spinner("πŸ”Ž"):
44
+ try:
45
+ st.session_state.result = query(username, pipeline)
46
+ except JSONDecodeError as je:
47
+ st.error(
48
+ "πŸ‘“    An error occurred reading the results. Is the document store working?"
49
+ )
50
+ except Exception as e:
51
+ logging.exception(e)
52
+ st.error("🐞    An error occurred during the request.")
53
+
54
+ if st.session_state.result:
55
+ voice = st.session_state.result
56
+ st.write(voice['results'][0])
57
+
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ haystack-ai==2.0.0b2
2
+ streamlit==1.25.0
3
+ pymed
4
+ markdown
utils/__init__.py ADDED
File without changes
utils/config.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+
4
+ load_dotenv()
5
+ HUGGING_FACE_TOKEN = os.getenv('HUGGING_FACE_TOKEN')
utils/haystack.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from haystack import Pipeline
3
+ from pubmed_fetcher import PubMedFetcher
4
+ from haystack.components.generators import HuggingFaceTGIGenerator
5
+ from haystack.components.builders.prompt_builder import PromptBuilder
6
+
7
+ # def start_keyword_pipeline(llm):
8
+ # keyword_prompt_template = """
9
+ # Your task is to convert the follwing question into 3 keywords that can be used to find relevant medical research papers on PubMed.
10
+ # Here is an examples:
11
+ # question: "What are the latest treatments for major depressive disorder?"
12
+ # keywords:
13
+ # Antidepressive Agents
14
+ # Depressive Disorder, Major
15
+ # Treatment-Resistant depression
16
+ # ---
17
+ # question: {{ question }}
18
+ # keywords:
19
+ # """
20
+ # keyword_prompt_builder = PromptBuilder(template=keyword_prompt_template)
21
+
22
+ # keyword_pipeline = Pipeline()
23
+ # keyword_pipeline.add_component("keyword_prompt_builder", keyword_prompt_builder)
24
+ # keyword_pipeline.add_component("keyword_llm", llm)
25
+ # return keyword_pipeline
26
+
27
+ # def start_qa_pipeline(llm):
28
+ # return qa_pipeline
29
+
30
+ def start_haystack(huggingface_token):
31
+ #Use this function to contruct a pipeline
32
+ llm = HuggingFaceTGIGenerator("mistralai/Mixtral-8x7B-Instruct-v0.1", token=huggingface_token)
33
+ llm.warm_up()
34
+ # start_keyword_pipeline(llm)
35
+ # start_qa_pipeline(llm)
36
+ keyword_prompt_template = """
37
+ Your task is to convert the follwing question into 3 keywords that can be used to find relevant medical research papers on PubMed.
38
+ Here is an examples:
39
+ question: "What are the latest treatments for major depressive disorder?"
40
+ keywords:
41
+ Antidepressive Agents
42
+ Depressive Disorder, Major
43
+ Treatment-Resistant depression
44
+ ---
45
+ question: {{ question }}
46
+ keywords:
47
+ """
48
+ prompt_template = """
49
+ Answer the question truthfully based on the given documents.
50
+ If the documents don't contain an answer, use your existing knowledge base.
51
+
52
+ q: {{ question }}
53
+ Articles:
54
+ {% for article in articles %}
55
+ {{article.content}}
56
+ keywords: {{article.meta['keywords']}}
57
+ title: {{article.meta['title']}}
58
+ {% endfor %}
59
+
60
+ """
61
+ keyword_prompt_builder = PromptBuilder(template=keyword_prompt_template)
62
+ prompt_builder = PromptBuilder(template=prompt_template)
63
+ fetcher = PubMedFetcher()
64
+
65
+ pipe = Pipeline()
66
+
67
+ pipe.add_component("keyword_prompt_builder", keyword_prompt_builder)
68
+ pipe.add_component("keyword_llm", llm)
69
+ pipe.add_component("pubmed_fetcher", fetcher)
70
+ pipe.add_component("prompt_builder", prompt_builder)
71
+ pipe.add_component("llm", llm)
72
+
73
+ pipe.connect("keyword_prompt_builder.prompt", "keyword_llm.prompt")
74
+ pipe.connect("keyword_llm.replies", "pubmed_fetcher.queries")
75
+
76
+ pipe.connect("pubmed_fetcher.articles", "prompt_builder.articles")
77
+ pipe.connect("prompt_builder.prompt", "llm.prompt")
78
+ return pipe
79
+
80
+
81
+ @st.cache_data(show_spinner=True)
82
+ def query(query, _pipeline):
83
+ try:
84
+ result = _pipeline.run(data={"keyword_prompt_builder":{"question":query},
85
+ "prompt_builder":{"question": query},
86
+ "llm":{"generation_kwargs": {"max_new_tokens": 500}}})
87
+ except Exception as e:
88
+ result = ["Please make sure you are providing a correct, public Mastodon account"]
89
+ return result
utils/pubmed_fetcher.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pymed import PubMed
2
+ from typing import List
3
+ from haystack import component
4
+ from haystack import Document
5
+
6
+ pubmed = PubMed(tool="Haystack2.0Prototype", email="[email protected]")
7
+
8
+ def documentize(article):
9
+ return Document(content=article.abstract, meta={'title': article.title, 'keywords': article.keywords})
10
+
11
+ @component
12
+ class PubMedFetcher():
13
+
14
+ @component.output_types(articles=List[Document])
15
+ def run(self, queries: list[str]):
16
+ cleaned_queries = queries[0].strip().split('\n')
17
+
18
+ articles = []
19
+ try:
20
+ for query in cleaned_queries:
21
+ response = pubmed.query(query, max_results = 1)
22
+ documents = [documentize(article) for article in response]
23
+ articles.extend(documents)
24
+ except Exception as e:
25
+ print(e)
26
+ print(f"Couldn't fetch articles for queries: {queries}" )
27
+ results = {'articles': articles}
28
+ return results
utils/ui.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PIL import Image
3
+
4
+ def set_state_if_absent(key, value):
5
+ if key not in st.session_state:
6
+ st.session_state[key] = value
7
+
8
+ def set_initial_state():
9
+ set_state_if_absent("question", "Ask a question")
10
+ set_state_if_absent("result", None)
11
+ set_state_if_absent("haystack_started", False)
12
+
13
+ def reset_results(*args):
14
+ st.session_state.result = None
15
+
16
+ def set_hf_api_key(api_key: str):
17
+ st.session_state["HUGGING_FACE_TOKEN"] = api_key
18
+
19
+ def sidebar():
20
+ with st.sidebar:
21
+ image = Image.open('logo/haystack-logo-colored.png')
22
+
23
+ st.markdown(
24
+ "## How to use\n"
25
+ "1. Enter your Hugging Face TGI API key below\n"
26
+ "2. Ask a question\n"
27
+ "3. Enjoy πŸ€—\n"
28
+ )
29
+
30
+ api_key_input = st.text_input(
31
+ "Hugging Face TGI API Key",
32
+ type="password",
33
+ placeholder="Paste your Hugging Face TGI token here",
34
+ value=st.session_state.get("HUGGING_FACE_TOKEN", ""),
35
+ )
36
+
37
+ if api_key_input:
38
+ set_hf_api_key(api_key_input)
39
+
40
+ st.markdown("---")
41
+ st.markdown(
42
+ "## How this works\n"
43
+ "This app was built with [Haystack](https://haystack.deepset.ai) using the"
44
+ " [`PromptNode`](https://docs.haystack.deepset.ai/docs/prompt_node) and custom [`PromptTemplate`](https://docs.haystack.deepset.ai/docs/prompt_node#templates).\n\n"
45
+ " The source code is also on [GitHub](https://github.com/TuanaCelik/should-i-follow)"
46
+ " with instructions to run locally.\n"
47
+ "You can see how the `PromptNode` was set up [here](https://github.com/TuanaCelik/should-i-follow/blob/main/utils/haystack.py)")
48
+ st.markdown("---")
49
+ st.markdown("Made by [tuanacelik](https://twitter.com/tuanacelik)")
50
+ st.markdown("---")
51
+ st.markdown("""Thanks to [mmz_001](https://twitter.com/mm_sasmitha)
52
+ for open sourcing [KnowledgeGPT](https://knowledgegpt.streamlit.app/) which helped me with this sidebar πŸ™πŸ½""")
53
+ st.image(image, width=250)