Spaces:

nand-tmp
/

GoogleSearchWithLLM

Sleeping

App Files Files Community

8bitnand commited on Apr 16

Commit

871255a

•

1 Parent(s): 87d5c64

Added support for streamlit and rag model

Browse files

Files changed (7) hide show

.gitignore +1 -0
__init__.py +1 -0
__pycache__/google.cpython-39.pyc +0 -0
app.py +32 -3
google.py +10 -7
model.py +71 -8
rag.configs.yml +3 -3

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ __pycache__/

__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from google import GoogleSearch, Document, SemanticSearch

__pycache__/google.cpython-39.pyc DELETED Viewed

Binary file (5.39 kB)

app.py CHANGED Viewed

@@ -1,10 +1,33 @@
 import streamlit as st
 st.title("LLM powred Google search")
 if "messages" not in st.session_state:
     st.session_state.messages = []
 for message in st.session_state.messages:
     with st.chat_message(message["role"]):
         st.markdown(message["content"])
@@ -14,10 +37,16 @@ if prompt := st.chat_input("Search Here insetad of Google"):
     st.chat_message("user").markdown(prompt)
     st.session_state.messages.append({"role": "user", "content": prompt})
-    response = (
-        f"Ans - {prompt}"  # TODO add answer to the prompt by calling the answer method
     )
     with st.chat_message("assistant"):
         st.markdown(response)

+import sys
 import streamlit as st
+from google import SemanticSearch, GoogleSearch, Document
+from model import RAGModel, load_configs
+def run_on_start():
+    global r
+    global configs
+    configs = load_configs(config_file="rag.configs.yml")
+    r = RAGModel(configs)
+def search(query):
+    g = GoogleSearch(query)
+    data = g.all_page_data
+    d = Document(data, min_char_len=configs["document"]["min_char_length"])
+    st.session_state.doc = d.doc()[0]
 st.title("LLM powred Google search")
 if "messages" not in st.session_state:
+    run_on_start()
     st.session_state.messages = []
+if "doc" not in st.session_state:
+    st.session_state.doc = None
 for message in st.session_state.messages:
     with st.chat_message(message["role"]):
         st.markdown(message["content"])
     st.chat_message("user").markdown(prompt)
     st.session_state.messages.append({"role": "user", "content": prompt})
+    search(prompt)
+    s = SemanticSearch(
+        prompt,
+        st.session_state.doc,
+        configs["model"]["embeding_model"],
+        configs["model"]["device"],
     )
+    topk = s.semantic_search(query=prompt, k=32)
+    output = r.answer_query(query=prompt, topk_items=topk)
+    response = output
     with st.chat_message("assistant"):
         st.markdown(response)

google.py CHANGED Viewed

@@ -13,7 +13,7 @@ class GoogleSearch:
         escaped_query = urllib.parse.quote_plus(query)
         self.URL = f"https://www.google.com/search?q={escaped_query}"
-        self.headers = headers = {
             "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3538.102 Safari/537.36"
         }
         self.links = self.get_initial_links()
@@ -46,7 +46,7 @@ class GoogleSearch:
         """
         scrape google for the query with keyword based search
         """
         response = requests.get(self.URL, headers=self.headers)
         soup = BeautifulSoup(response.text, "html.parser")
         anchors = soup.find_all("a", href=True)
@@ -95,6 +95,7 @@ class Document:
         return min_len_chunks
     def doc(self) -> tuple[list[str], list[str]]:
         chunked_data: list[str] = []
         urls: list[str] = []
         for url, dataitem in self.data:
@@ -108,16 +109,17 @@ class Document:
 class SemanticSearch:
     def __init__(
-        self, query: str, d: Document, g: GoogleSearch, model_path: str, device: str
     ) -> None:
         query = query
-        self.doc_chunks, self.urls = d.doc()
         self.st = SentenceTransformer(
             model_path,
             device,
         )
-    def semanti_search(self, query: str, k: int = 10):
         query_embeding = self.get_embeding(query)
         doc_embeding = self.get_embeding(self.doc_chunks)
         scores = util.dot_score(a=query_embeding, b=doc_embeding)[0]
@@ -136,8 +138,9 @@ if __name__ == "__main__":
     g = GoogleSearch(query)
     data = g.all_page_data
     d = Document(data, 333)
-    s = SemanticSearch(query, d, g, "all-mpnet-base-v2", "mps")
-    print(len(s.semanti_search(query, k=64)))
     # g = GoogleSearch("what is LLM")
     # d = Document(g.all_page_data)

         escaped_query = urllib.parse.quote_plus(query)
         self.URL = f"https://www.google.com/search?q={escaped_query}"
+        self.headers = {
             "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3538.102 Safari/537.36"
         }
         self.links = self.get_initial_links()
         """
         scrape google for the query with keyword based search
         """
+        print("Searching Google...")
         response = requests.get(self.URL, headers=self.headers)
         soup = BeautifulSoup(response.text, "html.parser")
         anchors = soup.find_all("a", href=True)
         return min_len_chunks
     def doc(self) -> tuple[list[str], list[str]]:
+        print("Creating Document...")
         chunked_data: list[str] = []
         urls: list[str] = []
         for url, dataitem in self.data:
 class SemanticSearch:
     def __init__(
+        self, doc_chunks: tuple[list, list], model_path: str, device: str
     ) -> None:
         query = query
+        self.doc_chunks, self.urls = doc_chunks
         self.st = SentenceTransformer(
             model_path,
             device,
         )
+    def semantic_search(self, query: str, k: int = 10):
+        print("Searhing Top k in document...")
         query_embeding = self.get_embeding(query)
         doc_embeding = self.get_embeding(self.doc_chunks)
         scores = util.dot_score(a=query_embeding, b=doc_embeding)[0]
     g = GoogleSearch(query)
     data = g.all_page_data
     d = Document(data, 333)
+    s = SemanticSearch("all-mpnet-base-v2", "mps")
+    print(len(s.semantic_search(query, k=64)))
     # g = GoogleSearch("what is LLM")
     # d = Document(g.all_page_data)

model.py CHANGED Viewed

@@ -1,15 +1,78 @@
-from google import SemanticSearch
-from transformers import AutoTokenizer, AutoModel
 class RAGModel:
     def __init__(self, configs) -> None:
         self.configs = configs
-        model_url = configs["RAG"]["genration_model"]
-        self.model = AutoModel.from_pretrained(model_url)
-        self.tokenizer = AutoTokenizer.from_pretrained(model_url)
-    def create_propmt(self, topk_items: list[str]):
-    def answer_query(self, query: str, context: list[str]) :

+from google import SemanticSearch, GoogleSearch, Document
+from transformers import AutoTokenizer, AutoModelForCausalLM
+from transformers import BitsAndBytesConfig
+from transformers.utils import is_flash_attn_2_available
+import yaml
+import torch
+def load_configs(config_file: str) -> dict:
+    with open(config_file, "r") as f:
+        configs = yaml.safe_load(f)
+    return configs
 class RAGModel:
     def __init__(self, configs) -> None:
         self.configs = configs
+        self.device = configs["model"]["device"]
+        model_url = configs["model"]["genration_model"]
+        # quantization_config = BitsAndBytesConfig(
+        #     load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16
+        # )
+        self.model = AutoModelForCausalLM.from_pretrained(
+            model_url,
+            torch_dtype=torch.float16,
+            # quantization_config=quantization_config,
+            low_cpu_mem_usage=False,
+            attn_implementation="sdpa",
+        ).to(self.device)
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            model_url,
+        )
+    def create_prompt(self, query, topk_items: list[str]):
+        context = "_ " + "\n-".join(c for c in topk_items)
+        base_prompt = f"""Based on the follwing context items, please answer the query.
+        Give time for yourself to read the context and then answer the query.
+        Do not return thinking process, just return the answer.
+        If you do not find the answer, or if the query is offesnsive or in any other way harmfull just return "I'm not aware of it"
+        Now use the following context items to answer the user query.
+        {context}.
+        user query : {query}
+        """
+        dialog_template = [{"role": "user", "content": base_prompt}]
+        prompt = self.tokenizer.apply_chat_template(
+            conversation=dialog_template, tokenize=False, add_feneration_prompt=True
+        )
+        return prompt
+    def answer_query(self, query: str, topk_items: list[str]):
+        prompt = self.create_prompt(query, topk_items)
+        print(prompt)
+        input_ids = self.tokenizer(prompt, return_tensors="pt").to(self.device)
+        output = self.model.generate(**input_ids, max_new_tokens=512)
+        text = self.tokenizer.decode(output[0])
+        return text
+if __name__ == "__main__":
+    configs = load_configs(config_file="rag.configs.yml")
+    query = "what is LLM"
+    # g = GoogleSearch(query)
+    # data = g.all_page_data
+    # d = Document(data, 512)
+    # s = SemanticSearch( "all-mpnet-base-v2", "mps")
+    # topk = s.semantic_search(query=query, k=32)
+    r = RAGModel(configs)
+    output = r.answer_query(query=query, topk_items=[""])
+    print(output)

rag.configs.yml CHANGED Viewed

@@ -1,8 +1,8 @@
 document:
   min_char_length: 333
-common:
   embeding_model: all-mpnet-base-v2
-  genration_model: meta-llama/Llama-2-7b
-  device: cpu

 document:
   min_char_length: 333
+model:
   embeding_model: all-mpnet-base-v2
+  genration_model: google/gemma-2b-it
+  device: mps