Spaces:

nand-tmp
/

GoogleSearchWithLLM

Sleeping

8bitnand commited on Apr 17

Commit

c575b59

•

1 Parent(s): 8b6196b

returning the query itsef, IDK why

Files changed (2) hide show

model.py CHANGED Viewed

@@ -37,12 +37,11 @@ class RAGModel:
         context = "_ " + "\n-".join(c for c in topk_items)
-        base_prompt = f"""Based on the follwing context items, please answer the query.
-        Give time for yourself to read the context and then answer the query.
         Do not return thinking process, just return the answer.
         If you do not find the answer, or if the query is offesnsive or in any other way harmfull just return "I'm not aware of it"
         Now use the following context items to answer the user query.
-        {context}.
         user query : {query}
         """
@@ -56,7 +55,6 @@ class RAGModel:
     def answer_query(self, query: str, topk_items: list[str]):
         prompt = self.create_prompt(query, topk_items)
-        print(prompt)
         input_ids = self.tokenizer(prompt, return_tensors="pt").to(self.device)
         output = self.model.generate(**input_ids, max_new_tokens=512)
         text = self.tokenizer.decode(output[0])
@@ -67,12 +65,13 @@ class RAGModel:
 if __name__ == "__main__":
     configs = load_configs(config_file="rag.configs.yml")
-    query = "what is LLM"
-    # g = GoogleSearch(query)
-    # data = g.all_page_data
-    # d = Document(data, 512)
-    # s, u = SemanticSearch( "all-mpnet-base-v2", "mps")
-    # topk = s.semantic_search(query=query, k=32)
-    # r = RAGModel(configs)
-    # output = r.answer_query(query=query, topk_items=[""])
-    # print(output)

         context = "_ " + "\n-".join(c for c in topk_items)
+        base_prompt = f"""Give time for yourself to read the context and then answer the query.
         Do not return thinking process, just return the answer.
         If you do not find the answer, or if the query is offesnsive or in any other way harmfull just return "I'm not aware of it"
         Now use the following context items to answer the user query.
+        context: {context}.
         user query : {query}
         """
     def answer_query(self, query: str, topk_items: list[str]):
         prompt = self.create_prompt(query, topk_items)
         input_ids = self.tokenizer(prompt, return_tensors="pt").to(self.device)
         output = self.model.generate(**input_ids, max_new_tokens=512)
         text = self.tokenizer.decode(output[0])
 if __name__ == "__main__":
     configs = load_configs(config_file="rag.configs.yml")
+    query = "what is computer vision"
+    g = GoogleSearch(query)
+    data = g.all_page_data
+    d = Document(data, 512)
+    doc_chunks = d.doc()
+    s = SemanticSearch(doc_chunks, "all-mpnet-base-v2", "mps")
+    topk, u = s.semantic_search(query=query, k=32)
+    r = RAGModel(configs)
+    output = r.answer_query(query=query, topk_items=topk)
+    print(output)

search.py CHANGED Viewed

@@ -34,12 +34,11 @@ class GoogleSearch:
             for link in sublist
             if len(link) > 0
         ]
-        print(links)
         return links
     def read_url_page(self, url: str) -> str:
-        print(url)
         response = requests.get(url, headers=self.headers)
         response.raise_for_status()
         soup = BeautifulSoup(response.text, "html.parser")
@@ -136,7 +135,7 @@ class SemanticSearch:
         )
     def semantic_search(self, query: str, k: int = 10):
-        print("Searhing Top k in document...")
         query_embeding = self.get_embeding(query)
         doc_embeding = self.get_embeding(self.doc_chunks)
         scores = util.dot_score(a=query_embeding, b=doc_embeding)[0]
@@ -147,20 +146,3 @@ class SemanticSearch:
     def get_embeding(self, text: Union[list[str], str]):
         en = self.st.encode(text)
         return en
-if __name__ == "__main__":
-    query = "what is LLM"
-    g = GoogleSearch(query)
-    data = g.all_page_data
-    # d = Document(data, 333)
-    # doc_chunks = d.doc()
-    # s = SemanticSearch(doc_chunks, "all-mpnet-base-v2", "mps")
-    # topk, u = s.semantic_search(query, k=64)
-    # print(len(topk))
-    # print(topk, u)
-    # g = GoogleSearch("what is LLM")
-    # d = Document(g.all_page_data)
-    # print(len(d.doc()[0]))

             for link in sublist
             if len(link) > 0
         ]
         return links
     def read_url_page(self, url: str) -> str:
         response = requests.get(url, headers=self.headers)
         response.raise_for_status()
         soup = BeautifulSoup(response.text, "html.parser")
         )
     def semantic_search(self, query: str, k: int = 10):
+        print("Searching Top k in document...")
         query_embeding = self.get_embeding(query)
         doc_embeding = self.get_embeding(self.doc_chunks)
         scores = util.dot_score(a=query_embeding, b=doc_embeding)[0]
     def get_embeding(self, text: Union[list[str], str]):
         en = self.st.encode(text)
         return en