8bitnand commited on
Commit
c575b59
1 Parent(s): 8b6196b

returning the query itsef, IDK why

Browse files
Files changed (2) hide show
  1. model.py +12 -13
  2. search.py +2 -20
model.py CHANGED
@@ -37,12 +37,11 @@ class RAGModel:
37
 
38
  context = "_ " + "\n-".join(c for c in topk_items)
39
 
40
- base_prompt = f"""Based on the follwing context items, please answer the query.
41
- Give time for yourself to read the context and then answer the query.
42
  Do not return thinking process, just return the answer.
43
  If you do not find the answer, or if the query is offesnsive or in any other way harmfull just return "I'm not aware of it"
44
  Now use the following context items to answer the user query.
45
- {context}.
46
  user query : {query}
47
  """
48
 
@@ -56,7 +55,6 @@ class RAGModel:
56
  def answer_query(self, query: str, topk_items: list[str]):
57
 
58
  prompt = self.create_prompt(query, topk_items)
59
- print(prompt)
60
  input_ids = self.tokenizer(prompt, return_tensors="pt").to(self.device)
61
  output = self.model.generate(**input_ids, max_new_tokens=512)
62
  text = self.tokenizer.decode(output[0])
@@ -67,12 +65,13 @@ class RAGModel:
67
  if __name__ == "__main__":
68
 
69
  configs = load_configs(config_file="rag.configs.yml")
70
- query = "what is LLM"
71
- # g = GoogleSearch(query)
72
- # data = g.all_page_data
73
- # d = Document(data, 512)
74
- # s, u = SemanticSearch( "all-mpnet-base-v2", "mps")
75
- # topk = s.semantic_search(query=query, k=32)
76
- # r = RAGModel(configs)
77
- # output = r.answer_query(query=query, topk_items=[""])
78
- # print(output)
 
 
37
 
38
  context = "_ " + "\n-".join(c for c in topk_items)
39
 
40
+ base_prompt = f"""Give time for yourself to read the context and then answer the query.
 
41
  Do not return thinking process, just return the answer.
42
  If you do not find the answer, or if the query is offesnsive or in any other way harmfull just return "I'm not aware of it"
43
  Now use the following context items to answer the user query.
44
+ context: {context}.
45
  user query : {query}
46
  """
47
 
 
55
  def answer_query(self, query: str, topk_items: list[str]):
56
 
57
  prompt = self.create_prompt(query, topk_items)
 
58
  input_ids = self.tokenizer(prompt, return_tensors="pt").to(self.device)
59
  output = self.model.generate(**input_ids, max_new_tokens=512)
60
  text = self.tokenizer.decode(output[0])
 
65
  if __name__ == "__main__":
66
 
67
  configs = load_configs(config_file="rag.configs.yml")
68
+ query = "what is computer vision"
69
+ g = GoogleSearch(query)
70
+ data = g.all_page_data
71
+ d = Document(data, 512)
72
+ doc_chunks = d.doc()
73
+ s = SemanticSearch(doc_chunks, "all-mpnet-base-v2", "mps")
74
+ topk, u = s.semantic_search(query=query, k=32)
75
+ r = RAGModel(configs)
76
+ output = r.answer_query(query=query, topk_items=topk)
77
+ print(output)
search.py CHANGED
@@ -34,12 +34,11 @@ class GoogleSearch:
34
  for link in sublist
35
  if len(link) > 0
36
  ]
37
- print(links)
38
  return links
39
 
40
  def read_url_page(self, url: str) -> str:
41
 
42
- print(url)
43
  response = requests.get(url, headers=self.headers)
44
  response.raise_for_status()
45
  soup = BeautifulSoup(response.text, "html.parser")
@@ -136,7 +135,7 @@ class SemanticSearch:
136
  )
137
 
138
  def semantic_search(self, query: str, k: int = 10):
139
- print("Searhing Top k in document...")
140
  query_embeding = self.get_embeding(query)
141
  doc_embeding = self.get_embeding(self.doc_chunks)
142
  scores = util.dot_score(a=query_embeding, b=doc_embeding)[0]
@@ -147,20 +146,3 @@ class SemanticSearch:
147
  def get_embeding(self, text: Union[list[str], str]):
148
  en = self.st.encode(text)
149
  return en
150
-
151
-
152
- if __name__ == "__main__":
153
-
154
- query = "what is LLM"
155
- g = GoogleSearch(query)
156
- data = g.all_page_data
157
- # d = Document(data, 333)
158
- # doc_chunks = d.doc()
159
- # s = SemanticSearch(doc_chunks, "all-mpnet-base-v2", "mps")
160
- # topk, u = s.semantic_search(query, k=64)
161
- # print(len(topk))
162
- # print(topk, u)
163
-
164
- # g = GoogleSearch("what is LLM")
165
- # d = Document(g.all_page_data)
166
- # print(len(d.doc()[0]))
 
34
  for link in sublist
35
  if len(link) > 0
36
  ]
37
+
38
  return links
39
 
40
  def read_url_page(self, url: str) -> str:
41
 
 
42
  response = requests.get(url, headers=self.headers)
43
  response.raise_for_status()
44
  soup = BeautifulSoup(response.text, "html.parser")
 
135
  )
136
 
137
  def semantic_search(self, query: str, k: int = 10):
138
+ print("Searching Top k in document...")
139
  query_embeding = self.get_embeding(query)
140
  doc_embeding = self.get_embeding(self.doc_chunks)
141
  scores = util.dot_score(a=query_embeding, b=doc_embeding)[0]
 
146
  def get_embeding(self, text: Union[list[str], str]):
147
  en = self.st.encode(text)
148
  return en