ASledziewska commited on
Commit
8ff5a08
1 Parent(s): 2cbbd1d

Update bm25_retreive_question.py

Browse files
Files changed (1) hide show
  1. bm25_retreive_question.py +32 -3
bm25_retreive_question.py CHANGED
@@ -1,12 +1,19 @@
1
  from rank_bm25 import BM25Okapi
2
  import nltk
3
  from nltk.tokenize import word_tokenize
 
 
 
 
 
4
 
5
  # Download NLTK data for tokenization
6
  nltk.download('punkt')
7
 
8
  class QuestionRetriever:
9
  def __init__(self):
 
 
10
  self.depression_questions = [
11
  "How often have you felt persistently low in mood or sad for most of the day?",
12
  "How often have you lost interest or pleasure in activities you used to enjoy?",
@@ -122,11 +129,33 @@ class QuestionRetriever:
122
  doc_scores = bm25.get_scores(tokenized_query)
123
 
124
  # Get the index of the most relevant document
125
- most_relevant_doc_index = doc_scores.argmax()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
  # Fetch the corresponding response from the knowledge base
128
- response = knowledge_base[most_relevant_doc_index]
129
- return response
130
 
131
  if __name__ == "__main__":
132
  knowledge_base = depression_questions
 
1
  from rank_bm25 import BM25Okapi
2
  import nltk
3
  from nltk.tokenize import word_tokenize
4
+ import numpy as np
5
+ import streamlit as st
6
+ import random
7
+
8
+
9
 
10
  # Download NLTK data for tokenization
11
  nltk.download('punkt')
12
 
13
  class QuestionRetriever:
14
  def __init__(self):
15
+ self.question_history = []
16
+
17
  self.depression_questions = [
18
  "How often have you felt persistently low in mood or sad for most of the day?",
19
  "How often have you lost interest or pleasure in activities you used to enjoy?",
 
129
  doc_scores = bm25.get_scores(tokenized_query)
130
 
131
  # Get the index of the most relevant document
132
+ # most_relevant_doc_index = doc_scores.argmax()
133
+
134
+ # Get the indices of documents sorted by score in descending order
135
+ sorted_indices = np.argsort(doc_scores)[::-1]
136
+
137
+ # Flag to keep track of whether a relevant document index is found
138
+ found_new_question = False
139
+ self.question_history = st.session_state.asked_questions
140
+ print(self.question_history)
141
+
142
+ for i in range(len(sorted_indices)):
143
+ print(f"Checking {i+1} question - Index {sorted_indices[i]}th - {knowledge_base[sorted_indices[i]]}")
144
+ most_relevant_doc_index = sorted_indices[i]
145
+ if knowledge_base[most_relevant_doc_index] not in self.question_history:
146
+ found_new_question = True
147
+ question = knowledge_base[most_relevant_doc_index]
148
+ st.session_state.asked_questions.append(question)
149
+ print(f">>> Returning Top #{i+1} question")
150
+ return question
151
+
152
+ # If no relevant document index is found, return None or handle the case accordingly
153
+ if not found_new_question:
154
+ question = random.choice(knowledge_base)
155
 
156
  # Fetch the corresponding response from the knowledge base
157
+ # response = knowledge_base[most_relevant_doc_index]
158
+ return question
159
 
160
  if __name__ == "__main__":
161
  knowledge_base = depression_questions