Spaces:
Sleeping
Sleeping
ASledziewska
commited on
Commit
•
8ff5a08
1
Parent(s):
2cbbd1d
Update bm25_retreive_question.py
Browse files- bm25_retreive_question.py +32 -3
bm25_retreive_question.py
CHANGED
@@ -1,12 +1,19 @@
|
|
1 |
from rank_bm25 import BM25Okapi
|
2 |
import nltk
|
3 |
from nltk.tokenize import word_tokenize
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
# Download NLTK data for tokenization
|
6 |
nltk.download('punkt')
|
7 |
|
8 |
class QuestionRetriever:
|
9 |
def __init__(self):
|
|
|
|
|
10 |
self.depression_questions = [
|
11 |
"How often have you felt persistently low in mood or sad for most of the day?",
|
12 |
"How often have you lost interest or pleasure in activities you used to enjoy?",
|
@@ -122,11 +129,33 @@ class QuestionRetriever:
|
|
122 |
doc_scores = bm25.get_scores(tokenized_query)
|
123 |
|
124 |
# Get the index of the most relevant document
|
125 |
-
most_relevant_doc_index = doc_scores.argmax()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
|
127 |
# Fetch the corresponding response from the knowledge base
|
128 |
-
response = knowledge_base[most_relevant_doc_index]
|
129 |
-
return
|
130 |
|
131 |
if __name__ == "__main__":
|
132 |
knowledge_base = depression_questions
|
|
|
1 |
from rank_bm25 import BM25Okapi
|
2 |
import nltk
|
3 |
from nltk.tokenize import word_tokenize
|
4 |
+
import numpy as np
|
5 |
+
import streamlit as st
|
6 |
+
import random
|
7 |
+
|
8 |
+
|
9 |
|
10 |
# Download NLTK data for tokenization
|
11 |
nltk.download('punkt')
|
12 |
|
13 |
class QuestionRetriever:
|
14 |
def __init__(self):
|
15 |
+
self.question_history = []
|
16 |
+
|
17 |
self.depression_questions = [
|
18 |
"How often have you felt persistently low in mood or sad for most of the day?",
|
19 |
"How often have you lost interest or pleasure in activities you used to enjoy?",
|
|
|
129 |
doc_scores = bm25.get_scores(tokenized_query)
|
130 |
|
131 |
# Get the index of the most relevant document
|
132 |
+
# most_relevant_doc_index = doc_scores.argmax()
|
133 |
+
|
134 |
+
# Get the indices of documents sorted by score in descending order
|
135 |
+
sorted_indices = np.argsort(doc_scores)[::-1]
|
136 |
+
|
137 |
+
# Flag to keep track of whether a relevant document index is found
|
138 |
+
found_new_question = False
|
139 |
+
self.question_history = st.session_state.asked_questions
|
140 |
+
print(self.question_history)
|
141 |
+
|
142 |
+
for i in range(len(sorted_indices)):
|
143 |
+
print(f"Checking {i+1} question - Index {sorted_indices[i]}th - {knowledge_base[sorted_indices[i]]}")
|
144 |
+
most_relevant_doc_index = sorted_indices[i]
|
145 |
+
if knowledge_base[most_relevant_doc_index] not in self.question_history:
|
146 |
+
found_new_question = True
|
147 |
+
question = knowledge_base[most_relevant_doc_index]
|
148 |
+
st.session_state.asked_questions.append(question)
|
149 |
+
print(f">>> Returning Top #{i+1} question")
|
150 |
+
return question
|
151 |
+
|
152 |
+
# If no relevant document index is found, return None or handle the case accordingly
|
153 |
+
if not found_new_question:
|
154 |
+
question = random.choice(knowledge_base)
|
155 |
|
156 |
# Fetch the corresponding response from the knowledge base
|
157 |
+
# response = knowledge_base[most_relevant_doc_index]
|
158 |
+
return question
|
159 |
|
160 |
if __name__ == "__main__":
|
161 |
knowledge_base = depression_questions
|