Amarsaish commited on
Commit
f9e7655
·
verified ·
1 Parent(s): 36d2dcd

Rename streamlit_merged.py to app.py

Browse files
Files changed (1) hide show
  1. streamlit_merged.py → app.py +272 -272
streamlit_merged.py → app.py RENAMED
@@ -1,273 +1,273 @@
1
- import streamlit as st
2
- import os
3
- import openai
4
- from sklearn.metrics.pairwise import cosine_similarity
5
- import numpy as np
6
- from PyPDF2 import PdfReader
7
- import json
8
- import pickle
9
- from pathlib import Path
10
- from pydub import AudioSegment
11
- from groq import Groq
12
- from typing import List, Dict
13
- import tempfile
14
-
15
- ffmpeg_path = r"C:\Users\AMAR\Downloads\ffmpeg-7.0.2-essentials_build\ffmpeg-7.0.2-essentials_build\bin\ffmpeg.exe"
16
- os.environ["PATH"] += os.pathsep + os.path.dirname(ffmpeg_path)
17
- AudioSegment.converter = ffmpeg_path
18
- class VoiceStockMarketEvaluator:
19
- def __init__(self, openai_api_key, groq_api_key):
20
- # OpenAI configuration
21
- self.openai_api_key = openai_api_key
22
- self.pdf_path = "STOCK1.pdf" # Update this to your PDF path in Streamlit
23
- openai.api_key = self.openai_api_key
24
- self.llm_model = "gpt-3.5-turbo"
25
-
26
- # Groq configuration for audio transcription
27
- self.groq_api_key = groq_api_key
28
- self.groq_client = Groq(api_key=self.groq_api_key)
29
- self.whisper_model = 'whisper-large-v3'
30
-
31
- # Questions
32
- self.questions = {
33
- 1: "What are different major investors?",
34
- 2: "Who are major traders?",
35
- 3: "who are participants of stock makets?"
36
- }
37
-
38
- # Load embeddings
39
- self.load_embeddings()
40
-
41
- def load_embeddings(self):
42
- """Load embeddings from pickle file or create if not exists"""
43
- pickle_path = Path('stock_market_embeddings.pkl')
44
-
45
- if pickle_path.exists():
46
- with open(pickle_path, 'rb') as f:
47
- data = pickle.load(f)
48
- self.pdf_content = data['content']
49
- self.pdf_chunks = data['chunks']
50
- self.pdf_embeddings = data['embeddings']
51
- else:
52
- self.create_and_save_embeddings(pickle_path)
53
-
54
- def create_and_save_embeddings(self, pickle_path):
55
- """Create embeddings and save to pickle file"""
56
- reader = PdfReader(self.pdf_path)
57
- self.pdf_content = ""
58
- for page in reader.pages:
59
- self.pdf_content += page.extract_text()
60
-
61
- self.pdf_chunks = self._chunk_text(self.pdf_content)
62
- self.pdf_embeddings = self.get_openai_embeddings(self.pdf_chunks)
63
-
64
- data = {
65
- 'content': self.pdf_content,
66
- 'chunks': self.pdf_chunks,
67
- 'embeddings': self.pdf_embeddings
68
- }
69
- with open(pickle_path, 'wb') as f:
70
- pickle.dump(data, f)
71
-
72
- def get_openai_embeddings(self, texts):
73
- """Generate embeddings using OpenAI API"""
74
- response = openai.Embedding.create(
75
- model="text-embedding-ada-002",
76
- input=texts
77
- )
78
- return [embedding['embedding'] for embedding in response['data']]
79
-
80
- def _chunk_text(self, text, chunk_size=500, overlap=50):
81
- """Split text into overlapping chunks"""
82
- words = text.split()
83
- chunks = []
84
- for i in range(0, len(words), chunk_size - overlap):
85
- chunk = ' '.join(words[i:i + chunk_size])
86
- chunks.append(chunk)
87
- return chunks
88
-
89
- def process_audio_file(self, uploaded_file):
90
- """Process uploaded audio file and convert if necessary"""
91
- allowed_formats = ["flac", "mp3", "mp4", "mpeg", "mpga", "m4a", "ogg", "opus", "wav", "webm"]
92
-
93
- # Create a temporary directory to store the processed file
94
- with tempfile.TemporaryDirectory() as temp_dir:
95
- # Get the original file extension
96
- original_extension = uploaded_file.name.split('.')[-1].lower()
97
- temp_file_path = os.path.join(temp_dir, f"audio_file.{original_extension}")
98
-
99
- # Save the uploaded file
100
- with open(temp_file_path, 'wb') as f:
101
- f.write(uploaded_file.getbuffer())
102
-
103
- # If the format is not supported, convert to WAV
104
- if original_extension not in allowed_formats:
105
- output_path = os.path.join(temp_dir, "converted_audio.wav")
106
- audio = AudioSegment.from_file(temp_file_path)
107
- audio.export(output_path, format="wav")
108
- final_path = output_path
109
- else:
110
- final_path = temp_file_path
111
-
112
- # Read the file for transcription
113
- with open(final_path, 'rb') as audio_file:
114
- return self.audio_to_text(final_path)
115
-
116
- def audio_to_text(self, filepath):
117
- """Transcribe audio to text using Groq"""
118
- with open(filepath, "rb") as file:
119
- translation = self.groq_client.audio.translations.create(
120
- file=(filepath, file.read()),
121
- model=self.whisper_model,
122
- )
123
- return translation.text
124
-
125
- def _find_relevant_context(self, question, answer, top_k=3):
126
- """Find relevant PDF chunks for the question-answer pair"""
127
- search_text = f"{question} {answer}"
128
- search_embedding = self.get_openai_embeddings([search_text])[0]
129
- similarities = cosine_similarity([search_embedding], self.pdf_embeddings)[0]
130
- top_indices = similarities.argsort()[-top_k:][::-1]
131
- return ' '.join(self.pdf_chunks[i] for i in top_indices)
132
-
133
- def evaluate_answer(self, question_num, user_answer):
134
- """Evaluate user answer for a given question number"""
135
- if question_num not in self.questions:
136
- raise ValueError("Invalid question number")
137
-
138
- question = self.questions[question_num]
139
- relevant_context = self._find_relevant_context(question, user_answer)
140
-
141
- prompt = f"""
142
- You are an expert evaluating answers about the stock market. Compare the answer with the reference material and provide detailed analysis.
143
-
144
- Question: {question}
145
-
146
- Reference Material:
147
- {relevant_context}
148
-
149
- Student Answer:
150
- {user_answer}
151
-
152
- Analyze this answer carefully and provide:
153
- 1. Points that are correct according to the reference material
154
- 2. Points that are incorrect or need clarification
155
- 3. Important points from the reference material that were missing
156
-
157
- Provide your analysis in JSON format:
158
- {{
159
- "correct_points": ["point1", "point2"],
160
- "incorrect_points": {{"incorrect_statement": "correction_based_on_reference"}},
161
- "missing_points": ["point1", "point2"],
162
- "explanation": "Brief explanation of the score"
163
- }}
164
- """
165
-
166
- response = openai.ChatCompletion.create(
167
- model=self.llm_model,
168
- messages=[{"role": "user", "content": prompt}],
169
- temperature=0.3
170
- )
171
-
172
- try:
173
- return json.loads(response.choices[0].message.content)
174
- except json.JSONDecodeError:
175
- return {"error": "Could not parse response"}
176
-
177
- def main():
178
- st.set_page_config(page_title="Voice-based Stock Market Evaluator", layout="wide")
179
-
180
- st.title("Voice-based Stock Market Evaluator")
181
-
182
- # Create sidebar for API keys
183
- st.sidebar.header("API Configuration")
184
-
185
- # Use session state to persist API keys
186
- if 'openai_key' not in st.session_state:
187
- st.session_state['openai_key'] = ''
188
- if 'groq_key' not in st.session_state:
189
- st.session_state['groq_key'] = ''
190
-
191
- # API key inputs in sidebar
192
- openai_key = st.sidebar.text_input(
193
- "OpenAI API Key",
194
- type="password",
195
- value=st.session_state['openai_key'],
196
- help="Enter your OpenAI API key to use the evaluation features"
197
- )
198
- groq_key = st.sidebar.text_input(
199
- "Groq API Key",
200
- type="password",
201
- value=st.session_state['groq_key'],
202
- help="Enter your Groq API key for audio transcription"
203
- )
204
-
205
- # Update session state
206
- st.session_state['openai_key'] = openai_key
207
- st.session_state['groq_key'] = groq_key
208
-
209
- # Check if API keys are provided
210
- if not openai_key or not groq_key:
211
- st.warning("Please enter both API keys in the sidebar to use the application.")
212
- return
213
-
214
- try:
215
- # Initialize the evaluator with API keys
216
- evaluator = VoiceStockMarketEvaluator(openai_key, groq_key)
217
-
218
- st.write("Upload an audio file with your answer to get evaluated!")
219
-
220
- # Display questions
221
- st.header("Available Questions")
222
- question_num = st.radio(
223
- "Select a question:",
224
- options=list(evaluator.questions.keys()),
225
- format_func=lambda x: f"Question {x}: {evaluator.questions[x]}"
226
- )
227
-
228
- # File uploader
229
- uploaded_file = st.file_uploader("Upload your audio response", type=None) # Accept all file types
230
-
231
- if uploaded_file is not None:
232
- with st.spinner("Processing audio file..."):
233
- try:
234
- # Process audio and get transcription
235
- transcribed_text = evaluator.process_audio_file(uploaded_file)
236
-
237
- st.subheader("Transcribed Text")
238
- st.write(transcribed_text)
239
-
240
- # Evaluate answer
241
- with st.spinner("Analyzing your answer..."):
242
- result = evaluator.evaluate_answer(question_num, transcribed_text)
243
-
244
- # Display results
245
- st.subheader("Analysis Results")
246
-
247
- # Correct points
248
- st.success("Correct Points:")
249
- for point in result.get("correct_points", []):
250
- st.write(f"✓ {point}")
251
-
252
- # Incorrect points
253
- st.error("Points Needing Correction:")
254
- for statement, correction in result.get("incorrect_points", {}).items():
255
- st.write(f"✗ {statement}")
256
- st.write(f"Correction: {correction}")
257
-
258
- # Missing points
259
- st.warning("Missing Points:")
260
- for point in result.get("missing_points", []):
261
- st.write(f"• {point}")
262
-
263
- # Score and explanation
264
- st.info(f"Explanation: {result.get('explanation', 'No explanation provided')}")
265
-
266
- except Exception as e:
267
- st.error(f"Error processing file: {str(e)}")
268
-
269
- except Exception as e:
270
- st.error(f"Error initializing the evaluator: {str(e)}")
271
-
272
- if __name__ == "__main__":
273
  main()
 
1
+ import streamlit as st
2
+ import os
3
+ import openai
4
+ from sklearn.metrics.pairwise import cosine_similarity
5
+ import numpy as np
6
+ from PyPDF2 import PdfReader
7
+ import json
8
+ import pickle
9
+ from pathlib import Path
10
+ from pydub import AudioSegment
11
+ from groq import Groq
12
+ from typing import List, Dict
13
+ import tempfile
14
+
15
+ #ffmpeg_path = r"C:\Users\AMAR\Downloads\ffmpeg-7.0.2-essentials_build\ffmpeg-7.0.2-essentials_build\bin\ffmpeg.exe"
16
+ #os.environ["PATH"] += os.pathsep + os.path.dirname(ffmpeg_path)
17
+ #AudioSegment.converter = ffmpeg_path
18
+ class VoiceStockMarketEvaluator:
19
+ def __init__(self, openai_api_key, groq_api_key):
20
+ # OpenAI configuration
21
+ self.openai_api_key = openai_api_key
22
+ self.pdf_path = "STOCK1.pdf" # Update this to your PDF path in Streamlit
23
+ openai.api_key = self.openai_api_key
24
+ self.llm_model = "gpt-3.5-turbo"
25
+
26
+ # Groq configuration for audio transcription
27
+ self.groq_api_key = groq_api_key
28
+ self.groq_client = Groq(api_key=self.groq_api_key)
29
+ self.whisper_model = 'whisper-large-v3'
30
+
31
+ # Questions
32
+ self.questions = {
33
+ 1: "What are different major investors?",
34
+ 2: "Who are major traders?",
35
+ 3: "who are participants of stock makets?"
36
+ }
37
+
38
+ # Load embeddings
39
+ self.load_embeddings()
40
+
41
+ def load_embeddings(self):
42
+ """Load embeddings from pickle file or create if not exists"""
43
+ pickle_path = Path('stock_market_embeddings.pkl')
44
+
45
+ if pickle_path.exists():
46
+ with open(pickle_path, 'rb') as f:
47
+ data = pickle.load(f)
48
+ self.pdf_content = data['content']
49
+ self.pdf_chunks = data['chunks']
50
+ self.pdf_embeddings = data['embeddings']
51
+ else:
52
+ self.create_and_save_embeddings(pickle_path)
53
+
54
+ def create_and_save_embeddings(self, pickle_path):
55
+ """Create embeddings and save to pickle file"""
56
+ reader = PdfReader(self.pdf_path)
57
+ self.pdf_content = ""
58
+ for page in reader.pages:
59
+ self.pdf_content += page.extract_text()
60
+
61
+ self.pdf_chunks = self._chunk_text(self.pdf_content)
62
+ self.pdf_embeddings = self.get_openai_embeddings(self.pdf_chunks)
63
+
64
+ data = {
65
+ 'content': self.pdf_content,
66
+ 'chunks': self.pdf_chunks,
67
+ 'embeddings': self.pdf_embeddings
68
+ }
69
+ with open(pickle_path, 'wb') as f:
70
+ pickle.dump(data, f)
71
+
72
+ def get_openai_embeddings(self, texts):
73
+ """Generate embeddings using OpenAI API"""
74
+ response = openai.Embedding.create(
75
+ model="text-embedding-ada-002",
76
+ input=texts
77
+ )
78
+ return [embedding['embedding'] for embedding in response['data']]
79
+
80
+ def _chunk_text(self, text, chunk_size=500, overlap=50):
81
+ """Split text into overlapping chunks"""
82
+ words = text.split()
83
+ chunks = []
84
+ for i in range(0, len(words), chunk_size - overlap):
85
+ chunk = ' '.join(words[i:i + chunk_size])
86
+ chunks.append(chunk)
87
+ return chunks
88
+
89
+ def process_audio_file(self, uploaded_file):
90
+ """Process uploaded audio file and convert if necessary"""
91
+ allowed_formats = ["flac", "mp3", "mp4", "mpeg", "mpga", "m4a", "ogg", "opus", "wav", "webm"]
92
+
93
+ # Create a temporary directory to store the processed file
94
+ with tempfile.TemporaryDirectory() as temp_dir:
95
+ # Get the original file extension
96
+ original_extension = uploaded_file.name.split('.')[-1].lower()
97
+ temp_file_path = os.path.join(temp_dir, f"audio_file.{original_extension}")
98
+
99
+ # Save the uploaded file
100
+ with open(temp_file_path, 'wb') as f:
101
+ f.write(uploaded_file.getbuffer())
102
+
103
+ # If the format is not supported, convert to WAV
104
+ if original_extension not in allowed_formats:
105
+ output_path = os.path.join(temp_dir, "converted_audio.wav")
106
+ audio = AudioSegment.from_file(temp_file_path)
107
+ audio.export(output_path, format="wav")
108
+ final_path = output_path
109
+ else:
110
+ final_path = temp_file_path
111
+
112
+ # Read the file for transcription
113
+ with open(final_path, 'rb') as audio_file:
114
+ return self.audio_to_text(final_path)
115
+
116
+ def audio_to_text(self, filepath):
117
+ """Transcribe audio to text using Groq"""
118
+ with open(filepath, "rb") as file:
119
+ translation = self.groq_client.audio.translations.create(
120
+ file=(filepath, file.read()),
121
+ model=self.whisper_model,
122
+ )
123
+ return translation.text
124
+
125
+ def _find_relevant_context(self, question, answer, top_k=3):
126
+ """Find relevant PDF chunks for the question-answer pair"""
127
+ search_text = f"{question} {answer}"
128
+ search_embedding = self.get_openai_embeddings([search_text])[0]
129
+ similarities = cosine_similarity([search_embedding], self.pdf_embeddings)[0]
130
+ top_indices = similarities.argsort()[-top_k:][::-1]
131
+ return ' '.join(self.pdf_chunks[i] for i in top_indices)
132
+
133
+ def evaluate_answer(self, question_num, user_answer):
134
+ """Evaluate user answer for a given question number"""
135
+ if question_num not in self.questions:
136
+ raise ValueError("Invalid question number")
137
+
138
+ question = self.questions[question_num]
139
+ relevant_context = self._find_relevant_context(question, user_answer)
140
+
141
+ prompt = f"""
142
+ You are an expert evaluating answers about the stock market. Compare the answer with the reference material and provide detailed analysis.
143
+
144
+ Question: {question}
145
+
146
+ Reference Material:
147
+ {relevant_context}
148
+
149
+ Student Answer:
150
+ {user_answer}
151
+
152
+ Analyze this answer carefully and provide:
153
+ 1. Points that are correct according to the reference material
154
+ 2. Points that are incorrect or need clarification
155
+ 3. Important points from the reference material that were missing
156
+
157
+ Provide your analysis in JSON format:
158
+ {{
159
+ "correct_points": ["point1", "point2"],
160
+ "incorrect_points": {{"incorrect_statement": "correction_based_on_reference"}},
161
+ "missing_points": ["point1", "point2"],
162
+ "explanation": "Brief explanation of the score"
163
+ }}
164
+ """
165
+
166
+ response = openai.ChatCompletion.create(
167
+ model=self.llm_model,
168
+ messages=[{"role": "user", "content": prompt}],
169
+ temperature=0.3
170
+ )
171
+
172
+ try:
173
+ return json.loads(response.choices[0].message.content)
174
+ except json.JSONDecodeError:
175
+ return {"error": "Could not parse response"}
176
+
177
+ def main():
178
+ st.set_page_config(page_title="Voice-based Stock Market Evaluator", layout="wide")
179
+
180
+ st.title("Voice-based Stock Market Evaluator")
181
+
182
+ # Create sidebar for API keys
183
+ st.sidebar.header("API Configuration")
184
+
185
+ # Use session state to persist API keys
186
+ if 'openai_key' not in st.session_state:
187
+ st.session_state['openai_key'] = ''
188
+ if 'groq_key' not in st.session_state:
189
+ st.session_state['groq_key'] = ''
190
+
191
+ # API key inputs in sidebar
192
+ openai_key = st.sidebar.text_input(
193
+ "OpenAI API Key",
194
+ type="password",
195
+ value=st.session_state['openai_key'],
196
+ help="Enter your OpenAI API key to use the evaluation features"
197
+ )
198
+ groq_key = st.sidebar.text_input(
199
+ "Groq API Key",
200
+ type="password",
201
+ value=st.session_state['groq_key'],
202
+ help="Enter your Groq API key for audio transcription"
203
+ )
204
+
205
+ # Update session state
206
+ st.session_state['openai_key'] = openai_key
207
+ st.session_state['groq_key'] = groq_key
208
+
209
+ # Check if API keys are provided
210
+ if not openai_key or not groq_key:
211
+ st.warning("Please enter both API keys in the sidebar to use the application.")
212
+ return
213
+
214
+ try:
215
+ # Initialize the evaluator with API keys
216
+ evaluator = VoiceStockMarketEvaluator(openai_key, groq_key)
217
+
218
+ st.write("Upload an audio file with your answer to get evaluated!")
219
+
220
+ # Display questions
221
+ st.header("Available Questions")
222
+ question_num = st.radio(
223
+ "Select a question:",
224
+ options=list(evaluator.questions.keys()),
225
+ format_func=lambda x: f"Question {x}: {evaluator.questions[x]}"
226
+ )
227
+
228
+ # File uploader
229
+ uploaded_file = st.file_uploader("Upload your audio response", type=None) # Accept all file types
230
+
231
+ if uploaded_file is not None:
232
+ with st.spinner("Processing audio file..."):
233
+ try:
234
+ # Process audio and get transcription
235
+ transcribed_text = evaluator.process_audio_file(uploaded_file)
236
+
237
+ st.subheader("Transcribed Text")
238
+ st.write(transcribed_text)
239
+
240
+ # Evaluate answer
241
+ with st.spinner("Analyzing your answer..."):
242
+ result = evaluator.evaluate_answer(question_num, transcribed_text)
243
+
244
+ # Display results
245
+ st.subheader("Analysis Results")
246
+
247
+ # Correct points
248
+ st.success("Correct Points:")
249
+ for point in result.get("correct_points", []):
250
+ st.write(f"✓ {point}")
251
+
252
+ # Incorrect points
253
+ st.error("Points Needing Correction:")
254
+ for statement, correction in result.get("incorrect_points", {}).items():
255
+ st.write(f"✗ {statement}")
256
+ st.write(f"Correction: {correction}")
257
+
258
+ # Missing points
259
+ st.warning("Missing Points:")
260
+ for point in result.get("missing_points", []):
261
+ st.write(f"• {point}")
262
+
263
+ # Score and explanation
264
+ st.info(f"Explanation: {result.get('explanation', 'No explanation provided')}")
265
+
266
+ except Exception as e:
267
+ st.error(f"Error processing file: {str(e)}")
268
+
269
+ except Exception as e:
270
+ st.error(f"Error initializing the evaluator: {str(e)}")
271
+
272
+ if __name__ == "__main__":
273
  main()