DrishtiSharma commited on
Commit
0d8421c
·
verified ·
1 Parent(s): 242deae

Update test.py

Browse files
Files changed (1) hide show
  1. test.py +210 -89
test.py CHANGED
@@ -8,6 +8,7 @@ import pandas as pd
8
  import plotly.express as px
9
  import tempfile
10
  import time
 
11
  import logging
12
 
13
  # Setup logging
@@ -28,8 +29,8 @@ stakeholder = st.text_input("Enter Stakeholder", value="Patent Attorneys")
28
  llm = None
29
 
30
  # Model Selection
31
- #st.header("Model Selection")
32
- model_choice = st.selectbox("Select LLM", ["OpenAI Model","Groq-based LLM"])
33
 
34
  # API Key Validation and LLM Initialization
35
  groq_api_key = os.getenv("GROQ_API_KEY")
@@ -37,19 +38,20 @@ openai_api_key = os.getenv("OPENAI_API_KEY")
37
 
38
  #llm = ChatGroq(groq_api_key=os.getenv("GROQ_API_KEY"), model="groq/llama-3.3-70b-versatile")
39
 
40
- if model_choice == "Groq-based LLM":
41
  if not groq_api_key:
42
  st.error("Groq API key is missing. Please set the GROQ_API_KEY environment variable.")
43
  llm = None
44
  else:
45
  llm = ChatGroq(groq_api_key=groq_api_key, model="groq/llama-3.3-70b-versatile")
46
- elif model_choice == "OpenAI Model":
47
  if not openai_api_key:
48
  st.error("OpenAI API key is missing. Please set the OPENAI_API_KEY environment variable.")
49
  llm = None
50
  else:
51
  llm = ChatOpenAI(api_key=openai_api_key, model="gpt-4o")
52
 
 
53
  # Advanced Options
54
  st.sidebar.header("Advanced Options")
55
  enable_advanced_analysis = st.sidebar.checkbox("Enable Advanced Analysis", value=True)
@@ -70,13 +72,20 @@ with st.sidebar.expander("Customize Agent Goals", expanded=False):
70
  )
71
  analyst_goal = st.text_area(
72
  "Analyst Goal",
73
- value="Perform detailed statistical analysis of patent filings, growth trends, and innovation distribution."
 
 
 
 
74
  )
75
  else:
76
  planner_goal = "Research trends in patent filings and technological innovation, identify key players, and provide strategic recommendations."
77
  writer_goal = "Craft a professional insights document summarizing trends, strategies, and actionable outcomes for stakeholders."
78
- analyst_goal = "Perform detailed statistical analysis of patent filings, growth trends, and innovation distribution."
79
-
 
 
 
80
 
81
  # Agent Definitions
82
  planner = Agent(
@@ -119,7 +128,7 @@ plan = Task(
119
  "2. Identify key players and emerging technologies.\n"
120
  "3. Provide recommendations for stakeholders on strategic directions.\n"
121
  "4. Identify key statistics such as top regions, top players, and hot areas of innovation.\n"
122
- "5. Limit the output to 500 words."
123
  ),
124
  expected_output="A research document with structured insights, strategic recommendations, and key statistics.",
125
  agent=planner
@@ -131,7 +140,7 @@ write = Task(
131
  "2. Include key findings, visual aids, and actionable strategies.\n"
132
  "3. Suggest strategic directions and highlight untapped innovation areas.\n"
133
  "4. Incorporate summarized tables for key statistics and example inventions.\n"
134
- "5. Limit the document to 650 words."
135
  ),
136
  expected_output="A polished, stakeholder-ready patent insights document with actionable recommendations.",
137
  agent=writer
@@ -139,16 +148,29 @@ write = Task(
139
 
140
  analyse = Task(
141
  description=(
142
- "1. Perform statistical analysis of patent filing trends, innovation hot spots, and growth projections.\n"
143
- "2. Identify top regions, key players, and technology combinations.\n"
144
- "3. Generate visualizations such as heatmaps, bar charts, and multi-line charts for trends.\n"
145
- "4. Provide structured output with fields 'Category' and 'Values' for visualization.\n"
146
- "5. Collaborate with the Planner and Writer to align on data needs."
 
 
 
 
 
 
 
 
 
 
 
 
147
  ),
148
- expected_output="A detailed statistical analysis with actionable insights, heatmaps, and trends.",
149
  agent=analyst
150
  )
151
 
 
152
  crew = Crew(
153
  agents=[planner, analyst, writer],
154
  tasks=[plan, analyse, write],
@@ -212,95 +234,194 @@ def validate_analyst_output(analyst_output):
212
  def create_visualizations(analyst_output):
213
  chart_paths = []
214
  validated_data = validate_analyst_output(analyst_output)
 
215
  if validated_data:
216
- data = pd.DataFrame(validated_data)
217
- try:
218
- if data.empty:
219
- raise ValueError("Data for visualizations is empty.")
220
-
221
- bar_chart = px.bar(data, x="Category", y="Values", title="Patent Trends by Category")
222
- st.plotly_chart(bar_chart)
223
- with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_chart:
224
- bar_chart.write_image(temp_chart.name)
225
- chart_paths.append(temp_chart.name)
226
-
227
- pie_chart = px.pie(data, names="Category", values="Values", title="Category Distribution")
228
- st.plotly_chart(pie_chart)
229
- with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_chart:
230
- pie_chart.write_image(temp_chart.name)
231
- chart_paths.append(temp_chart.name)
232
-
233
- heatmap_chart = px.density_heatmap(data, x="Category", y="Values", title="Regional Patent Density")
234
- st.plotly_chart(heatmap_chart)
235
- with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_chart:
236
- heatmap_chart.write_image(temp_chart.name)
237
- chart_paths.append(temp_chart.name)
238
-
239
- multi_line_chart = px.line(data, x="Category", y="Values", title="Trends Over Time")
240
- st.plotly_chart(multi_line_chart)
241
- with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_chart:
242
- multi_line_chart.write_image(temp_chart.name)
243
- chart_paths.append(temp_chart.name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
 
245
- except Exception as e:
246
- logging.error(f"Error generating visualization: {e}")
247
- st.error(f"Error generating visualization: {e}")
248
  return chart_paths
249
 
 
 
250
  def display_table(analyst_output):
251
  table_data = []
252
  validated_data = validate_analyst_output(analyst_output)
 
253
  if validated_data:
254
- data = pd.DataFrame(validated_data)
255
- st.dataframe(data)
256
- table_data = data.to_dict(orient="records")
257
- return table_data
258
 
259
- # Main Execution Block
260
- if st.button("Generate Insights"):
261
- if llm is None:
262
- st.error("Cannot proceed without a valid API key for the selected model.")
263
- else:
264
- with st.spinner('Processing...'):
265
  try:
266
- start_time = time.time()
267
- results = crew.kickoff(inputs={"topic": patent_area, "stakeholder": stakeholder})
268
- elapsed_time = time.time() - start_time
269
-
270
- writer_output = getattr(results.tasks_output[2], "raw", "No details available.")
271
- if writer_output:
272
- st.markdown("### Final Report")
273
- st.write(writer_output)
 
 
 
 
 
 
 
 
 
 
 
 
274
  else:
275
- st.warning("No final report available.")
 
 
 
 
276
 
277
- with st.expander("Explore Detailed Insights"):
278
- tab1, tab2 = st.tabs(["Planner's Insights", "Analyst's Analysis"])
279
 
280
- with tab1:
281
- planner_output = getattr(results.tasks_output[0], "raw", "No details available.")
282
- st.write(planner_output)
283
 
284
- with tab2:
285
- analyst_output = getattr(results.tasks_output[1], "raw", "No details available.")
286
- st.write(analyst_output)
287
 
288
- charts = []
289
- if enable_advanced_analysis:
290
- charts = create_visualizations(analyst_output)
 
291
 
292
- table_data = display_table(analyst_output)
 
293
 
294
- st.success(f"Analysis completed in {elapsed_time:.2f} seconds.")
295
- pdf_path = generate_pdf_report(
296
- writer_output,
297
- charts=charts,
298
- table_data=table_data,
299
- metadata={"Technology Area": patent_area, "Stakeholder": stakeholder}
300
- )
301
- with open(pdf_path, "rb") as report_file:
302
- st.download_button("Download Report", data=report_file, file_name="Patent_Strategy_Report.pdf")
303
 
304
- except Exception as e:
305
- logging.error(f"An error occurred during execution: {e}")
306
- st.error(f"An error occurred during execution: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  import plotly.express as px
9
  import tempfile
10
  import time
11
+ import ast
12
  import logging
13
 
14
  # Setup logging
 
29
  llm = None
30
 
31
  # Model Selection
32
+ model_choice = st.radio("Select LLM", ["GPT-4o", "llama-3.3-70b"], index=1, horizontal=True)
33
+
34
 
35
  # API Key Validation and LLM Initialization
36
  groq_api_key = os.getenv("GROQ_API_KEY")
 
38
 
39
  #llm = ChatGroq(groq_api_key=os.getenv("GROQ_API_KEY"), model="groq/llama-3.3-70b-versatile")
40
 
41
+ if model_choice == "llama-3.3-70b":
42
  if not groq_api_key:
43
  st.error("Groq API key is missing. Please set the GROQ_API_KEY environment variable.")
44
  llm = None
45
  else:
46
  llm = ChatGroq(groq_api_key=groq_api_key, model="groq/llama-3.3-70b-versatile")
47
+ elif model_choice == "GPT-4o":
48
  if not openai_api_key:
49
  st.error("OpenAI API key is missing. Please set the OPENAI_API_KEY environment variable.")
50
  llm = None
51
  else:
52
  llm = ChatOpenAI(api_key=openai_api_key, model="gpt-4o")
53
 
54
+
55
  # Advanced Options
56
  st.sidebar.header("Advanced Options")
57
  enable_advanced_analysis = st.sidebar.checkbox("Enable Advanced Analysis", value=True)
 
72
  )
73
  analyst_goal = st.text_area(
74
  "Analyst Goal",
75
+ value=(
76
+ "Perform detailed statistical analysis of patent filings, growth trends, and innovation distribution. "
77
+ "Identify top assignees/companies in the transparent antenna industry. "
78
+ "Provide structured output in a list of dictionaries with 'Category' and 'Values' keys for clear data presentation."
79
+ )
80
  )
81
  else:
82
  planner_goal = "Research trends in patent filings and technological innovation, identify key players, and provide strategic recommendations."
83
  writer_goal = "Craft a professional insights document summarizing trends, strategies, and actionable outcomes for stakeholders."
84
+ analyst_goal = (
85
+ "Perform detailed statistical analysis of patent filings, growth trends, and innovation distribution. "
86
+ "Identify top assignees/companies in the transparent antenna industry. "
87
+ "Provide structured output in a list of dictionaries with 'Category' and 'Values' keys for clear data presentation."
88
+ )
89
 
90
  # Agent Definitions
91
  planner = Agent(
 
128
  "2. Identify key players and emerging technologies.\n"
129
  "3. Provide recommendations for stakeholders on strategic directions.\n"
130
  "4. Identify key statistics such as top regions, top players, and hot areas of innovation.\n"
131
+ "5. Limit the output to 600 words."
132
  ),
133
  expected_output="A research document with structured insights, strategic recommendations, and key statistics.",
134
  agent=planner
 
140
  "2. Include key findings, visual aids, and actionable strategies.\n"
141
  "3. Suggest strategic directions and highlight untapped innovation areas.\n"
142
  "4. Incorporate summarized tables for key statistics and example inventions.\n"
143
+ "5. Limit the document to 600 words."
144
  ),
145
  expected_output="A polished, stakeholder-ready patent insights document with actionable recommendations.",
146
  agent=writer
 
148
 
149
  analyse = Task(
150
  description=(
151
+ "1. Conduct a comprehensive statistical analysis of patent filing trends, innovation hot spots, and future growth projections in the transparent antenna industry.\n"
152
+ "2. Identify and rank the top regions, leading assignees/companies driving innovation.\n"
153
+ "3. Highlight regional innovation trends and the distribution of emerging technologies across different geographies.\n"
154
+ "4. Provide actionable insights and strategic recommendations based on the data.\n"
155
+ "5. Deliver structured output in a list of dictionaries with 'Category' and 'Values' fields:\n"
156
+ " - 'Values' can be:\n"
157
+ " a) A dictionary with counts for quantitative data (e.g., {{'Region A': 120, 'Region B': 95}}),\n"
158
+ " b) A list of key items (technologies, companies, inventors), or\n"
159
+ " c) Descriptive text for qualitative insights.\n"
160
+ "6. Example Output Format:\n"
161
+ "[\n"
162
+ " {{'Category': 'Top Regions', 'Values': {{'North America': 120, 'Europe': 95, 'Asia-Pacific': 85}}}},\n"
163
+ " {{'Category': 'Top Assignees', 'Values': {{'Company A': 40, 'Company B': 35}}}},\n"
164
+ " {{'Category': 'Emerging Technologies', 'Values': ['Graphene Antennas', '5G Integration']}},\n"
165
+ " {{'Category': 'Strategic Insights', 'Values': 'Collaborations between automotive and material science industries are accelerating innovation.'}}\n"
166
+ "]\n"
167
+ "7. Ensure that the output is clean, well-structured, and formatted for use in visualizations and tables."
168
  ),
169
+ expected_output="A structured, well-organized dataset with numeric, list-based, and descriptive insights for comprehensive visual and tabular reporting.",
170
  agent=analyst
171
  )
172
 
173
+
174
  crew = Crew(
175
  agents=[planner, analyst, writer],
176
  tasks=[plan, analyse, write],
 
234
  def create_visualizations(analyst_output):
235
  chart_paths = []
236
  validated_data = validate_analyst_output(analyst_output)
237
+
238
  if validated_data:
239
+ for item in validated_data:
240
+ category = item["Category"]
241
+ values = item["Values"]
242
+
243
+ try:
244
+ # Handle dictionary data
245
+ if isinstance(values, dict):
246
+ df = pd.DataFrame(list(values.items()), columns=["Label", "Count"])
247
+
248
+ # Choose Pie Chart for fewer categories, else Bar Chart
249
+ if len(df) <= 5:
250
+ chart = px.pie(df, names="Label", values="Count", title=f"{category} Distribution")
251
+ else:
252
+ chart = px.bar(df, x="Label", y="Count", title=f"{category} Analysis")
253
+
254
+ # Handle list data
255
+ elif isinstance(values, list):
256
+ # Convert the list into a frequency count without dummy values
257
+ df = pd.DataFrame(values, columns=["Label"])
258
+ df = df["Label"].value_counts().reset_index()
259
+ df.columns = ["Label", "Count"]
260
+
261
+ # Plot as a bar chart or pie chart
262
+ if len(df) <= 5:
263
+ chart = px.pie(df, names="Label", values="Count", title=f"{category} Distribution")
264
+ else:
265
+ chart = px.bar(df, x="Label", y="Count", title=f"{category} Frequency")
266
+
267
+ # Handle text data
268
+ elif isinstance(values, str):
269
+ st.subheader(f"{category} Insights")
270
+ st.table(pd.DataFrame({"Insights": [values]}))
271
+ continue # No chart for text data
272
+
273
+ else:
274
+ st.warning(f"Unsupported data format for category: {category}")
275
+ continue
276
+
277
+ # Display the chart in Streamlit
278
+ st.plotly_chart(chart)
279
+
280
+ # Save the chart for PDF export
281
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_chart:
282
+ chart.write_image(temp_chart.name)
283
+ chart_paths.append(temp_chart.name)
284
+
285
+ except Exception as e:
286
+ st.error(f"Failed to generate visualization for {category}: {e}")
287
+ logging.error(f"Error in {category} visualization: {e}")
288
 
 
 
 
289
  return chart_paths
290
 
291
+
292
+
293
  def display_table(analyst_output):
294
  table_data = []
295
  validated_data = validate_analyst_output(analyst_output)
296
+
297
  if validated_data:
298
+ for item in validated_data:
299
+ category = item["Category"]
300
+ values = item["Values"]
 
301
 
302
+ # Error handling to prevent crashes
 
 
 
 
 
303
  try:
304
+ # Handle dictionary data (Table View)
305
+ if isinstance(values, dict):
306
+ df = pd.DataFrame(list(values.items()), columns=["Label", "Count"])
307
+ st.subheader(f"{category} (Table View)")
308
+ st.dataframe(df)
309
+ table_data.extend(df.to_dict(orient="records"))
310
+
311
+ # Handle list data (List View)
312
+ elif isinstance(values, list):
313
+ df = pd.DataFrame(values, columns=["Items"])
314
+ st.subheader(f"{category} (List View)")
315
+ st.dataframe(df)
316
+ table_data.extend(df.to_dict(orient="records"))
317
+
318
+ # Handle text data (Summary View)
319
+ elif isinstance(values, str):
320
+ st.subheader(f"{category} (Summary)")
321
+ st.table(pd.DataFrame({"Insights": [values]}))
322
+ table_data.append({"Category": category, "Values": values})
323
+
324
  else:
325
+ st.warning(f"Unsupported data format for category: {category}")
326
+
327
+ except Exception as e:
328
+ logging.error(f"Error processing {category}: {e}")
329
+ st.error(f"Failed to display {category} as a table due to an error.")
330
 
331
+ return table_data
 
332
 
 
 
 
333
 
 
 
 
334
 
335
+ def parse_analyst_output(raw_output):
336
+ structured_data = []
337
+ current_category = None
338
+ current_values = []
339
 
340
+ # Split raw output by line
341
+ lines = raw_output.split('\n')
342
 
343
+ for line in lines:
344
+ line = line.strip()
 
 
 
 
 
 
 
345
 
346
+ # Detect the start of a new category
347
+ if line.startswith("Category:"):
348
+ # Save the previous category and its values
349
+ if current_category and current_values:
350
+ structured_data.append({
351
+ "Category": current_category,
352
+ "Values": current_values if len(current_values) > 1 else current_values[0]
353
+ })
354
+ # Start processing the new category
355
+ current_category = line.replace("Category:", "").strip()
356
+ current_values = []
357
+
358
+ # Skip 'Values:' header
359
+ elif line.startswith("Values:"):
360
+ continue
361
+
362
+ # Process the values under the current category
363
+ elif line and current_category:
364
+ try:
365
+ # Attempt to convert the line into Python data (dict/list)
366
+ parsed_value = ast.literal_eval(line)
367
+ current_values.append(parsed_value)
368
+ except (ValueError, SyntaxError):
369
+ # If parsing fails, treat it as plain text
370
+ current_values.append(line)
371
+
372
+ # Save the last processed category
373
+ if current_category and current_values:
374
+ structured_data.append({
375
+ "Category": current_category,
376
+ "Values": current_values if len(current_values) > 1 else current_values[0]
377
+ })
378
+
379
+ return structured_data
380
+
381
+
382
+ # Main Execution Block
383
+ if st.button("Generate Patent Insights"):
384
+ with st.spinner('Processing...'):
385
+ try:
386
+ start_time = time.time()
387
+ results = crew.kickoff(inputs={"topic": patent_area, "stakeholder": stakeholder})
388
+ elapsed_time = time.time() - start_time
389
+
390
+ writer_output = getattr(results.tasks_output[2], "raw", "No details available.")
391
+ if writer_output:
392
+ st.markdown("### Final Report")
393
+ st.write(writer_output)
394
+ else:
395
+ st.warning("No final report available.")
396
+
397
+ with st.expander("Explore Detailed Insights"):
398
+ tab1, tab2 = st.tabs(["Planner's Insights", "Analyst's Analysis"])
399
+
400
+ with tab1:
401
+ planner_output = getattr(results.tasks_output[0], "raw", "No details available.")
402
+ st.write(planner_output)
403
+
404
+ with tab2:
405
+ analyst_output = getattr(results.tasks_output[1], "raw", "No details available.")
406
+ st.write(analyst_output)
407
+ # Convert raw text to structured data
408
+ if isinstance(analyst_output, str):
409
+ analyst_output = parse_analyst_output(analyst_output)
410
+ st.subheader("Structured Analyst Output")
411
+ st.write(analyst_output)
412
+
413
+
414
+ charts = []
415
+ if enable_advanced_analysis:
416
+ charts = create_visualizations(analyst_output)
417
+
418
+ table_data = display_table(analyst_output)
419
+
420
+ st.success(f"Analysis completed in {elapsed_time:.2f} seconds.")
421
+ pdf_path = generate_pdf_report(writer_output, charts=charts, table_data=table_data, metadata={"Technology Area": patent_area, "Stakeholder": stakeholder})
422
+ with open(pdf_path, "rb") as report_file:
423
+ st.download_button("Download Report", data=report_file, file_name="Patent_Strategy_Report.pdf")
424
+
425
+ except Exception as e:
426
+ logging.error(f"An error occurred during execution: {e}")
427
+ st.error(f"An error occurred during execution: {e}")