Spaces:
Sleeping
Sleeping
Update test.py
Browse files
test.py
CHANGED
@@ -8,6 +8,7 @@ import pandas as pd
|
|
8 |
import plotly.express as px
|
9 |
import tempfile
|
10 |
import time
|
|
|
11 |
import logging
|
12 |
|
13 |
# Setup logging
|
@@ -28,8 +29,8 @@ stakeholder = st.text_input("Enter Stakeholder", value="Patent Attorneys")
|
|
28 |
llm = None
|
29 |
|
30 |
# Model Selection
|
31 |
-
|
32 |
-
|
33 |
|
34 |
# API Key Validation and LLM Initialization
|
35 |
groq_api_key = os.getenv("GROQ_API_KEY")
|
@@ -37,19 +38,20 @@ openai_api_key = os.getenv("OPENAI_API_KEY")
|
|
37 |
|
38 |
#llm = ChatGroq(groq_api_key=os.getenv("GROQ_API_KEY"), model="groq/llama-3.3-70b-versatile")
|
39 |
|
40 |
-
if model_choice == "
|
41 |
if not groq_api_key:
|
42 |
st.error("Groq API key is missing. Please set the GROQ_API_KEY environment variable.")
|
43 |
llm = None
|
44 |
else:
|
45 |
llm = ChatGroq(groq_api_key=groq_api_key, model="groq/llama-3.3-70b-versatile")
|
46 |
-
elif model_choice == "
|
47 |
if not openai_api_key:
|
48 |
st.error("OpenAI API key is missing. Please set the OPENAI_API_KEY environment variable.")
|
49 |
llm = None
|
50 |
else:
|
51 |
llm = ChatOpenAI(api_key=openai_api_key, model="gpt-4o")
|
52 |
|
|
|
53 |
# Advanced Options
|
54 |
st.sidebar.header("Advanced Options")
|
55 |
enable_advanced_analysis = st.sidebar.checkbox("Enable Advanced Analysis", value=True)
|
@@ -70,13 +72,20 @@ with st.sidebar.expander("Customize Agent Goals", expanded=False):
|
|
70 |
)
|
71 |
analyst_goal = st.text_area(
|
72 |
"Analyst Goal",
|
73 |
-
value=
|
|
|
|
|
|
|
|
|
74 |
)
|
75 |
else:
|
76 |
planner_goal = "Research trends in patent filings and technological innovation, identify key players, and provide strategic recommendations."
|
77 |
writer_goal = "Craft a professional insights document summarizing trends, strategies, and actionable outcomes for stakeholders."
|
78 |
-
analyst_goal =
|
79 |
-
|
|
|
|
|
|
|
80 |
|
81 |
# Agent Definitions
|
82 |
planner = Agent(
|
@@ -119,7 +128,7 @@ plan = Task(
|
|
119 |
"2. Identify key players and emerging technologies.\n"
|
120 |
"3. Provide recommendations for stakeholders on strategic directions.\n"
|
121 |
"4. Identify key statistics such as top regions, top players, and hot areas of innovation.\n"
|
122 |
-
"5. Limit the output to
|
123 |
),
|
124 |
expected_output="A research document with structured insights, strategic recommendations, and key statistics.",
|
125 |
agent=planner
|
@@ -131,7 +140,7 @@ write = Task(
|
|
131 |
"2. Include key findings, visual aids, and actionable strategies.\n"
|
132 |
"3. Suggest strategic directions and highlight untapped innovation areas.\n"
|
133 |
"4. Incorporate summarized tables for key statistics and example inventions.\n"
|
134 |
-
"5. Limit the document to
|
135 |
),
|
136 |
expected_output="A polished, stakeholder-ready patent insights document with actionable recommendations.",
|
137 |
agent=writer
|
@@ -139,16 +148,29 @@ write = Task(
|
|
139 |
|
140 |
analyse = Task(
|
141 |
description=(
|
142 |
-
"1.
|
143 |
-
"2. Identify top regions,
|
144 |
-
"3.
|
145 |
-
"4. Provide
|
146 |
-
"5.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
147 |
),
|
148 |
-
expected_output="A
|
149 |
agent=analyst
|
150 |
)
|
151 |
|
|
|
152 |
crew = Crew(
|
153 |
agents=[planner, analyst, writer],
|
154 |
tasks=[plan, analyse, write],
|
@@ -212,95 +234,194 @@ def validate_analyst_output(analyst_output):
|
|
212 |
def create_visualizations(analyst_output):
|
213 |
chart_paths = []
|
214 |
validated_data = validate_analyst_output(analyst_output)
|
|
|
215 |
if validated_data:
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
244 |
|
245 |
-
except Exception as e:
|
246 |
-
logging.error(f"Error generating visualization: {e}")
|
247 |
-
st.error(f"Error generating visualization: {e}")
|
248 |
return chart_paths
|
249 |
|
|
|
|
|
250 |
def display_table(analyst_output):
|
251 |
table_data = []
|
252 |
validated_data = validate_analyst_output(analyst_output)
|
|
|
253 |
if validated_data:
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
return table_data
|
258 |
|
259 |
-
#
|
260 |
-
if st.button("Generate Insights"):
|
261 |
-
if llm is None:
|
262 |
-
st.error("Cannot proceed without a valid API key for the selected model.")
|
263 |
-
else:
|
264 |
-
with st.spinner('Processing...'):
|
265 |
try:
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
274 |
else:
|
275 |
-
st.warning("
|
|
|
|
|
|
|
|
|
276 |
|
277 |
-
|
278 |
-
tab1, tab2 = st.tabs(["Planner's Insights", "Analyst's Analysis"])
|
279 |
|
280 |
-
with tab1:
|
281 |
-
planner_output = getattr(results.tasks_output[0], "raw", "No details available.")
|
282 |
-
st.write(planner_output)
|
283 |
|
284 |
-
with tab2:
|
285 |
-
analyst_output = getattr(results.tasks_output[1], "raw", "No details available.")
|
286 |
-
st.write(analyst_output)
|
287 |
|
288 |
-
|
289 |
-
|
290 |
-
|
|
|
291 |
|
292 |
-
|
|
|
293 |
|
294 |
-
|
295 |
-
|
296 |
-
writer_output,
|
297 |
-
charts=charts,
|
298 |
-
table_data=table_data,
|
299 |
-
metadata={"Technology Area": patent_area, "Stakeholder": stakeholder}
|
300 |
-
)
|
301 |
-
with open(pdf_path, "rb") as report_file:
|
302 |
-
st.download_button("Download Report", data=report_file, file_name="Patent_Strategy_Report.pdf")
|
303 |
|
304 |
-
|
305 |
-
|
306 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
import plotly.express as px
|
9 |
import tempfile
|
10 |
import time
|
11 |
+
import ast
|
12 |
import logging
|
13 |
|
14 |
# Setup logging
|
|
|
29 |
llm = None
|
30 |
|
31 |
# Model Selection
|
32 |
+
model_choice = st.radio("Select LLM", ["GPT-4o", "llama-3.3-70b"], index=1, horizontal=True)
|
33 |
+
|
34 |
|
35 |
# API Key Validation and LLM Initialization
|
36 |
groq_api_key = os.getenv("GROQ_API_KEY")
|
|
|
38 |
|
39 |
#llm = ChatGroq(groq_api_key=os.getenv("GROQ_API_KEY"), model="groq/llama-3.3-70b-versatile")
|
40 |
|
41 |
+
if model_choice == "llama-3.3-70b":
|
42 |
if not groq_api_key:
|
43 |
st.error("Groq API key is missing. Please set the GROQ_API_KEY environment variable.")
|
44 |
llm = None
|
45 |
else:
|
46 |
llm = ChatGroq(groq_api_key=groq_api_key, model="groq/llama-3.3-70b-versatile")
|
47 |
+
elif model_choice == "GPT-4o":
|
48 |
if not openai_api_key:
|
49 |
st.error("OpenAI API key is missing. Please set the OPENAI_API_KEY environment variable.")
|
50 |
llm = None
|
51 |
else:
|
52 |
llm = ChatOpenAI(api_key=openai_api_key, model="gpt-4o")
|
53 |
|
54 |
+
|
55 |
# Advanced Options
|
56 |
st.sidebar.header("Advanced Options")
|
57 |
enable_advanced_analysis = st.sidebar.checkbox("Enable Advanced Analysis", value=True)
|
|
|
72 |
)
|
73 |
analyst_goal = st.text_area(
|
74 |
"Analyst Goal",
|
75 |
+
value=(
|
76 |
+
"Perform detailed statistical analysis of patent filings, growth trends, and innovation distribution. "
|
77 |
+
"Identify top assignees/companies in the transparent antenna industry. "
|
78 |
+
"Provide structured output in a list of dictionaries with 'Category' and 'Values' keys for clear data presentation."
|
79 |
+
)
|
80 |
)
|
81 |
else:
|
82 |
planner_goal = "Research trends in patent filings and technological innovation, identify key players, and provide strategic recommendations."
|
83 |
writer_goal = "Craft a professional insights document summarizing trends, strategies, and actionable outcomes for stakeholders."
|
84 |
+
analyst_goal = (
|
85 |
+
"Perform detailed statistical analysis of patent filings, growth trends, and innovation distribution. "
|
86 |
+
"Identify top assignees/companies in the transparent antenna industry. "
|
87 |
+
"Provide structured output in a list of dictionaries with 'Category' and 'Values' keys for clear data presentation."
|
88 |
+
)
|
89 |
|
90 |
# Agent Definitions
|
91 |
planner = Agent(
|
|
|
128 |
"2. Identify key players and emerging technologies.\n"
|
129 |
"3. Provide recommendations for stakeholders on strategic directions.\n"
|
130 |
"4. Identify key statistics such as top regions, top players, and hot areas of innovation.\n"
|
131 |
+
"5. Limit the output to 600 words."
|
132 |
),
|
133 |
expected_output="A research document with structured insights, strategic recommendations, and key statistics.",
|
134 |
agent=planner
|
|
|
140 |
"2. Include key findings, visual aids, and actionable strategies.\n"
|
141 |
"3. Suggest strategic directions and highlight untapped innovation areas.\n"
|
142 |
"4. Incorporate summarized tables for key statistics and example inventions.\n"
|
143 |
+
"5. Limit the document to 600 words."
|
144 |
),
|
145 |
expected_output="A polished, stakeholder-ready patent insights document with actionable recommendations.",
|
146 |
agent=writer
|
|
|
148 |
|
149 |
analyse = Task(
|
150 |
description=(
|
151 |
+
"1. Conduct a comprehensive statistical analysis of patent filing trends, innovation hot spots, and future growth projections in the transparent antenna industry.\n"
|
152 |
+
"2. Identify and rank the top regions, leading assignees/companies driving innovation.\n"
|
153 |
+
"3. Highlight regional innovation trends and the distribution of emerging technologies across different geographies.\n"
|
154 |
+
"4. Provide actionable insights and strategic recommendations based on the data.\n"
|
155 |
+
"5. Deliver structured output in a list of dictionaries with 'Category' and 'Values' fields:\n"
|
156 |
+
" - 'Values' can be:\n"
|
157 |
+
" a) A dictionary with counts for quantitative data (e.g., {{'Region A': 120, 'Region B': 95}}),\n"
|
158 |
+
" b) A list of key items (technologies, companies, inventors), or\n"
|
159 |
+
" c) Descriptive text for qualitative insights.\n"
|
160 |
+
"6. Example Output Format:\n"
|
161 |
+
"[\n"
|
162 |
+
" {{'Category': 'Top Regions', 'Values': {{'North America': 120, 'Europe': 95, 'Asia-Pacific': 85}}}},\n"
|
163 |
+
" {{'Category': 'Top Assignees', 'Values': {{'Company A': 40, 'Company B': 35}}}},\n"
|
164 |
+
" {{'Category': 'Emerging Technologies', 'Values': ['Graphene Antennas', '5G Integration']}},\n"
|
165 |
+
" {{'Category': 'Strategic Insights', 'Values': 'Collaborations between automotive and material science industries are accelerating innovation.'}}\n"
|
166 |
+
"]\n"
|
167 |
+
"7. Ensure that the output is clean, well-structured, and formatted for use in visualizations and tables."
|
168 |
),
|
169 |
+
expected_output="A structured, well-organized dataset with numeric, list-based, and descriptive insights for comprehensive visual and tabular reporting.",
|
170 |
agent=analyst
|
171 |
)
|
172 |
|
173 |
+
|
174 |
crew = Crew(
|
175 |
agents=[planner, analyst, writer],
|
176 |
tasks=[plan, analyse, write],
|
|
|
234 |
def create_visualizations(analyst_output):
|
235 |
chart_paths = []
|
236 |
validated_data = validate_analyst_output(analyst_output)
|
237 |
+
|
238 |
if validated_data:
|
239 |
+
for item in validated_data:
|
240 |
+
category = item["Category"]
|
241 |
+
values = item["Values"]
|
242 |
+
|
243 |
+
try:
|
244 |
+
# Handle dictionary data
|
245 |
+
if isinstance(values, dict):
|
246 |
+
df = pd.DataFrame(list(values.items()), columns=["Label", "Count"])
|
247 |
+
|
248 |
+
# Choose Pie Chart for fewer categories, else Bar Chart
|
249 |
+
if len(df) <= 5:
|
250 |
+
chart = px.pie(df, names="Label", values="Count", title=f"{category} Distribution")
|
251 |
+
else:
|
252 |
+
chart = px.bar(df, x="Label", y="Count", title=f"{category} Analysis")
|
253 |
+
|
254 |
+
# Handle list data
|
255 |
+
elif isinstance(values, list):
|
256 |
+
# Convert the list into a frequency count without dummy values
|
257 |
+
df = pd.DataFrame(values, columns=["Label"])
|
258 |
+
df = df["Label"].value_counts().reset_index()
|
259 |
+
df.columns = ["Label", "Count"]
|
260 |
+
|
261 |
+
# Plot as a bar chart or pie chart
|
262 |
+
if len(df) <= 5:
|
263 |
+
chart = px.pie(df, names="Label", values="Count", title=f"{category} Distribution")
|
264 |
+
else:
|
265 |
+
chart = px.bar(df, x="Label", y="Count", title=f"{category} Frequency")
|
266 |
+
|
267 |
+
# Handle text data
|
268 |
+
elif isinstance(values, str):
|
269 |
+
st.subheader(f"{category} Insights")
|
270 |
+
st.table(pd.DataFrame({"Insights": [values]}))
|
271 |
+
continue # No chart for text data
|
272 |
+
|
273 |
+
else:
|
274 |
+
st.warning(f"Unsupported data format for category: {category}")
|
275 |
+
continue
|
276 |
+
|
277 |
+
# Display the chart in Streamlit
|
278 |
+
st.plotly_chart(chart)
|
279 |
+
|
280 |
+
# Save the chart for PDF export
|
281 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_chart:
|
282 |
+
chart.write_image(temp_chart.name)
|
283 |
+
chart_paths.append(temp_chart.name)
|
284 |
+
|
285 |
+
except Exception as e:
|
286 |
+
st.error(f"Failed to generate visualization for {category}: {e}")
|
287 |
+
logging.error(f"Error in {category} visualization: {e}")
|
288 |
|
|
|
|
|
|
|
289 |
return chart_paths
|
290 |
|
291 |
+
|
292 |
+
|
293 |
def display_table(analyst_output):
|
294 |
table_data = []
|
295 |
validated_data = validate_analyst_output(analyst_output)
|
296 |
+
|
297 |
if validated_data:
|
298 |
+
for item in validated_data:
|
299 |
+
category = item["Category"]
|
300 |
+
values = item["Values"]
|
|
|
301 |
|
302 |
+
# Error handling to prevent crashes
|
|
|
|
|
|
|
|
|
|
|
303 |
try:
|
304 |
+
# Handle dictionary data (Table View)
|
305 |
+
if isinstance(values, dict):
|
306 |
+
df = pd.DataFrame(list(values.items()), columns=["Label", "Count"])
|
307 |
+
st.subheader(f"{category} (Table View)")
|
308 |
+
st.dataframe(df)
|
309 |
+
table_data.extend(df.to_dict(orient="records"))
|
310 |
+
|
311 |
+
# Handle list data (List View)
|
312 |
+
elif isinstance(values, list):
|
313 |
+
df = pd.DataFrame(values, columns=["Items"])
|
314 |
+
st.subheader(f"{category} (List View)")
|
315 |
+
st.dataframe(df)
|
316 |
+
table_data.extend(df.to_dict(orient="records"))
|
317 |
+
|
318 |
+
# Handle text data (Summary View)
|
319 |
+
elif isinstance(values, str):
|
320 |
+
st.subheader(f"{category} (Summary)")
|
321 |
+
st.table(pd.DataFrame({"Insights": [values]}))
|
322 |
+
table_data.append({"Category": category, "Values": values})
|
323 |
+
|
324 |
else:
|
325 |
+
st.warning(f"Unsupported data format for category: {category}")
|
326 |
+
|
327 |
+
except Exception as e:
|
328 |
+
logging.error(f"Error processing {category}: {e}")
|
329 |
+
st.error(f"Failed to display {category} as a table due to an error.")
|
330 |
|
331 |
+
return table_data
|
|
|
332 |
|
|
|
|
|
|
|
333 |
|
|
|
|
|
|
|
334 |
|
335 |
+
def parse_analyst_output(raw_output):
|
336 |
+
structured_data = []
|
337 |
+
current_category = None
|
338 |
+
current_values = []
|
339 |
|
340 |
+
# Split raw output by line
|
341 |
+
lines = raw_output.split('\n')
|
342 |
|
343 |
+
for line in lines:
|
344 |
+
line = line.strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
345 |
|
346 |
+
# Detect the start of a new category
|
347 |
+
if line.startswith("Category:"):
|
348 |
+
# Save the previous category and its values
|
349 |
+
if current_category and current_values:
|
350 |
+
structured_data.append({
|
351 |
+
"Category": current_category,
|
352 |
+
"Values": current_values if len(current_values) > 1 else current_values[0]
|
353 |
+
})
|
354 |
+
# Start processing the new category
|
355 |
+
current_category = line.replace("Category:", "").strip()
|
356 |
+
current_values = []
|
357 |
+
|
358 |
+
# Skip 'Values:' header
|
359 |
+
elif line.startswith("Values:"):
|
360 |
+
continue
|
361 |
+
|
362 |
+
# Process the values under the current category
|
363 |
+
elif line and current_category:
|
364 |
+
try:
|
365 |
+
# Attempt to convert the line into Python data (dict/list)
|
366 |
+
parsed_value = ast.literal_eval(line)
|
367 |
+
current_values.append(parsed_value)
|
368 |
+
except (ValueError, SyntaxError):
|
369 |
+
# If parsing fails, treat it as plain text
|
370 |
+
current_values.append(line)
|
371 |
+
|
372 |
+
# Save the last processed category
|
373 |
+
if current_category and current_values:
|
374 |
+
structured_data.append({
|
375 |
+
"Category": current_category,
|
376 |
+
"Values": current_values if len(current_values) > 1 else current_values[0]
|
377 |
+
})
|
378 |
+
|
379 |
+
return structured_data
|
380 |
+
|
381 |
+
|
382 |
+
# Main Execution Block
|
383 |
+
if st.button("Generate Patent Insights"):
|
384 |
+
with st.spinner('Processing...'):
|
385 |
+
try:
|
386 |
+
start_time = time.time()
|
387 |
+
results = crew.kickoff(inputs={"topic": patent_area, "stakeholder": stakeholder})
|
388 |
+
elapsed_time = time.time() - start_time
|
389 |
+
|
390 |
+
writer_output = getattr(results.tasks_output[2], "raw", "No details available.")
|
391 |
+
if writer_output:
|
392 |
+
st.markdown("### Final Report")
|
393 |
+
st.write(writer_output)
|
394 |
+
else:
|
395 |
+
st.warning("No final report available.")
|
396 |
+
|
397 |
+
with st.expander("Explore Detailed Insights"):
|
398 |
+
tab1, tab2 = st.tabs(["Planner's Insights", "Analyst's Analysis"])
|
399 |
+
|
400 |
+
with tab1:
|
401 |
+
planner_output = getattr(results.tasks_output[0], "raw", "No details available.")
|
402 |
+
st.write(planner_output)
|
403 |
+
|
404 |
+
with tab2:
|
405 |
+
analyst_output = getattr(results.tasks_output[1], "raw", "No details available.")
|
406 |
+
st.write(analyst_output)
|
407 |
+
# Convert raw text to structured data
|
408 |
+
if isinstance(analyst_output, str):
|
409 |
+
analyst_output = parse_analyst_output(analyst_output)
|
410 |
+
st.subheader("Structured Analyst Output")
|
411 |
+
st.write(analyst_output)
|
412 |
+
|
413 |
+
|
414 |
+
charts = []
|
415 |
+
if enable_advanced_analysis:
|
416 |
+
charts = create_visualizations(analyst_output)
|
417 |
+
|
418 |
+
table_data = display_table(analyst_output)
|
419 |
+
|
420 |
+
st.success(f"Analysis completed in {elapsed_time:.2f} seconds.")
|
421 |
+
pdf_path = generate_pdf_report(writer_output, charts=charts, table_data=table_data, metadata={"Technology Area": patent_area, "Stakeholder": stakeholder})
|
422 |
+
with open(pdf_path, "rb") as report_file:
|
423 |
+
st.download_button("Download Report", data=report_file, file_name="Patent_Strategy_Report.pdf")
|
424 |
+
|
425 |
+
except Exception as e:
|
426 |
+
logging.error(f"An error occurred during execution: {e}")
|
427 |
+
st.error(f"An error occurred during execution: {e}")
|