test-analyst-outputs / interim_v2.py
DrishtiSharma's picture
Update interim_v2.py
6f7e479 verified
import streamlit as st
from crewai import Agent, Task, Crew
import os
from langchain_groq import ChatGroq
from langchain_openai import ChatOpenAI
from fpdf import FPDF
import pandas as pd
import plotly.express as px
import tempfile
import time
import ast
import logging
import traceback
# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
# Title and Application Introduction
st.title("Patent Strategy and Innovation Consultant")
st.sidebar.write(
"This application provides actionable insights and comprehensive analysis for patent-related strategies."
)
# User Input Section
st.sidebar.header("User Inputs")
patent_area = st.text_input("Enter Patent Technology Area", value="Transparent Antennas for Windshields")
stakeholder = st.text_input("Enter Stakeholder", value="Patent Attorneys")
# Initialize LLM
llm = None
# Model Selection
model_choice = st.radio("Select LLM", ["GPT-4o", "llama-3.3-70b"], index=0, horizontal=True)
# API Key Validation and LLM Initialization
groq_api_key = os.getenv("GROQ_API_KEY")
openai_api_key = os.getenv("OPENAI_API_KEY")
if model_choice == "llama-3.3-70b":
if not groq_api_key:
st.error("Groq API key is missing. Please set the GROQ_API_KEY environment variable.")
llm = None
else:
llm = ChatGroq(groq_api_key=groq_api_key, model="groq/llama-3.3-70b-versatile")
elif model_choice == "GPT-4o":
if not openai_api_key:
st.error("OpenAI API key is missing. Please set the OPENAI_API_KEY environment variable.")
llm = None
else:
llm = ChatOpenAI(api_key=openai_api_key, model="gpt-4o")
# Advanced Options
st.sidebar.header("Advanced Options")
enable_advanced_analysis = st.sidebar.checkbox("Enable Advanced Analysis", value=True)
enable_custom_visualization = st.sidebar.checkbox("Enable Custom Visualizations", value=True)
# Agent Customization
st.sidebar.header("Agent Customization")
with st.sidebar.expander("Customize Agent Goals", expanded=False):
enable_customization = st.checkbox("Enable Custom Goals")
if enable_customization:
planner_goal = st.text_area(
"Planner Goal",
value=(
"Conduct in-depth, data-driven research on leading companies' recent patent filings, technological innovation, R&D investments,"
"strategic partnerships and market dynamics strictly within the {topic} sector."
"Identify key players, emerging technologies, competitor strategies, and market gaps with factually accurate and verifiable data. "
"Develop a Competitor Benchmark Matrix highlighting leaders in material/structural/software innovations vs. integration techniques."
"Summarize key market trends and shifts in competitive positioning with bullet-point insights."
"Strictly avoid hallucinated, fabricated, or speculative findings. "
"Develop a phased Patent Filing Roadmap targeting emerging technologies and high-growth regions tailored to the specific needs"
"and strategic goals of {stakeholder}. Conduct a Risk Assessment covering regulatory barriers, supply chain risks, and material"
"costs with mitigation strategies. Suggest Investment Opportunities by identifying startups or strategic partnerships in {topic}."
"Present findings using a Phased Patent Strategy Table (short-term, mid-term, long-term) and a Risk Matrix."
"Avoid unrelated or generic recommendations."
)
)
writer_goal = st.text_area(
"Writer Goal",
value=(
"Develop a high-impact, professionally structured insights report that integrates verified research data and strategic analysis into the following sections:\n\n"
"1. Executive Summary: Provide a concise overview of key insights and strategic recommendations strictly aligned with {stakeholder}'s strategic objectives.\n"
"2. Market Trends & Competitive Landscape: Detailed profiles, benchmarking, and market entry insights.\n"
"3. Emerging Technologies: Spotlight key technologies, TRL classification, adoption timelines.\n"
"4. Untapped Innovation Hotspots: Highlight regions with potential, innovation clusters, and readiness analysis.\n"
"5. Strategic Opportunities & Actionable Recommendations: Patent filing roadmap, risk mitigation, partnerships, and differentiation.\n"
"6. Future Growth Projections: Market forecasts, technology adoption, scenario analysis.\n"
"7. Industry Risk & Compliance Analysis: Risk matrix and mitigation strategies.\n"
"8. Summary & Appendix: Bullet-point insights and supporting data.\n\n"
"Ensure all insights, emerging technologies, and identified innovation gaps are fact-based, verifiable, and directly relevant to the {topic}."
"Present findings with clarity through well-structured sections, bullet points, and tables. Explicitly avoid hallucinated, fabricated, or speculative content throughout the report."
)
)
analyst_goal = st.text_area(
"Analyst Goal",
value=(
"Perform precise, data-driven statistical analysis of patent filings, growth trends, and innovation distribution strictly within the {topic} sector, "
"specifically customized to the strategic needs of {stakeholder}. "
"Identify top regions, leading assignees/companies, and emerging technologies that are explicitly and directly relevant to {topic}. "
"Strictly avoid hallucinated, fabricated, or speculative statistical data and patent numbers in the analysis. "
"Conduct a thorough market gap analysis, identifying 4-5 highly actionable and verifiable innovation opportunities aligned with {topic}, "
"emphasizing sustainability, emerging technology integration, industry collaboration, and competitor positioning. "
"Evaluate competitor patent strategies with factual data to uncover untapped opportunities and competitive advantages. "
"All innovation hotspots and emerging technology suggestions must be strictly aligned with {topic} - no generic or unrelated recommendations are allowed. "
"Deliver highly actionable, data-driven insights to support strategic decision-making and long-term growth. "
"Present findings in a structured, well-organized format using 'Category' and 'Values' keys for easy data interpretation."
)
)
else:
#planner_goal = (
# "Conduct comprehensive, data-driven research on patent filings, technological innovation, and market dynamics strictly within the {topic} sector."
# "Avoid unrelated or generic recommendations."
# "Identify key players, emerging technologies, competitor strategies, and market gaps with factually accurate and verifiable data. "
# "Strictly avoid hallucinated, fabricated, or speculative findings. "
# "Develop a phased Patent Filing Roadmap targeting emerging technologies and high-growth regions tailored to the specific needs"
# "and strategic goals of {stakeholder}. Conduct a Risk Assessment covering regulatory barriers, supply chain risks, and"
# "material costs with mitigation strategies. Suggest Investment Opportunities by identifying startups or strategic partnerships in {topic}."
# "Present findings using a Phased Patent Strategy Table (short-term, mid-term, long-term) and a Risk Matrix."
#)
planner_goal = (
"Conduct comprehensive, data-driven research on patent filings, technological innovations, and market dynamics strictly within the {topic} sector. "
"Deliver factually accurate and verifiable insights with no hallucinated, fabricated, or speculative findings. Avoid unrelated or generic recommendations. "
"Provide detailed outputs in the following structure:\n\n"
"1. Market Trends & Competitive Landscape:\n"
"- In-depth company profiles (recent patent filings, R&D investments, partnerships).\n"
"- Competitor Benchmarking (material innovations vs. integration techniques, etc.).\n"
"- Competitor Comparison Matrix (tabular comparison on innovation focus, market share, partnerships).\n"
"- Competitor Expansion Plans (predict market entries, M&A, product launches).\n\n"
"2. Emerging Technologies:\n"
"- Identify 4–5 key emerging technologies shaping the {topic} market.\n"
"- Classify technologies by Technology Readiness Level (TRL).\n"
"- Highlight disruptive technologies (e.g., metamaterials, quantum antennas).\n"
"- Analyze breakthroughs related to {topic} domain.\n"
"- Predict adoption timelines with Technology Adoption Curves.\n\n"
"3. Untapped Innovation Hotspots:\n"
"- Provide a Regional Patent Trends.\n"
"- Identify Innovation Clusters (universities, startups).\n"
"- Conduct Regulatory Landscape Analysis.\n"
"- Explore Application Expansion (non-traditional sectors).\n"
"- Evaluate Infrastructure Readiness (supply chains, logistics).\n\n"
"4. Strategic Opportunities & Actionable Recommendations:\n"
"- Develop a Phased Patent Strategy (short-term, mid-term, long-term).\n"
"- Conduct Risk Assessments (regulatory, supply chain, operational risks).\n"
"- Suggest Open Innovation Partnerships (universities, labs).\n"
"- Propose a Supply Chain Strategy (risk mitigation, diversification).\n"
"- Identify Investment Opportunities (startups, partnerships).\n"
"- Recommend Competitive Differentiation Strategies (blocking patents, cross-licensing).\n"
"- Market Entry Roadmap aligning patent filings with market strategies.\n\n"
"5. Future Growth Projections:\n"
"- Provide Market Size Forecasts (CAGR, revenue projections for 1–3 and 5–10 years).\n"
"- Analyze Demand Drivers and Barriers (tech adoption, regulatory hurdles, etc.).\n"
"- Conduct Scenario Analysis (best-case, worst-case, base-case).\n"
"- Predict Competitor Expansion Plans.\n"
"- Present Growth Projection Graphs and Forecast Summaries.\n\n"
"6. Industry Risk & Compliance Analysis:\n"
"- Identify Market, Operational, and Regulatory Risks.\n"
"- Develop Mitigation Strategies (compliance pathways, supply chain diversification).\n"
"- Provide a Regulatory Risk Matrix.\n\n"
"7. Summary & Appendix:\n"
"- Summarize key insights in bullet-point format.\n"
"- Include supplementary data, raw analysis, and research methodology."
)
writer_goal = (
"Develop a high-impact, professionally structured insights report that integrates verified research data and strategic analysis into the following sections:\n\n"
"1. Executive Summary: Provide a concise overview of key insights and strategic recommendations strictly aligned with {stakeholder}'s strategic objectives.\n"
"2. Market Trends & Competitive Landscape: Detailed profiles, benchmarking, and market entry insights.\n"
"3. Emerging Technologies: Spotlight key technologies, TRL classification, adoption timelines.\n"
"4. Untapped Innovation Hotspots: Highlight regions with potential, innovation clusters, and readiness analysis.\n"
"5. Strategic Opportunities & Actionable Recommendations: Patent filing roadmap, risk mitigation, partnerships, and differentiation.\n"
"6. Future Growth Projections: Market forecasts, technology adoption, scenario analysis.\n"
"7. Industry Risk & Compliance Analysis: Risk matrix and mitigation strategies.\n"
"8. Summary & Appendix: Bullet-point insights and supporting data.\n\n"
"Ensure all insights, emerging technologies, and identified innovation gaps are fact-based, verifiable, and directly relevant to the {topic}."
"Present findings with clarity through well-structured sections, bullet points, and tables. Explicitly avoid hallucinated, fabricated, or speculative content throughout the report."
)
analyst_goal = (
"Perform precise, data-driven statistical analysis of patent filings, growth trends, and innovation distribution strictly within the {topic} sector, "
"specifically customized to the strategic needs of {stakeholder}. "
"Identify top regions, leading assignees/companies, and emerging technologies that are explicitly and directly relevant to {topic}. "
"Identify 4-5 emerging technologies shaping the {topic} market with brief impact analyses."
"Highlight untapped markets and geographic regions with low patent activity but high growth potential."
"Provide Technology Spotlight Cards detailing each technology's name, description, development stage, and potential market impact."
"Strictly avoid hallucinated, fabricated, or speculative statistical data and patent numbers in the analysis. "
"Conduct a thorough market gap analysis, identifying 4-5 highly actionable and verifiable innovation opportunities aligned with {topic}, "
"emphasizing sustainability, emerging technology integration, industry collaboration, and competitor positioning. "
"Evaluate competitor patent strategies with factual data to uncover untapped opportunities and competitive advantages. "
"All innovation hotspots and emerging technology suggestions must be strictly aligned with {topic} - no generic or unrelated recommendations are allowed. "
"Deliver highly actionable, data-driven insights to support strategic decision-making and long-term growth. "
"Present findings in a structured, well-organized format using 'Category' and 'Values' keys for easy data interpretation."
)
# Agent Definitions
planner = Agent(
role="Patent Research Consultant",
goal=planner_goal,
backstory=(
"You're tasked with researching {topic} patents and identifying key trends and players. Your work supports the Patent Writer and Data Analyst."
),
allow_delegation=False,
verbose=True,
llm=llm
)
writer = Agent(
role="Patent Insights Writer",
goal=writer_goal,
backstory=(
"Using the research from the Planner and data from the Analyst, craft a professional document summarizing patent insights for {stakeholder}."
),
allow_delegation=False,
verbose=True,
llm=llm
)
analyst = Agent(
role="Patent Data Analyst",
goal=analyst_goal,
backstory=(
"Analyze patent filing data and innovation trends in {topic} to provide statistical insights. Your analysis will guide the Writer's final report."
),
allow_delegation=False,
verbose=True,
llm=llm
)
# Task Definitions
plan = Task(
description=(
"1. Conduct comprehensive, fact-based research on recent trends in {topic} patent filings and innovation.\n"
"2. Identify key players, emerging technologies, and market gaps that are strictly relevant to {topic}.\n"
"3. Ensure all findings—especially emerging technologies and innovation hotspots—are explicitly aligned with {topic}.\n"
"4. Avoid speculative, fabricated, or unrelated content entirely.\n"
"5. Provide actionable, data-backed strategic recommendations aligned with {stakeholder}'s goals.\n"
"6. Limit the output to 600 words."
),
expected_output="A fact-driven research document with strictly relevant insights, strategic recommendations, and key statistics.",
agent=planner
)
write = Task(
description=(
"1. Use the Planner's and Analyst's strictly topic-aligned outputs to craft a professional patent insights document.\n"
"2. Include key findings, visual aids, and actionable strategies strictly related to {topic}.\n"
"3. Highlight strategic directions and strictly relevant innovation opportunities.\n"
"4. Incorporate well-structured tables for key statistics and example inventions without using any fabricated data or fake patent numbers.\n"
"5. Avoid any speculative, fabricated, or unrelated content.\n"
"6. Limit the document to 600 words."
),
expected_output="A polished, stakeholder-ready patent insights report with actionable, strictly relevant recommendations.",
agent=writer
)
analyse = Task(
description=(
"1. Conduct a comprehensive statistical analysis of patent filing trends, innovation hot spots, and future growth projections in the {topic} sector.\n"
"2. Identify and rank the top regions, leading assignees/companies driving innovation.\n"
"3. Highlight regional innovation trends and the distribution of emerging technologies across different geographies.\n"
"4. Provide actionable insights and strategic recommendations based on the data.\n"
"5. Categorize outputs as either:\n"
" - 'Data Insight' for visualizations and tables (quantitative data, trends, technologies).\n"
" - 'Key Insight' for strategic recommendations and innovation opportunities.\n"
"6. Example Output Format:\n"
"[\n"
" {{'Category': 'Top Regions', 'Type': 'Data Insight', 'Values': {{'North America': 120, 'Europe': 95}},\n"
" {{'Category': 'Emerging Technologies', 'Type': 'Data Insight', 'Values': ['Transparent Conductive Films']}},\n"
" {{'Category': 'Strategic Insights', 'Type': 'Key Insight', 'Values': 'Collaborate with material science companies to develop advanced transparent antennas.'}},\n"
" {{'Category': 'Innovation Gaps', 'Type': 'Key Insight', 'Values': 'Limited patents in self-healing transparent materials present a growth opportunity.'}}\n"
"]\n"
"7. Ensure all data is factually accurate, verifiable, and strictly aligned with {topic}."
),
expected_output="A structured dataset combining Data Insights for comprehensive visualizations and table reporting, and Key Insights for strategic actions.",
agent=analyst
)
crew = Crew(
agents=[planner, analyst, writer],
tasks=[plan, analyse, write],
verbose=True
)
# PDF Report Generation
def generate_pdf_report(result, charts=None, table_data=None, metadata=None, key_insights=None):
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
pdf = FPDF()
pdf.add_page()
# Add DejaVu fonts (regular and bold)
pdf.add_font('DejaVu', '', '/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf', uni=True)
pdf.add_font('DejaVu', 'B', '/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf', uni=True)
pdf.set_font("DejaVu", size=12)
pdf.set_auto_page_break(auto=True, margin=15)
# Title (Bold)
pdf.set_font("DejaVu", size=16, style="B")
pdf.cell(200, 10, txt="Patent Strategy and Innovation Report", ln=True, align="C")
pdf.ln(10)
# Metadata Section
if metadata:
pdf.set_font("DejaVu", size=10)
for key, value in metadata.items():
pdf.cell(200, 10, txt=f"{key}: {value}", ln=True)
# Report Content
pdf.set_font("DejaVu", size=12)
pdf.multi_cell(0, 10, txt=result)
# Key Insights Section
if key_insights:
pdf.add_page()
pdf.set_font("DejaVu", size=14, style="B")
pdf.cell(200, 10, txt="Key Strategic Insights", ln=True)
pdf.ln(5)
pdf.set_font("DejaVu", size=12)
for insight in key_insights:
pdf.multi_cell(0, 10, txt=f"- {insight}")
# Insert Charts
if charts:
for chart_path in charts:
try:
pdf.add_page()
pdf.image(chart_path, x=10, y=20, w=180)
except Exception as e:
logging.error(f"Error including chart: {e}")
# Insert Tables
if table_data:
pdf.add_page()
pdf.set_font("DejaVu", size=10)
pdf.cell(200, 10, txt="Consolidated Data Table:", ln=True, align="L")
for row in table_data:
pdf.cell(200, 10, txt=str(row), ln=True)
pdf.output(temp_pdf.name)
return temp_pdf.name
# Data Validation
def validate_analyst_output(analyst_output):
if not analyst_output:
st.warning("No data available for analysis.")
return None
if not isinstance(analyst_output, list) or not all(isinstance(item, dict) for item in analyst_output):
st.warning("Analyst output must be a list of dictionaries.")
return None
required_keys = {'Category', 'Values'}
if not all(required_keys.issubset(item.keys()) for item in analyst_output):
st.warning(f"Each dictionary must contain keys: {required_keys}")
return None
return analyst_output
# Visualization and Table Display
def create_visualizations(analyst_output):
chart_paths = []
validated_data = validate_analyst_output(analyst_output)
if validated_data:
for item in validated_data:
category = item["Category"]
values = item["Values"]
try:
# Handle dictionary data
if isinstance(values, dict):
df = pd.DataFrame(list(values.items()), columns=["Label", "Count"])
# Choose Pie Chart for fewer categories, else Bar Chart
if len(df) <= 5:
chart = px.pie(df, names="Label", values="Count", title=f"{category} Distribution")
else:
chart = px.bar(df, x="Label", y="Count", title=f"{category} Analysis")
# Handle list data
elif isinstance(values, list):
# Convert the list into a frequency count without dummy values
df = pd.DataFrame(values, columns=["Label"])
df = df["Label"].value_counts().reset_index()
df.columns = ["Label", "Count"]
# Plot as a bar chart or pie chart
if len(df) <= 5:
chart = px.pie(df, names="Label", values="Count", title=f"{category} Distribution")
else:
chart = px.bar(df, x="Label", y="Count", title=f"{category} Frequency")
# Handle text data
elif isinstance(values, str):
st.subheader(f"{category} Insights")
st.table(pd.DataFrame({"Insights": [values]}))
continue # No chart for text data
else:
st.warning(f"Unsupported data format for category: {category}")
continue
# Display the chart in Streamlit
st.plotly_chart(chart)
# Save the chart for PDF export
with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_chart:
chart.write_image(temp_chart.name)
chart_paths.append(temp_chart.name)
except Exception as e:
st.error(f"Failed to generate visualization for {category}: {e}")
logging.error(f"Error in {category} visualization: {e}")
return chart_paths
def display_table(analyst_output):
table_data = []
validated_data = validate_analyst_output(analyst_output)
if validated_data:
for item in validated_data:
category = item["Category"]
values = item["Values"]
# Error handling to prevent crashes
try:
# Handle dictionary data (Table View)
if isinstance(values, dict):
df = pd.DataFrame(list(values.items()), columns=["Label", "Count"])
st.subheader(f"{category} (Table View)")
st.dataframe(df)
table_data.extend(df.to_dict(orient="records"))
# Handle list data (List View)
elif isinstance(values, list):
df = pd.DataFrame(values, columns=["Items"])
st.subheader(f"{category} (List View)")
st.dataframe(df)
table_data.extend(df.to_dict(orient="records"))
# Handle text data (Summary View)
elif isinstance(values, str):
st.subheader(f"{category} (Summary)")
st.table(pd.DataFrame({"Insights": [values]}))
table_data.append({"Category": category, "Values": values})
else:
st.warning(f"Unsupported data format for category: {category}")
except Exception as e:
logging.error(f"Error processing {category}: {e}")
st.error(f"Failed to display {category} as a table due to an error.")
return table_data
def parse_analyst_output(raw_output):
key_insights = []
data_insights = []
try:
# Correctly parse the raw output
structured_data = ast.literal_eval(raw_output) if isinstance(raw_output, str) else raw_output
for item in structured_data:
if "Category" not in item or "Values" not in item:
logging.warning(f"Missing 'Category' or 'Values' in item: {item}")
continue
if item.get("Type") == "Key Insight":
key_insights.append(item["Values"])
elif item.get("Type") == "Data Insight":
# Handle nested structures (e.g., Technology Spotlight Cards)
if isinstance(item["Values"], list):
for sub_item in item["Values"]:
data_insights.append({"Category": item["Category"], "Values": sub_item})
else:
data_insights.append(item)
else:
data_insights.append(item)
except Exception as e:
logging.error(f"Error parsing analyst output: {e}")
return key_insights, data_insights
# Main Execution Block
if st.button("Generate Patent Insights"):
with st.spinner('Processing...'):
try:
# Start the timer
start_time = time.time()
# Kick off the crew with user inputs
if not patent_area or not stakeholder:
st.error("Please provide both Patent Technology Area and Stakeholder.")
else:
logging.info(f"Starting analysis with Topic: {patent_area}, Stakeholder: {stakeholder}")
results = crew.kickoff(inputs={"topic": patent_area, "stakeholder": stakeholder})
# Calculate elapsed time
elapsed_time = time.time() - start_time
# Extract Writer's Output
writer_output = getattr(results.tasks_output[2], "raw", "No details available.")
if writer_output and writer_output.strip():
st.markdown("### Final Report")
st.write(writer_output)
else:
st.warning("No final report available.")
# Expandable section for detailed insights
with st.expander("Explore Detailed Insights"):
tab1, tab2 = st.tabs(["Planner's Insights", "Analyst's Analysis"])
# Planner's Insights
with tab1:
planner_output = getattr(results.tasks_output[0], "raw", "No details available.")
if planner_output and planner_output.strip():
st.write(planner_output)
else:
st.warning("No planner insights available.")
# Analyst's Analysis
with tab2:
analyst_output = getattr(results.tasks_output[1], "raw", "No details available.")
if analyst_output and analyst_output.strip():
st.write(analyst_output)
# Parse Analyst Output (Key Insights + Data Insights)
key_insights, data_insights = parse_analyst_output(analyst_output)
st.subheader("Structured Analyst Output")
st.write(data_insights)
# Create Visualizations if enabled
charts = []
if enable_advanced_analysis and data_insights:
charts = create_visualizations(data_insights)
else:
st.info("No data insights available for visualizations.")
# Display Data Tables
table_data = display_table(data_insights)
else:
st.warning("No analyst analysis available.")
# Notify user that the analysis is complete
st.success(f"Analysis completed in {elapsed_time:.2f} seconds.")
# Generate the PDF report with Key Insights and Data Insights
if writer_output:
pdf_path = generate_pdf_report(
result=writer_output,
charts=charts,
table_data=data_insights,
metadata={"Technology Area": patent_area, "Stakeholder": stakeholder},
key_insights=key_insights # Pass key insights to the PDF
)
# Download button for the generated PDF
with open(pdf_path, "rb") as report_file:
st.download_button(
label="📄 Download Report",
data=report_file,
file_name="Patent_Strategy_Report.pdf",
mime="application/pdf"
)
else:
st.warning("Report generation skipped due to missing content.")
except Exception as e:
error_message = traceback.format_exc()
logging.error(f"An error occurred during execution:\n{error_message}")
st.error(f"⚠️ An unexpected error occurred:\n{e}")