test-analyst-outputs / interim.py
DrishtiSharma's picture
Create interim.py
488f45f verified
raw
history blame
24.2 kB
import streamlit as st
from crewai import Agent, Task, Crew
import os
from langchain_groq import ChatGroq
from langchain_openai import ChatOpenAI
from fpdf import FPDF
import pandas as pd
import plotly.express as px
import tempfile
import time
import ast
import logging
import traceback
# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
# Title and Application Introduction
st.title("Patent Strategy and Innovation Consultant")
st.sidebar.write(
"This application provides actionable insights and comprehensive analysis for patent-related strategies."
)
# User Input Section
st.sidebar.header("User Inputs")
patent_area = st.text_input("Enter Patent Technology Area", value="Transparent Antennas for Windshields")
stakeholder = st.text_input("Enter Stakeholder", value="Patent Attorneys")
# Initialize LLM
llm = None
# Model Selection
model_choice = st.radio("Select LLM", ["GPT-4o", "llama-3.3-70b"], index=0, horizontal=True)
# API Key Validation and LLM Initialization
groq_api_key = os.getenv("GROQ_API_KEY")
openai_api_key = os.getenv("OPENAI_API_KEY")
#llm = ChatGroq(groq_api_key=os.getenv("GROQ_API_KEY"), model="groq/llama-3.3-70b-versatile")
if model_choice == "llama-3.3-70b":
if not groq_api_key:
st.error("Groq API key is missing. Please set the GROQ_API_KEY environment variable.")
llm = None
else:
llm = ChatGroq(groq_api_key=groq_api_key, model="groq/llama-3.3-70b-versatile")
elif model_choice == "GPT-4o":
if not openai_api_key:
st.error("OpenAI API key is missing. Please set the OPENAI_API_KEY environment variable.")
llm = None
else:
llm = ChatOpenAI(api_key=openai_api_key, model="gpt-4o")
# Advanced Options
st.sidebar.header("Advanced Options")
enable_advanced_analysis = st.sidebar.checkbox("Enable Advanced Analysis", value=True)
enable_custom_visualization = st.sidebar.checkbox("Enable Custom Visualizations", value=True)
# Agent Customization
st.sidebar.header("Agent Customization")
with st.sidebar.expander("Customize Agent Goals", expanded=False):
enable_customization = st.checkbox("Enable Custom Goals")
if enable_customization:
planner_goal = st.text_area(
"Planner Goal",
value=(
"Conduct comprehensive, data-driven research on patent filings, technological innovation, and market dynamics strictly within the {topic} sector."
"Avoid unrelated or generic recommendations."
"Identify key players, emerging technologies, competitor strategies, and market gaps with factually accurate and verifiable data. "
"Strictly avoid hallucinated, fabricated, or speculative findings. "
"Deliver precise, actionable suggestions tailored to the specific needs and strategic goals of {stakeholder}."
)
)
writer_goal = st.text_area(
"Writer Goal",
value=(
"Develop a high-impact, professionally structured insights report that integrates verified research data and strategic analysis"
"into a cohesive and compelling narrative. "
"Organize findings into well-defined, data-driven sections such as Market Trends, Competitive Landscape, Emerging Technologies,"
"Untapped Innovation Hotspots, and Strategic Opportunities -- providing actionable insights and prioritized recommendations"
"strictly aligned with {stakeholder}'s strategic objectives. "
"Ensure all insights, emerging technologies, and identified innovation gaps are fact-based, verifiable, and directly relevant to the {topic}. "
"Explicitly avoid hallucinated, fabricated, or speculative content throughout the report."
)
)
analyst_goal = st.text_area(
"Analyst Goal",
value=(
"Perform precise, data-driven statistical analysis of patent filings, growth trends, and innovation distribution strictly within the {topic} sector, "
"specifically customized to the strategic needs of {stakeholder}. "
"Identify top regions, leading assignees/companies, and emerging technologies that are explicitly and directly relevant to {topic}. "
"Strictly avoid hallucinated, fabricated, or speculative statistical data and patent numbers in the analysis. "
"Conduct a thorough market gap analysis, identifying 4-5 highly actionable and verifiable innovation opportunities aligned with {topic}, "
"emphasizing sustainability, emerging technology integration, industry collaboration, and competitor positioning. "
"Evaluate competitor patent strategies with factual data to uncover untapped opportunities and competitive advantages. "
"All innovation hotspots and emerging technology suggestions must be strictly aligned with {topic} - no generic or unrelated recommendations are allowed. "
"Deliver highly actionable, data-driven insights to support strategic decision-making and long-term growth. "
"Present findings in a structured, well-organized format using 'Category' and 'Values' keys for easy data interpretation."
)
)
else:
planner_goal = (
"Conduct comprehensive, data-driven research on patent filings, technological innovation, and market dynamics strictly within the {topic} sector."
"Avoid unrelated or generic recommendations."
"Identify key players, emerging technologies, competitor strategies, and market gaps with factually accurate and verifiable data. "
"Strictly avoid hallucinated, fabricated, or speculative findings. "
"Deliver precise, actionable suggestions tailored to the specific needs and strategic goals of {stakeholder}."
)
writer_goal = (
"Develop a high-impact, professionally structured insights report that integrates verified research data and strategic analysis"
"into a cohesive and compelling narrative. "
"Organize findings into well-defined, data-driven sections such as Market Trends, Competitive Landscape, Emerging Technologies,"
"Untapped Innovation Hotspots, and Strategic Opportunities -- providing actionable insights and prioritized recommendations"
"strictly aligned with {stakeholder}'s strategic objectives. "
"Ensure all insights, emerging technologies, and identified innovation gaps are fact-based, verifiable, and directly relevant to the {topic}. "
"Explicitly avoid hallucinated, fabricated, or speculative content throughout the report."
)
analyst_goal = (
"Perform precise, data-driven statistical analysis of patent filings, growth trends, and innovation distribution strictly within the {topic} sector, "
"specifically customized to the strategic needs of {stakeholder}. "
"Identify top regions, leading assignees/companies, and emerging technologies that are explicitly and directly relevant to {topic}. "
"Strictly avoid hallucinated, fabricated, or speculative statistical data and patent numbers in the analysis. "
"Conduct a thorough market gap analysis, identifying 4-5 highly actionable and verifiable innovation opportunities aligned with {topic}, "
"emphasizing sustainability, emerging technology integration, industry collaboration, and competitor positioning. "
"Evaluate competitor patent strategies with factual data to uncover untapped opportunities and competitive advantages. "
"All innovation hotspots and emerging technology suggestions must be strictly aligned with {topic} - no generic or unrelated recommendations are allowed. "
"Deliver highly actionable, data-driven insights to support strategic decision-making and long-term growth. "
"Present findings in a structured, well-organized format using 'Category' and 'Values' keys for easy data interpretation."
)
# Agent Definitions
planner = Agent(
role="Patent Research Consultant",
goal=planner_goal,
backstory=(
"You're tasked with researching {topic} patents and identifying key trends and players. Your work supports the Patent Writer and Data Analyst."
),
allow_delegation=False,
verbose=True,
llm=llm
)
writer = Agent(
role="Patent Insights Writer",
goal=writer_goal,
backstory=(
"Using the research from the Planner and data from the Analyst, craft a professional document summarizing patent insights for {stakeholder}."
),
allow_delegation=False,
verbose=True,
llm=llm
)
analyst = Agent(
role="Patent Data Analyst",
goal=analyst_goal,
backstory=(
"Analyze patent filing data and innovation trends in {topic} to provide statistical insights. Your analysis will guide the Writer's final report."
),
allow_delegation=False,
verbose=True,
llm=llm
)
# Task Definitions
plan = Task(
description=(
"1. Conduct comprehensive, fact-based research on recent trends in {topic} patent filings and innovation.\n"
"2. Identify key players, emerging technologies, and market gaps that are strictly relevant to {topic}.\n"
"3. Ensure all findings—especially emerging technologies and innovation hotspots—are explicitly aligned with {topic}.\n"
"4. Avoid speculative, fabricated, or unrelated content entirely.\n"
"5. Provide actionable, data-backed strategic recommendations aligned with {stakeholder}'s goals.\n"
"6. Limit the output to 600 words."
),
expected_output="A fact-driven research document with strictly relevant insights, strategic recommendations, and key statistics.",
agent=planner
)
write = Task(
description=(
"1. Use the Planner's and Analyst's strictly topic-aligned outputs to craft a professional patent insights document.\n"
"2. Include key findings, visual aids, and actionable strategies strictly related to {topic}.\n"
"3. Highlight strategic directions and strictly relevant innovation opportunities.\n"
"4. Incorporate well-structured tables for key statistics and example inventions without using any fabricated data or fake patent numbers.\n"
"5. Avoid any speculative, fabricated, or unrelated content.\n"
"6. Limit the document to 600 words."
),
expected_output="A polished, stakeholder-ready patent insights report with actionable, strictly relevant recommendations.",
agent=writer
)
analyse = Task(
description=(
"1. Conduct a comprehensive statistical analysis of patent filing trends, innovation hot spots, and future growth projections in the {topic} sector.\n"
"2. Identify and rank the top regions, leading assignees/companies driving innovation.\n"
"3. Highlight regional innovation trends and the distribution of emerging technologies across different geographies.\n"
"4. Provide actionable insights and strategic recommendations based on the data.\n"
"5. Categorize outputs as either:\n"
" - 'Data Insight' for visualizations and tables (quantitative data, trends, technologies).\n"
" - 'Key Insight' for strategic recommendations and innovation opportunities.\n"
"6. Example Output Format:\n"
"[\n"
" {{'Category': 'Top Regions', 'Type': 'Data Insight', 'Values': {{'North America': 120, 'Europe': 95}},\n"
" {{'Category': 'Emerging Technologies', 'Type': 'Data Insight', 'Values': ['Transparent Conductive Films']}},\n"
" {{'Category': 'Strategic Insights', 'Type': 'Key Insight', 'Values': 'Collaborate with material science companies to develop advanced transparent antennas.'}},\n"
" {{'Category': 'Innovation Gaps', 'Type': 'Key Insight', 'Values': 'Limited patents in self-healing transparent materials present a growth opportunity.'}}\n"
"]\n"
"7. Ensure all data is factually accurate, verifiable, and strictly aligned with {topic}."
),
expected_output="A structured dataset combining Data Insights for comprehensive visualizations and table reporting, and Key Insights for strategic actions.",
agent=analyst
)
crew = Crew(
agents=[planner, analyst, writer],
tasks=[plan, analyse, write],
verbose=True
)
# PDF Report Generation
def generate_pdf_report(result, charts=None, table_data=None, metadata=None, key_insights=None):
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
pdf = FPDF()
pdf.add_page()
# Add DejaVu fonts (regular and bold)
pdf.add_font('DejaVu', '', '/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf', uni=True)
pdf.add_font('DejaVu', 'B', '/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf', uni=True)
pdf.set_font("DejaVu", size=12)
pdf.set_auto_page_break(auto=True, margin=15)
# Title (Bold)
pdf.set_font("DejaVu", size=16, style="B")
pdf.cell(200, 10, txt="Patent Strategy and Innovation Report", ln=True, align="C")
pdf.ln(10)
# Metadata Section
if metadata:
pdf.set_font("DejaVu", size=10)
for key, value in metadata.items():
pdf.cell(200, 10, txt=f"{key}: {value}", ln=True)
# Report Content
pdf.set_font("DejaVu", size=12)
pdf.multi_cell(0, 10, txt=result)
# Key Insights Section
if key_insights:
pdf.add_page()
pdf.set_font("DejaVu", size=14, style="B")
pdf.cell(200, 10, txt="Key Strategic Insights", ln=True)
pdf.ln(5)
pdf.set_font("DejaVu", size=12)
for insight in key_insights:
pdf.multi_cell(0, 10, txt=f"- {insight}")
# Insert Charts
if charts:
for chart_path in charts:
try:
pdf.add_page()
pdf.image(chart_path, x=10, y=20, w=180)
except Exception as e:
logging.error(f"Error including chart: {e}")
# Insert Tables
if table_data:
pdf.add_page()
pdf.set_font("DejaVu", size=10)
pdf.cell(200, 10, txt="Consolidated Data Table:", ln=True, align="L")
for row in table_data:
pdf.cell(200, 10, txt=str(row), ln=True)
pdf.output(temp_pdf.name)
return temp_pdf.name
# Data Validation
def validate_analyst_output(analyst_output):
if not analyst_output:
st.warning("No data available for analysis.")
return None
if not isinstance(analyst_output, list) or not all(isinstance(item, dict) for item in analyst_output):
st.warning("Analyst output must be a list of dictionaries.")
return None
required_keys = {'Category', 'Values'}
if not all(required_keys.issubset(item.keys()) for item in analyst_output):
st.warning(f"Each dictionary must contain keys: {required_keys}")
return None
return analyst_output
# Visualization and Table Display
def create_visualizations(analyst_output):
chart_paths = []
validated_data = validate_analyst_output(analyst_output)
if validated_data:
for item in validated_data:
category = item["Category"]
values = item["Values"]
try:
# Handle dictionary data
if isinstance(values, dict):
df = pd.DataFrame(list(values.items()), columns=["Label", "Count"])
# Choose Pie Chart for fewer categories, else Bar Chart
if len(df) <= 5:
chart = px.pie(df, names="Label", values="Count", title=f"{category} Distribution")
else:
chart = px.bar(df, x="Label", y="Count", title=f"{category} Analysis")
# Handle list data
elif isinstance(values, list):
# Convert the list into a frequency count without dummy values
df = pd.DataFrame(values, columns=["Label"])
df = df["Label"].value_counts().reset_index()
df.columns = ["Label", "Count"]
# Plot as a bar chart or pie chart
if len(df) <= 5:
chart = px.pie(df, names="Label", values="Count", title=f"{category} Distribution")
else:
chart = px.bar(df, x="Label", y="Count", title=f"{category} Frequency")
# Handle text data
elif isinstance(values, str):
st.subheader(f"{category} Insights")
st.table(pd.DataFrame({"Insights": [values]}))
continue # No chart for text data
else:
st.warning(f"Unsupported data format for category: {category}")
continue
# Display the chart in Streamlit
st.plotly_chart(chart)
# Save the chart for PDF export
with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_chart:
chart.write_image(temp_chart.name)
chart_paths.append(temp_chart.name)
except Exception as e:
st.error(f"Failed to generate visualization for {category}: {e}")
logging.error(f"Error in {category} visualization: {e}")
return chart_paths
def display_table(analyst_output):
table_data = []
validated_data = validate_analyst_output(analyst_output)
if validated_data:
for item in validated_data:
category = item["Category"]
values = item["Values"]
# Error handling to prevent crashes
try:
# Handle dictionary data (Table View)
if isinstance(values, dict):
df = pd.DataFrame(list(values.items()), columns=["Label", "Count"])
st.subheader(f"{category} (Table View)")
st.dataframe(df)
table_data.extend(df.to_dict(orient="records"))
# Handle list data (List View)
elif isinstance(values, list):
df = pd.DataFrame(values, columns=["Items"])
st.subheader(f"{category} (List View)")
st.dataframe(df)
table_data.extend(df.to_dict(orient="records"))
# Handle text data (Summary View)
elif isinstance(values, str):
st.subheader(f"{category} (Summary)")
st.table(pd.DataFrame({"Insights": [values]}))
table_data.append({"Category": category, "Values": values})
else:
st.warning(f"Unsupported data format for category: {category}")
except Exception as e:
logging.error(f"Error processing {category}: {e}")
st.error(f"Failed to display {category} as a table due to an error.")
return table_data
def parse_analyst_output(raw_output):
key_insights = []
data_insights = []
try:
structured_data = ast.literal_eval(raw_output) if isinstance(raw_output, str) else raw_output
for item in structured_data:
if "Category" not in item:
logging.warning(f"Missing 'Category' in item: {item}")
continue
if item.get("Type") == "Key Insight":
key_insights.append(item["Values"])
elif item.get("Type") == "Data Insight":
data_insights.append(item)
else:
data_insights.append(item)
except Exception as e:
logging.error(f"Error parsing analyst output: {e}")
return key_insights, data_insights
# Main Execution Block
if st.button("Generate Patent Insights"):
with st.spinner('Processing...'):
try:
# Start the timer
start_time = time.time()
# Kick off the crew with user inputs
if not patent_area or not stakeholder:
st.error("Please provide both Patent Technology Area and Stakeholder.")
else:
logging.info(f"Starting analysis with Topic: {patent_area}, Stakeholder: {stakeholder}")
results = crew.kickoff(inputs={"topic": patent_area, "stakeholder": stakeholder})
# Calculate elapsed time
elapsed_time = time.time() - start_time
# Extract Writer's Output
writer_output = getattr(results.tasks_output[2], "raw", "No details available.")
if writer_output and writer_output.strip():
st.markdown("### Final Report")
st.write(writer_output)
else:
st.warning("No final report available.")
# Expandable section for detailed insights
with st.expander("Explore Detailed Insights"):
tab1, tab2 = st.tabs(["Planner's Insights", "Analyst's Analysis"])
# Planner's Insights
with tab1:
planner_output = getattr(results.tasks_output[0], "raw", "No details available.")
if planner_output and planner_output.strip():
st.write(planner_output)
else:
st.warning("No planner insights available.")
# Analyst's Analysis
with tab2:
analyst_output = getattr(results.tasks_output[1], "raw", "No details available.")
if analyst_output and analyst_output.strip():
st.write(analyst_output)
# Parse Analyst Output (Key Insights + Data Insights)
key_insights, data_insights = parse_analyst_output(analyst_output)
st.subheader("Structured Analyst Output")
st.write(data_insights)
# Create Visualizations if enabled
charts = []
if enable_advanced_analysis and data_insights:
charts = create_visualizations(data_insights)
else:
st.info("No data insights available for visualizations.")
# Display Data Tables
table_data = display_table(data_insights)
else:
st.warning("No analyst analysis available.")
# Notify user that the analysis is complete
st.success(f"Analysis completed in {elapsed_time:.2f} seconds.")
# Generate the PDF report with Key Insights and Data Insights
if writer_output:
pdf_path = generate_pdf_report(
result=writer_output,
charts=charts,
table_data=data_insights,
metadata={"Technology Area": patent_area, "Stakeholder": stakeholder},
key_insights=key_insights # 🔑 Pass key insights to the PDF
)
# Download button for the generated PDF
with open(pdf_path, "rb") as report_file:
st.download_button(
label="📄 Download Report",
data=report_file,
file_name="Patent_Strategy_Report.pdf",
mime="application/pdf"
)
else:
st.warning("Report generation skipped due to missing content.")
except Exception as e:
error_message = traceback.format_exc()
logging.error(f"An error occurred during execution:\n{error_message}")
st.error(f"⚠️ An unexpected error occurred:\n{e}")