test-analyst-outputs

Sleeping

App Files Files Community

test-analyst-outputs / interim.py

DrishtiSharma

Create interim.py

488f45f verified 24 days ago

raw

history blame

24.2 kB

	import streamlit as st
	from crewai import Agent, Task, Crew
	import os
	from langchain_groq import ChatGroq
	from langchain_openai import ChatOpenAI
	from fpdf import FPDF
	import pandas as pd
	import plotly.express as px
	import tempfile
	import time
	import ast
	import logging
	import traceback

	# Setup logging
	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

	# Title and Application Introduction
	st.title("Patent Strategy and Innovation Consultant")
	st.sidebar.write(
	"This application provides actionable insights and comprehensive analysis for patent-related strategies."
	)

	# User Input Section
	st.sidebar.header("User Inputs")
	patent_area = st.text_input("Enter Patent Technology Area", value="Transparent Antennas for Windshields")
	stakeholder = st.text_input("Enter Stakeholder", value="Patent Attorneys")

	# Initialize LLM
	llm = None

	# Model Selection
	model_choice = st.radio("Select LLM", ["GPT-4o", "llama-3.3-70b"], index=0, horizontal=True)


	# API Key Validation and LLM Initialization
	groq_api_key = os.getenv("GROQ_API_KEY")
	openai_api_key = os.getenv("OPENAI_API_KEY")

	#llm = ChatGroq(groq_api_key=os.getenv("GROQ_API_KEY"), model="groq/llama-3.3-70b-versatile")

	if model_choice == "llama-3.3-70b":
	if not groq_api_key:
	st.error("Groq API key is missing. Please set the GROQ_API_KEY environment variable.")
	llm = None
	else:
	llm = ChatGroq(groq_api_key=groq_api_key, model="groq/llama-3.3-70b-versatile")
	elif model_choice == "GPT-4o":
	if not openai_api_key:
	st.error("OpenAI API key is missing. Please set the OPENAI_API_KEY environment variable.")
	llm = None
	else:
	llm = ChatOpenAI(api_key=openai_api_key, model="gpt-4o")


	# Advanced Options
	st.sidebar.header("Advanced Options")
	enable_advanced_analysis = st.sidebar.checkbox("Enable Advanced Analysis", value=True)
	enable_custom_visualization = st.sidebar.checkbox("Enable Custom Visualizations", value=True)

	# Agent Customization
	st.sidebar.header("Agent Customization")
	with st.sidebar.expander("Customize Agent Goals", expanded=False):
	enable_customization = st.checkbox("Enable Custom Goals")
	if enable_customization:
	planner_goal = st.text_area(
	"Planner Goal",
	value=(
	"Conduct comprehensive, data-driven research on patent filings, technological innovation, and market dynamics strictly within the {topic} sector."
	"Avoid unrelated or generic recommendations."
	"Identify key players, emerging technologies, competitor strategies, and market gaps with factually accurate and verifiable data. "
	"Strictly avoid hallucinated, fabricated, or speculative findings. "
	"Deliver precise, actionable suggestions tailored to the specific needs and strategic goals of {stakeholder}."
	)
	)
	writer_goal = st.text_area(
	"Writer Goal",
	value=(
	"Develop a high-impact, professionally structured insights report that integrates verified research data and strategic analysis"
	"into a cohesive and compelling narrative. "
	"Organize findings into well-defined, data-driven sections such as Market Trends, Competitive Landscape, Emerging Technologies,"
	"Untapped Innovation Hotspots, and Strategic Opportunities -- providing actionable insights and prioritized recommendations"
	"strictly aligned with {stakeholder}'s strategic objectives. "
	"Ensure all insights, emerging technologies, and identified innovation gaps are fact-based, verifiable, and directly relevant to the {topic}. "
	"Explicitly avoid hallucinated, fabricated, or speculative content throughout the report."
	)
	)
	analyst_goal = st.text_area(
	"Analyst Goal",
	value=(
	"Perform precise, data-driven statistical analysis of patent filings, growth trends, and innovation distribution strictly within the {topic} sector, "
	"specifically customized to the strategic needs of {stakeholder}. "
	"Identify top regions, leading assignees/companies, and emerging technologies that are explicitly and directly relevant to {topic}. "
	"Strictly avoid hallucinated, fabricated, or speculative statistical data and patent numbers in the analysis. "
	"Conduct a thorough market gap analysis, identifying 4-5 highly actionable and verifiable innovation opportunities aligned with {topic}, "
	"emphasizing sustainability, emerging technology integration, industry collaboration, and competitor positioning. "
	"Evaluate competitor patent strategies with factual data to uncover untapped opportunities and competitive advantages. "
	"All innovation hotspots and emerging technology suggestions must be strictly aligned with {topic} - no generic or unrelated recommendations are allowed. "
	"Deliver highly actionable, data-driven insights to support strategic decision-making and long-term growth. "
	"Present findings in a structured, well-organized format using 'Category' and 'Values' keys for easy data interpretation."

	)
	)
	else:
	planner_goal = (
	"Conduct comprehensive, data-driven research on patent filings, technological innovation, and market dynamics strictly within the {topic} sector."
	"Avoid unrelated or generic recommendations."
	"Identify key players, emerging technologies, competitor strategies, and market gaps with factually accurate and verifiable data. "
	"Strictly avoid hallucinated, fabricated, or speculative findings. "
	"Deliver precise, actionable suggestions tailored to the specific needs and strategic goals of {stakeholder}."
	)
	writer_goal = (
	"Develop a high-impact, professionally structured insights report that integrates verified research data and strategic analysis"
	"into a cohesive and compelling narrative. "
	"Organize findings into well-defined, data-driven sections such as Market Trends, Competitive Landscape, Emerging Technologies,"
	"Untapped Innovation Hotspots, and Strategic Opportunities -- providing actionable insights and prioritized recommendations"
	"strictly aligned with {stakeholder}'s strategic objectives. "
	"Ensure all insights, emerging technologies, and identified innovation gaps are fact-based, verifiable, and directly relevant to the {topic}. "
	"Explicitly avoid hallucinated, fabricated, or speculative content throughout the report."

	)
	analyst_goal = (
	"Perform precise, data-driven statistical analysis of patent filings, growth trends, and innovation distribution strictly within the {topic} sector, "
	"specifically customized to the strategic needs of {stakeholder}. "
	"Identify top regions, leading assignees/companies, and emerging technologies that are explicitly and directly relevant to {topic}. "
	"Strictly avoid hallucinated, fabricated, or speculative statistical data and patent numbers in the analysis. "
	"Conduct a thorough market gap analysis, identifying 4-5 highly actionable and verifiable innovation opportunities aligned with {topic}, "
	"emphasizing sustainability, emerging technology integration, industry collaboration, and competitor positioning. "
	"Evaluate competitor patent strategies with factual data to uncover untapped opportunities and competitive advantages. "
	"All innovation hotspots and emerging technology suggestions must be strictly aligned with {topic} - no generic or unrelated recommendations are allowed. "
	"Deliver highly actionable, data-driven insights to support strategic decision-making and long-term growth. "
	"Present findings in a structured, well-organized format using 'Category' and 'Values' keys for easy data interpretation."
	)

	# Agent Definitions
	planner = Agent(
	role="Patent Research Consultant",
	goal=planner_goal,
	backstory=(
	"You're tasked with researching {topic} patents and identifying key trends and players. Your work supports the Patent Writer and Data Analyst."
	),
	allow_delegation=False,
	verbose=True,
	llm=llm
	)

	writer = Agent(
	role="Patent Insights Writer",
	goal=writer_goal,
	backstory=(
	"Using the research from the Planner and data from the Analyst, craft a professional document summarizing patent insights for {stakeholder}."
	),
	allow_delegation=False,
	verbose=True,
	llm=llm
	)

	analyst = Agent(
	role="Patent Data Analyst",
	goal=analyst_goal,
	backstory=(
	"Analyze patent filing data and innovation trends in {topic} to provide statistical insights. Your analysis will guide the Writer's final report."
	),
	allow_delegation=False,
	verbose=True,
	llm=llm
	)

	# Task Definitions
	plan = Task(
	description=(
	"1. Conduct comprehensive, fact-based research on recent trends in {topic} patent filings and innovation.\n"
	"2. Identify key players, emerging technologies, and market gaps that are strictly relevant to {topic}.\n"
	"3. Ensure all findings—especially emerging technologies and innovation hotspots—are explicitly aligned with {topic}.\n"
	"4. Avoid speculative, fabricated, or unrelated content entirely.\n"
	"5. Provide actionable, data-backed strategic recommendations aligned with {stakeholder}'s goals.\n"
	"6. Limit the output to 600 words."
	),
	expected_output="A fact-driven research document with strictly relevant insights, strategic recommendations, and key statistics.",
	agent=planner
	)

	write = Task(
	description=(
	"1. Use the Planner's and Analyst's strictly topic-aligned outputs to craft a professional patent insights document.\n"
	"2. Include key findings, visual aids, and actionable strategies strictly related to {topic}.\n"
	"3. Highlight strategic directions and strictly relevant innovation opportunities.\n"
	"4. Incorporate well-structured tables for key statistics and example inventions without using any fabricated data or fake patent numbers.\n"
	"5. Avoid any speculative, fabricated, or unrelated content.\n"
	"6. Limit the document to 600 words."
	),
	expected_output="A polished, stakeholder-ready patent insights report with actionable, strictly relevant recommendations.",
	agent=writer
	)

	analyse = Task(
	description=(
	"1. Conduct a comprehensive statistical analysis of patent filing trends, innovation hot spots, and future growth projections in the {topic} sector.\n"
	"2. Identify and rank the top regions, leading assignees/companies driving innovation.\n"
	"3. Highlight regional innovation trends and the distribution of emerging technologies across different geographies.\n"
	"4. Provide actionable insights and strategic recommendations based on the data.\n"
	"5. Categorize outputs as either:\n"
	" - 'Data Insight' for visualizations and tables (quantitative data, trends, technologies).\n"
	" - 'Key Insight' for strategic recommendations and innovation opportunities.\n"
	"6. Example Output Format:\n"
	"[\n"
	" {{'Category': 'Top Regions', 'Type': 'Data Insight', 'Values': {{'North America': 120, 'Europe': 95}},\n"
	" {{'Category': 'Emerging Technologies', 'Type': 'Data Insight', 'Values': ['Transparent Conductive Films']}},\n"
	" {{'Category': 'Strategic Insights', 'Type': 'Key Insight', 'Values': 'Collaborate with material science companies to develop advanced transparent antennas.'}},\n"
	" {{'Category': 'Innovation Gaps', 'Type': 'Key Insight', 'Values': 'Limited patents in self-healing transparent materials present a growth opportunity.'}}\n"
	"]\n"
	"7. Ensure all data is factually accurate, verifiable, and strictly aligned with {topic}."
	),
	expected_output="A structured dataset combining Data Insights for comprehensive visualizations and table reporting, and Key Insights for strategic actions.",
	agent=analyst
	)


	crew = Crew(
	agents=[planner, analyst, writer],
	tasks=[plan, analyse, write],
	verbose=True
	)

	# PDF Report Generation
	def generate_pdf_report(result, charts=None, table_data=None, metadata=None, key_insights=None):
	with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
	pdf = FPDF()
	pdf.add_page()

	# Add DejaVu fonts (regular and bold)
	pdf.add_font('DejaVu', '', '/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf', uni=True)
	pdf.add_font('DejaVu', 'B', '/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf', uni=True)

	pdf.set_font("DejaVu", size=12)
	pdf.set_auto_page_break(auto=True, margin=15)

	# Title (Bold)
	pdf.set_font("DejaVu", size=16, style="B")
	pdf.cell(200, 10, txt="Patent Strategy and Innovation Report", ln=True, align="C")
	pdf.ln(10)

	# Metadata Section
	if metadata:
	pdf.set_font("DejaVu", size=10)
	for key, value in metadata.items():
	pdf.cell(200, 10, txt=f"{key}: {value}", ln=True)

	# Report Content
	pdf.set_font("DejaVu", size=12)
	pdf.multi_cell(0, 10, txt=result)

	# Key Insights Section
	if key_insights:
	pdf.add_page()
	pdf.set_font("DejaVu", size=14, style="B")
	pdf.cell(200, 10, txt="Key Strategic Insights", ln=True)
	pdf.ln(5)
	pdf.set_font("DejaVu", size=12)
	for insight in key_insights:
	pdf.multi_cell(0, 10, txt=f"- {insight}")

	# Insert Charts
	if charts:
	for chart_path in charts:
	try:
	pdf.add_page()
	pdf.image(chart_path, x=10, y=20, w=180)
	except Exception as e:
	logging.error(f"Error including chart: {e}")

	# Insert Tables
	if table_data:
	pdf.add_page()
	pdf.set_font("DejaVu", size=10)
	pdf.cell(200, 10, txt="Consolidated Data Table:", ln=True, align="L")
	for row in table_data:
	pdf.cell(200, 10, txt=str(row), ln=True)

	pdf.output(temp_pdf.name)
	return temp_pdf.name


	# Data Validation
	def validate_analyst_output(analyst_output):
	if not analyst_output:
	st.warning("No data available for analysis.")
	return None
	if not isinstance(analyst_output, list) or not all(isinstance(item, dict) for item in analyst_output):
	st.warning("Analyst output must be a list of dictionaries.")
	return None
	required_keys = {'Category', 'Values'}
	if not all(required_keys.issubset(item.keys()) for item in analyst_output):
	st.warning(f"Each dictionary must contain keys: {required_keys}")
	return None
	return analyst_output

	# Visualization and Table Display
	def create_visualizations(analyst_output):
	chart_paths = []
	validated_data = validate_analyst_output(analyst_output)

	if validated_data:
	for item in validated_data:
	category = item["Category"]
	values = item["Values"]

	try:
	# Handle dictionary data
	if isinstance(values, dict):
	df = pd.DataFrame(list(values.items()), columns=["Label", "Count"])

	# Choose Pie Chart for fewer categories, else Bar Chart
	if len(df) <= 5:
	chart = px.pie(df, names="Label", values="Count", title=f"{category} Distribution")
	else:
	chart = px.bar(df, x="Label", y="Count", title=f"{category} Analysis")

	# Handle list data
	elif isinstance(values, list):
	# Convert the list into a frequency count without dummy values
	df = pd.DataFrame(values, columns=["Label"])
	df = df["Label"].value_counts().reset_index()
	df.columns = ["Label", "Count"]

	# Plot as a bar chart or pie chart
	if len(df) <= 5:
	chart = px.pie(df, names="Label", values="Count", title=f"{category} Distribution")
	else:
	chart = px.bar(df, x="Label", y="Count", title=f"{category} Frequency")

	# Handle text data
	elif isinstance(values, str):
	st.subheader(f"{category} Insights")
	st.table(pd.DataFrame({"Insights": [values]}))
	continue # No chart for text data

	else:
	st.warning(f"Unsupported data format for category: {category}")
	continue

	# Display the chart in Streamlit
	st.plotly_chart(chart)

	# Save the chart for PDF export
	with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_chart:
	chart.write_image(temp_chart.name)
	chart_paths.append(temp_chart.name)

	except Exception as e:
	st.error(f"Failed to generate visualization for {category}: {e}")
	logging.error(f"Error in {category} visualization: {e}")

	return chart_paths

	def display_table(analyst_output):
	table_data = []
	validated_data = validate_analyst_output(analyst_output)

	if validated_data:
	for item in validated_data:
	category = item["Category"]
	values = item["Values"]

	# Error handling to prevent crashes
	try:
	# Handle dictionary data (Table View)
	if isinstance(values, dict):
	df = pd.DataFrame(list(values.items()), columns=["Label", "Count"])
	st.subheader(f"{category} (Table View)")
	st.dataframe(df)
	table_data.extend(df.to_dict(orient="records"))

	# Handle list data (List View)
	elif isinstance(values, list):
	df = pd.DataFrame(values, columns=["Items"])
	st.subheader(f"{category} (List View)")
	st.dataframe(df)
	table_data.extend(df.to_dict(orient="records"))

	# Handle text data (Summary View)
	elif isinstance(values, str):
	st.subheader(f"{category} (Summary)")
	st.table(pd.DataFrame({"Insights": [values]}))
	table_data.append({"Category": category, "Values": values})

	else:
	st.warning(f"Unsupported data format for category: {category}")

	except Exception as e:
	logging.error(f"Error processing {category}: {e}")
	st.error(f"Failed to display {category} as a table due to an error.")

	return table_data

	def parse_analyst_output(raw_output):
	key_insights = []
	data_insights = []

	try:
	structured_data = ast.literal_eval(raw_output) if isinstance(raw_output, str) else raw_output

	for item in structured_data:
	if "Category" not in item:
	logging.warning(f"Missing 'Category' in item: {item}")
	continue

	if item.get("Type") == "Key Insight":
	key_insights.append(item["Values"])
	elif item.get("Type") == "Data Insight":
	data_insights.append(item)
	else:
	data_insights.append(item)
	except Exception as e:
	logging.error(f"Error parsing analyst output: {e}")

	return key_insights, data_insights


	# Main Execution Block
	if st.button("Generate Patent Insights"):
	with st.spinner('Processing...'):
	try:
	# Start the timer
	start_time = time.time()

	# Kick off the crew with user inputs
	if not patent_area or not stakeholder:
	st.error("Please provide both Patent Technology Area and Stakeholder.")
	else:
	logging.info(f"Starting analysis with Topic: {patent_area}, Stakeholder: {stakeholder}")
	results = crew.kickoff(inputs={"topic": patent_area, "stakeholder": stakeholder})


	# Calculate elapsed time
	elapsed_time = time.time() - start_time

	# Extract Writer's Output
	writer_output = getattr(results.tasks_output[2], "raw", "No details available.")
	if writer_output and writer_output.strip():
	st.markdown("### Final Report")
	st.write(writer_output)
	else:
	st.warning("No final report available.")

	# Expandable section for detailed insights
	with st.expander("Explore Detailed Insights"):
	tab1, tab2 = st.tabs(["Planner's Insights", "Analyst's Analysis"])

	# Planner's Insights
	with tab1:
	planner_output = getattr(results.tasks_output[0], "raw", "No details available.")
	if planner_output and planner_output.strip():
	st.write(planner_output)
	else:
	st.warning("No planner insights available.")

	# Analyst's Analysis
	with tab2:
	analyst_output = getattr(results.tasks_output[1], "raw", "No details available.")
	if analyst_output and analyst_output.strip():
	st.write(analyst_output)

	# Parse Analyst Output (Key Insights + Data Insights)
	key_insights, data_insights = parse_analyst_output(analyst_output)
	st.subheader("Structured Analyst Output")
	st.write(data_insights)

	# Create Visualizations if enabled
	charts = []
	if enable_advanced_analysis and data_insights:
	charts = create_visualizations(data_insights)
	else:
	st.info("No data insights available for visualizations.")

	# Display Data Tables
	table_data = display_table(data_insights)

	else:
	st.warning("No analyst analysis available.")

	# Notify user that the analysis is complete
	st.success(f"Analysis completed in {elapsed_time:.2f} seconds.")

	# Generate the PDF report with Key Insights and Data Insights
	if writer_output:
	pdf_path = generate_pdf_report(
	result=writer_output,
	charts=charts,
	table_data=data_insights,
	metadata={"Technology Area": patent_area, "Stakeholder": stakeholder},
	key_insights=key_insights # 🔑 Pass key insights to the PDF
	)

	# Download button for the generated PDF
	with open(pdf_path, "rb") as report_file:
	st.download_button(
	label="📄 Download Report",
	data=report_file,
	file_name="Patent_Strategy_Report.pdf",
	mime="application/pdf"
	)
	else:
	st.warning("Report generation skipped due to missing content.")

	except Exception as e:
	error_message = traceback.format_exc()
	logging.error(f"An error occurred during execution:\n{error_message}")
	st.error(f"⚠️ An unexpected error occurred:\n{e}")