test-analyst-outputs

Sleeping

App Files Files Community

DrishtiSharma commited on 24 days ago

Commit

488f45f

verified ·

1 Parent(s): fdd059c

Create interim.py

Browse files

Files changed (1) hide show

interim.py +509 -0

interim.py ADDED Viewed

	@@ -0,0 +1,509 @@

+import streamlit as st
+from crewai import Agent, Task, Crew
+import os
+from langchain_groq import ChatGroq
+from langchain_openai import ChatOpenAI
+from fpdf import FPDF
+import pandas as pd
+import plotly.express as px
+import tempfile
+import time
+import ast
+import logging
+import traceback
+# Setup logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+# Title and Application Introduction
+st.title("Patent Strategy and Innovation Consultant")
+st.sidebar.write(
+    "This application provides actionable insights and comprehensive analysis for patent-related strategies."
+)
+# User Input Section
+st.sidebar.header("User Inputs")
+patent_area = st.text_input("Enter Patent Technology Area", value="Transparent Antennas for Windshields")
+stakeholder = st.text_input("Enter Stakeholder", value="Patent Attorneys")
+# Initialize LLM
+llm = None
+# Model Selection
+model_choice = st.radio("Select LLM", ["GPT-4o", "llama-3.3-70b"], index=0, horizontal=True)
+# API Key Validation and LLM Initialization
+groq_api_key = os.getenv("GROQ_API_KEY")
+openai_api_key = os.getenv("OPENAI_API_KEY")
+#llm = ChatGroq(groq_api_key=os.getenv("GROQ_API_KEY"), model="groq/llama-3.3-70b-versatile")
+if model_choice == "llama-3.3-70b":
+    if not groq_api_key:
+        st.error("Groq API key is missing. Please set the GROQ_API_KEY environment variable.")
+        llm = None
+    else:
+        llm = ChatGroq(groq_api_key=groq_api_key, model="groq/llama-3.3-70b-versatile")
+elif model_choice == "GPT-4o":
+    if not openai_api_key:
+        st.error("OpenAI API key is missing. Please set the OPENAI_API_KEY environment variable.")
+        llm = None
+    else:
+        llm = ChatOpenAI(api_key=openai_api_key, model="gpt-4o")
+# Advanced Options
+st.sidebar.header("Advanced Options")
+enable_advanced_analysis = st.sidebar.checkbox("Enable Advanced Analysis", value=True)
+enable_custom_visualization = st.sidebar.checkbox("Enable Custom Visualizations", value=True)
+# Agent Customization
+st.sidebar.header("Agent Customization")
+with st.sidebar.expander("Customize Agent Goals", expanded=False):
+    enable_customization = st.checkbox("Enable Custom Goals")
+    if enable_customization:
+        planner_goal = st.text_area(
+            "Planner Goal",
+            value=(
+                "Conduct comprehensive, data-driven research on patent filings, technological innovation, and market dynamics strictly within the {topic} sector."
+                "Avoid unrelated or generic recommendations."
+                "Identify key players, emerging technologies, competitor strategies, and market gaps with factually accurate and verifiable data. "
+                "Strictly avoid hallucinated, fabricated, or speculative findings. "
+                "Deliver precise, actionable suggestions tailored to the specific needs and strategic goals of {stakeholder}."
+            )
+        )
+        writer_goal = st.text_area(
+            "Writer Goal",
+            value=(
+                "Develop a high-impact, professionally structured insights report that integrates verified research data and strategic analysis"
+                "into a cohesive and compelling narrative. "
+                "Organize findings into well-defined, data-driven sections such as Market Trends, Competitive Landscape, Emerging Technologies,"
+                "Untapped Innovation Hotspots, and Strategic Opportunities -- providing actionable insights and prioritized recommendations"
+                "strictly aligned with {stakeholder}'s strategic objectives. "
+                "Ensure all insights, emerging technologies, and identified innovation gaps are fact-based, verifiable, and directly relevant to the {topic}. "
+                "Explicitly avoid hallucinated, fabricated, or speculative content throughout the report."
+            )
+        )
+        analyst_goal = st.text_area(
+            "Analyst Goal",
+            value=(
+                "Perform precise, data-driven statistical analysis of patent filings, growth trends, and innovation distribution strictly within the {topic} sector, "
+                "specifically customized to the strategic needs of {stakeholder}. "
+                "Identify top regions, leading assignees/companies, and emerging technologies that are explicitly and directly relevant to {topic}. "
+                "Strictly avoid hallucinated, fabricated, or speculative statistical data and patent numbers in the analysis. "
+                "Conduct a thorough market gap analysis, identifying 4-5 highly actionable and verifiable innovation opportunities aligned with {topic}, "
+                "emphasizing sustainability, emerging technology integration, industry collaboration, and competitor positioning. "
+                "Evaluate competitor patent strategies with factual data to uncover untapped opportunities and competitive advantages. "
+                "All innovation hotspots and emerging technology suggestions must be strictly aligned with {topic} - no generic or unrelated recommendations are allowed. "
+                "Deliver highly actionable, data-driven insights to support strategic decision-making and long-term growth. "
+                "Present findings in a structured, well-organized format using 'Category' and 'Values' keys for easy data interpretation."
+            )
+        )
+    else:
+        planner_goal = (
+            "Conduct comprehensive, data-driven research on patent filings, technological innovation, and market dynamics strictly within the {topic} sector."
+            "Avoid unrelated or generic recommendations."
+            "Identify key players, emerging technologies, competitor strategies, and market gaps with factually accurate and verifiable data. "
+            "Strictly avoid hallucinated, fabricated, or speculative findings. "
+            "Deliver precise, actionable suggestions tailored to the specific needs and strategic goals of {stakeholder}."
+        )
+        writer_goal = (
+            "Develop a high-impact, professionally structured insights report that integrates verified research data and strategic analysis"
+            "into a cohesive and compelling narrative. "
+            "Organize findings into well-defined, data-driven sections such as Market Trends, Competitive Landscape, Emerging Technologies,"
+            "Untapped Innovation Hotspots, and Strategic Opportunities -- providing actionable insights and prioritized recommendations"
+            "strictly aligned with {stakeholder}'s strategic objectives. "
+            "Ensure all insights, emerging technologies, and identified innovation gaps are fact-based, verifiable, and directly relevant to the {topic}. "
+            "Explicitly avoid hallucinated, fabricated, or speculative content throughout the report."
+        )
+        analyst_goal = (
+            "Perform precise, data-driven statistical analysis of patent filings, growth trends, and innovation distribution strictly within the {topic} sector, "
+            "specifically customized to the strategic needs of {stakeholder}. "
+            "Identify top regions, leading assignees/companies, and emerging technologies that are explicitly and directly relevant to {topic}. "
+            "Strictly avoid hallucinated, fabricated, or speculative statistical data and patent numbers in the analysis. "
+            "Conduct a thorough market gap analysis, identifying 4-5 highly actionable and verifiable innovation opportunities aligned with {topic}, "
+            "emphasizing sustainability, emerging technology integration, industry collaboration, and competitor positioning. "
+            "Evaluate competitor patent strategies with factual data to uncover untapped opportunities and competitive advantages. "
+            "All innovation hotspots and emerging technology suggestions must be strictly aligned with {topic} - no generic or unrelated recommendations are allowed. "
+            "Deliver highly actionable, data-driven insights to support strategic decision-making and long-term growth. "
+            "Present findings in a structured, well-organized format using 'Category' and 'Values' keys for easy data interpretation."
+        )
+# Agent Definitions
+planner = Agent(
+    role="Patent Research Consultant",
+    goal=planner_goal,
+    backstory=(
+        "You're tasked with researching {topic} patents and identifying key trends and players. Your work supports the Patent Writer and Data Analyst."
+    ),
+    allow_delegation=False,
+    verbose=True,
+    llm=llm
+)
+writer = Agent(
+    role="Patent Insights Writer",
+    goal=writer_goal,
+    backstory=(
+        "Using the research from the Planner and data from the Analyst, craft a professional document summarizing patent insights for {stakeholder}."
+    ),
+    allow_delegation=False,
+    verbose=True,
+    llm=llm
+)
+analyst = Agent(
+    role="Patent Data Analyst",
+    goal=analyst_goal,
+    backstory=(
+        "Analyze patent filing data and innovation trends in {topic} to provide statistical insights. Your analysis will guide the Writer's final report."
+    ),
+    allow_delegation=False,
+    verbose=True,
+    llm=llm
+)
+# Task Definitions
+plan = Task(
+    description=(
+        "1. Conduct comprehensive, fact-based research on recent trends in {topic} patent filings and innovation.\n"
+        "2. Identify key players, emerging technologies, and market gaps that are strictly relevant to {topic}.\n"
+        "3. Ensure all findings—especially emerging technologies and innovation hotspots—are explicitly aligned with {topic}.\n"
+        "4. Avoid speculative, fabricated, or unrelated content entirely.\n"
+        "5. Provide actionable, data-backed strategic recommendations aligned with {stakeholder}'s goals.\n"
+        "6. Limit the output to 600 words."
+    ),
+    expected_output="A fact-driven research document with strictly relevant insights, strategic recommendations, and key statistics.",
+    agent=planner
+)
+write = Task(
+    description=(
+        "1. Use the Planner's and Analyst's strictly topic-aligned outputs to craft a professional patent insights document.\n"
+        "2. Include key findings, visual aids, and actionable strategies strictly related to {topic}.\n"
+        "3. Highlight strategic directions and strictly relevant innovation opportunities.\n"
+        "4. Incorporate well-structured tables for key statistics and example inventions without using any fabricated data or fake patent numbers.\n"
+        "5. Avoid any speculative, fabricated, or unrelated content.\n"
+        "6. Limit the document to 600 words."
+    ),
+    expected_output="A polished, stakeholder-ready patent insights report with actionable, strictly relevant recommendations.",
+    agent=writer
+)
+analyse = Task(
+    description=(
+        "1. Conduct a comprehensive statistical analysis of patent filing trends, innovation hot spots, and future growth projections in the {topic} sector.\n"
+        "2. Identify and rank the top regions, leading assignees/companies driving innovation.\n"
+        "3. Highlight regional innovation trends and the distribution of emerging technologies across different geographies.\n"
+        "4. Provide actionable insights and strategic recommendations based on the data.\n"
+        "5. Categorize outputs as either:\n"
+        "   - 'Data Insight' for visualizations and tables (quantitative data, trends, technologies).\n"
+        "   - 'Key Insight' for strategic recommendations and innovation opportunities.\n"
+        "6. Example Output Format:\n"
+        "[\n"
+        "  {{'Category': 'Top Regions', 'Type': 'Data Insight', 'Values': {{'North America': 120, 'Europe': 95}},\n"
+        "  {{'Category': 'Emerging Technologies', 'Type': 'Data Insight', 'Values': ['Transparent Conductive Films']}},\n"
+        "  {{'Category': 'Strategic Insights', 'Type': 'Key Insight', 'Values': 'Collaborate with material science companies to develop advanced transparent antennas.'}},\n"
+        "  {{'Category': 'Innovation Gaps', 'Type': 'Key Insight', 'Values': 'Limited patents in self-healing transparent materials present a growth opportunity.'}}\n"
+        "]\n"
+        "7. Ensure all data is factually accurate, verifiable, and strictly aligned with {topic}."
+    ),
+    expected_output="A structured dataset combining Data Insights for comprehensive visualizations and table reporting, and Key Insights for strategic actions.",
+    agent=analyst
+)
+crew = Crew(
+    agents=[planner, analyst, writer],
+    tasks=[plan, analyse, write],
+    verbose=True
+)
+# PDF Report Generation
+def generate_pdf_report(result, charts=None, table_data=None, metadata=None, key_insights=None):
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
+        pdf = FPDF()
+        pdf.add_page()
+        # Add DejaVu fonts (regular and bold)
+        pdf.add_font('DejaVu', '', '/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf', uni=True)
+        pdf.add_font('DejaVu', 'B', '/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf', uni=True)
+        pdf.set_font("DejaVu", size=12)
+        pdf.set_auto_page_break(auto=True, margin=15)
+        # Title (Bold)
+        pdf.set_font("DejaVu", size=16, style="B")
+        pdf.cell(200, 10, txt="Patent Strategy and Innovation Report", ln=True, align="C")
+        pdf.ln(10)
+        # Metadata Section
+        if metadata:
+            pdf.set_font("DejaVu", size=10)
+            for key, value in metadata.items():
+                pdf.cell(200, 10, txt=f"{key}: {value}", ln=True)
+        # Report Content
+        pdf.set_font("DejaVu", size=12)
+        pdf.multi_cell(0, 10, txt=result)
+        # Key Insights Section
+        if key_insights:
+            pdf.add_page()
+            pdf.set_font("DejaVu", size=14, style="B")
+            pdf.cell(200, 10, txt="Key Strategic Insights", ln=True)
+            pdf.ln(5)
+            pdf.set_font("DejaVu", size=12)
+            for insight in key_insights:
+                pdf.multi_cell(0, 10, txt=f"- {insight}")
+        # Insert Charts
+        if charts:
+            for chart_path in charts:
+                try:
+                    pdf.add_page()
+                    pdf.image(chart_path, x=10, y=20, w=180)
+                except Exception as e:
+                    logging.error(f"Error including chart: {e}")
+        # Insert Tables
+        if table_data:
+            pdf.add_page()
+            pdf.set_font("DejaVu", size=10)
+            pdf.cell(200, 10, txt="Consolidated Data Table:", ln=True, align="L")
+            for row in table_data:
+                pdf.cell(200, 10, txt=str(row), ln=True)
+        pdf.output(temp_pdf.name)
+        return temp_pdf.name
+# Data Validation
+def validate_analyst_output(analyst_output):
+    if not analyst_output:
+        st.warning("No data available for analysis.")
+        return None
+    if not isinstance(analyst_output, list) or not all(isinstance(item, dict) for item in analyst_output):
+        st.warning("Analyst output must be a list of dictionaries.")
+        return None
+    required_keys = {'Category', 'Values'}
+    if not all(required_keys.issubset(item.keys()) for item in analyst_output):
+        st.warning(f"Each dictionary must contain keys: {required_keys}")
+        return None
+    return analyst_output
+# Visualization and Table Display
+def create_visualizations(analyst_output):
+    chart_paths = []
+    validated_data = validate_analyst_output(analyst_output)
+    if validated_data:
+        for item in validated_data:
+            category = item["Category"]
+            values = item["Values"]
+            try:
+                # Handle dictionary data
+                if isinstance(values, dict):
+                    df = pd.DataFrame(list(values.items()), columns=["Label", "Count"])
+                    # Choose Pie Chart for fewer categories, else Bar Chart
+                    if len(df) <= 5:
+                        chart = px.pie(df, names="Label", values="Count", title=f"{category} Distribution")
+                    else:
+                        chart = px.bar(df, x="Label", y="Count", title=f"{category} Analysis")
+                # Handle list data
+                elif isinstance(values, list):
+                    # Convert the list into a frequency count without dummy values
+                    df = pd.DataFrame(values, columns=["Label"])
+                    df = df["Label"].value_counts().reset_index()
+                    df.columns = ["Label", "Count"]
+                    # Plot as a bar chart or pie chart
+                    if len(df) <= 5:
+                        chart = px.pie(df, names="Label", values="Count", title=f"{category} Distribution")
+                    else:
+                        chart = px.bar(df, x="Label", y="Count", title=f"{category} Frequency")
+                # Handle text data
+                elif isinstance(values, str):
+                    st.subheader(f"{category} Insights")
+                    st.table(pd.DataFrame({"Insights": [values]}))
+                    continue  # No chart for text data
+                else:
+                    st.warning(f"Unsupported data format for category: {category}")
+                    continue
+                # Display the chart in Streamlit
+                st.plotly_chart(chart)
+                # Save the chart for PDF export
+                with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_chart:
+                    chart.write_image(temp_chart.name)
+                    chart_paths.append(temp_chart.name)
+            except Exception as e:
+                st.error(f"Failed to generate visualization for {category}: {e}")
+                logging.error(f"Error in {category} visualization: {e}")
+    return chart_paths
+def display_table(analyst_output):
+    table_data = []
+    validated_data = validate_analyst_output(analyst_output)
+    if validated_data:
+        for item in validated_data:
+            category = item["Category"]
+            values = item["Values"]
+            # Error handling to prevent crashes
+            try:
+                # Handle dictionary data (Table View)
+                if isinstance(values, dict):
+                    df = pd.DataFrame(list(values.items()), columns=["Label", "Count"])
+                    st.subheader(f"{category} (Table View)")
+                    st.dataframe(df)
+                    table_data.extend(df.to_dict(orient="records"))
+                # Handle list data (List View)
+                elif isinstance(values, list):
+                    df = pd.DataFrame(values, columns=["Items"])
+                    st.subheader(f"{category} (List View)")
+                    st.dataframe(df)
+                    table_data.extend(df.to_dict(orient="records"))
+                # Handle text data (Summary View)
+                elif isinstance(values, str):
+                    st.subheader(f"{category} (Summary)")
+                    st.table(pd.DataFrame({"Insights": [values]}))
+                    table_data.append({"Category": category, "Values": values})
+                else:
+                    st.warning(f"Unsupported data format for category: {category}")
+            except Exception as e:
+                logging.error(f"Error processing {category}: {e}")
+                st.error(f"Failed to display {category} as a table due to an error.")
+    return table_data
+def parse_analyst_output(raw_output):
+    key_insights = []
+    data_insights = []
+    try:
+        structured_data = ast.literal_eval(raw_output) if isinstance(raw_output, str) else raw_output
+        for item in structured_data:
+            if "Category" not in item:
+                logging.warning(f"Missing 'Category' in item: {item}")
+                continue
+            if item.get("Type") == "Key Insight":
+                key_insights.append(item["Values"])
+            elif item.get("Type") == "Data Insight":
+                data_insights.append(item)
+            else:
+                data_insights.append(item)
+    except Exception as e:
+        logging.error(f"Error parsing analyst output: {e}")
+    return key_insights, data_insights
+# Main Execution Block
+if st.button("Generate Patent Insights"):
+    with st.spinner('Processing...'):
+        try:
+            # Start the timer
+            start_time = time.time()
+            # Kick off the crew with user inputs
+            if not patent_area or not stakeholder:
+                st.error("Please provide both Patent Technology Area and Stakeholder.")
+            else:
+                logging.info(f"Starting analysis with Topic: {patent_area}, Stakeholder: {stakeholder}")
+                results = crew.kickoff(inputs={"topic": patent_area, "stakeholder": stakeholder})
+            # Calculate elapsed time
+            elapsed_time = time.time() - start_time
+            # Extract Writer's Output
+            writer_output = getattr(results.tasks_output[2], "raw", "No details available.")
+            if writer_output and writer_output.strip():
+                st.markdown("### Final Report")
+                st.write(writer_output)
+            else:
+                st.warning("No final report available.")
+            # Expandable section for detailed insights
+            with st.expander("Explore Detailed Insights"):
+                tab1, tab2 = st.tabs(["Planner's Insights", "Analyst's Analysis"])
+                # Planner's Insights
+                with tab1:
+                    planner_output = getattr(results.tasks_output[0], "raw", "No details available.")
+                    if planner_output and planner_output.strip():
+                        st.write(planner_output)
+                    else:
+                        st.warning("No planner insights available.")
+                # Analyst's Analysis
+                with tab2:
+                    analyst_output = getattr(results.tasks_output[1], "raw", "No details available.")
+                    if analyst_output and analyst_output.strip():
+                        st.write(analyst_output)
+                        # Parse Analyst Output (Key Insights + Data Insights)
+                        key_insights, data_insights = parse_analyst_output(analyst_output)
+                        st.subheader("Structured Analyst Output")
+                        st.write(data_insights)
+                        # Create Visualizations if enabled
+                        charts = []
+                        if enable_advanced_analysis and data_insights:
+                            charts = create_visualizations(data_insights)
+                        else:
+                            st.info("No data insights available for visualizations.")
+                        # Display Data Tables
+                        table_data = display_table(data_insights)
+                    else:
+                        st.warning("No analyst analysis available.")
+            # Notify user that the analysis is complete
+            st.success(f"Analysis completed in {elapsed_time:.2f} seconds.")
+            # Generate the PDF report with Key Insights and Data Insights
+            if writer_output:
+                pdf_path = generate_pdf_report(
+                    result=writer_output,
+                    charts=charts,
+                    table_data=data_insights,
+                    metadata={"Technology Area": patent_area, "Stakeholder": stakeholder},
+                    key_insights=key_insights  # 🔑 Pass key insights to the PDF
+                )
+                # Download button for the generated PDF
+                with open(pdf_path, "rb") as report_file:
+                    st.download_button(
+                        label="📄 Download Report",
+                        data=report_file,
+                        file_name="Patent_Strategy_Report.pdf",
+                        mime="application/pdf"
+                    )
+            else:
+                st.warning("Report generation skipped due to missing content.")
+        except Exception as e:
+            error_message = traceback.format_exc()
+            logging.error(f"An error occurred during execution:\n{error_message}")
+            st.error(f"⚠️ An unexpected error occurred:\n{e}")