test-analyst-outputs

Sleeping

App Files Files Community

DrishtiSharma commited on about 1 month ago

Commit

0d8421c

verified ·

1 Parent(s): 242deae

Update test.py

Browse files

Files changed (1) hide show

test.py +210 -89

test.py CHANGED Viewed

@@ -8,6 +8,7 @@ import pandas as pd
 import plotly.express as px
 import tempfile
 import time
 import logging
 # Setup logging
@@ -28,8 +29,8 @@ stakeholder = st.text_input("Enter Stakeholder", value="Patent Attorneys")
 llm = None
 # Model Selection
-#st.header("Model Selection")
-model_choice = st.selectbox("Select LLM", ["OpenAI Model","Groq-based LLM"])
 # API Key Validation and LLM Initialization
 groq_api_key = os.getenv("GROQ_API_KEY")
@@ -37,19 +38,20 @@ openai_api_key = os.getenv("OPENAI_API_KEY")
 #llm = ChatGroq(groq_api_key=os.getenv("GROQ_API_KEY"), model="groq/llama-3.3-70b-versatile")
-if model_choice == "Groq-based LLM":
     if not groq_api_key:
         st.error("Groq API key is missing. Please set the GROQ_API_KEY environment variable.")
         llm = None
     else:
         llm = ChatGroq(groq_api_key=groq_api_key, model="groq/llama-3.3-70b-versatile")
-elif model_choice == "OpenAI Model":
     if not openai_api_key:
         st.error("OpenAI API key is missing. Please set the OPENAI_API_KEY environment variable.")
         llm = None
     else:
         llm = ChatOpenAI(api_key=openai_api_key, model="gpt-4o")
 # Advanced Options
 st.sidebar.header("Advanced Options")
 enable_advanced_analysis = st.sidebar.checkbox("Enable Advanced Analysis", value=True)
@@ -70,13 +72,20 @@ with st.sidebar.expander("Customize Agent Goals", expanded=False):
         )
         analyst_goal = st.text_area(
             "Analyst Goal",
-            value="Perform detailed statistical analysis of patent filings, growth trends, and innovation distribution."
         )
     else:
         planner_goal = "Research trends in patent filings and technological innovation, identify key players, and provide strategic recommendations."
         writer_goal = "Craft a professional insights document summarizing trends, strategies, and actionable outcomes for stakeholders."
-        analyst_goal = "Perform detailed statistical analysis of patent filings, growth trends, and innovation distribution."
 # Agent Definitions
 planner = Agent(
@@ -119,7 +128,7 @@ plan = Task(
         "2. Identify key players and emerging technologies.\n"
         "3. Provide recommendations for stakeholders on strategic directions.\n"
         "4. Identify key statistics such as top regions, top players, and hot areas of innovation.\n"
-        "5. Limit the output to 500 words."
     ),
     expected_output="A research document with structured insights, strategic recommendations, and key statistics.",
     agent=planner
@@ -131,7 +140,7 @@ write = Task(
         "2. Include key findings, visual aids, and actionable strategies.\n"
         "3. Suggest strategic directions and highlight untapped innovation areas.\n"
         "4. Incorporate summarized tables for key statistics and example inventions.\n"
-        "5. Limit the document to 650 words."
     ),
     expected_output="A polished, stakeholder-ready patent insights document with actionable recommendations.",
     agent=writer
@@ -139,16 +148,29 @@ write = Task(
 analyse = Task(
     description=(
-        "1. Perform statistical analysis of patent filing trends, innovation hot spots, and growth projections.\n"
-        "2. Identify top regions, key players, and technology combinations.\n"
-        "3. Generate visualizations such as heatmaps, bar charts, and multi-line charts for trends.\n"
-        "4. Provide structured output with fields 'Category' and 'Values' for visualization.\n"
-        "5. Collaborate with the Planner and Writer to align on data needs."
     ),
-    expected_output="A detailed statistical analysis with actionable insights, heatmaps, and trends.",
     agent=analyst
 )
 crew = Crew(
     agents=[planner, analyst, writer],
     tasks=[plan, analyse, write],
@@ -212,95 +234,194 @@ def validate_analyst_output(analyst_output):
 def create_visualizations(analyst_output):
     chart_paths = []
     validated_data = validate_analyst_output(analyst_output)
     if validated_data:
-        data = pd.DataFrame(validated_data)
-        try:
-            if data.empty:
-                raise ValueError("Data for visualizations is empty.")
-            bar_chart = px.bar(data, x="Category", y="Values", title="Patent Trends by Category")
-            st.plotly_chart(bar_chart)
-            with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_chart:
-                bar_chart.write_image(temp_chart.name)
-                chart_paths.append(temp_chart.name)
-            pie_chart = px.pie(data, names="Category", values="Values", title="Category Distribution")
-            st.plotly_chart(pie_chart)
-            with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_chart:
-                pie_chart.write_image(temp_chart.name)
-                chart_paths.append(temp_chart.name)
-            heatmap_chart = px.density_heatmap(data, x="Category", y="Values", title="Regional Patent Density")
-            st.plotly_chart(heatmap_chart)
-            with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_chart:
-                heatmap_chart.write_image(temp_chart.name)
-                chart_paths.append(temp_chart.name)
-            multi_line_chart = px.line(data, x="Category", y="Values", title="Trends Over Time")
-            st.plotly_chart(multi_line_chart)
-            with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_chart:
-                multi_line_chart.write_image(temp_chart.name)
-                chart_paths.append(temp_chart.name)
-        except Exception as e:
-            logging.error(f"Error generating visualization: {e}")
-            st.error(f"Error generating visualization: {e}")
     return chart_paths
 def display_table(analyst_output):
     table_data = []
     validated_data = validate_analyst_output(analyst_output)
     if validated_data:
-        data = pd.DataFrame(validated_data)
-        st.dataframe(data)
-        table_data = data.to_dict(orient="records")
-    return table_data
-# Main Execution Block
-if st.button("Generate Insights"):
-    if llm is None:
-        st.error("Cannot proceed without a valid API key for the selected model.")
-    else:
-        with st.spinner('Processing...'):
             try:
-                start_time = time.time()
-                results = crew.kickoff(inputs={"topic": patent_area, "stakeholder": stakeholder})
-                elapsed_time = time.time() - start_time
-                writer_output = getattr(results.tasks_output[2], "raw", "No details available.")
-                if writer_output:
-                    st.markdown("### Final Report")
-                    st.write(writer_output)
                 else:
-                    st.warning("No final report available.")
-                with st.expander("Explore Detailed Insights"):
-                    tab1, tab2 = st.tabs(["Planner's Insights", "Analyst's Analysis"])
-                    with tab1:
-                        planner_output = getattr(results.tasks_output[0], "raw", "No details available.")
-                        st.write(planner_output)
-                    with tab2:
-                        analyst_output = getattr(results.tasks_output[1], "raw", "No details available.")
-                        st.write(analyst_output)
-                        charts = []
-                        if enable_advanced_analysis:
-                            charts = create_visualizations(analyst_output)
-                        table_data = display_table(analyst_output)
-                st.success(f"Analysis completed in {elapsed_time:.2f} seconds.")
-                pdf_path = generate_pdf_report(
-                    writer_output,
-                    charts=charts,
-                    table_data=table_data,
-                    metadata={"Technology Area": patent_area, "Stakeholder": stakeholder}
-                )
-                with open(pdf_path, "rb") as report_file:
-                    st.download_button("Download Report", data=report_file, file_name="Patent_Strategy_Report.pdf")
-            except Exception as e:
-                logging.error(f"An error occurred during execution: {e}")
-                st.error(f"An error occurred during execution: {e}")

 import plotly.express as px
 import tempfile
 import time
+import ast
 import logging
 # Setup logging
 llm = None
 # Model Selection
+model_choice = st.radio("Select LLM", ["GPT-4o", "llama-3.3-70b"], index=1, horizontal=True)
 # API Key Validation and LLM Initialization
 groq_api_key = os.getenv("GROQ_API_KEY")
 #llm = ChatGroq(groq_api_key=os.getenv("GROQ_API_KEY"), model="groq/llama-3.3-70b-versatile")
+if model_choice == "llama-3.3-70b":
     if not groq_api_key:
         st.error("Groq API key is missing. Please set the GROQ_API_KEY environment variable.")
         llm = None
     else:
         llm = ChatGroq(groq_api_key=groq_api_key, model="groq/llama-3.3-70b-versatile")
+elif model_choice == "GPT-4o":
     if not openai_api_key:
         st.error("OpenAI API key is missing. Please set the OPENAI_API_KEY environment variable.")
         llm = None
     else:
         llm = ChatOpenAI(api_key=openai_api_key, model="gpt-4o")
 # Advanced Options
 st.sidebar.header("Advanced Options")
 enable_advanced_analysis = st.sidebar.checkbox("Enable Advanced Analysis", value=True)
         )
         analyst_goal = st.text_area(
             "Analyst Goal",
+            value=(
+                "Perform detailed statistical analysis of patent filings, growth trends, and innovation distribution. "
+                "Identify top assignees/companies in the transparent antenna industry. "
+                "Provide structured output in a list of dictionaries with 'Category' and 'Values' keys for clear data presentation."
+            )
         )
     else:
         planner_goal = "Research trends in patent filings and technological innovation, identify key players, and provide strategic recommendations."
         writer_goal = "Craft a professional insights document summarizing trends, strategies, and actionable outcomes for stakeholders."
+        analyst_goal = (
+            "Perform detailed statistical analysis of patent filings, growth trends, and innovation distribution. "
+            "Identify top assignees/companies in the transparent antenna industry. "
+            "Provide structured output in a list of dictionaries with 'Category' and 'Values' keys for clear data presentation."
+        )
 # Agent Definitions
 planner = Agent(
         "2. Identify key players and emerging technologies.\n"
         "3. Provide recommendations for stakeholders on strategic directions.\n"
         "4. Identify key statistics such as top regions, top players, and hot areas of innovation.\n"
+        "5. Limit the output to 600 words."
     ),
     expected_output="A research document with structured insights, strategic recommendations, and key statistics.",
     agent=planner
         "2. Include key findings, visual aids, and actionable strategies.\n"
         "3. Suggest strategic directions and highlight untapped innovation areas.\n"
         "4. Incorporate summarized tables for key statistics and example inventions.\n"
+        "5. Limit the document to 600 words."
     ),
     expected_output="A polished, stakeholder-ready patent insights document with actionable recommendations.",
     agent=writer
 analyse = Task(
     description=(
+        "1. Conduct a comprehensive statistical analysis of patent filing trends, innovation hot spots, and future growth projections in the transparent antenna industry.\n"
+        "2. Identify and rank the top regions, leading assignees/companies driving innovation.\n"
+        "3. Highlight regional innovation trends and the distribution of emerging technologies across different geographies.\n"
+        "4. Provide actionable insights and strategic recommendations based on the data.\n"
+        "5. Deliver structured output in a list of dictionaries with 'Category' and 'Values' fields:\n"
+        "   - 'Values' can be:\n"
+        "     a) A dictionary with counts for quantitative data (e.g., {{'Region A': 120, 'Region B': 95}}),\n"
+        "     b) A list of key items (technologies, companies, inventors), or\n"
+        "     c) Descriptive text for qualitative insights.\n"
+        "6. Example Output Format:\n"
+        "[\n"
+        "  {{'Category': 'Top Regions', 'Values': {{'North America': 120, 'Europe': 95, 'Asia-Pacific': 85}}}},\n"
+        "  {{'Category': 'Top Assignees', 'Values': {{'Company A': 40, 'Company B': 35}}}},\n"
+        "  {{'Category': 'Emerging Technologies', 'Values': ['Graphene Antennas', '5G Integration']}},\n"
+        "  {{'Category': 'Strategic Insights', 'Values': 'Collaborations between automotive and material science industries are accelerating innovation.'}}\n"
+        "]\n"
+        "7. Ensure that the output is clean, well-structured, and formatted for use in visualizations and tables."
     ),
+    expected_output="A structured, well-organized dataset with numeric, list-based, and descriptive insights for comprehensive visual and tabular reporting.",
     agent=analyst
 )
 crew = Crew(
     agents=[planner, analyst, writer],
     tasks=[plan, analyse, write],
 def create_visualizations(analyst_output):
     chart_paths = []
     validated_data = validate_analyst_output(analyst_output)
     if validated_data:
+        for item in validated_data:
+            category = item["Category"]
+            values = item["Values"]
+            try:
+                # Handle dictionary data
+                if isinstance(values, dict):
+                    df = pd.DataFrame(list(values.items()), columns=["Label", "Count"])
+                    # Choose Pie Chart for fewer categories, else Bar Chart
+                    if len(df) <= 5:
+                        chart = px.pie(df, names="Label", values="Count", title=f"{category} Distribution")
+                    else:
+                        chart = px.bar(df, x="Label", y="Count", title=f"{category} Analysis")
+                # Handle list data
+                elif isinstance(values, list):
+                    # Convert the list into a frequency count without dummy values
+                    df = pd.DataFrame(values, columns=["Label"])
+                    df = df["Label"].value_counts().reset_index()
+                    df.columns = ["Label", "Count"]
+                    # Plot as a bar chart or pie chart
+                    if len(df) <= 5:
+                        chart = px.pie(df, names="Label", values="Count", title=f"{category} Distribution")
+                    else:
+                        chart = px.bar(df, x="Label", y="Count", title=f"{category} Frequency")
+                # Handle text data
+                elif isinstance(values, str):
+                    st.subheader(f"{category} Insights")
+                    st.table(pd.DataFrame({"Insights": [values]}))
+                    continue  # No chart for text data
+                else:
+                    st.warning(f"Unsupported data format for category: {category}")
+                    continue
+                # Display the chart in Streamlit
+                st.plotly_chart(chart)
+                # Save the chart for PDF export
+                with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_chart:
+                    chart.write_image(temp_chart.name)
+                    chart_paths.append(temp_chart.name)
+            except Exception as e:
+                st.error(f"Failed to generate visualization for {category}: {e}")
+                logging.error(f"Error in {category} visualization: {e}")
     return chart_paths
 def display_table(analyst_output):
     table_data = []
     validated_data = validate_analyst_output(analyst_output)
     if validated_data:
+        for item in validated_data:
+            category = item["Category"]
+            values = item["Values"]
+            # Error handling to prevent crashes
             try:
+                # Handle dictionary data (Table View)
+                if isinstance(values, dict):
+                    df = pd.DataFrame(list(values.items()), columns=["Label", "Count"])
+                    st.subheader(f"{category} (Table View)")
+                    st.dataframe(df)
+                    table_data.extend(df.to_dict(orient="records"))
+                # Handle list data (List View)
+                elif isinstance(values, list):
+                    df = pd.DataFrame(values, columns=["Items"])
+                    st.subheader(f"{category} (List View)")
+                    st.dataframe(df)
+                    table_data.extend(df.to_dict(orient="records"))
+                # Handle text data (Summary View)
+                elif isinstance(values, str):
+                    st.subheader(f"{category} (Summary)")
+                    st.table(pd.DataFrame({"Insights": [values]}))
+                    table_data.append({"Category": category, "Values": values})
                 else:
+                    st.warning(f"Unsupported data format for category: {category}")
+            except Exception as e:
+                logging.error(f"Error processing {category}: {e}")
+                st.error(f"Failed to display {category} as a table due to an error.")
+    return table_data
+def parse_analyst_output(raw_output):
+    structured_data = []
+    current_category = None
+    current_values = []
+    # Split raw output by line
+    lines = raw_output.split('\n')
+    for line in lines:
+        line = line.strip()
+        # Detect the start of a new category
+        if line.startswith("Category:"):
+            # Save the previous category and its values
+            if current_category and current_values:
+                structured_data.append({
+                    "Category": current_category,
+                    "Values": current_values if len(current_values) > 1 else current_values[0]
+                })
+            # Start processing the new category
+            current_category = line.replace("Category:", "").strip()
+            current_values = []
+        # Skip 'Values:' header
+        elif line.startswith("Values:"):
+            continue
+        # Process the values under the current category
+        elif line and current_category:
+            try:
+                # Attempt to convert the line into Python data (dict/list)
+                parsed_value = ast.literal_eval(line)
+                current_values.append(parsed_value)
+            except (ValueError, SyntaxError):
+                # If parsing fails, treat it as plain text
+                current_values.append(line)
+    # Save the last processed category
+    if current_category and current_values:
+        structured_data.append({
+            "Category": current_category,
+            "Values": current_values if len(current_values) > 1 else current_values[0]
+        })
+    return structured_data
+# Main Execution Block
+if st.button("Generate Patent Insights"):
+    with st.spinner('Processing...'):
+        try:
+            start_time = time.time()
+            results = crew.kickoff(inputs={"topic": patent_area, "stakeholder": stakeholder})
+            elapsed_time = time.time() - start_time
+            writer_output = getattr(results.tasks_output[2], "raw", "No details available.")
+            if writer_output:
+                st.markdown("### Final Report")
+                st.write(writer_output)
+            else:
+                st.warning("No final report available.")
+            with st.expander("Explore Detailed Insights"):
+                tab1, tab2 = st.tabs(["Planner's Insights", "Analyst's Analysis"])
+                with tab1:
+                    planner_output = getattr(results.tasks_output[0], "raw", "No details available.")
+                    st.write(planner_output)
+                with tab2:
+                    analyst_output = getattr(results.tasks_output[1], "raw", "No details available.")
+                    st.write(analyst_output)
+                    # Convert raw text to structured data
+                    if isinstance(analyst_output, str):
+                        analyst_output = parse_analyst_output(analyst_output)
+                    st.subheader("Structured Analyst Output")
+                    st.write(analyst_output)
+                    charts = []
+                    if enable_advanced_analysis:
+                        charts = create_visualizations(analyst_output)
+                    table_data = display_table(analyst_output)
+            st.success(f"Analysis completed in {elapsed_time:.2f} seconds.")
+            pdf_path = generate_pdf_report(writer_output, charts=charts, table_data=table_data, metadata={"Technology Area": patent_area, "Stakeholder": stakeholder})
+            with open(pdf_path, "rb") as report_file:
+                st.download_button("Download Report", data=report_file, file_name="Patent_Strategy_Report.pdf")
+        except Exception as e:
+            logging.error(f"An error occurred during execution: {e}")
+            st.error(f"An error occurred during execution: {e}")