import os import chromadb from datetime import datetime import streamlit as st from patentwiz import preprocess_data, qa_agent # Check if the API key is loaded api_key = os.getenv("OPENAI_API_KEY") if not api_key: st.error("OPENAI_API_KEY not found! Please set it in the environment variables or Hugging Face Secrets.") st.stop() # Clear ChromaDB cache to fix tenant issue chromadb.api.client.SharedSystemClient.clear_system_cache() PROMPT = """ Task: Carefully review the provided patent text and extract all relevant technical information, specifically for RF devices, antennas, and related hardware. Focus on the following parameters: 1. **Physical Measurements**: - Frequency, impedance, voltage, current, power, gain, bandwidth, radiation efficiency, and other measurable RF parameters. - For each measurement, provide the following details: - Substance or component being measured. - Specific value or range of the measurement. - Unit of measurement (if provided). - Measurement type or context (e.g., frequency, impedance, gain, etc.). 2. **Patent Metadata**: - Title of the patent. - Abstract summarizing the technical focus. - Metadata, including: - Patent number. - Filing date. - Inventors. - Assignee (if applicable). ### Output Format: The response should be formatted as a structured JSON object, as shown below: { "Patent_Title": "Title", "Patent_Abstract": "Abstract", "Patent_Metadata": { "Patent_Number": "Number", "Filing_Date": "Date", "Inventors": ["Name1", "Name2"], "Assignee": "Assignee Name" }, "Content": [ { "Measurement_substance": "substance", "Measured_value": "value", "Measured_unit": "unit", "measurement_type": "type" } // Additional measurements ] } """ # Title and description st.title("Blah") st.write( "Analyze patents to extract physical measurements such as frequency, bandwidth, and more. " "Provide a date range to download patents and analyze them using GPT models." ) # User Input Section st.header("Enter Date Range for Patent Analysis") start_date_input = st.text_input("Enter the start date (YYYY-MM-DD):", value="2024-06-20") end_date_input = st.text_input("Enter the end date (YYYY-MM-DD):", value="2024-06-27") num_patents_to_analyze = st.number_input( "Number of patents to analyze:", min_value=1, value=3, step=1, help="Specify how many patents you want to analyze." ) model_choice = st.selectbox( "Select a model for analysis:", ["gpt-3.5-turbo", "gpt-4"], help="Choose the OpenAI GPT model for the analysis." ) logging_enabled = st.checkbox("Enable logging?", value=False, help="Toggle logging for debugging purposes.") # Keyword Management st.header("Manage Keywords") st.write("Add or delete keywords for filtering patents.") default_keywords = [ "RF", "Radio Frequency", "Wireless Communication", "Antenna", "Microwave", "Electromagnetic Waves", "Beamforming", "5G", "6G", "Patch Antenna", "Dipole Antenna", "Phased Array", "Radiation Pattern", "IoT", "Wireless Charging" ] keywords_input = st.text_area( "Enter keywords for filtering (comma-separated):", value=", ".join(default_keywords) ) user_keywords = [kw.strip() for kw in keywords_input.split(",") if kw.strip()] # Field Selection st.header("Choose Fields for Filtering") fields = st.multiselect( "Select fields to search for keywords:", ["Title", "Abstract", "Claims", "Summary", "Detailed Description"], default=["Title", "Abstract"] ) # Run Analysis Button if st.button("Analyze Patents"): if not start_date_input or not end_date_input: st.error("Please enter both start and end dates!") elif not user_keywords: st.error("Please provide at least one keyword for filtering.") elif not fields: st.error("Please select at least one field for filtering.") else: try: # Parse date inputs start_date = datetime.strptime(start_date_input, "%Y-%m-%d") end_date = datetime.strptime(end_date_input, "%Y-%m-%d") # Validate date range if start_date > end_date: st.error("End date must be after start date!") st.stop() # Step 1: Download and preprocess patents with st.spinner("Downloading and extracting patents..."): saved_patent_names = preprocess_data.parse_and_save_patents( start_date, end_date, logging_enabled ) if not saved_patent_names: st.error("No patents found for the given date range.") st.stop() st.success(f"{len(saved_patent_names)} patents found and processed!") # Step 2: Filter patents based on user input with st.spinner("Filtering patents..."): filtered_patents = preprocess_data.filter_rf_patents( saved_patent_names, keywords=user_keywords, fields=fields ) if not filtered_patents: st.error("No patents matched the filtering criteria.") st.stop() st.success(f"{len(filtered_patents)} relevant patents found and processed!") # Step 3: Analyze patents using GPT random_patents = filtered_patents[:num_patents_to_analyze] total_cost = 0 results = [] st.write("Starting patent analysis...") for i, patent_file in enumerate(random_patents): cost, output = qa_agent.call_QA_to_json( PROMPT, start_date.year, start_date.month, start_date.day, saved_patent_names, i, logging_enabled, model_choice ) total_cost += cost results.append(output) # Step 4: Display results st.write(f"**Total Cost:** ${total_cost:.4f}") st.write("### Analysis Results:") for idx, result in enumerate(results): st.subheader(f"Patent {idx + 1}") st.json(result) except ValueError as ve: st.error(f"Invalid date format: {ve}") except Exception as e: st.error(f"An unexpected error occurred: {e}")