import os import gradio as gr from datasets import load_dataset from typing import Union, Dict, List, Optional import pandas as pd from smolagents import CodeAgent, HfApiModel, tool, GradioUI from opentelemetry import trace from opentelemetry.sdk.trace import TracerProvider from opentelemetry.sdk.trace.export import BatchSpanProcessor from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter from openinference.instrumentation.smolagents import SmolagentsInstrumentor from openinference.semconv.resource import ResourceAttributes from opentelemetry.sdk.resources import Resource import Bio resource = Resource(attributes={ ResourceAttributes.PROJECT_NAME: 'hf-parsimony' }) PHOENIX_API_KEY = os.getenv("PHOENIX_API_KEY") api_key = f"api_key={PHOENIX_API_KEY}" os.environ["OTEL_EXPORTER_OTLP_HEADERS"] = api_key os.environ["PHOENIX_CLIENT_HEADERS"] = api_key os.environ["PHOENIX_COLLECTOR_ENDPOINT"] = "https://app.phoenix.arize.com" endpoint = "https://app.phoenix.arize.com/v1/traces" trace_provider = TracerProvider() trace_provider.add_span_processor(BatchSpanProcessor(OTLPSpanExporter(endpoint))) SmolagentsInstrumentor().instrument(tracer_provider=trace_provider) examples = [ ["Validate whether FANCA interacts with PALB2 in homologous recombination repair using BioGRID and PubMed evidence."], ["Show the evolution of TP53-MDM2 interaction evidence from 2018-2023, highlighting key supporting papers."], ["Compare BRCA2 interaction networks between human and mouse homologs, marking conserved relationships."], ["Identify synthetic lethal partners for BRCA1-deficient cancers with confidence > 0.9 and clinical trial associations."], ["Find novel VHL interactors proposed in 2023 PubMed articles not yet in BioGRID."], ["Visualize the ATM interaction network with nodes sized by betweenness centrality and colored by validation source."], ["Explain the Fanconi Anemia pathway and show its core components with experimental validation status."], ["Correlate TP53BP1 protein interactions with mRNA co-expression patterns in TCGA breast cancer data."], ["Identify high-betweenness nodes in the KRAS interaction network with druggable protein products."], ["List all interactions with conflicting evidence between BioGRID and STRING, sorted by confidence delta."], ] class GradioUIWithExamples(GradioUI): def __init__(self, agent, examples=None, **kwargs): super().__init__(agent, **kwargs) self.examples = examples def build_interface(self): with gr.Blocks() as demo: gr.Markdown("## Biomedical Answers") gr.Markdown(""" An experimental knowledge platform that attempts to transforms fragmented biomedical data into answers. A word of caution... this is a journey of discovery to understand what the Smolagents coding approach can provide with a simple app and a handful of dependencies. In terms of the responses provided it has a ways to go until I would claim that it provides true knowledge or insight. If anything I think that it highlights the value of ontologies and associated vocabularies in guiding LLM behavior. Probably a good time to go back and review some notes on DSPy and layer on domain vocabularies... """) input_box = gr.Textbox( label="Your Question", placeholder="e.g., 'Find novel VHL interactors proposed in 2023 PubMed articles not yet in BioGRID.'" ) output_box = gr.Textbox( label="Analysis Results", placeholder="Response will appear here...", interactive=False, ) submit_button = gr.Button("Submit") submit_button.click( self.agent.run, inputs=input_box, outputs=output_box, ) if self.examples: gr.Markdown("### Examples") for example in self.examples: gr.Button(example[0]).click( lambda x=example[0]: x, inputs=[], outputs=input_box, ) return demo def launch(self): demo = self.build_interface() demo.launch() model = HfApiModel() agent = CodeAgent( tools=[], model=model, additional_authorized_imports=["pyvis","aiohttp","os","sys","gradio","pandas","datasets","numpy","Bio","rdflib","networkx","requests","execute_sql","duckdb","pyarrow","google-cloud-bigquery","pandas-gbq","pybigquery","sqlalchemy"], add_base_tools=True ) interface = GradioUIWithExamples(agent, examples=examples) interface.launch()