|
import os |
|
import gradio as gr |
|
from datasets import load_dataset |
|
from typing import Union, Dict, List, Optional |
|
import pandas as pd |
|
from smolagents import CodeAgent, HfApiModel, tool, GradioUI |
|
from opentelemetry import trace |
|
from opentelemetry.sdk.trace import TracerProvider |
|
from opentelemetry.sdk.trace.export import BatchSpanProcessor |
|
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter |
|
from openinference.instrumentation.smolagents import SmolagentsInstrumentor |
|
from openinference.semconv.resource import ResourceAttributes |
|
from opentelemetry.sdk.resources import Resource |
|
import Bio |
|
|
|
resource = Resource(attributes={ |
|
ResourceAttributes.PROJECT_NAME: 'hf-parsimony' |
|
}) |
|
|
|
PHOENIX_API_KEY = os.getenv("PHOENIX_API_KEY") |
|
api_key = f"api_key={PHOENIX_API_KEY}" |
|
os.environ["OTEL_EXPORTER_OTLP_HEADERS"] = api_key |
|
os.environ["PHOENIX_CLIENT_HEADERS"] = api_key |
|
os.environ["PHOENIX_COLLECTOR_ENDPOINT"] = "https://app.phoenix.arize.com" |
|
|
|
endpoint = "https://app.phoenix.arize.com/v1/traces" |
|
trace_provider = TracerProvider() |
|
trace_provider.add_span_processor(BatchSpanProcessor(OTLPSpanExporter(endpoint))) |
|
SmolagentsInstrumentor().instrument(tracer_provider=trace_provider) |
|
|
|
examples = [ |
|
["Validate whether FANCA interacts with PALB2 in homologous recombination repair using BioGRID and PubMed evidence."], |
|
["Show the evolution of TP53-MDM2 interaction evidence from 2018-2023, highlighting key supporting papers."], |
|
["Compare BRCA2 interaction networks between human and mouse homologs, marking conserved relationships."], |
|
["Identify synthetic lethal partners for BRCA1-deficient cancers with confidence > 0.9 and clinical trial associations."], |
|
["Find novel VHL interactors proposed in 2023 PubMed articles not yet in BioGRID."], |
|
["Visualize the ATM interaction network with nodes sized by betweenness centrality and colored by validation source."], |
|
["Explain the Fanconi Anemia pathway and show its core components with experimental validation status."], |
|
["Correlate TP53BP1 protein interactions with mRNA co-expression patterns in TCGA breast cancer data."], |
|
["Identify high-betweenness nodes in the KRAS interaction network with druggable protein products."], |
|
["List all interactions with conflicting evidence between BioGRID and STRING, sorted by confidence delta."], |
|
] |
|
|
|
class GradioUIWithExamples(GradioUI): |
|
def __init__(self, agent, examples=None, **kwargs): |
|
super().__init__(agent, **kwargs) |
|
self.examples = examples |
|
|
|
def build_interface(self): |
|
with gr.Blocks() as demo: |
|
gr.Markdown("## Biomedical Answers") |
|
gr.Markdown(""" |
|
An experimental knowledge platform that attempts to transforms fragmented biomedical data into answers. |
|
|
|
A word of caution... this is a journey of discovery to understand what the Smolagents coding approach can provide with a simple app and a handful of dependencies. In terms of the responses provided it has a ways to go until I would claim that it provides true knowledge or insight. |
|
|
|
If anything I think that it highlights the value of ontologies and associated vocabularies in guiding LLM behavior. Probably a good time to go back and review some notes on DSPy and layer on domain vocabularies... |
|
""") |
|
|
|
input_box = gr.Textbox( |
|
label="Your Question", |
|
placeholder="e.g., 'Find novel VHL interactors proposed in 2023 PubMed articles not yet in BioGRID.'" |
|
) |
|
output_box = gr.Textbox( |
|
label="Analysis Results", |
|
placeholder="Response will appear here...", |
|
interactive=False, |
|
) |
|
submit_button = gr.Button("Submit") |
|
|
|
submit_button.click( |
|
self.agent.run, |
|
inputs=input_box, |
|
outputs=output_box, |
|
) |
|
|
|
if self.examples: |
|
gr.Markdown("### Examples") |
|
for example in self.examples: |
|
gr.Button(example[0]).click( |
|
lambda x=example[0]: x, |
|
inputs=[], |
|
outputs=input_box, |
|
) |
|
return demo |
|
|
|
def launch(self): |
|
demo = self.build_interface() |
|
demo.launch() |
|
|
|
model = HfApiModel() |
|
agent = CodeAgent( |
|
tools=[], |
|
model=model, |
|
additional_authorized_imports=["gradio","pandas","datasets","numpy","Bio","rdflib","networkx","requests","execute_sql"], |
|
add_base_tools=True |
|
) |
|
|
|
interface = GradioUIWithExamples(agent, examples=examples) |
|
interface.launch() |