parsimony / app.py
dwb2023's picture
add sql capabilities
a77f78a verified
import os
import gradio as gr
from datasets import load_dataset
from typing import Union, Dict, List, Optional
import pandas as pd
from smolagents import CodeAgent, HfApiModel, tool, GradioUI
from opentelemetry import trace
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
from openinference.instrumentation.smolagents import SmolagentsInstrumentor
from openinference.semconv.resource import ResourceAttributes
from opentelemetry.sdk.resources import Resource
import Bio
resource = Resource(attributes={
ResourceAttributes.PROJECT_NAME: 'hf-parsimony'
})
PHOENIX_API_KEY = os.getenv("PHOENIX_API_KEY")
api_key = f"api_key={PHOENIX_API_KEY}"
os.environ["OTEL_EXPORTER_OTLP_HEADERS"] = api_key
os.environ["PHOENIX_CLIENT_HEADERS"] = api_key
os.environ["PHOENIX_COLLECTOR_ENDPOINT"] = "https://app.phoenix.arize.com"
endpoint = "https://app.phoenix.arize.com/v1/traces"
trace_provider = TracerProvider()
trace_provider.add_span_processor(BatchSpanProcessor(OTLPSpanExporter(endpoint)))
SmolagentsInstrumentor().instrument(tracer_provider=trace_provider)
examples = [
["Validate whether FANCA interacts with PALB2 in homologous recombination repair using BioGRID and PubMed evidence."],
["Show the evolution of TP53-MDM2 interaction evidence from 2018-2023, highlighting key supporting papers."],
["Compare BRCA2 interaction networks between human and mouse homologs, marking conserved relationships."],
["Identify synthetic lethal partners for BRCA1-deficient cancers with confidence > 0.9 and clinical trial associations."],
["Find novel VHL interactors proposed in 2023 PubMed articles not yet in BioGRID."],
["Visualize the ATM interaction network with nodes sized by betweenness centrality and colored by validation source."],
["Explain the Fanconi Anemia pathway and show its core components with experimental validation status."],
["Correlate TP53BP1 protein interactions with mRNA co-expression patterns in TCGA breast cancer data."],
["Identify high-betweenness nodes in the KRAS interaction network with druggable protein products."],
["List all interactions with conflicting evidence between BioGRID and STRING, sorted by confidence delta."],
]
class GradioUIWithExamples(GradioUI):
def __init__(self, agent, examples=None, **kwargs):
super().__init__(agent, **kwargs)
self.examples = examples
def build_interface(self):
with gr.Blocks() as demo:
gr.Markdown("## Biomedical Answers")
gr.Markdown("""
An experimental knowledge platform that attempts to transforms fragmented biomedical data into answers.
A word of caution... this is a journey of discovery to understand what the Smolagents coding approach can provide with a simple app and a handful of dependencies. In terms of the responses provided it has a ways to go until I would claim that it provides true knowledge or insight.
If anything I think that it highlights the value of ontologies and associated vocabularies in guiding LLM behavior. Probably a good time to go back and review some notes on DSPy and layer on domain vocabularies...
""")
input_box = gr.Textbox(
label="Your Question",
placeholder="e.g., 'Find novel VHL interactors proposed in 2023 PubMed articles not yet in BioGRID.'"
)
output_box = gr.Textbox(
label="Analysis Results",
placeholder="Response will appear here...",
interactive=False,
)
submit_button = gr.Button("Submit")
submit_button.click(
self.agent.run,
inputs=input_box,
outputs=output_box,
)
if self.examples:
gr.Markdown("### Examples")
for example in self.examples:
gr.Button(example[0]).click(
lambda x=example[0]: x,
inputs=[],
outputs=input_box,
)
return demo
def launch(self):
demo = self.build_interface()
demo.launch()
model = HfApiModel()
agent = CodeAgent(
tools=[],
model=model,
additional_authorized_imports=["gradio","pandas","datasets","numpy","Bio","rdflib","networkx","requests","execute_sql"],
add_base_tools=True
)
interface = GradioUIWithExamples(agent, examples=examples)
interface.launch()