Spaces:
Running
Running
import logging | |
import pathlib | |
from typing import List, Optional | |
from rdkit import Chem | |
from tqdm import tqdm | |
import gradio as gr | |
from submission import submission | |
import pandas as pd | |
from configuration import GENE_EXPRESSION_METADATA | |
logger = logging.getLogger(__name__) | |
logger.addHandler(logging.NullHandler()) | |
site_mapper = { | |
"central_nervous_system": "CNS", | |
"haematopoietic_and_lymphoid_tissue": "Haema_lymph", | |
"upper_aerodigestive_tract": "digestive", | |
"autonomic_ganglia": "ganglia", | |
} | |
def run_inference( | |
smiles: Optional[str], | |
smiles_path: Optional[str], | |
omic_path: Optional[str], | |
confidence: bool, | |
): | |
# Read SMILES | |
if not isinstance(smiles_path, (str, type(None))): | |
raise TypeError( | |
f"SMILES file pass has to be None or str, not {type(smiles_path)}" | |
) | |
if smiles is None and smiles_path is None: | |
raise TypeError(f"Pass either single SMILES or a file") | |
elif smiles is not None: | |
smiles = [smiles] | |
elif smiles_path is not None: | |
smiles_data = pd.read_csv(smiles_path, sep="\t", header=False) | |
smiles = smiles_data[0] | |
for smi in smiles: | |
if Chem.MolFromSmiles(smi) is None: | |
raise ValueError(f"Found invalid SMILES {smi}") | |
# Read omics and otherwise load baseline | |
if not isinstance(omic_path, (str, type(None))): | |
raise TypeError(f"Omics file pass has to be None or str, not {type(omic_path)}") | |
# ToDo: Add progress bar for multiple smiles | |
results = {} | |
for smi in tqdm(smiles, total=len(smiles)): | |
result = submission( | |
drug={"smiles": smi}, | |
workspace_id="emulated_workspace_id", | |
task_id="emulated_task_id", | |
estimate_confidence=confidence, | |
omics_file=omic_path, | |
) | |
# For the moment no attention analysis | |
result.pop("gene_attention") | |
result.pop("smiles_attention", None) | |
result.pop("IC50") | |
results[f"IC50_{smi}"] = result["log_micromolar_IC50"].squeeze().round(3) | |
results[f"IC50_{smi}"].shape | |
if confidence: | |
results[f"aleatoric_confidence_{smi}"] = ( | |
result["aleatoric_confidence"].squeeze().round(3) | |
) | |
results[f"epistemic_confidence_{smi}"] = ( | |
result["aleatoric_confidence"].squeeze().round(3) | |
) | |
print(results) | |
predicted_df = pd.DataFrame(results) | |
# Prepare DF to visualize | |
if omic_path is None: | |
df = GENE_EXPRESSION_METADATA | |
print(df.columns) | |
df.drop( | |
[ | |
"histology", | |
"cell_line_name", | |
"IC50 (min/max scaled)", | |
"IC50 (log(μmol))", | |
], | |
axis=1, | |
inplace=True, | |
) | |
df["site"] = df["site"].apply(lambda x: site_mapper.get(x, x)) | |
df["cell_line"] = df["cell_line"].apply(lambda x: x.split("_")[0]) | |
else: | |
pass | |
result_df = pd.concat( | |
[df["cell_line"], predicted_df, df.drop(["cell_line"], axis=1)], axis=1 | |
) | |
return result_df, result_df | |
if __name__ == "__main__": | |
# Load metadata | |
metadata_root = pathlib.Path(__file__).parent.joinpath("model_cards") | |
examples = pd.read_csv( | |
metadata_root.joinpath("examples.csv"), header=None, sep="|" | |
).fillna("") | |
with open(metadata_root.joinpath("article.md"), "r") as f: | |
article = f.read() | |
with open(metadata_root.joinpath("description.md"), "r") as f: | |
description = f.read() | |
demo = gr.Interface( | |
fn=run_inference, | |
title="PaccMann", | |
inputs=[ | |
gr.Textbox( | |
label="SMILES", | |
placeholder="COc1cc(O)c2c(c1)C=CCC(O)C(O)C(=O)C=CCC(C)OC2=O", | |
lines=1, | |
), | |
gr.File( | |
file_types=[".smi", ".tsv"], | |
label="List of SMILES (tab-separated file with SMILES in first column)", | |
), | |
gr.File( | |
file_types=[".csv"], | |
label="Transcriptomics data with cell lines in rows and genes in columns", | |
), | |
gr.Radio(choices=[True, False], label="Estimate confidence", value=False), | |
], | |
outputs=[gr.DataFrame(label="Output"), gr.File()], | |
article=article, | |
description=description, | |
# examples=examples.values.tolist(), | |
) | |
demo.launch(debug=True, show_error=True) | |