Spaces:
Running
Running
File size: 5,050 Bytes
8e66b23 53dcc17 8e66b23 53dcc17 ec53722 8e66b23 53dcc17 ec53722 53dcc17 8e66b23 ec53722 8e66b23 ec53722 ed32193 ec53722 8e66b23 ec53722 4f58ce4 53dcc17 abf9cc5 eebbf31 6c631e4 ec53722 8aab0ae ec53722 ed32193 ec53722 eebbf31 ec53722 d6ae31f ec53722 d6ae31f ec53722 d6ae31f ec53722 d6ae31f 1feb58a ec53722 d6ae31f 1feb58a 8e66b23 eebbf31 ec53722 53dcc17 ec53722 66f9f84 8aab0ae 66f9f84 9eecb91 8e66b23 9eecb91 8e66b23 53dcc17 8e66b23 ec53722 8e66b23 53dcc17 8aab0ae f8ccafc 53dcc17 8e66b23 ec53722 8e66b23 ec53722 8e66b23 ec53722 8aab0ae 8e66b23 ec53722 53dcc17 8e66b23 ec53722 8e66b23 53dcc17 8e66b23 53dcc17 8e66b23 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
import logging
import os
import pathlib
import tempfile
from typing import List, Optional
import gradio as gr
import pandas as pd
from rdkit import Chem
from tqdm import tqdm
from configuration import GENE_EXPRESSION_METADATA
from submission import submission
logger = logging.getLogger(__name__)
logger.addHandler(logging.NullHandler())
site_mapper = {
"central_nervous_system": "CNS",
"haematopoietic_and_lymphoid_tissue": "Haema_lymph",
"upper_aerodigestive_tract": "digestive",
"autonomic_ganglia": "ganglia",
}
def run_inference(
smiles: Optional[str],
smiles_path: Optional[str],
omic: Optional[str],
confidence: bool,
):
# Read SMILES
if smiles == "" and smiles_path is None:
raise TypeError("Pass either single SMILES or a file")
elif smiles != "" and smiles_path is not None:
raise TypeError("Pass either single SMILES or a file, not both")
elif smiles != "":
smiles = [smiles]
elif smiles_path is not None:
smiles_data = pd.read_csv(smiles_path.name, sep="\t", header=None)
smiles = smiles_data[0]
for smi in smiles:
if Chem.MolFromSmiles(smi) is None:
raise ValueError(f"Found invalid SMILES {smi}")
# Read omics and otherwise load baseline
if omic is not None:
omic_path = omic.name
else:
omic_path = None
result = pd.DataFrame({})
for smi in tqdm(smiles, total=len(smiles)):
output = submission(
drug={"smiles": smi},
workspace_id="emulated_workspace_id",
task_id="emulated_task_id",
estimate_confidence=confidence,
omics_file=omic_path,
)
# For the moment no attention analysis
output.pop("gene_attention")
output.pop("smiles_attention", None)
output.pop("IC50")
result[f"IC50_{smi}"] = output["log_micromolar_IC50"].squeeze().round(3)
if confidence:
result[f"aleatoric_confidence_{smi}"] = (
output["aleatoric_confidence"].squeeze().round(3)
)
result[f"epistemic_confidence_{smi}"] = (
output["aleatoric_confidence"].squeeze().round(3)
)
predicted_df = result
# Prepare DF to visualize
if omic_path is None:
df = GENE_EXPRESSION_METADATA.copy()
df.drop(
[
"histology",
"cell_line_name",
"IC50 (min/max scaled)",
"IC50 (log(μmol))",
],
axis=1,
inplace=True,
)
df["site"] = df["site"].apply(lambda x: site_mapper.get(x, x))
df["cell_line"] = df["cell_line"].apply(lambda x: x.split("_")[0])
if (not confidence) and "aleatoric_confidence" in df.columns:
df.drop(
["aleatoric_confidence", "epistemic_confidence"], axis=1, inplace=True
)
if (not confidence) and "aleatoric_confidence" in predicted_df.columns:
predicted_df.drop(
["aleatoric_confidence", "epistemic_confidence"], axis=1, inplace=True
)
# else:
# df = pd.read_csv(omic_path, low_memory=False)
result_df = pd.concat(
[df["cell_line"], predicted_df, df.drop(["cell_line"], axis=1)], axis=1
)
else:
result_df = predicted_df
# Save to temporary dir
temp_path = os.path.join(tempfile.gettempdir(), "paccmann_result.csv")
result_df.to_csv(temp_path)
return temp_path, result_df.head(25)
if __name__ == "__main__":
# Load metadata
metadata_root = pathlib.Path(__file__).parent.joinpath("model_cards")
examples = [
["COc1cc(O)c2c(c1)C=CCC(O)C(O)C(=O)C=CCC(C)OC2=O", None, None, False],
["COC1=C(C=C2C(=C1)N=CN=C2NC3=CC(=C(C=C3)F)Cl)OCCCN4CCOCC4", None, None, True],
# [None, metadata_root.joinpath("molecules.smi"), None, False],
]
with open(metadata_root.joinpath("article.md"), "r") as f:
article = f.read()
with open(metadata_root.joinpath("description.md"), "r") as f:
description = f.read()
demo = gr.Interface(
fn=run_inference,
title="PaccMann",
inputs=[
gr.Textbox(
label="SMILES",
placeholder="COc1cc(O)c2c(c1)C=CCC(O)C(O)C(=O)C=CCC(C)OC2=O",
lines=1,
),
gr.File(
file_types=[".smi", ".tsv"],
label="Multiple SMILES",
),
gr.File(
file_types=[".csv"],
label="Transcriptomics data file",
),
gr.Radio(choices=[True, False], label="Estimate confidence", value=False),
],
outputs=[
gr.File(label="Download full results"),
gr.DataFrame(label="Preview of results for 25 cell lines"),
],
article=article,
description=description,
examples=examples,
)
demo.launch(debug=True, show_error=True)
|