paccmann

Running

App Files Files Community

jannisborn commited on Feb 1, 2023

Commit

8aab0ae

unverified ·

1 Parent(s): 53dcc17

update

Browse files

Files changed (2) hide show

app.py +9 -11
model_cards/article.md +8 -3

app.py CHANGED Viewed

@@ -30,19 +30,13 @@ def run_inference(
     confidence: bool,
 ):
-    print(smiles)
-    print(smiles_path)
     # Read SMILES
-    if not isinstance(smiles_path, (str, type(None))):
-        raise TypeError(
-            f"SMILES file pass has to be None or str, not {type(smiles_path)}"
-        )
     if smiles is None and smiles_path is None:
         raise TypeError("Pass either single SMILES or a file")
     elif smiles is not None:
         smiles = [smiles]
     elif smiles_path is not None:
-        smiles_data = pd.read_csv(smiles_path, sep="\t", header=False)
         smiles = smiles_data[0]
         for smi in smiles:
             if Chem.MolFromSmiles(smi) is None:
@@ -93,6 +87,10 @@ def run_inference(
         )
         df["site"] = df["site"].apply(lambda x: site_mapper.get(x, x))
         df["cell_line"] = df["cell_line"].apply(lambda x: x.split("_")[0])
     else:
         pass
@@ -113,9 +111,9 @@ if __name__ == "__main__":
     metadata_root = pathlib.Path(__file__).parent.joinpath("model_cards")
     examples = [
-        ["COc1cc(O)c2c(c1)C=CCC(O)C(O)C(=O)C=CCC(C)OC2=O", "", "", False],
-        ["COC1=C(C=C2C(=C1)N=CN=C2NC3=CC(=C(C=C3)F)Cl)OCCCN4CCOCC4", "", "", True],
-        ["", metadata_root.joinpath("molecules.smi"), "", False],
     ]
     with open(metadata_root.joinpath("article.md"), "r") as f:
         article = f.read()
@@ -133,7 +131,7 @@ if __name__ == "__main__":
             ),
             gr.File(
                 file_types=[".smi", ".tsv"],
-                label="Tab-separated file with SMILES in 1st column)",
             ),
             gr.File(
                 file_types=[".csv"],

     confidence: bool,
 ):
     # Read SMILES
     if smiles is None and smiles_path is None:
         raise TypeError("Pass either single SMILES or a file")
     elif smiles is not None:
         smiles = [smiles]
     elif smiles_path is not None:
+        smiles_data = pd.read_csv(smiles_path.name, sep="\t", header=None)
         smiles = smiles_data[0]
         for smi in smiles:
             if Chem.MolFromSmiles(smi) is None:
         )
         df["site"] = df["site"].apply(lambda x: site_mapper.get(x, x))
         df["cell_line"] = df["cell_line"].apply(lambda x: x.split("_")[0])
+        if confidence:
+            df.drop(
+                ["aleatoric_confidence", "epistemic_confidence"], axis=1, inplace=True
+            )
     else:
         pass
     metadata_root = pathlib.Path(__file__).parent.joinpath("model_cards")
     examples = [
+        ["COc1cc(O)c2c(c1)C=CCC(O)C(O)C(=O)C=CCC(C)OC2=O", None, None, False],
+        ["COC1=C(C=C2C(=C1)N=CN=C2NC3=CC(=C(C=C3)F)Cl)OCCCN4CCOCC4", None, None, True],
+        [None, metadata_root.joinpath("molecules.smi"), None, False],
     ]
     with open(metadata_root.joinpath("article.md"), "r") as f:
         article = f.read()
             ),
             gr.File(
                 file_types=[".smi", ".tsv"],
+                label="Multiple SMILES",
             ),
             gr.File(
                 file_types=[".csv"],

model_cards/article.md CHANGED Viewed

@@ -1,11 +1,16 @@
-# Model documentation
-**SMILES**:
-ell lines in rows and genes in columns
 ## Citation

+## Model documentation
+**SMILES**: A single SMILES representing a drug for which the prediction should be performed.
+**Multiple SMILES**: Alternatively, you can upload a `.smi` or a `.tsv` file that is tab-separated and contains SMILES in the first column. Note that it **must not** contain a header. Moreover, provide *either* a single SMILES *or* a file, not both!
+**Transcriptomics data file**: Here, you can optionally upload an omics file with cell lines in rows and genes in columns. If not provided, predictions will be performed on the cell lines available in the [GDSC](https://academic.oup.com/nar/article/41/D1/D955/1059448) and [CCLE](https://sites.broadinstitute.org/ccle/) databases.
+**Confidence**: This toggle determines whether the model returns confidence estimates. If toggled on, this will take ~15 times more time to run. The model will return two estimates, for aleatoric and epistemic uncertainty respectively.
+## NOTE
+If you are an user of the old, deprecated PaccMann webservice (that was hosted on IBM Cloud) and you miss certain functionalities such as analysing the SMILES or the gene attention, please reach out to {jab,tte}@zurich.ibm.com and we will try to provide those features timely.
 ## Citation