Spaces:
Sleeping
Sleeping
Ana Sanchez
commited on
Commit
·
5bd2a17
1
Parent(s):
66c0de1
Add data folder
Browse files
app.py
CHANGED
@@ -27,17 +27,16 @@ from rdkit.Chem import AllChem
|
|
27 |
from rdkit.Chem import DataStructs
|
28 |
|
29 |
|
30 |
-
|
31 |
-
|
32 |
basepath = os.path.dirname(__file__)
|
|
|
33 |
|
34 |
-
MODEL_PATH = os.path.join(basepath, "epoch_55.pt")
|
35 |
CLOOME_PATH = "/home/ana/gitrepos/hti-cloob"
|
36 |
-
|
|
|
|
|
|
|
|
|
37 |
imgname = "I1"
|
38 |
-
molecule_features = "all_molecule_cellpainting_features.pkl"
|
39 |
-
image_features = "subset_image_cellpainting_features.pkl"
|
40 |
-
images_arr = "subset_npzs_dict_.npz"
|
41 |
|
42 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
43 |
model_type = "RN50"
|
@@ -113,7 +112,6 @@ def get_features(dataset, model, device):
|
|
113 |
|
114 |
all_ids.append(ids)
|
115 |
|
116 |
-
|
117 |
all_ids = list(chain.from_iterable(all_ids))
|
118 |
|
119 |
if imgs is not None and mols is not None:
|
@@ -158,9 +156,6 @@ def main(df, model_path, model, img_path=None, mol_path=None, image_resolution=N
|
|
158 |
val_img_features, val_ids = result
|
159 |
return val_img_features, val_ids
|
160 |
|
161 |
-
#val_img_features, val_ids = get_features(val, model, device)
|
162 |
-
|
163 |
-
#return val_img_features, val_text_features, val_ids
|
164 |
|
165 |
def img_to_numpy(file):
|
166 |
img = Image.open(file)
|
@@ -305,9 +300,6 @@ def reshape_image(arr):
|
|
305 |
|
306 |
# missing functions: save morgan to to_hdf, create index, load features, calculate similarities
|
307 |
|
308 |
-
|
309 |
-
#model = load(MODEL_PATH, device, model_type, image_resolution)
|
310 |
-
|
311 |
##### STREAMLIT FUNCTIONS ######
|
312 |
st.title('CLOOME: Contrastive Learning for Molecule Representation with Microscopy Images and Chemical Structures')
|
313 |
|
@@ -375,22 +367,22 @@ def molecules_from_image():
|
|
375 |
morgan = [morgan_from_smiles(s) for s in smiles]
|
376 |
molnames = [f"M{i}" for i in range(len(morgan))]
|
377 |
mol_index_fname = "mol_index.csv"
|
378 |
-
mol_index = create_index(
|
379 |
-
molpath = os.path.join(
|
380 |
fps_fname = save_hdf(morgan, molnames, molpath)
|
381 |
mol_imgs = draw_molecules(smiles)
|
382 |
mol_features, mol_ids = main(mol_index, MODEL_PATH, model_type, mol_path=molpath, image_resolution=image_resolution)
|
383 |
predefined_features = False
|
384 |
else:
|
385 |
mol_index = pd.read_csv("cellpainting-unique-molecule.csv")
|
386 |
-
mol_features_torch = torch.load(
|
387 |
mol_features = mol_features_torch["mol_features"]
|
388 |
mol_ids = mol_features_torch["mol_ids"]
|
389 |
print(len(mol_ids))
|
390 |
predefined_features = True
|
391 |
|
392 |
img_index_fname = "img_index.csv"
|
393 |
-
img_index = create_index(
|
394 |
img_features, img_ids = main(img_index, MODEL_PATH, model_type, img_path=npzs, image_resolution=image_resolution)
|
395 |
|
396 |
print(img_features.shape)
|
@@ -434,8 +426,8 @@ def images_from_molecule():
|
|
434 |
morgan = [morgan_from_smiles(s) for s in smiles]
|
435 |
molnames = [f"M{i}" for i in range(len(morgan))]
|
436 |
mol_index_fname = "mol_index.csv"
|
437 |
-
mol_index = create_index(
|
438 |
-
molpath = os.path.join(
|
439 |
fps_fname = save_hdf(morgan, molnames, molpath)
|
440 |
mol_imgs = draw_molecules(smiles)
|
441 |
|
@@ -493,6 +485,3 @@ page_names_to_funcs = {
|
|
493 |
|
494 |
selected_page = st.sidebar.selectbox("What would you like to retrieve?", page_names_to_funcs.keys())
|
495 |
page_names_to_funcs[selected_page]()
|
496 |
-
|
497 |
-
# print(img_features.shape)
|
498 |
-
# print(img_ids)
|
|
|
27 |
from rdkit.Chem import DataStructs
|
28 |
|
29 |
|
|
|
|
|
30 |
basepath = os.path.dirname(__file__)
|
31 |
+
datapath = os.path.join(basepath, "data")
|
32 |
|
|
|
33 |
CLOOME_PATH = "/home/ana/gitrepos/hti-cloob"
|
34 |
+
MODEL_PATH = os.path.join(datapath, "epoch_55.pt")
|
35 |
+
npzs = os.path.join(datapath, "npzs")
|
36 |
+
molecule_features = os.path.join(datapath, "all_molecule_cellpainting_features.pkl")
|
37 |
+
image_features = os.path.join(datapath, "subset_image_cellpainting_features.pkl")
|
38 |
+
images_arr = os.path.join(datapath, "subset_npzs_dict_.npz")
|
39 |
imgname = "I1"
|
|
|
|
|
|
|
40 |
|
41 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
42 |
model_type = "RN50"
|
|
|
112 |
|
113 |
all_ids.append(ids)
|
114 |
|
|
|
115 |
all_ids = list(chain.from_iterable(all_ids))
|
116 |
|
117 |
if imgs is not None and mols is not None:
|
|
|
156 |
val_img_features, val_ids = result
|
157 |
return val_img_features, val_ids
|
158 |
|
|
|
|
|
|
|
159 |
|
160 |
def img_to_numpy(file):
|
161 |
img = Image.open(file)
|
|
|
300 |
|
301 |
# missing functions: save morgan to to_hdf, create index, load features, calculate similarities
|
302 |
|
|
|
|
|
|
|
303 |
##### STREAMLIT FUNCTIONS ######
|
304 |
st.title('CLOOME: Contrastive Learning for Molecule Representation with Microscopy Images and Chemical Structures')
|
305 |
|
|
|
367 |
morgan = [morgan_from_smiles(s) for s in smiles]
|
368 |
molnames = [f"M{i}" for i in range(len(morgan))]
|
369 |
mol_index_fname = "mol_index.csv"
|
370 |
+
mol_index = create_index(datapath, molnames, mol_index_fname)
|
371 |
+
molpath = os.path.join(datapath, "mols.hdf")
|
372 |
fps_fname = save_hdf(morgan, molnames, molpath)
|
373 |
mol_imgs = draw_molecules(smiles)
|
374 |
mol_features, mol_ids = main(mol_index, MODEL_PATH, model_type, mol_path=molpath, image_resolution=image_resolution)
|
375 |
predefined_features = False
|
376 |
else:
|
377 |
mol_index = pd.read_csv("cellpainting-unique-molecule.csv")
|
378 |
+
mol_features_torch = torch.load(molecule_features, map_location=device)
|
379 |
mol_features = mol_features_torch["mol_features"]
|
380 |
mol_ids = mol_features_torch["mol_ids"]
|
381 |
print(len(mol_ids))
|
382 |
predefined_features = True
|
383 |
|
384 |
img_index_fname = "img_index.csv"
|
385 |
+
img_index = create_index(datapath, imgname, img_index_fname)
|
386 |
img_features, img_ids = main(img_index, MODEL_PATH, model_type, img_path=npzs, image_resolution=image_resolution)
|
387 |
|
388 |
print(img_features.shape)
|
|
|
426 |
morgan = [morgan_from_smiles(s) for s in smiles]
|
427 |
molnames = [f"M{i}" for i in range(len(morgan))]
|
428 |
mol_index_fname = "mol_index.csv"
|
429 |
+
mol_index = create_index(datapath, molnames, mol_index_fname)
|
430 |
+
molpath = os.path.join(datapath, "mols.hdf")
|
431 |
fps_fname = save_hdf(morgan, molnames, molpath)
|
432 |
mol_imgs = draw_molecules(smiles)
|
433 |
|
|
|
485 |
|
486 |
selected_page = st.sidebar.selectbox("What would you like to retrieve?", page_names_to_funcs.keys())
|
487 |
page_names_to_funcs[selected_page]()
|
|
|
|
|
|
data/all_molecule_cellpainting_features.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8979250025350c6ff67f986c657c14a881710cfe73e315ef5d126abaecf50b4b
|
3 |
+
size 62906027
|
data/cellpainting-all-imgpermol.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a10cc3285a7b1c3275c30b2aa3654d00651ae4211d5e057118f32c40725e09ff
|
3 |
+
size 14270985
|
data/cellpainting-unique-molecule.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:65592c0ee09203c2ba5be15c159c7944c49feea65a24cfb7862bb49af7cd112a
|
3 |
+
size 14265091
|
data/epoch_55.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c612c6da6f943caac839b9102fe98ba944838600942897aaa035f552d9a535bd
|
3 |
+
size 352013623
|
data/subset_image_cellpainting_features.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7a7cbfdf80d0ee6197f4e9118be2d7569399601d6c55c4db7fbc2dcbeadd9d6a
|
3 |
+
size 62906027
|
data/subset_npzs_dict_.npz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0ddb3451d5d46a1eed6613c914777e013a6d9a392cfc4f9448d0b9488d099da1
|
3 |
+
size 3656596390
|