Spaces:
Sleeping
Sleeping
Commit
·
2c449b4
1
Parent(s):
1acd869
fix bugs
Browse files
cas12.py
CHANGED
@@ -145,10 +145,22 @@ def process_gene(gene_symbol, model_path):
|
|
145 |
|
146 |
return all_data, gene_sequence
|
147 |
|
|
|
148 |
def create_genbank_features(formatted_data):
|
149 |
features = []
|
150 |
for data in formatted_data:
|
151 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
152 |
feature = SeqFeature(location=location, type="misc_feature", qualifiers={
|
153 |
'label': data[5], # gRNA as label
|
154 |
'note': f"Prediction: {data[6]}" # Prediction score in note
|
|
|
145 |
|
146 |
return all_data, gene_sequence
|
147 |
|
148 |
+
|
149 |
def create_genbank_features(formatted_data):
|
150 |
features = []
|
151 |
for data in formatted_data:
|
152 |
+
try:
|
153 |
+
# Attempt to convert start and end positions to integers
|
154 |
+
start = int(data[1])
|
155 |
+
end = int(data[2])
|
156 |
+
except ValueError as e:
|
157 |
+
# Log the error and skip this iteration if conversion fails
|
158 |
+
print(f"Error converting start/end to int: {data[1]}, {data[2]} - {e}")
|
159 |
+
continue # Skip this iteration
|
160 |
+
|
161 |
+
# Proceed as normal if conversion is successful
|
162 |
+
strand = 1 if data[3] == '+' else -1
|
163 |
+
location = FeatureLocation(start=start, end=end, strand=strand)
|
164 |
feature = SeqFeature(location=location, type="misc_feature", qualifiers={
|
165 |
'label': data[5], # gRNA as label
|
166 |
'note': f"Prediction: {data[6]}" # Prediction score in note
|
cas9on.py
CHANGED
@@ -40,23 +40,24 @@ class DCModelOntar:
|
|
40 |
return yp.ravel()
|
41 |
|
42 |
# Function to predict on-target efficiency and format output
|
43 |
-
def format_prediction_output(
|
44 |
dcModel = DCModelOntar(model_path)
|
45 |
formatted_data = []
|
46 |
|
47 |
-
for
|
48 |
# Encode the gRNA sequence
|
49 |
-
encoded_seq = get_seqcode(
|
50 |
|
51 |
# Predict on-target efficiency using the model
|
52 |
prediction = dcModel.ontar_predict(encoded_seq)
|
53 |
|
54 |
# Format output
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
|
|
60 |
|
61 |
return formatted_data
|
62 |
|
|
|
40 |
return yp.ravel()
|
41 |
|
42 |
# Function to predict on-target efficiency and format output
|
43 |
+
def format_prediction_output(targets, model_path):
|
44 |
dcModel = DCModelOntar(model_path)
|
45 |
formatted_data = []
|
46 |
|
47 |
+
for target in targets:
|
48 |
# Encode the gRNA sequence
|
49 |
+
encoded_seq = get_seqcode(target[0]).reshape(-1,4,1,23)
|
50 |
|
51 |
# Predict on-target efficiency using the model
|
52 |
prediction = dcModel.ontar_predict(encoded_seq)
|
53 |
|
54 |
# Format output
|
55 |
+
gRNA = target[1]
|
56 |
+
chr = target[2]
|
57 |
+
start = target[3]
|
58 |
+
end = target[4]
|
59 |
+
strand = target[5]
|
60 |
+
formatted_data.append([chr, start, end, strand, target[0], gRNA, prediction[0]])
|
61 |
|
62 |
return formatted_data
|
63 |
|