Spaces:

NiniCat
/

CRISPRTool

Sleeping

supercat666 commited on Feb 15, 2024

Commit

2c449b4

1 Parent(s): 1acd869

fix bugs

Files changed (2) hide show

cas12.py CHANGED Viewed

@@ -145,10 +145,22 @@ def process_gene(gene_symbol, model_path):
     return all_data, gene_sequence
 def create_genbank_features(formatted_data):
     features = []
     for data in formatted_data:
-        location = FeatureLocation(start=int(data[1]), end=int(data[2]), strand=(1 if data[3] == '+' else -1))
         feature = SeqFeature(location=location, type="misc_feature", qualifiers={
             'label': data[5],  # gRNA as label
             'note': f"Prediction: {data[6]}"  # Prediction score in note

     return all_data, gene_sequence
 def create_genbank_features(formatted_data):
     features = []
     for data in formatted_data:
+        try:
+            # Attempt to convert start and end positions to integers
+            start = int(data[1])
+            end = int(data[2])
+        except ValueError as e:
+            # Log the error and skip this iteration if conversion fails
+            print(f"Error converting start/end to int: {data[1]}, {data[2]} - {e}")
+            continue  # Skip this iteration
+        # Proceed as normal if conversion is successful
+        strand = 1 if data[3] == '+' else -1
+        location = FeatureLocation(start=start, end=end, strand=strand)
         feature = SeqFeature(location=location, type="misc_feature", qualifiers={
             'label': data[5],  # gRNA as label
             'note': f"Prediction: {data[6]}"  # Prediction score in note

cas9on.py CHANGED Viewed

@@ -40,23 +40,24 @@ class DCModelOntar:
         return yp.ravel()
 # Function to predict on-target efficiency and format output
-def format_prediction_output(gRNAs, model_path):
     dcModel = DCModelOntar(model_path)
     formatted_data = []
-    for gRNA in gRNAs:
         # Encode the gRNA sequence
-        encoded_seq = get_seqcode(gRNA[0]).reshape(-1,4,1,23)
         # Predict on-target efficiency using the model
         prediction = dcModel.ontar_predict(encoded_seq)
         # Format output
-        chr = gRNA[1]
-        start = gRNA[2]
-        end = gRNA[3]
-        strand = gRNA[4]
-        formatted_data.append([chr, start, end, strand, gRNA[0], prediction[0]])
     return formatted_data

         return yp.ravel()
 # Function to predict on-target efficiency and format output
+def format_prediction_output(targets, model_path):
     dcModel = DCModelOntar(model_path)
     formatted_data = []
+    for target in targets:
         # Encode the gRNA sequence
+        encoded_seq = get_seqcode(target[0]).reshape(-1,4,1,23)
         # Predict on-target efficiency using the model
         prediction = dcModel.ontar_predict(encoded_seq)
         # Format output
+        gRNA = target[1]
+        chr = target[2]
+        start = target[3]
+        end = target[4]
+        strand = target[5]
+        formatted_data.append([chr, start, end, strand, target[0], gRNA, prediction[0]])
     return formatted_data