Update app.py
Browse files
app.py
CHANGED
@@ -54,71 +54,98 @@ RESISTANCE_GENES = {
|
|
54 |
|
55 |
def read_fasta_file(file_path):
|
56 |
"""Read a FASTA file from disk"""
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
|
|
|
|
|
|
|
|
62 |
|
63 |
def read_fasta_from_upload(uploaded_file):
|
64 |
"""Read a FASTA file from Streamlit upload"""
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
|
|
|
|
|
|
|
|
69 |
|
70 |
def extract_gene_region(genome_seq, gene_start, gene_end):
|
71 |
"""Extract a gene region with additional context"""
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
|
77 |
def find_mutations_with_context(ref_seq, query_seq, gene_start, gene_end, offset=0):
|
78 |
"""Find mutations with sequence context"""
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
'
|
110 |
-
'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
}
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
|
|
|
|
|
|
116 |
|
117 |
def analyze_resistance(mutations, gene_info):
|
118 |
"""Analyze mutations for drug resistance patterns"""
|
119 |
resistance_found = []
|
120 |
|
|
|
|
|
121 |
for mut in mutations:
|
|
|
122 |
codon_pos = str(mut['gene_position'] // 3 + 1)
|
123 |
if codon_pos in gene_info['mutations']:
|
124 |
pattern = gene_info['mutations'][codon_pos]
|
@@ -132,154 +159,122 @@ def analyze_resistance(mutations, gene_info):
|
|
132 |
|
133 |
return resistance_found
|
134 |
|
135 |
-
def create_resistance_report(all_results):
|
136 |
-
"""Create a comprehensive resistance report"""
|
137 |
-
report = []
|
138 |
-
for gene, results in all_results.items():
|
139 |
-
if results['resistance']:
|
140 |
-
drug = RESISTANCE_GENES[gene]['drug']
|
141 |
-
mutations = results['resistance']
|
142 |
-
confidence = max(m['confidence'] for m in mutations)
|
143 |
-
report.append({
|
144 |
-
'gene': gene,
|
145 |
-
'drug': drug,
|
146 |
-
'mutations_found': len(mutations),
|
147 |
-
'mutations': mutations,
|
148 |
-
'confidence': confidence
|
149 |
-
})
|
150 |
-
return report
|
151 |
-
|
152 |
-
def plot_gene_mutations(mutations_by_gene, genome_length):
|
153 |
-
"""Create a visualization of mutations across genes"""
|
154 |
-
fig = go.Figure()
|
155 |
-
|
156 |
-
colors = {'rpoB': 'red', 'katG': 'blue', 'inhA': 'green', 'gyrA': 'purple'}
|
157 |
-
|
158 |
-
for gene in RESISTANCE_GENES:
|
159 |
-
gene_info = RESISTANCE_GENES[gene]
|
160 |
-
mutations = mutations_by_gene.get(gene, [])
|
161 |
-
|
162 |
-
# Add gene region
|
163 |
-
fig.add_trace(go.Scatter(
|
164 |
-
x=[gene_info['start'], gene_info['end']],
|
165 |
-
y=[1, 1],
|
166 |
-
mode='lines',
|
167 |
-
name=f"{gene} ({gene_info['drug']})",
|
168 |
-
line=dict(color=colors.get(gene, 'gray'), width=20, dash='solid'),
|
169 |
-
))
|
170 |
-
|
171 |
-
# Add mutations
|
172 |
-
if mutations:
|
173 |
-
x_pos = [m['position'] for m in mutations]
|
174 |
-
fig.add_trace(go.Scatter(
|
175 |
-
x=x_pos,
|
176 |
-
y=[1.2] * len(x_pos),
|
177 |
-
mode='markers',
|
178 |
-
name=f'{gene} mutations',
|
179 |
-
marker=dict(color=colors.get(gene, 'gray'), size=10, symbol='star'),
|
180 |
-
))
|
181 |
-
|
182 |
-
fig.update_layout(
|
183 |
-
title="Resistance-associated Mutations",
|
184 |
-
xaxis_title="Genome Position",
|
185 |
-
yaxis_visible=False,
|
186 |
-
showlegend=True,
|
187 |
-
height=400,
|
188 |
-
margin=dict(l=50, r=50, t=50, b=50)
|
189 |
-
)
|
190 |
-
|
191 |
-
return fig
|
192 |
-
|
193 |
def main():
|
194 |
st.title("M. tuberculosis Drug Resistance Analysis")
|
195 |
|
196 |
st.markdown("""
|
197 |
### Automated Drug Resistance Analysis Tool
|
198 |
Upload your query genome (clinical isolate) in FASTA format for comparison with H37Rv reference.
|
199 |
-
The tool will automatically analyze resistance-associated genes and provide a detailed report.
|
200 |
""")
|
201 |
|
|
|
|
|
|
|
202 |
# Load reference genome
|
203 |
-
|
204 |
-
|
205 |
-
st.success("Reference genome
|
206 |
-
|
207 |
-
st.error(
|
208 |
return
|
209 |
|
210 |
-
# Query genome upload
|
211 |
query_file = st.file_uploader("Upload Query Genome (FASTA)", type=['fasta', 'fa'])
|
212 |
|
213 |
if query_file:
|
214 |
if st.button("Analyze Drug Resistance"):
|
215 |
-
|
216 |
-
|
|
|
|
|
|
|
|
|
|
|
217 |
|
218 |
-
#
|
219 |
all_results = {}
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
|
|
224 |
|
225 |
-
|
226 |
-
|
|
|
227 |
|
228 |
-
#
|
229 |
-
|
|
|
230 |
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
235 |
|
236 |
-
#
|
237 |
-
|
|
|
238 |
|
239 |
# Display Results
|
240 |
-
st.header("
|
241 |
|
242 |
-
|
243 |
-
|
|
|
|
|
244 |
|
245 |
-
|
246 |
-
|
247 |
-
st.subheader(f"🧬 {entry['gene']} - {RESISTANCE_GENES[entry['gene']]['drug']}")
|
248 |
-
st.write(f"Confidence: {entry['confidence']}")
|
249 |
-
st.write(f"Mutations found: {entry['mutations_found']}")
|
250 |
-
|
251 |
-
# Create detailed mutation table
|
252 |
-
mutations_df = pd.DataFrame(entry['mutations'])
|
253 |
-
st.dataframe(mutations_df)
|
254 |
-
|
255 |
-
st.markdown("---")
|
256 |
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
269 |
|
270 |
-
|
271 |
-
report_df = pd.DataFrame(resistance_report)
|
272 |
csv = report_df.to_csv(index=False)
|
273 |
st.download_button(
|
274 |
-
"Download
|
275 |
csv,
|
276 |
-
"
|
277 |
-
"text/csv"
|
278 |
-
key='download-csv'
|
279 |
)
|
280 |
-
else:
|
281 |
-
st.success("No known resistance mutations detected")
|
282 |
-
st.info("Note: This does not guarantee drug susceptibility. Consider phenotypic testing.")
|
283 |
|
284 |
if __name__ == "__main__":
|
285 |
main()
|
|
|
54 |
|
55 |
def read_fasta_file(file_path):
|
56 |
"""Read a FASTA file from disk"""
|
57 |
+
try:
|
58 |
+
with open(file_path, 'r') as handle:
|
59 |
+
content = handle.read().strip()
|
60 |
+
parts = content.split('\n', 1)
|
61 |
+
sequence = ''.join(parts[1].split('\n')).replace(' ', '')
|
62 |
+
return sequence.upper()
|
63 |
+
except Exception as e:
|
64 |
+
st.error(f"Error reading file {file_path}: {str(e)}")
|
65 |
+
return None
|
66 |
|
67 |
def read_fasta_from_upload(uploaded_file):
|
68 |
"""Read a FASTA file from Streamlit upload"""
|
69 |
+
try:
|
70 |
+
content = uploaded_file.getvalue().decode('utf-8').strip()
|
71 |
+
parts = content.split('\n', 1)
|
72 |
+
sequence = ''.join(parts[1].split('\n')).replace(' ', '')
|
73 |
+
return sequence.upper()
|
74 |
+
except Exception as e:
|
75 |
+
st.error(f"Error reading uploaded file: {str(e)}")
|
76 |
+
return None
|
77 |
|
78 |
def extract_gene_region(genome_seq, gene_start, gene_end):
|
79 |
"""Extract a gene region with additional context"""
|
80 |
+
try:
|
81 |
+
flank = 200
|
82 |
+
start = max(0, gene_start - flank)
|
83 |
+
end = min(len(genome_seq), gene_end + flank)
|
84 |
+
extracted_seq = genome_seq[start:end]
|
85 |
+
st.write(f"Extracted sequence length: {len(extracted_seq)}bp")
|
86 |
+
return extracted_seq, start
|
87 |
+
except Exception as e:
|
88 |
+
st.error(f"Error extracting gene region: {str(e)}")
|
89 |
+
return None, None
|
90 |
|
91 |
def find_mutations_with_context(ref_seq, query_seq, gene_start, gene_end, offset=0):
|
92 |
"""Find mutations with sequence context"""
|
93 |
+
try:
|
94 |
+
st.write(f"Aligning sequences (lengths: ref={len(ref_seq)}, query={len(query_seq)})")
|
95 |
+
|
96 |
+
alignments = pairwise2.align.globalms(ref_seq, query_seq,
|
97 |
+
match=2,
|
98 |
+
mismatch=-3,
|
99 |
+
open=-10,
|
100 |
+
extend=-0.5)
|
101 |
+
|
102 |
+
if not alignments:
|
103 |
+
st.warning("No alignments found")
|
104 |
+
return []
|
105 |
+
|
106 |
+
alignment = alignments[0]
|
107 |
+
ref_aligned, query_aligned = alignment[0], alignment[1]
|
108 |
+
|
109 |
+
st.write(f"Alignment lengths: ref={len(ref_aligned)}, query={len(query_aligned)}")
|
110 |
+
|
111 |
+
mutations = []
|
112 |
+
real_pos = 0
|
113 |
+
|
114 |
+
for i in range(len(ref_aligned)):
|
115 |
+
if ref_aligned[i] != '-':
|
116 |
+
real_pos += 1
|
117 |
+
|
118 |
+
if ref_aligned[i] != query_aligned[i]:
|
119 |
+
adj_pos = offset + real_pos
|
120 |
+
if gene_start <= adj_pos <= gene_end:
|
121 |
+
mut = {
|
122 |
+
'position': adj_pos,
|
123 |
+
'gene_position': adj_pos - gene_start + 1,
|
124 |
+
'ref_base': ref_aligned[i],
|
125 |
+
'query_base': query_aligned[i] if query_aligned[i] != '-' else 'None',
|
126 |
+
'type': 'SNP' if ref_aligned[i] != '-' and query_aligned[i] != '-' else 'INDEL',
|
127 |
+
'codon_position': (real_pos - 1) % 3 + 1,
|
128 |
+
'context': {
|
129 |
+
'ref': ref_aligned[max(0,i-5):i] + '[' + ref_aligned[i] + ']' + ref_aligned[i+1:i+6],
|
130 |
+
'query': query_aligned[max(0,i-5):i] + '[' + query_aligned[i] + ']' + query_aligned[i+1:i+6]
|
131 |
+
}
|
132 |
}
|
133 |
+
mutations.append(mut)
|
134 |
+
|
135 |
+
st.write(f"Found {len(mutations)} mutations")
|
136 |
+
return mutations
|
137 |
+
except Exception as e:
|
138 |
+
st.error(f"Error in mutation analysis: {str(e)}")
|
139 |
+
return []
|
140 |
|
141 |
def analyze_resistance(mutations, gene_info):
|
142 |
"""Analyze mutations for drug resistance patterns"""
|
143 |
resistance_found = []
|
144 |
|
145 |
+
st.write(f"Analyzing {len(mutations)} mutations for resistance patterns")
|
146 |
+
|
147 |
for mut in mutations:
|
148 |
+
st.write(f"Mutation at position {mut['position']}: {mut['ref_base']} -> {mut['query_base']}")
|
149 |
codon_pos = str(mut['gene_position'] // 3 + 1)
|
150 |
if codon_pos in gene_info['mutations']:
|
151 |
pattern = gene_info['mutations'][codon_pos]
|
|
|
159 |
|
160 |
return resistance_found
|
161 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
162 |
def main():
|
163 |
st.title("M. tuberculosis Drug Resistance Analysis")
|
164 |
|
165 |
st.markdown("""
|
166 |
### Automated Drug Resistance Analysis Tool
|
167 |
Upload your query genome (clinical isolate) in FASTA format for comparison with H37Rv reference.
|
|
|
168 |
""")
|
169 |
|
170 |
+
# Debug mode toggle
|
171 |
+
debug_mode = st.checkbox("Enable debug mode")
|
172 |
+
|
173 |
# Load reference genome
|
174 |
+
ref_genome = read_fasta_file("NC_000962.3.fasta")
|
175 |
+
if ref_genome:
|
176 |
+
st.success(f"Reference genome loaded successfully (length: {len(ref_genome)}bp)")
|
177 |
+
else:
|
178 |
+
st.error("Failed to load reference genome")
|
179 |
return
|
180 |
|
|
|
181 |
query_file = st.file_uploader("Upload Query Genome (FASTA)", type=['fasta', 'fa'])
|
182 |
|
183 |
if query_file:
|
184 |
if st.button("Analyze Drug Resistance"):
|
185 |
+
query_genome = read_fasta_from_upload(query_file)
|
186 |
+
if query_genome:
|
187 |
+
st.success(f"Query genome loaded successfully (length: {len(query_genome)}bp)")
|
188 |
+
|
189 |
+
# Analysis progress tracking
|
190 |
+
progress_bar = st.progress(0)
|
191 |
+
status_text = st.empty()
|
192 |
|
193 |
+
# Store all results
|
194 |
all_results = {}
|
195 |
+
|
196 |
+
# Analyze each gene
|
197 |
+
for i, (gene, info) in enumerate(RESISTANCE_GENES.items()):
|
198 |
+
status_text.text(f"Analyzing {gene} ({info['drug']})...")
|
199 |
+
progress_bar.progress((i + 1) / len(RESISTANCE_GENES))
|
200 |
|
201 |
+
if debug_mode:
|
202 |
+
st.subheader(f"Analyzing {gene}")
|
203 |
+
st.write(f"Gene region: {info['start']}-{info['end']}")
|
204 |
|
205 |
+
# Extract regions
|
206 |
+
ref_region, ref_start = extract_gene_region(ref_genome, info['start'], info['end'])
|
207 |
+
query_region, _ = extract_gene_region(query_genome, info['start'], info['end'])
|
208 |
|
209 |
+
if ref_region and query_region:
|
210 |
+
# Find mutations
|
211 |
+
mutations = find_mutations_with_context(
|
212 |
+
ref_region, query_region,
|
213 |
+
info['start'], info['end'],
|
214 |
+
ref_start
|
215 |
+
)
|
216 |
+
|
217 |
+
# Analyze resistance
|
218 |
+
resistance = analyze_resistance(mutations, info)
|
219 |
+
|
220 |
+
all_results[gene] = {
|
221 |
+
'mutations': mutations,
|
222 |
+
'resistance': resistance
|
223 |
+
}
|
224 |
+
|
225 |
+
if debug_mode:
|
226 |
+
st.write(f"Found {len(mutations)} mutations")
|
227 |
+
st.write(f"Identified {len(resistance)} resistance patterns")
|
228 |
+
else:
|
229 |
+
st.error(f"Failed to analyze {gene}")
|
230 |
|
231 |
+
# Clear progress indicators
|
232 |
+
progress_bar.empty()
|
233 |
+
status_text.empty()
|
234 |
|
235 |
# Display Results
|
236 |
+
st.header("Analysis Results")
|
237 |
|
238 |
+
# Show results for each gene
|
239 |
+
for gene, results in all_results.items():
|
240 |
+
st.subheader(f"{gene} Analysis")
|
241 |
+
info = RESISTANCE_GENES[gene]
|
242 |
|
243 |
+
st.write(f"Drug: {info['drug']}")
|
244 |
+
st.write(f"Total mutations found: {len(results['mutations'])}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
245 |
|
246 |
+
if results['mutations']:
|
247 |
+
mutations_df = pd.DataFrame(results['mutations'])
|
248 |
+
st.write("All mutations found:")
|
249 |
+
st.dataframe(mutations_df)
|
250 |
|
251 |
+
if results['resistance']:
|
252 |
+
st.warning(f"Potential resistance mutations found in {gene}")
|
253 |
+
resistance_df = pd.DataFrame(results['resistance'])
|
254 |
+
st.dataframe(resistance_df)
|
255 |
+
else:
|
256 |
+
st.info(f"No known resistance mutations found in {gene}")
|
257 |
+
|
258 |
+
# Download complete results
|
259 |
+
if st.button("Download Complete Analysis"):
|
260 |
+
# Create detailed report DataFrame
|
261 |
+
report_data = []
|
262 |
+
for gene, results in all_results.items():
|
263 |
+
for mut in results['mutations']:
|
264 |
+
report_data.append({
|
265 |
+
'Gene': gene,
|
266 |
+
'Drug': RESISTANCE_GENES[gene]['drug'],
|
267 |
+
**mut
|
268 |
+
})
|
269 |
|
270 |
+
report_df = pd.DataFrame(report_data)
|
|
|
271 |
csv = report_df.to_csv(index=False)
|
272 |
st.download_button(
|
273 |
+
"Download Full Report (CSV)",
|
274 |
csv,
|
275 |
+
"mtb_analysis_report.csv",
|
276 |
+
"text/csv"
|
|
|
277 |
)
|
|
|
|
|
|
|
278 |
|
279 |
if __name__ == "__main__":
|
280 |
main()
|