Spaces:
Runtime error
Runtime error
import re | |
import gradio as gr | |
import os | |
import accelerate | |
import spaces | |
from tqdm import tqdm | |
import subprocess | |
from huggingface_hub import hf_hub_download | |
from llama_cpp import Llama | |
from huggingface_hub import login | |
from docling.document_converter import DocumentConverter | |
login(token = os.getenv('HF_TOKEN')) | |
repo_id = "QuantFactory/Meta-Llama-3-70B-Instruct-GGUF" | |
model_id = "Meta-Llama-3-70B-Instruct.Q2_K.gguf" | |
local_dir = "models" | |
hf_hub_download( | |
repo_id=repo_id, | |
filename=model_id, | |
local_dir = local_dir | |
) | |
def harmonize_doc(llm, pdftext, prompt, maxtokens, temperature, top_probability, model_name): | |
prompt = """ | |
Please reformat the provided medical report into the following standardized structure: | |
1. Hospital Information: | |
- Name of Hospital: [Name of hospital] | |
- Department: [Relevant department or 'N/A'] | |
2. Patient Information: | |
- Name: [Full Name] | |
- Gender: [Gender] | |
- Date of Birth: [Date of Birth] | |
- Address: [Full Address or 'N/A'] | |
- ID Numbers: | |
- [Relevant identifiers such as NHS Number, Case Number, etc.] | |
3. Procedure Details: | |
- Date of Procedure: [Date] | |
- Referring Doctor: [Name or 'N/A'] | |
- Performed By: | |
- Consultant: [Name or 'N/A'] | |
- Additional Clinicians: [Name(s) or 'N/A'] | |
- Nurses: [Name(s) or 'N/A'] | |
- Details: | |
- Indications: [Symptoms, reasons for procedure] | |
- Instrument: [Instrument details or 'N/A'] | |
- Co-morbidities: [Relevant conditions or 'N/A'] | |
- ASA Status: [ASA classification or 'N/A'] | |
- Procedure: [Details of patient preparation and exact description of procedures performed as in the original report or 'N/A'] | |
- Findings: [Exact findings from the report, including any locations, measurements, or observations] | |
- Specimens Taken: [Details on specimens, if any, or 'N/A'] | |
- Comments: [Additional notes, advice, or remarks from the report] | |
4. Diagnosis and Outcomes: | |
- Diagnosis: [Exact diagnosis or 'N/A'] | |
- Therapeutic Actions: [Treatments performed or 'N/A'] | |
- Complications: [Details on complications or 'No complications'] | |
- Follow-Up: [Exact follow-up recommendations from the report] | |
Instructions for Output: | |
1. Use the exact wording and details from the original report wherever possible. Do not summarize or interpret information. | |
2. If any information is missing in the original report, use 'N/A' for the corresponding field. | |
3. Ensure the output matches the given structure exactly, without omitting any fields. | |
4. Retain all medical terms, values, and phrases as stated in the report. | |
""" | |
output = llm.create_chat_completion( | |
messages=[ | |
{"role": "assistant", "content": prompt}, | |
{ | |
"role": "user", | |
"content": pdftext | |
} | |
], | |
max_tokens=maxtokens, | |
temperature=temperature | |
) | |
output = output['choices'][0]['message']['content'] | |
find_index = output.find(' '.join(pdftext.split()[:3])) | |
if find_index != -1: | |
output = output[find_index:].strip() | |
return output | |
def pdf_to_text(files, input_text='', prompt='', model_name='default', temperature=0, maxtokens=2048, top_probability=0.95): | |
llm = Llama( | |
model_path="models/" + model_id, | |
flash_attn=True, | |
n_gpu_layers=81, | |
n_batch=1024, | |
n_ctx=8192, | |
) | |
harmonized_text = '' | |
for file in files: | |
converter = DocumentConverter() | |
result = converter.convert(file) | |
pdftext = result.document.export_to_markdown() | |
input_text = pdftext | |
harmonized_text += harmonize_doc(llm, input_text, prompt, maxtokens, temperature, top_probability, model_name) | |
harmonized_text += '\n\n-----------------------------------------------------------------\n\n' | |
return harmonized_text | |
temp_slider = gr.Slider(minimum=0, maximum=2, value=0.9, label="Temperature Value") | |
model_name = gr.Dropdown(["default", "fine-tuned"], label="LLama Model") | |
max_tokens = gr.Number(value=600, label="Max Tokens") | |
input_text = gr.Text(label='Input Text') | |
input_prompt = gr.Text(label='Prompt') | |
input_files = gr.File(file_count="multiple") | |
output_path_component = gr.File(label="Select Output Path") | |
iface = gr.Interface( | |
fn=pdf_to_text, | |
inputs=input_files, | |
outputs=['text'], | |
title='COBIx Endoscopy Report Harmonization', | |
description="This application helps standardize medical reports into a consistent format", | |
theme=gr.themes.Soft(), | |
) | |
iface.launch() |