Spaces:
Runtime error
Runtime error
File size: 5,096 Bytes
152a01f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 |
import re
import gradio as gr
import os
import accelerate
import spaces
from tqdm import tqdm
import subprocess
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
from huggingface_hub import login
from docling.document_converter import DocumentConverter
login(token = os.getenv('HF_TOKEN'))
repo_id = "QuantFactory/Meta-Llama-3-70B-Instruct-GGUF"
model_id = "Meta-Llama-3-70B-Instruct.Q2_K.gguf"
local_dir = "models"
hf_hub_download(
repo_id=repo_id,
filename=model_id,
local_dir = local_dir
)
def harmonize_doc(llm, pdftext, prompt, maxtokens, temperature, top_probability, model_name):
prompt = """
Please reformat the provided medical report into the following standardized structure:
1. Hospital Information:
- Name of Hospital: [Name of hospital]
- Department: [Relevant department or 'N/A']
2. Patient Information:
- Name: [Full Name]
- Gender: [Gender]
- Date of Birth: [Date of Birth]
- Address: [Full Address or 'N/A']
- ID Numbers:
- [Relevant identifiers such as NHS Number, Case Number, etc.]
3. Procedure Details:
- Date of Procedure: [Date]
- Referring Doctor: [Name or 'N/A']
- Performed By:
- Consultant: [Name or 'N/A']
- Additional Clinicians: [Name(s) or 'N/A']
- Nurses: [Name(s) or 'N/A']
- Details:
- Indications: [Symptoms, reasons for procedure]
- Instrument: [Instrument details or 'N/A']
- Co-morbidities: [Relevant conditions or 'N/A']
- ASA Status: [ASA classification or 'N/A']
- Procedure: [Details of patient preparation and exact description of procedures performed as in the original report or 'N/A']
- Findings: [Exact findings from the report, including any locations, measurements, or observations]
- Specimens Taken: [Details on specimens, if any, or 'N/A']
- Comments: [Additional notes, advice, or remarks from the report]
4. Diagnosis and Outcomes:
- Diagnosis: [Exact diagnosis or 'N/A']
- Therapeutic Actions: [Treatments performed or 'N/A']
- Complications: [Details on complications or 'No complications']
- Follow-Up: [Exact follow-up recommendations from the report]
Instructions for Output:
1. Use the exact wording and details from the original report wherever possible. Do not summarize or interpret information.
2. If any information is missing in the original report, use 'N/A' for the corresponding field.
3. Ensure the output matches the given structure exactly, without omitting any fields.
4. Retain all medical terms, values, and phrases as stated in the report.
"""
output = llm.create_chat_completion(
messages=[
{"role": "assistant", "content": prompt},
{
"role": "user",
"content": pdftext
}
],
max_tokens=maxtokens,
temperature=temperature
)
output = output['choices'][0]['message']['content']
find_index = output.find(' '.join(pdftext.split()[:3]))
if find_index != -1:
output = output[find_index:].strip()
return output
@spaces.GPU(duration=120)
def pdf_to_text(files, input_text='', prompt='', model_name='default', temperature=0, maxtokens=2048, top_probability=0.95):
llm = Llama(
model_path="models/" + model_id,
flash_attn=True,
n_gpu_layers=81,
n_batch=1024,
n_ctx=8192,
)
harmonized_text = ''
for file in files:
converter = DocumentConverter()
result = converter.convert(file)
pdftext = result.document.export_to_markdown()
input_text = pdftext
harmonized_text += harmonize_doc(llm, input_text, prompt, maxtokens, temperature, top_probability, model_name)
harmonized_text += '\n\n-----------------------------------------------------------------\n\n'
return harmonized_text
temp_slider = gr.Slider(minimum=0, maximum=2, value=0.9, label="Temperature Value")
model_name = gr.Dropdown(["default", "fine-tuned"], label="LLama Model")
max_tokens = gr.Number(value=600, label="Max Tokens")
input_text = gr.Text(label='Input Text')
input_prompt = gr.Text(label='Prompt')
input_files = gr.File(file_count="multiple")
output_path_component = gr.File(label="Select Output Path")
iface = gr.Interface(
fn=pdf_to_text,
inputs=input_files,
outputs=['text'],
title='COBIx Endoscopy Report Harmonization',
description="This application helps standardize medical reports into a consistent format",
theme=gr.themes.Soft(),
)
iface.launch() |