Spaces:
Paused
Paused
import gradio as gr | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
import fitz # PyMuPDF for PDF handling | |
# Function to extract text from PDF | |
def extract_text_from_pdf(pdf_path): | |
doc = fitz.open(pdf_path) | |
text = "" | |
for page in doc: | |
text += page.get_text() | |
return text | |
# Function to handle file upload and text input | |
def analyze_document(file, prompt): | |
# Check file type and extract text accordingly | |
if file.name.endswith(".pdf"): | |
text = extract_text_from_pdf(file.name) | |
elif file.name.endswith(".txt"): | |
text = file.read().decode("utf-8") | |
else: | |
return "Unsupported file format. Please upload a PDF or TXT file." | |
# Load model and tokenizer | |
model_name = "Alibaba-NLP/gte-Qwen1.5-7B-instruct" | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForCausalLM.from_pretrained(model_name) | |
# Generate input for the model | |
input_text = f"Document content:\n{text}\n\nPrompt:\n{prompt}" | |
inputs = tokenizer(input_text, return_tensors="pt") | |
outputs = model.generate(**inputs) | |
response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
return response | |
# Define Gradio interface | |
iface = gr.Interface( | |
fn=analyze_document, | |
inputs=[ | |
gr.inputs.File(label="Upload TXT or PDF Document"), | |
gr.inputs.Textbox(label="Prompt", placeholder="Enter your structured prompt here") | |
], | |
outputs="text", | |
title="Document Analysis with GPT Model", | |
description="Upload a TXT or PDF document and enter a prompt to get an analysis." | |
) | |
# Launch the interface | |
iface.launch() | |