Anupam251272's picture
Update app.py
b348401 verified
import gradio as gr
import PyPDF2
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from deep_translator import GoogleTranslator # More stable than googletrans
import logging
from typing import Optional, Dict
import time
from pathlib import Path
import os
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
# Language mapping with detailed descriptions
LANGUAGE_MAPPING = {
"hi": {
"name": "Hindi - हिन्दी",
"description": "Official language of India, written in Devanagari script",
"deep_translator_code": "hi"
},
"ta": {
"name": "Tamil - தமிழ்",
"description": "Classical language of Tamil Nadu, written in Tamil script",
"deep_translator_code": "ta"
},
"te": {
"name": "Telugu - తెలుగు",
"description": "Official language of Andhra Pradesh and Telangana",
"deep_translator_code": "te"
},
"bn": {
"name": "Bengali - বাংলা",
"description": "Official language of West Bengal and Bangladesh",
"deep_translator_code": "bn"
},
"mr": {
"name": "Marathi - मराठी",
"description": "Official language of Maharashtra",
"deep_translator_code": "mr"
}
}
class PDFQueryTranslator:
def __init__(self, max_retries=3, retry_delay=1):
self.max_retries = max_retries
self.retry_delay = retry_delay
self.setup_device()
self.setup_model()
logger.info(f"Initialization complete. Using device: {self.device}")
def setup_device(self):
"""Setup CUDA device with error handling"""
try:
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if self.device.type == "cuda":
# Check CUDA memory
torch.cuda.empty_cache()
logger.info(f"Available CUDA memory: {torch.cuda.get_device_properties(0).total_memory}")
except Exception as e:
logger.warning(f"Error setting up CUDA device: {e}. Falling back to CPU.")
self.device = torch.device("cpu")
def setup_model(self):
"""Initialize the model with retry mechanism"""
for attempt in range(self.max_retries):
try:
model_name = "facebook/opt-125m" # Using smaller model for stability
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
self.model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16 if self.device.type == "cuda" else torch.float32
)
if self.device.type == "cuda":
self.model = self.model.to(self.device)
torch.cuda.empty_cache() # Clear CUDA cache
else:
self.model = self.model.to(self.device)
logger.info(f"Model loaded successfully on {self.device}")
break
except Exception as e:
logger.error(f"Attempt {attempt + 1} failed: {str(e)}")
if attempt < self.max_retries - 1:
time.sleep(self.retry_delay)
else:
raise Exception("Failed to load model after maximum retries")
def extract_text_from_pdf(self, pdf_file: str) -> str:
"""Extract text from PDF with robust error handling"""
try:
if not os.path.exists(pdf_file):
raise FileNotFoundError(f"PDF file not found: {pdf_file}")
pdf_reader = PyPDF2.PdfReader(pdf_file)
text = []
for page_num in range(len(pdf_reader.pages)):
try:
page = pdf_reader.pages[page_num]
text.append(page.extract_text())
except Exception as e:
logger.error(f"Error extracting text from page {page_num}: {e}")
text.append(f"[Error extracting page {page_num}]")
return "\n".join(text)
except Exception as e:
logger.error(f"Error processing PDF: {str(e)}")
return f"Error processing PDF: {str(e)}"
def translate_text(self, text: str, target_lang: str) -> str:
"""Translate text using deep_translator with retry mechanism"""
for attempt in range(self.max_retries):
try:
translator = GoogleTranslator(source='auto', target=target_lang)
# Split text into chunks if it's too long (Google Translate limit)
max_chunk_size = 4500
chunks = [text[i:i + max_chunk_size] for i in range(0, len(text), max_chunk_size)]
translated_chunks = []
for chunk in chunks:
translated_chunk = translator.translate(chunk)
translated_chunks.append(translated_chunk)
time.sleep(0.5) # Rate limiting
return ' '.join(translated_chunks)
except Exception as e:
logger.error(f"Translation attempt {attempt + 1} failed: {str(e)}")
if attempt < self.max_retries - 1:
time.sleep(self.retry_delay)
else:
return f"Translation error: {str(e)}"
def process_query(self, pdf_file: str, query: str, language: str) -> str:
"""Process query with comprehensive error handling"""
try:
# Validate inputs
if not pdf_file or not os.path.exists(pdf_file):
return "Please provide a valid PDF file."
if not query.strip():
return "Please provide a valid query."
if language not in LANGUAGE_MAPPING:
return "Please select a valid language."
# Extract text
pdf_text = self.extract_text_from_pdf(pdf_file)
if pdf_text.startswith("Error"):
return pdf_text
# Generate response
prompt = f"Query: {query}\n\nContent: {pdf_text[:1000]}\n\nAnswer:" # Limit content length
input_ids = self.tokenizer(prompt, return_tensors="pt").input_ids.to(self.device)
with torch.no_grad():
output = self.model.generate(
input_ids,
max_length=200,
num_return_sequences=1,
temperature=0.7,
pad_token_id=self.tokenizer.eos_token_id
)
response = self.tokenizer.decode(output[0], skip_special_tokens=True)
# Translate
target_lang = LANGUAGE_MAPPING[language]["deep_translator_code"]
translated_response = self.translate_text(response, target_lang)
return translated_response
except Exception as e:
logger.error(f"Error in process_query: {str(e)}")
return f"An error occurred: {str(e)}"
# Gradio interface
def create_interface():
pdf_processor = PDFQueryTranslator()
with gr.Blocks() as demo:
gr.Markdown("### PDF Query and Translation System")
with gr.Row():
with gr.Column():
pdf_input = gr.File(
label="Upload PDF Document",
type="filepath"
)
query_input = gr.Textbox(
label="Enter your question about the PDF",
placeholder="What would you like to know about the document?"
)
language_input = gr.Dropdown(
label="Select Output Language",
choices=[f"{code} - {info['name']}" for code, info in LANGUAGE_MAPPING.items()],
value="hi - Hindi - हिन्दी"
)
language_description = gr.Textbox(
label="Language Information",
value=LANGUAGE_MAPPING['hi']['description'],
interactive=False
)
with gr.Row():
output_text = gr.Textbox(
label="Translated Answer",
placeholder="Translation will appear here...",
lines=5
)
def update_description(selected):
code = selected.split(" - ")[0]
return LANGUAGE_MAPPING[code]['description']
def process_and_translate(pdf_file, query, language):
try:
lang_code = language.split(" - ")[0]
return pdf_processor.process_query(pdf_file, query, lang_code)
except Exception as e:
return f"Error processing request: {str(e)}"
# Event handlers
language_input.change(
fn=update_description,
inputs=[language_input],
outputs=[language_description]
)
submit_button = gr.Button("Get Answer")
submit_button.click(
fn=process_and_translate,
inputs=[pdf_input, query_input, language_input],
outputs=output_text
)
return demo
if __name__ == "__main__":
demo = create_interface()
demo.queue() # Enable queueing
demo.launch(share=True)