Spaces:

Daemontatox
/

Imagechat

Running

App Files Files Community

Daemontatox commited on Jan 27

Commit

82d4c23

verified ·

1 Parent(s): f204970

Update app.py

Browse files

Files changed (1) hide show

app.py +130 -100

app.py CHANGED Viewed

@@ -4,125 +4,155 @@ import torch
 from threading import Thread
 import gradio as gr
 import spaces
-import fitz  # PyMuPDF for PDF processing
-from io import BytesIO
 # Load model and processor
 ckpt = "Qwen/Qwen2.5-VL-7B-Instruct"
 model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
-    ckpt,
     torch_dtype=torch.bfloat16,
     device_map="auto",
     trust_remote_code=True
-).to("cuda")
 processor = AutoProcessor.from_pretrained(ckpt, trust_remote_code=True)
-def process_pdf(file_path):
-    """Convert first page of PDF to PIL Image"""
-    pdf_doc = fitz.open(file_path)
-    page = pdf_doc.load_page(0)
-    pix = page.get_pixmap()
-    img_bytes = pix.tobytes("ppm")
-    image = Image.open(BytesIO(img_bytes)).convert("RGB")
-    pdf_doc.close()
-    return image
-@spaces.GPU(duration=240)
-def bot_streaming(message, history, max_new_tokens=2048):
-    txt = message["text"]
-    images = []
-    messages = []
-    # Process history
-    for i, (user_msg, bot_msg) in enumerate(history):
-        if isinstance(user_msg, list):  # Contains files
-            hist_images = []
-            content = [{"type": "text", "text": user_msg[0]["text"]}]
-            for file_info in user_msg[1:]:
-                file_path = file_info["path"] if isinstance(file_info, dict) else file_info
-                if file_path.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')):
-                    img = Image.open(file_path).convert("RGB")
-                    hist_images.append(img)
-                    content.append({"type": "image"})
-                elif file_path.lower().endswith('.pdf'):
-                    try:
-                        img = process_pdf(file_path)
-                        hist_images.append(img)
-                        content.append({"type": "image"})
-                    except Exception as e:
-                        print(f"Error processing PDF: {e}")
-            images.extend(hist_images)
-            messages.append({"role": "user", "content": content})
-            messages.append({"role": "assistant", "content": bot_msg})
-        else:
-            messages.extend([
-                {"role": "user", "content": [{"type": "text", "text": user_msg}]},
-                {"role": "assistant", "content": [{"type": "text", "text": bot_msg}]}
-            ])
-    # Process current message
-    current_images = []
-    content = [{"type": "text", "text": txt}]
-    if message["files"]:
-        for file_info in message["files"]:
-            file_path = file_info["path"] if isinstance(file_info, dict) else file_info
             try:
-                if file_path.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')):
-                    img = Image.open(file_path).convert("RGB")
-                    current_images.append(img)
-                    content.append({"type": "image"})
-                elif file_path.lower().endswith('.pdf'):
-                    img = process_pdf(file_path)
-                    current_images.append(img)
-                    content.append({"type": "image"})
             except Exception as e:
-                print(f"File processing error: {e}")
-        images.extend(current_images)
-        messages.append({"role": "user", "content": content})
-    else:
-        messages.append({"role": "user", "content": [{"type": "text", "text": txt}]})
-    # Generate response
-    inputs = processor(
-        text=processor.apply_chat_template(messages, add_generation_prompt=True),
-        images=images if images else None,
-        return_tensors="pt"
-    ).to("cuda")
-    streamer = TextIteratorStreamer(processor, skip_special_tokens=True)
-    generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=max_new_tokens)
-    thread = Thread(target=model.generate, kwargs=generation_kwargs)
-    thread.start()
-    buffer = ""
-    for new_text in streamer:
-        buffer += new_text
-        yield buffer
-# Configure Gradio interface
-textbox = gr.MultimodalTextbox(
-    file_upload_kwargs={
-        "file_count": "multiple",
-        "file_types": ["image", ".pdf"]
-    },
-    placeholder="Input message or upload files...",
-    show_label=False
-)
-demo = gr.ChatInterface(
-    fn=bot_streaming,
-    title="MultiFile AI Assistant",
-    examples=[],
-    textbox=textbox,
-    additional_inputs=[
-        gr.Slider(10, 4096, value=512, label="Max New Tokens")
-    ],
-    css=".gradio-container {background: #fafafa}",
-    allow_flagging="never"
-)
 if __name__ == "__main__":
     demo.launch(debug=True)

 from threading import Thread
 import gradio as gr
 import spaces
+import fitz  # PyMuPDF
+import io
+import logging
+from typing import List, Dict, Any
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
 # Load model and processor
 ckpt = "Qwen/Qwen2.5-VL-7B-Instruct"
 model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+    ckpt,
     torch_dtype=torch.bfloat16,
     device_map="auto",
     trust_remote_code=True
+)
 processor = AutoProcessor.from_pretrained(ckpt, trust_remote_code=True)
+class ChatState:
+    def __init__(self):
+        self.conversation_history: List[Dict[str, Any]] = []
+        self.current_images: List[Image.Image] = []
+    def add_message(self, role: str, content: Any, images: List[Image.Image] = None):
+        self.conversation_history.append({
+            "role": role,
+            "content": content,
+            "images": images or []
+        })
+    def clear(self):
+        self.conversation_history = []
+        self.current_images = []
+chat_state = ChatState()
+def process_pdf(file_path: str, max_pages: int = 5) -> List[Image.Image]:
+    """Process PDF file into images (first 5 pages max for demo)"""
+    try:
+        doc = fitz.open(file_path)
+        images = []
+        for page_num in range(min(doc.page_count, max_pages)):
+            page = doc.load_page(page_num)
+            pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))
+            img_data = pix.tobytes("ppm")
+            images.append(Image.open(io.BytesIO(img_data)).convert("RGB"))
+        doc.close()
+        return images
+    except Exception as e:
+        logger.error(f"PDF processing error: {str(e)}")
+        return []
+def handle_file_upload(files: List[str]) -> List[Image.Image]:
+    """Handle uploaded files (PDF or images)"""
+    images = []
+    for file_path in files:
+        if file_path.lower().endswith('.pdf'):
+            images.extend(process_pdf(file_path))
+        else:
             try:
+                images.append(Image.open(file_path).convert("RGB"))
             except Exception as e:
+                logger.error(f"Image processing error: {str(e)}")
+    return images
+@spaces.GPU
+def chat_streaming(message: Dict, history: List, max_new_tokens: int = 1024):
+    try:
+        # Process user input
+        user_text = message["text"]
+        user_images = handle_file_upload([f["path"] for f in message["files"]]) if message["files"] else []
+        # Update chat state
+        chat_state.add_message("user", user_text, user_images)
+        # Build conversation history for model
+        messages = []
+        for msg in chat_state.conversation_history:
+            content = []
+            if msg["role"] == "user":
+                content.append({"type": "text", "text": msg["content"]})
+                for img in msg["images"]:
+                    content.append({"type": "image"})
+                messages.append({"role": "user", "content": content})
+            else:
+                messages.append({"role": "assistant", "content": msg["content"]})
+        # Prepare model inputs
+        model_inputs = processor.apply_chat_template(
+            messages,
+            add_generation_prompt=True,
+            tokenize=False
+        )
+        # Get all images from history
+        all_images = [img for msg in chat_state.conversation_history for img in msg["images"]]
+        inputs = processor(
+            text=model_inputs,
+            images=all_images if all_images else None,
+            return_tensors="pt"
+        ).to(model.device)
+        # Stream response
+        streamer = TextIteratorStreamer(processor, skip_special_tokens=True)
+        generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=max_new_tokens)
+        thread = Thread(target=model.generate, kwargs=generation_kwargs)
+        thread.start()
+        buffer = ""
+        for new_text in streamer:
+            buffer += new_text
+            yield buffer
+        # Save final response
+        chat_state.add_message("assistant", buffer)
+    except Exception as e:
+        logger.error(f"Chat error: {str(e)}")
+        yield "An error occurred. Please try again."
+def clear_chat():
+    """Clear chat history"""
+    chat_state.clear()
+    return "Chat history cleared. Start a new conversation."
+# Create Gradio interface
+with gr.Blocks(title="Multimodal Chat Assistant") as demo:
+    gr.Markdown("# Multimodal Chat Assistant")
+    gr.Markdown("Chat with documents and images! Upload PDFs or images and ask questions.")
+    chat_interface = gr.ChatInterface(
+        fn=chat_streaming,
+        additional_inputs=[
+            gr.Slider(100, 4096, value=1024, label="Max Response Length"),
+            gr.File(file_count="multiple", file_types=["image", "pdf"], label="Upload Files")
+        ],
+        stop_btn="Stop",
+        retry_btn="Retry",
+        undo_btn="Undo",
+        clear_btn="Clear History"
+    )
+    chat_interface.clear_btn.click(
+        fn=clear_chat,
+        outputs=[chat_interface.chatbot]
+    )
 if __name__ == "__main__":
     demo.launch(debug=True)