Daemontatox commited on
Commit
82d4c23
·
verified ·
1 Parent(s): f204970

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +130 -100
app.py CHANGED
@@ -4,125 +4,155 @@ import torch
4
  from threading import Thread
5
  import gradio as gr
6
  import spaces
7
- import fitz # PyMuPDF for PDF processing
8
- from io import BytesIO
 
 
 
 
 
 
9
 
10
  # Load model and processor
11
  ckpt = "Qwen/Qwen2.5-VL-7B-Instruct"
12
  model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
13
- ckpt,
14
  torch_dtype=torch.bfloat16,
15
  device_map="auto",
16
  trust_remote_code=True
17
- ).to("cuda")
18
  processor = AutoProcessor.from_pretrained(ckpt, trust_remote_code=True)
19
 
20
- def process_pdf(file_path):
21
- """Convert first page of PDF to PIL Image"""
22
- pdf_doc = fitz.open(file_path)
23
- page = pdf_doc.load_page(0)
24
- pix = page.get_pixmap()
25
- img_bytes = pix.tobytes("ppm")
26
- image = Image.open(BytesIO(img_bytes)).convert("RGB")
27
- pdf_doc.close()
28
- return image
 
 
 
 
 
 
29
 
30
- @spaces.GPU(duration=240)
31
- def bot_streaming(message, history, max_new_tokens=2048):
32
- txt = message["text"]
33
- images = []
34
- messages = []
35
 
36
- # Process history
37
- for i, (user_msg, bot_msg) in enumerate(history):
38
- if isinstance(user_msg, list): # Contains files
39
- hist_images = []
40
- content = [{"type": "text", "text": user_msg[0]["text"]}]
41
- for file_info in user_msg[1:]:
42
- file_path = file_info["path"] if isinstance(file_info, dict) else file_info
43
- if file_path.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')):
44
- img = Image.open(file_path).convert("RGB")
45
- hist_images.append(img)
46
- content.append({"type": "image"})
47
- elif file_path.lower().endswith('.pdf'):
48
- try:
49
- img = process_pdf(file_path)
50
- hist_images.append(img)
51
- content.append({"type": "image"})
52
- except Exception as e:
53
- print(f"Error processing PDF: {e}")
54
- images.extend(hist_images)
55
- messages.append({"role": "user", "content": content})
56
- messages.append({"role": "assistant", "content": bot_msg})
57
- else:
58
- messages.extend([
59
- {"role": "user", "content": [{"type": "text", "text": user_msg}]},
60
- {"role": "assistant", "content": [{"type": "text", "text": bot_msg}]}
61
- ])
62
 
63
- # Process current message
64
- current_images = []
65
- content = [{"type": "text", "text": txt}]
66
-
67
- if message["files"]:
68
- for file_info in message["files"]:
69
- file_path = file_info["path"] if isinstance(file_info, dict) else file_info
70
  try:
71
- if file_path.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')):
72
- img = Image.open(file_path).convert("RGB")
73
- current_images.append(img)
74
- content.append({"type": "image"})
75
- elif file_path.lower().endswith('.pdf'):
76
- img = process_pdf(file_path)
77
- current_images.append(img)
78
- content.append({"type": "image"})
79
  except Exception as e:
80
- print(f"File processing error: {e}")
 
 
 
 
 
 
 
 
 
 
 
81
 
82
- images.extend(current_images)
83
- messages.append({"role": "user", "content": content})
84
- else:
85
- messages.append({"role": "user", "content": [{"type": "text", "text": txt}]})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
- # Generate response
88
- inputs = processor(
89
- text=processor.apply_chat_template(messages, add_generation_prompt=True),
90
- images=images if images else None,
91
- return_tensors="pt"
92
- ).to("cuda")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
 
94
- streamer = TextIteratorStreamer(processor, skip_special_tokens=True)
95
- generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=max_new_tokens)
 
 
96
 
97
- thread = Thread(target=model.generate, kwargs=generation_kwargs)
98
- thread.start()
 
 
 
 
 
 
 
 
 
99
 
100
- buffer = ""
101
- for new_text in streamer:
102
- buffer += new_text
103
- yield buffer
104
-
105
- # Configure Gradio interface
106
- textbox = gr.MultimodalTextbox(
107
- file_upload_kwargs={
108
- "file_count": "multiple",
109
- "file_types": ["image", ".pdf"]
110
- },
111
- placeholder="Input message or upload files...",
112
- show_label=False
113
- )
114
-
115
- demo = gr.ChatInterface(
116
- fn=bot_streaming,
117
- title="MultiFile AI Assistant",
118
- examples=[],
119
- textbox=textbox,
120
- additional_inputs=[
121
- gr.Slider(10, 4096, value=512, label="Max New Tokens")
122
- ],
123
- css=".gradio-container {background: #fafafa}",
124
- allow_flagging="never"
125
- )
126
 
127
  if __name__ == "__main__":
128
  demo.launch(debug=True)
 
4
  from threading import Thread
5
  import gradio as gr
6
  import spaces
7
+ import fitz # PyMuPDF
8
+ import io
9
+ import logging
10
+ from typing import List, Dict, Any
11
+
12
+ # Set up logging
13
+ logging.basicConfig(level=logging.INFO)
14
+ logger = logging.getLogger(__name__)
15
 
16
  # Load model and processor
17
  ckpt = "Qwen/Qwen2.5-VL-7B-Instruct"
18
  model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
19
+ ckpt,
20
  torch_dtype=torch.bfloat16,
21
  device_map="auto",
22
  trust_remote_code=True
23
+ )
24
  processor = AutoProcessor.from_pretrained(ckpt, trust_remote_code=True)
25
 
26
+ class ChatState:
27
+ def __init__(self):
28
+ self.conversation_history: List[Dict[str, Any]] = []
29
+ self.current_images: List[Image.Image] = []
30
+
31
+ def add_message(self, role: str, content: Any, images: List[Image.Image] = None):
32
+ self.conversation_history.append({
33
+ "role": role,
34
+ "content": content,
35
+ "images": images or []
36
+ })
37
+
38
+ def clear(self):
39
+ self.conversation_history = []
40
+ self.current_images = []
41
 
42
+ chat_state = ChatState()
 
 
 
 
43
 
44
+ def process_pdf(file_path: str, max_pages: int = 5) -> List[Image.Image]:
45
+ """Process PDF file into images (first 5 pages max for demo)"""
46
+ try:
47
+ doc = fitz.open(file_path)
48
+ images = []
49
+ for page_num in range(min(doc.page_count, max_pages)):
50
+ page = doc.load_page(page_num)
51
+ pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))
52
+ img_data = pix.tobytes("ppm")
53
+ images.append(Image.open(io.BytesIO(img_data)).convert("RGB"))
54
+ doc.close()
55
+ return images
56
+ except Exception as e:
57
+ logger.error(f"PDF processing error: {str(e)}")
58
+ return []
 
 
 
 
 
 
 
 
 
 
 
59
 
60
+ def handle_file_upload(files: List[str]) -> List[Image.Image]:
61
+ """Handle uploaded files (PDF or images)"""
62
+ images = []
63
+ for file_path in files:
64
+ if file_path.lower().endswith('.pdf'):
65
+ images.extend(process_pdf(file_path))
66
+ else:
67
  try:
68
+ images.append(Image.open(file_path).convert("RGB"))
 
 
 
 
 
 
 
69
  except Exception as e:
70
+ logger.error(f"Image processing error: {str(e)}")
71
+ return images
72
+
73
+ @spaces.GPU
74
+ def chat_streaming(message: Dict, history: List, max_new_tokens: int = 1024):
75
+ try:
76
+ # Process user input
77
+ user_text = message["text"]
78
+ user_images = handle_file_upload([f["path"] for f in message["files"]]) if message["files"] else []
79
+
80
+ # Update chat state
81
+ chat_state.add_message("user", user_text, user_images)
82
 
83
+ # Build conversation history for model
84
+ messages = []
85
+ for msg in chat_state.conversation_history:
86
+ content = []
87
+ if msg["role"] == "user":
88
+ content.append({"type": "text", "text": msg["content"]})
89
+ for img in msg["images"]:
90
+ content.append({"type": "image"})
91
+ messages.append({"role": "user", "content": content})
92
+ else:
93
+ messages.append({"role": "assistant", "content": msg["content"]})
94
+
95
+ # Prepare model inputs
96
+ model_inputs = processor.apply_chat_template(
97
+ messages,
98
+ add_generation_prompt=True,
99
+ tokenize=False
100
+ )
101
+
102
+ # Get all images from history
103
+ all_images = [img for msg in chat_state.conversation_history for img in msg["images"]]
104
+
105
+ inputs = processor(
106
+ text=model_inputs,
107
+ images=all_images if all_images else None,
108
+ return_tensors="pt"
109
+ ).to(model.device)
110
 
111
+ # Stream response
112
+ streamer = TextIteratorStreamer(processor, skip_special_tokens=True)
113
+ generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=max_new_tokens)
114
+
115
+ thread = Thread(target=model.generate, kwargs=generation_kwargs)
116
+ thread.start()
117
+
118
+ buffer = ""
119
+ for new_text in streamer:
120
+ buffer += new_text
121
+ yield buffer
122
+
123
+ # Save final response
124
+ chat_state.add_message("assistant", buffer)
125
+
126
+ except Exception as e:
127
+ logger.error(f"Chat error: {str(e)}")
128
+ yield "An error occurred. Please try again."
129
+
130
+ def clear_chat():
131
+ """Clear chat history"""
132
+ chat_state.clear()
133
+ return "Chat history cleared. Start a new conversation."
134
 
135
+ # Create Gradio interface
136
+ with gr.Blocks(title="Multimodal Chat Assistant") as demo:
137
+ gr.Markdown("# Multimodal Chat Assistant")
138
+ gr.Markdown("Chat with documents and images! Upload PDFs or images and ask questions.")
139
 
140
+ chat_interface = gr.ChatInterface(
141
+ fn=chat_streaming,
142
+ additional_inputs=[
143
+ gr.Slider(100, 4096, value=1024, label="Max Response Length"),
144
+ gr.File(file_count="multiple", file_types=["image", "pdf"], label="Upload Files")
145
+ ],
146
+ stop_btn="Stop",
147
+ retry_btn="Retry",
148
+ undo_btn="Undo",
149
+ clear_btn="Clear History"
150
+ )
151
 
152
+ chat_interface.clear_btn.click(
153
+ fn=clear_chat,
154
+ outputs=[chat_interface.chatbot]
155
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
 
157
  if __name__ == "__main__":
158
  demo.launch(debug=True)