muhammadsalmanalfaridzi commited on
Commit
a1d4dd3
·
verified ·
1 Parent(s): bcec267

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -33
app.py CHANGED
@@ -2,7 +2,6 @@ import os
2
  import gradio as gr
3
  from argparse import ArgumentParser
4
  from groq import Groq
5
- from PIL import Image
6
  import base64
7
  import io
8
 
@@ -19,37 +18,51 @@ def _get_args():
19
  return parser.parse_args()
20
 
21
  def process_image(image):
22
- # Convert image to bytes for Groq API
23
  buffered = io.BytesIO()
24
  image.save(buffered, format="JPEG")
25
  return buffered.getvalue()
26
 
27
- def create_messages(query, image_data):
28
- messages = []
29
-
30
- # User query as text
31
- if query:
32
- messages.append({'role': 'user', 'content': query})
 
 
 
33
 
34
- # Include image if provided
35
- if image_data:
36
- image_base64 = f"data:image/jpeg;base64,{base64.b64encode(image_data).decode()}"
37
- messages.append({
38
- 'role': 'user',
39
- 'content': [
40
- {"type": "text", "text": "Please analyze this image."},
41
- {"type": "image_url", "image_url": {"url": image_base64}}
42
- ]
43
- })
 
 
44
 
45
- return messages
 
 
 
 
 
 
 
 
46
 
47
- def predict(chat_history, query, image):
48
- # Process the image if provided
49
  image_data = process_image(image) if image else None
50
- messages = create_messages(query, image_data)
 
 
 
 
 
51
 
52
- # Call the Groq API with the messages
53
  try:
54
  completion = client.chat.completions.create(
55
  model="llama-3.2-90b-vision-preview",
@@ -61,32 +74,53 @@ def predict(chat_history, query, image):
61
  )
62
 
63
  response_text = completion.choices[0].message.content.strip()
 
 
64
  except Exception as e:
65
  response_text = f"Error: {str(e)}"
 
 
66
 
67
- chat_history.append((query, response_text))
68
  return chat_history
69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  def clear_history():
71
  return []
72
 
73
  def main():
74
  args = _get_args()
75
 
76
- with gr.Blocks() as demo:
77
- gr.Markdown("<h1 style='text-align: center;'>Llama-3.2-90b-vision-preview</h1>")
78
 
79
- chatbox = gr.Chatbot()
80
- query = gr.Textbox(label="Input", placeholder="Type your query here...")
81
  image_input = gr.Image(type="pil", label="Upload Image")
 
 
 
 
 
 
82
 
83
- submit_btn = gr.Button("Submit")
84
- clear_btn = gr.Button("Clear History")
85
-
86
- submit_btn.click(predict, inputs=[chatbox, query, image_input], outputs=chatbox)
87
  clear_btn.click(clear_history, outputs=chatbox)
88
 
89
  demo.launch(share=args.share)
90
 
91
  if __name__ == '__main__':
92
- main()
 
2
  import gradio as gr
3
  from argparse import ArgumentParser
4
  from groq import Groq
 
5
  import base64
6
  import io
7
 
 
18
  return parser.parse_args()
19
 
20
  def process_image(image):
 
21
  buffered = io.BytesIO()
22
  image.save(buffered, format="JPEG")
23
  return buffered.getvalue()
24
 
25
+ def translate_audio(audio_file):
26
+ with open(audio_file, "rb") as file:
27
+ translation = client.audio.translations.create(
28
+ file=(audio_file, file.read()),
29
+ model="whisper-large-v3",
30
+ response_format="json",
31
+ temperature=0.0
32
+ )
33
+ return translation.text
34
 
35
+ def transcribe_audio(audio_file):
36
+ with open(audio_file, "rb") as file:
37
+ transcription = client.audio.transcriptions.create(
38
+ file=(audio_file, file.read()),
39
+ model="whisper-large-v3",
40
+ response_format="json",
41
+ temperature=0.0
42
+ )
43
+ return transcription.text
44
+
45
+ def predict(chat_history, query, image, audio, translate):
46
+ final_query = query.strip()
47
 
48
+ if audio:
49
+ audio_file_path = audio
50
+ if translate:
51
+ translation_text = translate_audio(audio_file_path)
52
+ final_query = translation_text.strip()
53
+ chat_history.append({'role': 'assistant', 'content': translation_text})
54
+ else:
55
+ transcribed_text = transcribe_audio(audio_file_path)
56
+ final_query = f"{final_query} {transcribed_text}".strip()
57
 
 
 
58
  image_data = process_image(image) if image else None
59
+ messages = create_messages(final_query, image_data)
60
+
61
+ if not messages:
62
+ error_message = "No valid input provided. Please enter a query or upload an image/audio."
63
+ chat_history.append({'role': 'assistant', 'content': error_message})
64
+ return chat_history
65
 
 
66
  try:
67
  completion = client.chat.completions.create(
68
  model="llama-3.2-90b-vision-preview",
 
74
  )
75
 
76
  response_text = completion.choices[0].message.content.strip()
77
+ chat_history.append({'role': 'user', 'content': final_query})
78
+ chat_history.append({'role': 'assistant', 'content': response_text})
79
  except Exception as e:
80
  response_text = f"Error: {str(e)}"
81
+ chat_history.append({'role': 'user', 'content': final_query})
82
+ chat_history.append({'role': 'assistant', 'content': response_text})
83
 
 
84
  return chat_history
85
 
86
+ def create_messages(query, image_data):
87
+ messages = []
88
+ if query:
89
+ messages.append({'role': 'user', 'content': query})
90
+ if image_data:
91
+ image_base64 = f"data:image/jpeg;base64,{base64.b64encode(image_data).decode()}"
92
+ messages.append({
93
+ 'role': 'user',
94
+ 'content': [
95
+ {"type": "text", "text": "Please analyze this image."},
96
+ {"type": "image_url", "image_url": {"url": image_base64}}
97
+ ]
98
+ })
99
+ return messages
100
+
101
  def clear_history():
102
  return []
103
 
104
  def main():
105
  args = _get_args()
106
 
107
+ with gr.Blocks(css="#chatbox {height: 400px; background-color: #f9f9f9; padding: 20px; border-radius: 10px; }") as demo:
108
+ gr.Markdown("<h1 style='text-align: center; color: #4a4a4a;'>Llama-3.2-90b-vision-preview</h1>")
109
 
110
+ chatbox = gr.Chatbot(type='messages', elem_id="chatbox")
111
+ query = gr.Textbox(label="Type your query here...", placeholder="Enter your question or command...", lines=2)
112
  image_input = gr.Image(type="pil", label="Upload Image")
113
+ audio_input = gr.Audio(type="filepath", label="Upload Audio")
114
+ translate_checkbox = gr.Checkbox(label="Translate Audio to English Text")
115
+
116
+ with gr.Row():
117
+ submit_btn = gr.Button("Submit", variant="primary", elem_id="submit-btn")
118
+ clear_btn = gr.Button("Clear History", variant="secondary", elem_id="clear-btn")
119
 
120
+ submit_btn.click(predict, inputs=[chatbox, query, image_input, audio_input, translate_checkbox], outputs=chatbox)
 
 
 
121
  clear_btn.click(clear_history, outputs=chatbox)
122
 
123
  demo.launch(share=args.share)
124
 
125
  if __name__ == '__main__':
126
+ main()