Jiangxz01 commited on
Commit
41f5990
ยท
verified ยท
1 Parent(s): f5db0ff

Upload 2 files

Browse files

AI Visual Storytelling

Files changed (2) hide show
  1. app.py +319 -0
  2. requirements.txt +2 -0
app.py ADDED
@@ -0,0 +1,319 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ # ่ฒกๆ”ฟ้ƒจ่ฒกๆ”ฟ่ณ‡่จŠไธญๅฟƒ ๆฑŸไฟกๅฎ—
3
+
4
+ import streamlit as st
5
+ import requests
6
+ from PIL import Image
7
+ import io
8
+ import base64
9
+ import time
10
+ import uuid
11
+ import json
12
+ from gtts import gTTS
13
+ import os
14
+ from litellm import completion
15
+ from dotenv import load_dotenv
16
+
17
+ load_dotenv()
18
+
19
+ def compress_image(image, max_size=(800, 800), quality=95):
20
+ img_copy = image.copy()
21
+ img_copy.thumbnail(max_size)
22
+ buffered = io.BytesIO()
23
+ img_copy.save(buffered, format="JPEG", quality=quality)
24
+ return buffered.getvalue()
25
+
26
+ def analyze_image(image, api_key, model):
27
+ compressed_image = compress_image(image)
28
+ img_str = base64.b64encode(compressed_image).decode()
29
+ messages = [
30
+ {
31
+ "role": "user",
32
+ "content": [
33
+ {"type": "text", "text": "Carefully observe this image and describe it in as much detail as possible. Please address the following aspects: primary subject matter, background setting, color palette, emotional conveyance, and specific details."},
34
+ {
35
+ "type": "image_url",
36
+ "image_url": {
37
+ "url": f"data:image/jpeg;base64,{img_str}"
38
+ }
39
+ }
40
+ ]
41
+ }
42
+ ]
43
+ response = completion(model=model, messages=messages, max_tokens=1024)
44
+ return response.choices[0].message.content.strip()
45
+
46
+ def translate_to_chinese(text, api_key, model):
47
+ if "groq/" in model:
48
+ translation_model = "groq/gemma2-9b-it"
49
+ else:
50
+ translation_model = model
51
+ messages = [
52
+ {
53
+ "role": "system",
54
+ "content": "You are an expert translator proficient in both Traditional Chinese and English, with 40 years of translation experience and extensive cross-disciplinary knowledge. You have been deeply involved in the Chinese translations of The New York Times and Bloomberg, and have a deep understanding of the translation of current events and academic papers. I would like you to translate the following English text into Traditional Chinese, with a style similar to the Chinese versions of the aforementioned magazines. I would like to request a translation of the following English content into Traditional Chinese. Please ensure that the translation is accurate and natural-sounding."
55
+ },
56
+ {
57
+ "role": "user",
58
+ "content": f"THAT'S IMPORTANT OTHERWISE I'LL DIE. Translate the Text ``` {text} ``` into \"Traditional Chinese\". Must reply to me in Traditional Chinese."
59
+ }
60
+ ]
61
+ response = completion(model=translation_model, messages=messages, max_tokens=1024)
62
+ return response.choices[0].message.content.strip()
63
+
64
+ def resize_image(image, target_height=400):
65
+ original_width, original_height = image.size
66
+ aspect_ratio = original_width / original_height
67
+ target_width = int(target_height * aspect_ratio)
68
+ resized_image = image.resize((target_width, target_height), Image.LANCZOS)
69
+ return resized_image
70
+
71
+ def main():
72
+ st.set_page_config(
73
+ layout="wide",
74
+ page_title="AI-Powered Visual Storytelling",
75
+ page_icon="๐Ÿ–ผ๏ธ",
76
+ menu_items={
77
+ 'Get Help': None,
78
+ 'Report a bug': None,
79
+ 'About': '# ๅœ–็‰‡AI่พจ่ญ˜ๆ‡‰็”จ\nไฝฟ็”จAIๅˆ†ๆžๅœ–็‰‡ๅ…งๅฎนไน‹็ถฒ้ ็จ‹ๅผใ€‚'
80
+ }
81
+ )
82
+ st.markdown("""
83
+ <style>
84
+ .stApp {
85
+ background-image: linear-gradient(to bottom, #e6f3ff, #ffffff);
86
+ }
87
+ .stTitle, .stMarkdown, .stRadio, .stFileUploader, .stTextInput > label, p {
88
+ color: black !important;
89
+ }
90
+ .stTitle h1 {
91
+ color: black !important;
92
+ }
93
+ .stButton>button {
94
+ background-color: #3498db;
95
+ color: white;
96
+ }
97
+ .stTextInput>div>div>input {
98
+ background-color: #ecf0f1;
99
+ color: #2c3e50;
100
+ }
101
+ .custom-image-container {
102
+ border: 2px solid #bdc3c7;
103
+ border-radius: 10px;
104
+ overflow: hidden;
105
+ }
106
+ .custom-image {
107
+ width: 100%;
108
+ height: 400px;
109
+ object-fit: cover;
110
+ border-radius: 10px;
111
+ }
112
+ .description-box {
113
+ background-color: rgba(52, 152, 219, 0.1);
114
+ border-left: 5px solid #3498db;
115
+ padding: 12px;
116
+ border-radius: 0 6px 6px 0;
117
+ transition: all 0.3s ease;
118
+ margin-bottom: 5px;
119
+ }
120
+ .description-box:hover {
121
+ background-color: rgba(52, 152, 219, 0.2);
122
+ box-shadow: 0 0 10px rgba(52, 152, 219, 0.5);
123
+ }
124
+ .description-box p {
125
+ color: #2c3e50;
126
+ font-size: 16px;
127
+ line-height: 1.6;
128
+ transition: all 0.3s ease;
129
+ }
130
+ .description-box:hover p {
131
+ font-weight: bold;
132
+ }
133
+ .info-box {
134
+ background-color: rgba(52, 152, 219, 0.1);
135
+ border-left: 5px solid #3498db;
136
+ padding: 10px;
137
+ border-radius: 0 10px 10px 0;
138
+ transition: all 0.3s ease;
139
+ margin-bottom: 5px;
140
+ }
141
+ .info-box:hover {
142
+ background-color: rgba(52, 152, 219, 0.2);
143
+ box-shadow: 0 0 10px rgba(52, 152, 219, 0.5);
144
+ }
145
+ .info-box p {
146
+ color: #2c3e50;
147
+ font-size: 16px;
148
+ line-height: 1.6;
149
+ transition: all 0.3s ease;
150
+ margin: 0;
151
+ }
152
+ .info-box:hover p {
153
+ font-weight: bold;
154
+ }
155
+ .stTextInput > div > div > input {
156
+ background-color: #ffffff;
157
+ color: #2c3e50;
158
+ border: 2px solid #3498db;
159
+ border-radius: 5px;
160
+ padding: 8px 12px;
161
+ }
162
+ .stButton > button {
163
+ background-color: #3498db;
164
+ color: white;
165
+ border: none;
166
+ border-radius: 5px;
167
+ padding: 8px 16px;
168
+ font-weight: bold;
169
+ transition: all 0.3s ease;
170
+ }
171
+ .stButton > button:hover {
172
+ background-color: #2980b9;
173
+ box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
174
+ }
175
+ [data-testid=stSidebar] {
176
+ background-color: #f0f8ff;
177
+ padding: 20px;
178
+ }
179
+ [data-testid=stSidebar] .stTitle h1 {
180
+ color: #2c3e50 !important;
181
+ font-size: 24px;
182
+ margin-bottom: 20px;
183
+ }
184
+ .main-content {
185
+ padding-left: 0 !important;
186
+ }
187
+ .stColumns {
188
+ gap: 1rem !important;
189
+ }
190
+ .streamlit-expanderHeader {
191
+ background-color: #3498db;
192
+ color: white !important;
193
+ border-radius: 5px;
194
+ padding: 10px 15px;
195
+ font-weight: bold;
196
+ transition: all 0.3s ease;
197
+ }
198
+ .streamlit-expanderHeader:hover {
199
+ background-color: #2980b9;
200
+ box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
201
+ }
202
+ .streamlit-expanderContent {
203
+ border: 1px solid #3498db;
204
+ border-radius: 0 0 5px 5px;
205
+ padding: 10px;
206
+ }
207
+ </style>
208
+
209
+ <script>
210
+ const mutationObserver = new MutationObserver(function(mutations) {
211
+ mutations.forEach(function(mutation) {
212
+ if (mutation.type === 'childList') {
213
+ const descriptionBoxes = document.querySelectorAll('.description-box');
214
+ descriptionBoxes.forEach(box => {
215
+ const paragraphs = box.querySelectorAll('p');
216
+ paragraphs.forEach(p => {
217
+ p.textContent = p.textContent.replace(/^<strong>|<\/strong>$/g, '');
218
+ });
219
+ });
220
+ }
221
+ });
222
+ });
223
+
224
+ mutationObserver.observe(document.body, {
225
+ childList: true,
226
+ subtree: true
227
+ });
228
+ </script>
229
+ """, unsafe_allow_html=True)
230
+
231
+ with st.sidebar:
232
+ st.title("๐Ÿ–ผ๏ธ ๅœ–็‰‡ๅˆ†ๆž")
233
+ if 'uploaded_files' not in st.session_state:
234
+ st.session_state.uploaded_files = []
235
+ new_uploads = st.file_uploader("ๆ–ฐๅขž/ๅˆช้™คๅœ–็‰‡", type=["png", "jpg", "jpeg"], accept_multiple_files=True)
236
+ current_files = {f.name: f for f in new_uploads} if new_uploads else {}
237
+ st.session_state.uploaded_files = [f for f in st.session_state.uploaded_files if f.name in current_files]
238
+ for file_name, file in current_files.items():
239
+ if file_name not in [f.name for f in st.session_state.uploaded_files]:
240
+ st.session_state.uploaded_files.append(file)
241
+ uploaded_files = st.session_state.uploaded_files
242
+ with st.expander("่ฉฎ้‡‹ๅœ–็‰‡่ชž่จ€", expanded=False):
243
+ language = st.radio("", ["็น้ซ”ไธญๆ–‡", "English"], index=0)
244
+ st.markdown("### ๐Ÿค– Model Settings")
245
+ model_options = ["gpt-4o", "gemini-1.5-pro", "gpt-4o-mini", "custom"]
246
+ selected_model = st.selectbox("Select Model", model_options)
247
+ if selected_model == "custom":
248
+ custom_model = st.text_input("Enter custom model name")
249
+ model = custom_model if custom_model else "groq/llava-v1.5-7b-4096-preview"
250
+ else:
251
+ model = selected_model
252
+ st.markdown("### ๐Ÿ”‘ API Settings")
253
+ api_key = st.text_input("API Key", type="password", value=os.getenv("OPENAI_API_KEY", ""))
254
+ api_base = st.text_input("API Base URL", value=os.getenv("OPENAI_API_BASE", "")) or "https://api.groq.com/openai/v1/"
255
+ if st.button("Save API Settings"):
256
+ os.environ["OPENAI_API_KEY"] = api_key
257
+ os.environ["OPENAI_API_BASE"] = api_base
258
+ st.success("API settings saved successfully")
259
+ st.markdown("""
260
+ <div class="info-box">
261
+ <p>็ณป็ตฑ้ƒจ็ฝฒ๏ผšๆฑŸไฟกๅฎ—<br>Vision Language Models</p>
262
+ </div>
263
+ """, unsafe_allow_html=True)
264
+
265
+ st.markdown('<div class="main-content">', unsafe_allow_html=True)
266
+ st.title("๐ŸŒ„ AI-Powered Visual Storytelling")
267
+ if api_key and uploaded_files:
268
+ if 'analyzed_files' not in st.session_state:
269
+ st.session_state.analyzed_files = {}
270
+ files_to_remove = set(st.session_state.analyzed_files.keys()) - set(f.name for f in uploaded_files)
271
+ for file_name in files_to_remove:
272
+ del st.session_state.analyzed_files[file_name]
273
+ for i in range(0, len(uploaded_files), 2):
274
+ img_col1, img_col2 = st.columns(2)
275
+ for j in range(2):
276
+ if i + j < len(uploaded_files):
277
+ with img_col1 if j == 0 else img_col2:
278
+ uploaded_file = uploaded_files[i + j]
279
+ image = Image.open(uploaded_file)
280
+ resized_image = resize_image(image)
281
+ buffered = io.BytesIO()
282
+ resized_image.save(buffered, format="PNG")
283
+ img_str = base64.b64encode(buffered.getvalue()).decode()
284
+ st.markdown(f"""
285
+ <div class="custom-image-container">
286
+ <img src="data:image/png;base64,{img_str}" class="custom-image">
287
+ </div>
288
+ <p style="text-align: center; color: black;">{uploaded_file.name}</p>
289
+ """, unsafe_allow_html=True)
290
+ if uploaded_file.name not in st.session_state.analyzed_files:
291
+ with st.spinner("ๅˆ†ๆžๅœ–็‰‡ๅŠ็”Ÿๆˆ่ชž้Ÿณไธญ..."):
292
+ try:
293
+ description = analyze_image(image, api_key, model)
294
+ if language == "็น้ซ”ไธญๆ–‡":
295
+ with st.spinner("็ฟป่ญฏไธญ..."):
296
+ description = translate_to_chinese(description, api_key, model)
297
+ st.session_state.analyzed_files[uploaded_file.name] = description
298
+ time.sleep(1)
299
+ except Exception as e:
300
+ st.error(f"่™•็†ๅœ–็‰‡ๆ™‚็™ผ็”Ÿ้Œฏ่ชค: {str(e)}")
301
+ continue
302
+ description = st.session_state.analyzed_files[uploaded_file.name]
303
+ paragraphs = [p.strip() for p in description.split('\n') if p.strip()]
304
+ if paragraphs:
305
+ formatted_description = ''.join([f'<p style="margin: 0;">{p}</p>' for p in paragraphs])
306
+ st.markdown(f'<div class="description-box">{formatted_description}</div>', unsafe_allow_html=True)
307
+ tts = gTTS(text=description, lang='zh-tw' if language == "็น้ซ”ไธญๆ–‡" else 'en')
308
+ audio_file = f"audio_{uuid.uuid4()}.mp3"
309
+ tts.save(audio_file)
310
+ st.audio(audio_file)
311
+ os.remove(audio_file)
312
+ else:
313
+ st.warning("็„กๆณ•็ฒๅ–ๅœ–็‰‡ๆ่ฟฐใ€‚")
314
+ elif uploaded_files:
315
+ st.warning("่ซ‹่ผธๅ…ฅๆœ‰ๆ•ˆ็š„ API Key ไปฅๅˆ†ๆžๅœ–็‰‡ใ€‚")
316
+ st.markdown('</div>', unsafe_allow_html=True)
317
+
318
+ if __name__ == "__main__":
319
+ main()
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ litellm
2
+ gTTS