Spaces:
Running
Running
Pylint & CVE fix (#1)
Browse files- Fix (some) Pylint warning & remove some unused code (ae312d00decf61d78caf4d9aba0cd2239eb93aeb)
- Upgrade package versions (4785fdbdfadefafdcfd3da9f323c3c1bcdb02eb6)
- app.py +30 -34
- global_config.py +2 -16
- llm_helper.py +10 -88
- pptx_helper.py +11 -6
- requirements.txt +2 -2
app.py
CHANGED
@@ -12,7 +12,7 @@ import pptx_helper
|
|
12 |
from global_config import GlobalConfig
|
13 |
|
14 |
|
15 |
-
APP_TEXT = json5.loads(open(GlobalConfig.APP_STRINGS_FILE, 'r').read())
|
16 |
GB_CONVERTER = 2 ** 30
|
17 |
|
18 |
|
@@ -68,18 +68,6 @@ def get_web_search_results_wrapper(text: str) -> List[Tuple[str, str]]:
|
|
68 |
return results
|
69 |
|
70 |
|
71 |
-
@st.cache_data
|
72 |
-
def get_ai_image_wrapper(text: str) -> str:
|
73 |
-
"""
|
74 |
-
Fetch and cache a Base 64-encoded image by calling an external API.
|
75 |
-
|
76 |
-
:param text: The image prompt
|
77 |
-
:return: The Base 64-encoded image
|
78 |
-
"""
|
79 |
-
|
80 |
-
return llm_helper.get_ai_image(text)
|
81 |
-
|
82 |
-
|
83 |
# def get_disk_used_percentage() -> float:
|
84 |
# """
|
85 |
# Compute the disk usage.
|
@@ -111,14 +99,19 @@ def build_ui():
|
|
111 |
|
112 |
st.title(APP_TEXT['app_name'])
|
113 |
st.subheader(APP_TEXT['caption'])
|
114 |
-
st.markdown(
|
115 |
-
|
116 |
-
|
|
|
|
|
|
|
|
|
|
|
117 |
|
118 |
with st.form('my_form'):
|
119 |
# Topic input
|
120 |
try:
|
121 |
-
with open(GlobalConfig.PRELOAD_DATA_FILE, 'r') as in_file:
|
122 |
preload_data = json5.loads(in_file.read())
|
123 |
except (FileExistsError, FileNotFoundError):
|
124 |
preload_data = {'topic': '', 'audience': ''}
|
@@ -158,7 +151,8 @@ def build_ui():
|
|
158 |
st.text(APP_TEXT['tos2'])
|
159 |
|
160 |
st.markdown(
|
161 |
-
'![Visitors]
|
|
|
162 |
)
|
163 |
|
164 |
|
@@ -173,20 +167,17 @@ def generate_presentation(topic: str, pptx_template: str, progress_bar):
|
|
173 |
"""
|
174 |
|
175 |
topic_length = len(topic)
|
176 |
-
logging.debug(
|
177 |
|
178 |
if topic_length >= 10:
|
179 |
-
logging.debug(
|
180 |
-
f'Topic: {topic}\n'
|
181 |
-
)
|
182 |
-
|
183 |
target_length = min(topic_length, GlobalConfig.LLM_MODEL_MAX_INPUT_LENGTH)
|
184 |
|
185 |
try:
|
186 |
# Step 1: Generate the contents in JSON format using an LLM
|
187 |
json_str = process_slides_contents(topic[:target_length], progress_bar)
|
188 |
-
logging.debug(
|
189 |
-
logging.debug(
|
190 |
|
191 |
# Step 2: Generate the slide deck based on the template specified
|
192 |
if len(json_str) > 0:
|
@@ -196,8 +187,10 @@ def generate_presentation(topic: str, pptx_template: str, progress_bar):
|
|
196 |
icon="💡️"
|
197 |
)
|
198 |
else:
|
199 |
-
st.error(
|
200 |
-
|
|
|
|
|
201 |
return
|
202 |
|
203 |
all_headers = generate_slide_deck(json_str, pptx_template, progress_bar)
|
@@ -225,15 +218,14 @@ def process_slides_contents(text: str, progress_bar: st.progress) -> str:
|
|
225 |
json_str = ''
|
226 |
|
227 |
try:
|
228 |
-
logging.info(
|
229 |
json_str = get_contents_wrapper(text)
|
230 |
except Exception as ex:
|
231 |
-
st.error(
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
# logging.debug(f'JSON: {json_str}')
|
237 |
|
238 |
progress_bar.progress(50, text='Contents generated')
|
239 |
|
@@ -316,6 +308,10 @@ def show_bonus_stuff(ppt_headers: List[str]):
|
|
316 |
|
317 |
|
318 |
def main():
|
|
|
|
|
|
|
|
|
319 |
build_ui()
|
320 |
|
321 |
|
|
|
12 |
from global_config import GlobalConfig
|
13 |
|
14 |
|
15 |
+
APP_TEXT = json5.loads(open(GlobalConfig.APP_STRINGS_FILE, 'r', encoding='utf-8').read())
|
16 |
GB_CONVERTER = 2 ** 30
|
17 |
|
18 |
|
|
|
68 |
return results
|
69 |
|
70 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
# def get_disk_used_percentage() -> float:
|
72 |
# """
|
73 |
# Compute the disk usage.
|
|
|
99 |
|
100 |
st.title(APP_TEXT['app_name'])
|
101 |
st.subheader(APP_TEXT['caption'])
|
102 |
+
st.markdown(
|
103 |
+
'Powered by'
|
104 |
+
' [Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2).'
|
105 |
+
)
|
106 |
+
st.markdown(
|
107 |
+
'*If the JSON is generated or parsed incorrectly, try again later by making minor changes'
|
108 |
+
' to the input text.*'
|
109 |
+
)
|
110 |
|
111 |
with st.form('my_form'):
|
112 |
# Topic input
|
113 |
try:
|
114 |
+
with open(GlobalConfig.PRELOAD_DATA_FILE, 'r', encoding='utf-8') as in_file:
|
115 |
preload_data = json5.loads(in_file.read())
|
116 |
except (FileExistsError, FileNotFoundError):
|
117 |
preload_data = {'topic': '', 'audience': ''}
|
|
|
151 |
st.text(APP_TEXT['tos2'])
|
152 |
|
153 |
st.markdown(
|
154 |
+
'![Visitors]'
|
155 |
+
'(https://api.visitorbadge.io/api/visitors?path=https%3A%2F%2Fhuggingface.co%2Fspaces%2Fbarunsaha%2Fslide-deck-ai&countColor=%23263759)'
|
156 |
)
|
157 |
|
158 |
|
|
|
167 |
"""
|
168 |
|
169 |
topic_length = len(topic)
|
170 |
+
logging.debug('Input length:: topic: %s', topic_length)
|
171 |
|
172 |
if topic_length >= 10:
|
173 |
+
logging.debug('Topic: %s', topic)
|
|
|
|
|
|
|
174 |
target_length = min(topic_length, GlobalConfig.LLM_MODEL_MAX_INPUT_LENGTH)
|
175 |
|
176 |
try:
|
177 |
# Step 1: Generate the contents in JSON format using an LLM
|
178 |
json_str = process_slides_contents(topic[:target_length], progress_bar)
|
179 |
+
logging.debug('Truncated topic: %s', topic[:target_length])
|
180 |
+
logging.debug('Length of JSON: %d', len(json_str))
|
181 |
|
182 |
# Step 2: Generate the slide deck based on the template specified
|
183 |
if len(json_str) > 0:
|
|
|
187 |
icon="💡️"
|
188 |
)
|
189 |
else:
|
190 |
+
st.error(
|
191 |
+
'Unfortunately, JSON generation failed, so the next steps would lead'
|
192 |
+
' to nowhere. Try again or come back later.'
|
193 |
+
)
|
194 |
return
|
195 |
|
196 |
all_headers = generate_slide_deck(json_str, pptx_template, progress_bar)
|
|
|
218 |
json_str = ''
|
219 |
|
220 |
try:
|
221 |
+
logging.info('Calling LLM for content generation on the topic: %s', text)
|
222 |
json_str = get_contents_wrapper(text)
|
223 |
except Exception as ex:
|
224 |
+
st.error(
|
225 |
+
f'An exception occurred while trying to convert to JSON. It could be because of heavy'
|
226 |
+
f' traffic or something else. Try doing it again or try again later.'
|
227 |
+
f'\nError message: {ex}'
|
228 |
+
)
|
|
|
229 |
|
230 |
progress_bar.progress(50, text='Contents generated')
|
231 |
|
|
|
308 |
|
309 |
|
310 |
def main():
|
311 |
+
"""
|
312 |
+
Trigger application run.
|
313 |
+
"""
|
314 |
+
|
315 |
build_ui()
|
316 |
|
317 |
|
global_config.py
CHANGED
@@ -1,6 +1,7 @@
|
|
|
|
|
|
1 |
from dataclasses import dataclass
|
2 |
from dotenv import load_dotenv
|
3 |
-
import os
|
4 |
|
5 |
|
6 |
load_dotenv()
|
@@ -8,20 +9,6 @@ load_dotenv()
|
|
8 |
|
9 |
@dataclass(frozen=True)
|
10 |
class GlobalConfig:
|
11 |
-
# CLARIFAI_PAT = os.environ.get('CLARIFAI_PAT', '')
|
12 |
-
# CLARIFAI_USER_ID = 'meta'
|
13 |
-
# CLARIFAI_APP_ID = 'Llama-2'
|
14 |
-
# CLARIFAI_MODEL_ID = 'llama2-13b-chat'
|
15 |
-
#
|
16 |
-
# CLARIFAI_USER_ID_GPT = 'openai'
|
17 |
-
# CLARIFAI_APP_ID_GPT = 'chat-completion'
|
18 |
-
# CLARIFAI_MODEL_ID_GPT = 'GPT-4' # 'GPT-3_5-turbo'
|
19 |
-
#
|
20 |
-
# CLARIFAI_USER_ID_SD = 'stability-ai'
|
21 |
-
# CLARIFAI_APP_ID_SD = 'stable-diffusion-2'
|
22 |
-
# CLARIFAI_MODEL_ID_SD = 'stable-diffusion-xl'
|
23 |
-
# CLARIFAI_MODEL_VERSION_ID_SD = '0c919cc1edfc455dbc96207753f178d7'
|
24 |
-
|
25 |
HF_LLM_MODEL_NAME = 'mistralai/Mistral-7B-Instruct-v0.2'
|
26 |
LLM_MODEL_TEMPERATURE: float = 0.2
|
27 |
LLM_MODEL_MIN_OUTPUT_LENGTH: int = 50
|
@@ -51,4 +38,3 @@ class GlobalConfig:
|
|
51 |
'caption': 'Marvel in a monochrome dream'
|
52 |
}
|
53 |
}
|
54 |
-
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
from dataclasses import dataclass
|
4 |
from dotenv import load_dotenv
|
|
|
5 |
|
6 |
|
7 |
load_dotenv()
|
|
|
9 |
|
10 |
@dataclass(frozen=True)
|
11 |
class GlobalConfig:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
HF_LLM_MODEL_NAME = 'mistralai/Mistral-7B-Instruct-v0.2'
|
13 |
LLM_MODEL_TEMPERATURE: float = 0.2
|
14 |
LLM_MODEL_MIN_OUTPUT_LENGTH: int = 50
|
|
|
38 |
'caption': 'Marvel in a monochrome dream'
|
39 |
}
|
40 |
}
|
|
llm_helper.py
CHANGED
@@ -1,8 +1,5 @@
|
|
1 |
-
import json
|
2 |
import logging
|
3 |
-
import time
|
4 |
import requests
|
5 |
-
from langchain.llms import Clarifai
|
6 |
|
7 |
from global_config import GlobalConfig
|
8 |
|
@@ -18,36 +15,6 @@ logging.basicConfig(
|
|
18 |
# llm = None
|
19 |
|
20 |
|
21 |
-
def get_llm(use_gpt: bool) -> Clarifai:
|
22 |
-
"""
|
23 |
-
Get a large language model (hosted by Clarifai).
|
24 |
-
|
25 |
-
:param use_gpt: True if GPT-3.5 is required; False is Llama 2 is required
|
26 |
-
"""
|
27 |
-
|
28 |
-
if use_gpt:
|
29 |
-
_ = Clarifai(
|
30 |
-
pat=GlobalConfig.CLARIFAI_PAT,
|
31 |
-
user_id=GlobalConfig.CLARIFAI_USER_ID_GPT,
|
32 |
-
app_id=GlobalConfig.CLARIFAI_APP_ID_GPT,
|
33 |
-
model_id=GlobalConfig.CLARIFAI_MODEL_ID_GPT,
|
34 |
-
verbose=True,
|
35 |
-
# temperature=0.1,
|
36 |
-
)
|
37 |
-
else:
|
38 |
-
_ = Clarifai(
|
39 |
-
pat=GlobalConfig.CLARIFAI_PAT,
|
40 |
-
user_id=GlobalConfig.CLARIFAI_USER_ID,
|
41 |
-
app_id=GlobalConfig.CLARIFAI_APP_ID,
|
42 |
-
model_id=GlobalConfig.CLARIFAI_MODEL_ID,
|
43 |
-
verbose=True,
|
44 |
-
# temperature=0.1,
|
45 |
-
)
|
46 |
-
# print(llm)
|
47 |
-
|
48 |
-
return _
|
49 |
-
|
50 |
-
|
51 |
def hf_api_query(payload: dict):
|
52 |
"""
|
53 |
Invoke HF inference end-point API.
|
@@ -56,9 +23,14 @@ def hf_api_query(payload: dict):
|
|
56 |
:return: The output from the LLM
|
57 |
"""
|
58 |
|
59 |
-
|
60 |
-
|
61 |
-
|
|
|
|
|
|
|
|
|
|
|
62 |
|
63 |
|
64 |
def generate_slides_content(topic: str) -> str:
|
@@ -69,7 +41,7 @@ def generate_slides_content(topic: str) -> str:
|
|
69 |
:return: The content in JSON format
|
70 |
"""
|
71 |
|
72 |
-
with open(GlobalConfig.SLIDES_TEMPLATE_FILE, 'r') as in_file:
|
73 |
template_txt = in_file.read().strip()
|
74 |
template_txt = template_txt.replace('<REPLACE_PLACEHOLDER>', topic)
|
75 |
|
@@ -98,61 +70,11 @@ def generate_slides_content(topic: str) -> str:
|
|
98 |
# logging.debug(f'{json_end_idx=}')
|
99 |
output = output[:json_end_idx]
|
100 |
|
101 |
-
logging.debug(
|
102 |
|
103 |
return output
|
104 |
|
105 |
|
106 |
-
def get_ai_image(text: str) -> str:
|
107 |
-
"""
|
108 |
-
Get a Stable Diffusion-generated image based on a given text.
|
109 |
-
|
110 |
-
:param text: The input text
|
111 |
-
:return: The Base 64-encoded image
|
112 |
-
"""
|
113 |
-
|
114 |
-
url = f'''https://api.clarifai.com/v2/users/{GlobalConfig.CLARIFAI_USER_ID_SD}/apps/{GlobalConfig.CLARIFAI_APP_ID_SD}/models/{GlobalConfig.CLARIFAI_MODEL_ID_SD}/versions/{GlobalConfig.CLARIFAI_MODEL_VERSION_ID_SD}/outputs'''
|
115 |
-
headers = {
|
116 |
-
"Content-Type": "application/json",
|
117 |
-
"Authorization": f'Key {GlobalConfig.CLARIFAI_PAT}'
|
118 |
-
}
|
119 |
-
data = {
|
120 |
-
"inputs": [
|
121 |
-
{
|
122 |
-
"data": {
|
123 |
-
"text": {
|
124 |
-
"raw": text
|
125 |
-
}
|
126 |
-
}
|
127 |
-
}
|
128 |
-
]
|
129 |
-
}
|
130 |
-
|
131 |
-
# print('*** AI image generator...')
|
132 |
-
# print(url)
|
133 |
-
|
134 |
-
start = time.time()
|
135 |
-
response = requests.post(
|
136 |
-
url=url,
|
137 |
-
headers=headers,
|
138 |
-
data=json.dumps(data)
|
139 |
-
)
|
140 |
-
stop = time.time()
|
141 |
-
|
142 |
-
# print('Response:', response, response.status_code)
|
143 |
-
logging.debug('Image generation took', stop - start, 'seconds')
|
144 |
-
img_data = ''
|
145 |
-
|
146 |
-
if response.ok:
|
147 |
-
# print('*** Clarifai SDXL request: Response OK')
|
148 |
-
json_data = json.loads(response.text)
|
149 |
-
img_data = json_data['outputs'][0]['data']['image']['base64']
|
150 |
-
else:
|
151 |
-
logging.error('*** Image generation failed:', response.text)
|
152 |
-
|
153 |
-
return img_data
|
154 |
-
|
155 |
-
|
156 |
if __name__ == '__main__':
|
157 |
# results = get_related_websites('5G AI WiFi 6')
|
158 |
#
|
|
|
|
|
1 |
import logging
|
|
|
2 |
import requests
|
|
|
3 |
|
4 |
from global_config import GlobalConfig
|
5 |
|
|
|
15 |
# llm = None
|
16 |
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
def hf_api_query(payload: dict):
|
19 |
"""
|
20 |
Invoke HF inference end-point API.
|
|
|
23 |
:return: The output from the LLM
|
24 |
"""
|
25 |
|
26 |
+
try:
|
27 |
+
response = requests.post(HF_API_URL, headers=HF_API_HEADERS, json=payload, timeout=15)
|
28 |
+
result = response.json()
|
29 |
+
except requests.exceptions.Timeout as te:
|
30 |
+
logging.error('*** Error: hf_api_query timeout! %s', str(te))
|
31 |
+
result = {}
|
32 |
+
|
33 |
+
return result
|
34 |
|
35 |
|
36 |
def generate_slides_content(topic: str) -> str:
|
|
|
41 |
:return: The content in JSON format
|
42 |
"""
|
43 |
|
44 |
+
with open(GlobalConfig.SLIDES_TEMPLATE_FILE, 'r', encoding='utf-8') as in_file:
|
45 |
template_txt = in_file.read().strip()
|
46 |
template_txt = template_txt.replace('<REPLACE_PLACEHOLDER>', topic)
|
47 |
|
|
|
70 |
# logging.debug(f'{json_end_idx=}')
|
71 |
output = output[:json_end_idx]
|
72 |
|
73 |
+
logging.debug('generate_slides_content: output: %s', output)
|
74 |
|
75 |
return output
|
76 |
|
77 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
if __name__ == '__main__':
|
79 |
# results = get_related_websites('5G AI WiFi 6')
|
80 |
#
|
pptx_helper.py
CHANGED
@@ -1,10 +1,11 @@
|
|
|
|
1 |
import pathlib
|
|
|
2 |
import tempfile
|
3 |
from typing import List, Tuple
|
|
|
4 |
import json5
|
5 |
-
import logging
|
6 |
import pptx
|
7 |
-
import re
|
8 |
import yaml
|
9 |
|
10 |
from global_config import GlobalConfig
|
@@ -57,7 +58,8 @@ def generate_powerpoint_presentation(
|
|
57 |
"""
|
58 |
Create and save a PowerPoint presentation file containing the contents in JSON or YAML format.
|
59 |
|
60 |
-
:param structured_data: The presentation contents as "JSON" (may contain trailing commas) or
|
|
|
61 |
:param as_yaml: True if the input data is in YAML format; False if it is in JSON format
|
62 |
:param slides_template: The PPTX template to use
|
63 |
:param output_file_path: The path of the PPTX file to save as
|
@@ -69,13 +71,16 @@ def generate_powerpoint_presentation(
|
|
69 |
try:
|
70 |
parsed_data = yaml.safe_load(structured_data)
|
71 |
except yaml.parser.ParserError as ype:
|
72 |
-
logging.error(
|
73 |
parsed_data = {'title': '', 'slides': []}
|
74 |
else:
|
75 |
# The structured "JSON" might contain trailing commas, so using json5
|
76 |
parsed_data = json5.loads(structured_data)
|
77 |
|
78 |
-
logging.debug(
|
|
|
|
|
|
|
79 |
presentation = pptx.Presentation(GlobalConfig.PPTX_TEMPLATE_FILES[slides_template]['file'])
|
80 |
|
81 |
# The title slide
|
@@ -84,7 +89,7 @@ def generate_powerpoint_presentation(
|
|
84 |
title = slide.shapes.title
|
85 |
subtitle = slide.placeholders[1]
|
86 |
title.text = parsed_data['title']
|
87 |
-
logging.debug(
|
88 |
subtitle.text = 'by Myself and SlideDeck AI :)'
|
89 |
all_headers = [title.text, ]
|
90 |
|
|
|
1 |
+
import logging
|
2 |
import pathlib
|
3 |
+
import re
|
4 |
import tempfile
|
5 |
from typing import List, Tuple
|
6 |
+
|
7 |
import json5
|
|
|
8 |
import pptx
|
|
|
9 |
import yaml
|
10 |
|
11 |
from global_config import GlobalConfig
|
|
|
58 |
"""
|
59 |
Create and save a PowerPoint presentation file containing the contents in JSON or YAML format.
|
60 |
|
61 |
+
:param structured_data: The presentation contents as "JSON" (may contain trailing commas) or
|
62 |
+
YAML
|
63 |
:param as_yaml: True if the input data is in YAML format; False if it is in JSON format
|
64 |
:param slides_template: The PPTX template to use
|
65 |
:param output_file_path: The path of the PPTX file to save as
|
|
|
71 |
try:
|
72 |
parsed_data = yaml.safe_load(structured_data)
|
73 |
except yaml.parser.ParserError as ype:
|
74 |
+
logging.error('*** YAML parse error: %s', str(ype))
|
75 |
parsed_data = {'title': '', 'slides': []}
|
76 |
else:
|
77 |
# The structured "JSON" might contain trailing commas, so using json5
|
78 |
parsed_data = json5.loads(structured_data)
|
79 |
|
80 |
+
logging.debug(
|
81 |
+
"*** Using PPTX template: %s",
|
82 |
+
GlobalConfig.PPTX_TEMPLATE_FILES[slides_template]['file']
|
83 |
+
)
|
84 |
presentation = pptx.Presentation(GlobalConfig.PPTX_TEMPLATE_FILES[slides_template]['file'])
|
85 |
|
86 |
# The title slide
|
|
|
89 |
title = slide.shapes.title
|
90 |
subtitle = slide.placeholders[1]
|
91 |
title.text = parsed_data['title']
|
92 |
+
logging.debug('Presentation title is: %s', title.text)
|
93 |
subtitle.text = 'by Myself and SlideDeck AI :)'
|
94 |
all_headers = [title.text, ]
|
95 |
|
requirements.txt
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
python-dotenv[cli]~=1.0.0
|
2 |
-
langchain~=0.
|
3 |
# huggingface_hub
|
4 |
-
streamlit~=1.
|
5 |
clarifai==9.7.4
|
6 |
|
7 |
python-pptx
|
|
|
1 |
python-dotenv[cli]~=1.0.0
|
2 |
+
langchain~=0.1.13
|
3 |
# huggingface_hub
|
4 |
+
streamlit~=1.32.2
|
5 |
clarifai==9.7.4
|
6 |
|
7 |
python-pptx
|