AIRider commited on
Commit
567a892
โ€ข
1 Parent(s): 33f5248

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +252 -0
app.py ADDED
@@ -0,0 +1,252 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+ from requests.adapters import HTTPAdapter
5
+ from requests.packages.urllib3.util.retry import Retry
6
+ import re
7
+ import time
8
+ import random
9
+ import os
10
+
11
+ def setup_session():
12
+ try:
13
+ session = requests.Session()
14
+ retries = Retry(total=5, backoff_factor=1, status_forcelist=[502, 503, 504])
15
+ session.mount('https://', HTTPAdapter(max_retries=retries))
16
+ return session
17
+ except Exception as e:
18
+ return None
19
+
20
+ def generate_naver_search_url(query):
21
+ base_url = "https://search.naver.com/search.naver?"
22
+ params = {"ssc": "tab.blog.all", "sm": "tab_jum", "query": query}
23
+ url = base_url + "&".join(f"{key}={value}" for key, value in params.items())
24
+ return url
25
+
26
+ def crawl_blog_content(url, session):
27
+ try:
28
+ headers = {
29
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3",
30
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
31
+ "Accept-Language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7",
32
+ "Accept-Encoding": "gzip, deflate, br",
33
+ "Connection": "keep-alive",
34
+ "Referer": "https://search.naver.com/search.naver",
35
+ }
36
+
37
+ # ๋žœ๋ค ๋”œ๋ ˆ์ด ์ถ”๊ฐ€
38
+ delay = random.uniform(1, 2)
39
+ time.sleep(delay)
40
+
41
+ response = session.get(url, headers=headers)
42
+ if response.status_code != 200:
43
+ return ""
44
+
45
+ soup = BeautifulSoup(response.content, "html.parser")
46
+ content = soup.find("div", attrs={'class': 'se-main-container'})
47
+
48
+ if content:
49
+ return clean_text(content.get_text())
50
+ else:
51
+ return ""
52
+ except Exception as e:
53
+ return ""
54
+
55
+ def crawl_naver_search_results(url, session):
56
+ try:
57
+ headers = {
58
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3",
59
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
60
+ "Accept-Language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7",
61
+ "Accept-Encoding": "gzip, deflate, br",
62
+ "Connection": "keep-alive",
63
+ "Referer": "https://search.naver.com/search.naver",
64
+ }
65
+ response = session.get(url, headers=headers)
66
+ if response.status_code != 200:
67
+ return []
68
+
69
+ soup = BeautifulSoup(response.content, "html.parser")
70
+ results = []
71
+ count = 0
72
+ for li in soup.find_all("li", class_=re.compile("bx.*")):
73
+ if count >= 10:
74
+ break
75
+ for div in li.find_all("div", class_="detail_box"):
76
+ for div2 in div.find_all("div", class_="title_area"):
77
+ title = div2.text.strip()
78
+ for a in div2.find_all("a", href=True):
79
+ link = a["href"]
80
+ if "blog.naver" in link:
81
+ link = link.replace("https://", "https://m.")
82
+ results.append({"์ œ๋ชฉ": title, "๋งํฌ": link})
83
+ count += 1
84
+ if count >= 10:
85
+ break
86
+ if count >= 10:
87
+ break
88
+ if count >= 10:
89
+ break
90
+
91
+ return results
92
+ except Exception as e:
93
+ return []
94
+
95
+ def clean_text(text):
96
+ text = re.sub(r'\s+', ' ', text).strip()
97
+ return text
98
+
99
+ def analyze_info(category, topic, references1, references2, references3):
100
+ return f"์„ ํƒํ•œ ์นดํ…Œ๊ณ ๋ฆฌ: {category}\n๋ธ”๋กœ๊ทธ ์ฃผ์ œ: {topic}\n์ฐธ๊ณ  ๊ธ€1: {references1}\n์ฐธ๊ณ  ๊ธ€2: {references2}\n์ฐธ๊ณ  ๊ธ€3: {references3}"
101
+
102
+ def suggest_title(category, topic, references1, references2, references3, system_message, max_tokens, temperature, top_p):
103
+ full_content = analyze_info(category, topic, references1, references2, references3)
104
+ modified_text = call_api(full_content, system_message, max_tokens, temperature, top_p)
105
+ return modified_text
106
+
107
+ def generate_outline(category, topic, references1, references2, references3, title, system_message, max_tokens, temperature, top_p):
108
+ full_content = analyze_info(category, topic, references1, references2, references3)
109
+ content = f"{full_content}\nTitle: {title}"
110
+ modified_text = call_api(content, system_message, max_tokens, temperature, top_p)
111
+ return modified_text
112
+
113
+ def generate_blog_post(category, topic, references1, references2, references3, title, outline, system_message, max_tokens, temperature, top_p):
114
+ full_content = analyze_info(category, topic, references1, references2, references3)
115
+ content = f"{full_content}\nTitle: {title}\nOutline: {outline}"
116
+ modified_text = call_api(content, system_message, max_tokens, temperature, top_p)
117
+ formatted_text = modified_text.replace('\n', '\n\n')
118
+ return formatted_text
119
+
120
+ def fetch_references(topic):
121
+ search_url = generate_naver_search_url(topic)
122
+ session = setup_session()
123
+ if session is None:
124
+ return "Failed to set up session.", "", "", ""
125
+ results = crawl_naver_search_results(search_url, session)
126
+ if not results:
127
+ return "No results found.", "", "", ""
128
+
129
+ # ์ƒ์œ„ 10๊ฐœ์˜ ๋ธ”๋กœ๊ทธ ์ค‘ ๋žœ๋ค์œผ๋กœ 3๊ฐœ ์„ ํƒ
130
+ selected_results = random.sample(results, 3)
131
+ references1_content = f"์ œ๋ชฉ: {selected_results[0]['์ œ๋ชฉ']}\n๋‚ด์šฉ: {crawl_blog_content(selected_results[0]['๋งํฌ'], session)}"
132
+ references2_content = f"์ œ๋ชฉ: {selected_results[1]['์ œ๋ชฉ']}\n๋‚ด์šฉ: {crawl_blog_content(selected_results[1]['๋งํฌ'], session)}"
133
+ references3_content = f"์ œ๋ชฉ: {selected_results[2]['์ œ๋ชฉ']}\n๋‚ด์šฉ: {crawl_blog_content(selected_results[2]['๋งํฌ'], session)}"
134
+
135
+ return "์ฐธ๊ณ ๊ธ€ ์ƒ์„ฑ ์™„๋ฃŒ", references1_content, references2_content, references3_content
136
+
137
+ def get_title_prompt(category):
138
+ if (category == "์ผ๋ฐ˜"):
139
+ return """
140
+ # ๋ธ”๋กœ๊ทธ ์ œ๋ชฉ ์ƒ์„ฑ ๊ทœ์น™(์ผ๋ฐ˜)
141
+ """
142
+ elif (category == "๊ฑด๊ฐ•์ •๋ณด"):
143
+ return """
144
+ # ๋ธ”๋กœ๊ทธ ์ œ๋ชฉ ์ƒ์„ฑ ๊ทœ์น™(๊ฑด๊ฐ•์ •๋ณด)
145
+ """
146
+
147
+ def get_outline_prompt(category):
148
+ if (category == "์ผ๋ฐ˜"):
149
+ return """
150
+ # ๋ธ”๋กœ๊ทธ ์†Œ์ฃผ์ œ(Subtopic) ์ƒ์„ฑ ๊ทœ์น™(์ผ๋ฐ˜)
151
+ """
152
+ elif (category == "๊ฑด๊ฐ•์ •๋ณด"):
153
+ return """
154
+ # ๋ธ”๋กœ๊ทธ ์†Œ์ฃผ์ œ(Subtopic) ์ƒ์„ฑ ๊ทœ์น™(๊ฑด๊ฐ•์ •๋ณด)
155
+ """
156
+
157
+ def get_blog_post_prompt(category):
158
+ if (category == "์ผ๋ฐ˜"):
159
+ return """
160
+ # ๋ธ”๋กœ๊ทธ ํ…์ŠคํŠธ ์ƒ์„ฑ ๊ทœ์น™(์ผ๋ฐ˜)
161
+ """
162
+ elif (category == "๊ฑด๊ฐ•์ •๋ณด"):
163
+ return """
164
+ # ๋ธ”๋กœ๊ทธ ํ…์ŠคํŠธ ์ƒ์„ฑ ๊ทœ์น™(๊ฑด๊ฐ•์ •๋ณด)
165
+ """
166
+
167
+ # Gradio ์ธํ„ฐํŽ˜์ด์Šค ๊ตฌ์„ฑ
168
+ title = "์นดํ…Œ๊ณ ๋ฆฌ๋ณ„ ๋ธ”๋กœ๊ทธ ๊ธ€ ์ƒ์„ฑ๊ธฐ(Play Ground)"
169
+
170
+ def update_prompts(category):
171
+ title_prompt = get_title_prompt(category)
172
+ outline_prompt = get_outline_prompt(category)
173
+ blog_post_prompt = get_blog_post_prompt(category)
174
+ return title_prompt, outline_prompt, blog_post_prompt
175
+
176
+ with gr.Blocks() as demo:
177
+ gr.Markdown(f"# {title}")
178
+
179
+ # 1๋‹จ๊ณ„
180
+ gr.Markdown("### 1๋‹จ๊ณ„ : ํฌ์ŠคํŒ… ์นดํ…Œ๊ณ ๋ฆฌ๋ฅผ ์ง€์ •ํ•ด์ฃผ์„ธ์š”")
181
+ category = gr.Radio(choices=["์ผ๋ฐ˜", "๊ฑด๊ฐ•์ •๋ณด"], label="ํฌ์ŠคํŒ… ์นดํ…Œ๊ณ ๋ฆฌ", value="์ผ๋ฐ˜")
182
+
183
+ # 2๋‹จ๊ณ„
184
+ gr.Markdown("### 2๋‹จ๊ณ„ : ๋ธ”๋กœ๊ทธ ์ฃผ์ œ, ๋˜๋Š” ํ‚ค์›Œ๋“œ๋ฅผ ์ƒ์„ธํžˆ ์ž…๋ ฅํ•˜์„ธ์š”")
185
+ topic = gr.Textbox(label="๋ธ”๋กœ๊ทธ ์ฃผ์ œ(์˜ˆ์‹œ: ์˜ค์ง•์–ด ๋ฌด์นจํšŒ(X), ์˜ค์ง•์–ด ๋ฌด์นจํšŒ ๋ ˆ์‹œํ”ผ(O))", placeholder="์˜ˆ์‹œ: ์—ฌํ–‰์ง€ ์ถ”์ฒœ(X), 8์›” ๊ตญ๋‚ด ์—ฌํ–‰์ง€ ์ถ”์ฒœ(O)")
186
+
187
+ # 3๋‹จ๊ณ„: ์ฐธ๊ณ  ๊ธ€์„ ์œ„ํ•œ ๋ณ€์ˆ˜๋“ค ๋ฏธ๋ฆฌ ์ •์˜
188
+ references1 = gr.Textbox(label="์ฐธ๊ณ  ๊ธ€ 1", placeholder="์ฐธ๊ณ ํ•  ๋ธ”๋กœ๊ทธ ํฌ์ŠคํŒ…๊ธ€์„ ๋ณต์‚ฌํ•˜์—ฌ ๋ถ™์—ฌ๋„ฃ์œผ์„ธ์š”", lines=10, visible=False)
189
+ references2 = gr.Textbox(label="์ฐธ๊ณ  ๊ธ€ 2", placeholder="์ฐธ๊ณ ํ•  ๋ธ”๋กœ๊ทธ ํฌ์ŠคํŒ…๊ธ€์„ ๋ณต์‚ฌํ•˜์—ฌ ๋ถ™์—ฌ๋„ฃ์œผ์„ธ์š”", lines=10, visible=False)
190
+ references3 = gr.Textbox(label="์ฐธ๊ณ  ๊ธ€ 3", placeholder="์ฐธ๊ณ ํ•  ๋ธ”๋กœ๊ทธ ํฌ์ŠคํŒ…๊ธ€์„ ๋ณต์‚ฌํ•˜์—ฌ ๋ถ™์—ฌ๋„ฃ์œผ์„ธ์š”", lines=10, visible=False)
191
+
192
+ # ์ง„ํ–‰ ์ƒํ™ฉ ํ‘œ์‹œ๋ฅผ ์œ„ํ•œ ์ถœ๋ ฅ ํ…์ŠคํŠธ๋ฐ•์Šค
193
+ progress_output = gr.Textbox(label="์ง„ํ–‰ ์ƒํ™ฉ", lines=2, visible=True)
194
+
195
+ # ์ฐธ๊ณ ๊ธ€ ๊ฐ€์ ธ์˜ค๊ธฐ ๋ฒ„ํŠผ
196
+ fetch_references_btn = gr.Button("์ฐธ๊ณ ๊ธ€ ์ƒ์„ฑํ•˜๊ธฐ")
197
+ fetch_references_btn.click(fn=fetch_references, inputs=[topic], outputs=[progress_output, references1, references2, references3])
198
+
199
+ # ์ฐธ๊ณ ๊ธ€ ๋‹ค์‹œ ๋„ฃ๊ธฐ ๋ฒ„ํŠผ
200
+ refill_btn = gr.Button("์ฐธ๊ณ ๊ธ€ ๋‹ค์‹œ ๋„ฃ๊ธฐ")
201
+ refill_btn.click(fn=fetch_references, inputs=[topic], outputs=[progress_output, references1, references2, references3])
202
+
203
+ # 5๋‹จ๊ณ„: ๋ธ”๋กœ๊ทธ ์ œ๋ชฉ์„ ์ž…๋ ฅํ•˜์„ธ์š”
204
+ gr.Markdown("### 5๋‹จ๊ณ„ : ๋ธ”๋กœ๊ทธ ์ œ๋ชฉ์„ ์ž…๋ ฅํ•˜์„ธ์š”")
205
+
206
+ with gr.Accordion("์ œ๋ชฉ ์„ค์ •", open=True):
207
+ title_system_message = gr.Textbox(label="์‹œ์Šคํ…œ ๋ฉ”์‹œ์ง€", value=get_title_prompt("์ผ๋ฐ˜"), lines=15)
208
+ title_max_tokens = gr.Slider(label="Max Tokens", minimum=1000, maximum=8000, value=5000, step=1000)
209
+ title_temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, value=0.8, step=0.1)
210
+ title_top_p = gr.Slider(label="Top P", minimum=0.1, maximum=1.0, value=0.95, step=0.05)
211
+
212
+ title_btn = gr.Button("์ œ๋ชฉ ์ถ”์ฒœํ•˜๊ธฐ")
213
+ title_suggestions = gr.Textbox(label="์ œ๋ชฉ ์ถ”์ฒœ", lines=10)
214
+
215
+ title_btn.click(fn=suggest_title, inputs=[category, topic, references1, references2, references3, title_system_message, title_max_tokens, title_temperature, title_top_p], outputs=[title_suggestions])
216
+ gr.HTML("<span style='color: grey;'>[์ œ๋ชฉ ์ถ”์ฒœ ํ•ญ๋ชฉ์„ ์ฐธ๊ณ ํ•˜์—ฌ ํ•˜๋‚˜๋ฅผ ๋ณต์‚ฌํ•˜์—ฌ ์‚ฌ์šฉํ•˜์…”๋„ ๋ฉ๋‹ˆ๋‹ค.]</span>")
217
+ blog_title = gr.Textbox(label="๋ธ”๋กœ๊ทธ ์ œ๋ชฉ", placeholder="๋ธ”๋กœ๊ทธ ์ œ๋ชฉ์„ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”")
218
+
219
+ # 6๋‹จ๊ณ„: ์•„์›ƒ๋ผ์ธ์„ ์ •ํ•ด์ฃผ์„ธ์š”
220
+ gr.Markdown("### 6๋‹จ๊ณ„ : ์•„์›ƒ๋ผ์ธ์„ ์ž‘์„ฑํ•ด์ฃผ์„ธ์š”")
221
+ gr.HTML("<span style='color: grey;'>[์•„์›ƒ๋ผ์ธ์—์„œ ๋‚˜์˜จ ๊ฒฐ๊ณผ๋ฅผ ์ˆ˜์ •ํ•ด์„œ ์‚ฌ์šฉํ•ด์ฃผ์„ธ์š”]</span>")
222
+
223
+ with gr.Accordion("์•„์›ƒ๋ผ์ธ ์„ค์ •", open=True):
224
+ outline_system_message = gr.Textbox(label="์‹œ์Šคํ…œ ๋ฉ”์‹œ์ง€", value=get_outline_prompt("์ผ๋ฐ˜"), lines=20)
225
+ outline_max_tokens = gr.Slider(label="Max Tokens", minimum=1000, maximum=8000, value=6000, step=1000)
226
+ outline_temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, value=0.8, step=0.1)
227
+ outline_top_p = gr.Slider(label="Top P", minimum=0.1, maximum=1.0, value=0.95, step=0.05)
228
+
229
+ outline_generate_btn = gr.Button("์•„์›ƒ๋ผ์ธ ์ƒ์„ฑํ•˜๊ธฐ")
230
+ outline_result = gr.Textbox(label="์•„์›ƒ๋ผ์ธ ๊ฒฐ๊ณผ", lines=15)
231
+ outline_input = gr.Textbox(label="์ž‘์„ฑํ•  ์•„์›ƒ๋ผ์ธ์„ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”", placeholder="์ƒ์„ฑ๋œ ์•„์›ƒ๋ผ์ธ ๋ณต์‚ฌ, ์ˆ˜์ •ํ•ด์„œ ์‚ฌ์šฉํ•˜์„ธ์š”", lines=10)
232
+
233
+ outline_generate_btn.click(fn=generate_outline, inputs=[category, topic, references1, references2, references3, blog_title, outline_system_message, outline_max_tokens, outline_temperature, outline_top_p], outputs=[outline_result])
234
+
235
+ # 7๋‹จ๊ณ„: ๊ธ€ ์ƒ์„ฑํ•˜๊ธฐ
236
+ gr.Markdown("### 7๋‹จ๊ณ„ : ๊ธ€ ์ƒ์„ฑํ•˜๊ธฐ")
237
+ gr.HTML("<span style='color: grey;'>[์•„์›ƒ๋ผ์ธ ๋ณ„ ํ…์ŠคํŠธ๋Ÿ‰์„ ์ •ํ•˜๊ณ  ๊ธ€ ์ƒ์„ฑํ•˜๊ธฐ ๋ฒ„ํŠผ์„ ์„ ํƒํ•ด์ฃผ์„ธ์š”]</span>")
238
+
239
+ with gr.Accordion("๋ธ”๋กœ๊ทธ ๊ธ€ ์„ค์ •", open=True):
240
+ blog_system_message = gr.Textbox(label="์‹œ์Šคํ…œ ๋ฉ”์‹œ์ง€", value=get_blog_post_prompt("์ผ๋ฐ˜"), lines=20)
241
+ blog_max_tokens = gr.Slider(label="Max Tokens", minimum=1000, maximum=12000, value=8000, step=1000)
242
+ blog_temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, value=0.8, step=0.1)
243
+ blog_top_p = gr.Slider(label="Top P", minimum=0.1, maximum=1.0, value=0.95, step=0.05)
244
+
245
+ generate_btn = gr.Button("๋ธ”๋กœ๊ทธ ๊ธ€ ์ƒ์„ฑํ•˜๊ธฐ")
246
+ output = gr.Textbox(label="์ƒ์„ฑ๋œ ๋ธ”๋กœ๊ทธ ๊ธ€", lines=30)
247
+
248
+ generate_btn.click(fn=generate_blog_post, inputs=[category, topic, references1, references2, references3, blog_title, outline_input, blog_system_message, blog_max_tokens, blog_temperature, blog_top_p], outputs=[output])
249
+
250
+ category.change(fn=update_prompts, inputs=category, outputs=[title_system_message, outline_system_message, blog_system_message])
251
+
252
+ demo.launch()