Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,252 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import requests
|
3 |
+
from bs4 import BeautifulSoup
|
4 |
+
from requests.adapters import HTTPAdapter
|
5 |
+
from requests.packages.urllib3.util.retry import Retry
|
6 |
+
import re
|
7 |
+
import time
|
8 |
+
import random
|
9 |
+
import os
|
10 |
+
|
11 |
+
def setup_session():
|
12 |
+
try:
|
13 |
+
session = requests.Session()
|
14 |
+
retries = Retry(total=5, backoff_factor=1, status_forcelist=[502, 503, 504])
|
15 |
+
session.mount('https://', HTTPAdapter(max_retries=retries))
|
16 |
+
return session
|
17 |
+
except Exception as e:
|
18 |
+
return None
|
19 |
+
|
20 |
+
def generate_naver_search_url(query):
|
21 |
+
base_url = "https://search.naver.com/search.naver?"
|
22 |
+
params = {"ssc": "tab.blog.all", "sm": "tab_jum", "query": query}
|
23 |
+
url = base_url + "&".join(f"{key}={value}" for key, value in params.items())
|
24 |
+
return url
|
25 |
+
|
26 |
+
def crawl_blog_content(url, session):
|
27 |
+
try:
|
28 |
+
headers = {
|
29 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3",
|
30 |
+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
|
31 |
+
"Accept-Language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7",
|
32 |
+
"Accept-Encoding": "gzip, deflate, br",
|
33 |
+
"Connection": "keep-alive",
|
34 |
+
"Referer": "https://search.naver.com/search.naver",
|
35 |
+
}
|
36 |
+
|
37 |
+
# ๋๋ค ๋๋ ์ด ์ถ๊ฐ
|
38 |
+
delay = random.uniform(1, 2)
|
39 |
+
time.sleep(delay)
|
40 |
+
|
41 |
+
response = session.get(url, headers=headers)
|
42 |
+
if response.status_code != 200:
|
43 |
+
return ""
|
44 |
+
|
45 |
+
soup = BeautifulSoup(response.content, "html.parser")
|
46 |
+
content = soup.find("div", attrs={'class': 'se-main-container'})
|
47 |
+
|
48 |
+
if content:
|
49 |
+
return clean_text(content.get_text())
|
50 |
+
else:
|
51 |
+
return ""
|
52 |
+
except Exception as e:
|
53 |
+
return ""
|
54 |
+
|
55 |
+
def crawl_naver_search_results(url, session):
|
56 |
+
try:
|
57 |
+
headers = {
|
58 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3",
|
59 |
+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
|
60 |
+
"Accept-Language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7",
|
61 |
+
"Accept-Encoding": "gzip, deflate, br",
|
62 |
+
"Connection": "keep-alive",
|
63 |
+
"Referer": "https://search.naver.com/search.naver",
|
64 |
+
}
|
65 |
+
response = session.get(url, headers=headers)
|
66 |
+
if response.status_code != 200:
|
67 |
+
return []
|
68 |
+
|
69 |
+
soup = BeautifulSoup(response.content, "html.parser")
|
70 |
+
results = []
|
71 |
+
count = 0
|
72 |
+
for li in soup.find_all("li", class_=re.compile("bx.*")):
|
73 |
+
if count >= 10:
|
74 |
+
break
|
75 |
+
for div in li.find_all("div", class_="detail_box"):
|
76 |
+
for div2 in div.find_all("div", class_="title_area"):
|
77 |
+
title = div2.text.strip()
|
78 |
+
for a in div2.find_all("a", href=True):
|
79 |
+
link = a["href"]
|
80 |
+
if "blog.naver" in link:
|
81 |
+
link = link.replace("https://", "https://m.")
|
82 |
+
results.append({"์ ๋ชฉ": title, "๋งํฌ": link})
|
83 |
+
count += 1
|
84 |
+
if count >= 10:
|
85 |
+
break
|
86 |
+
if count >= 10:
|
87 |
+
break
|
88 |
+
if count >= 10:
|
89 |
+
break
|
90 |
+
|
91 |
+
return results
|
92 |
+
except Exception as e:
|
93 |
+
return []
|
94 |
+
|
95 |
+
def clean_text(text):
|
96 |
+
text = re.sub(r'\s+', ' ', text).strip()
|
97 |
+
return text
|
98 |
+
|
99 |
+
def analyze_info(category, topic, references1, references2, references3):
|
100 |
+
return f"์ ํํ ์นดํ
๊ณ ๋ฆฌ: {category}\n๋ธ๋ก๊ทธ ์ฃผ์ : {topic}\n์ฐธ๊ณ ๊ธ1: {references1}\n์ฐธ๊ณ ๊ธ2: {references2}\n์ฐธ๊ณ ๊ธ3: {references3}"
|
101 |
+
|
102 |
+
def suggest_title(category, topic, references1, references2, references3, system_message, max_tokens, temperature, top_p):
|
103 |
+
full_content = analyze_info(category, topic, references1, references2, references3)
|
104 |
+
modified_text = call_api(full_content, system_message, max_tokens, temperature, top_p)
|
105 |
+
return modified_text
|
106 |
+
|
107 |
+
def generate_outline(category, topic, references1, references2, references3, title, system_message, max_tokens, temperature, top_p):
|
108 |
+
full_content = analyze_info(category, topic, references1, references2, references3)
|
109 |
+
content = f"{full_content}\nTitle: {title}"
|
110 |
+
modified_text = call_api(content, system_message, max_tokens, temperature, top_p)
|
111 |
+
return modified_text
|
112 |
+
|
113 |
+
def generate_blog_post(category, topic, references1, references2, references3, title, outline, system_message, max_tokens, temperature, top_p):
|
114 |
+
full_content = analyze_info(category, topic, references1, references2, references3)
|
115 |
+
content = f"{full_content}\nTitle: {title}\nOutline: {outline}"
|
116 |
+
modified_text = call_api(content, system_message, max_tokens, temperature, top_p)
|
117 |
+
formatted_text = modified_text.replace('\n', '\n\n')
|
118 |
+
return formatted_text
|
119 |
+
|
120 |
+
def fetch_references(topic):
|
121 |
+
search_url = generate_naver_search_url(topic)
|
122 |
+
session = setup_session()
|
123 |
+
if session is None:
|
124 |
+
return "Failed to set up session.", "", "", ""
|
125 |
+
results = crawl_naver_search_results(search_url, session)
|
126 |
+
if not results:
|
127 |
+
return "No results found.", "", "", ""
|
128 |
+
|
129 |
+
# ์์ 10๊ฐ์ ๋ธ๋ก๊ทธ ์ค ๋๋ค์ผ๋ก 3๊ฐ ์ ํ
|
130 |
+
selected_results = random.sample(results, 3)
|
131 |
+
references1_content = f"์ ๋ชฉ: {selected_results[0]['์ ๋ชฉ']}\n๋ด์ฉ: {crawl_blog_content(selected_results[0]['๋งํฌ'], session)}"
|
132 |
+
references2_content = f"์ ๋ชฉ: {selected_results[1]['์ ๋ชฉ']}\n๋ด์ฉ: {crawl_blog_content(selected_results[1]['๋งํฌ'], session)}"
|
133 |
+
references3_content = f"์ ๋ชฉ: {selected_results[2]['์ ๋ชฉ']}\n๋ด์ฉ: {crawl_blog_content(selected_results[2]['๋งํฌ'], session)}"
|
134 |
+
|
135 |
+
return "์ฐธ๊ณ ๊ธ ์์ฑ ์๋ฃ", references1_content, references2_content, references3_content
|
136 |
+
|
137 |
+
def get_title_prompt(category):
|
138 |
+
if (category == "์ผ๋ฐ"):
|
139 |
+
return """
|
140 |
+
# ๋ธ๋ก๊ทธ ์ ๋ชฉ ์์ฑ ๊ท์น(์ผ๋ฐ)
|
141 |
+
"""
|
142 |
+
elif (category == "๊ฑด๊ฐ์ ๋ณด"):
|
143 |
+
return """
|
144 |
+
# ๋ธ๋ก๊ทธ ์ ๋ชฉ ์์ฑ ๊ท์น(๊ฑด๊ฐ์ ๋ณด)
|
145 |
+
"""
|
146 |
+
|
147 |
+
def get_outline_prompt(category):
|
148 |
+
if (category == "์ผ๋ฐ"):
|
149 |
+
return """
|
150 |
+
# ๋ธ๋ก๊ทธ ์์ฃผ์ (Subtopic) ์์ฑ ๊ท์น(์ผ๋ฐ)
|
151 |
+
"""
|
152 |
+
elif (category == "๊ฑด๊ฐ์ ๋ณด"):
|
153 |
+
return """
|
154 |
+
# ๋ธ๋ก๊ทธ ์์ฃผ์ (Subtopic) ์์ฑ ๊ท์น(๊ฑด๊ฐ์ ๋ณด)
|
155 |
+
"""
|
156 |
+
|
157 |
+
def get_blog_post_prompt(category):
|
158 |
+
if (category == "์ผ๋ฐ"):
|
159 |
+
return """
|
160 |
+
# ๋ธ๋ก๊ทธ ํ
์คํธ ์์ฑ ๊ท์น(์ผ๋ฐ)
|
161 |
+
"""
|
162 |
+
elif (category == "๊ฑด๊ฐ์ ๋ณด"):
|
163 |
+
return """
|
164 |
+
# ๋ธ๋ก๊ทธ ํ
์คํธ ์์ฑ ๊ท์น(๊ฑด๊ฐ์ ๋ณด)
|
165 |
+
"""
|
166 |
+
|
167 |
+
# Gradio ์ธํฐํ์ด์ค ๊ตฌ์ฑ
|
168 |
+
title = "์นดํ
๊ณ ๋ฆฌ๋ณ ๋ธ๋ก๊ทธ ๊ธ ์์ฑ๊ธฐ(Play Ground)"
|
169 |
+
|
170 |
+
def update_prompts(category):
|
171 |
+
title_prompt = get_title_prompt(category)
|
172 |
+
outline_prompt = get_outline_prompt(category)
|
173 |
+
blog_post_prompt = get_blog_post_prompt(category)
|
174 |
+
return title_prompt, outline_prompt, blog_post_prompt
|
175 |
+
|
176 |
+
with gr.Blocks() as demo:
|
177 |
+
gr.Markdown(f"# {title}")
|
178 |
+
|
179 |
+
# 1๋จ๊ณ
|
180 |
+
gr.Markdown("### 1๋จ๊ณ : ํฌ์คํ
์นดํ
๊ณ ๋ฆฌ๋ฅผ ์ง์ ํด์ฃผ์ธ์")
|
181 |
+
category = gr.Radio(choices=["์ผ๋ฐ", "๊ฑด๊ฐ์ ๋ณด"], label="ํฌ์คํ
์นดํ
๊ณ ๋ฆฌ", value="์ผ๋ฐ")
|
182 |
+
|
183 |
+
# 2๋จ๊ณ
|
184 |
+
gr.Markdown("### 2๋จ๊ณ : ๋ธ๋ก๊ทธ ์ฃผ์ , ๋๋ ํค์๋๋ฅผ ์์ธํ ์
๋ ฅํ์ธ์")
|
185 |
+
topic = gr.Textbox(label="๋ธ๋ก๊ทธ ์ฃผ์ (์์: ์ค์ง์ด ๋ฌด์นจํ(X), ์ค์ง์ด ๋ฌด์นจํ ๋ ์ํผ(O))", placeholder="์์: ์ฌํ์ง ์ถ์ฒ(X), 8์ ๊ตญ๋ด ์ฌํ์ง ์ถ์ฒ(O)")
|
186 |
+
|
187 |
+
# 3๋จ๊ณ: ์ฐธ๊ณ ๊ธ์ ์ํ ๋ณ์๋ค ๋ฏธ๋ฆฌ ์ ์
|
188 |
+
references1 = gr.Textbox(label="์ฐธ๊ณ ๊ธ 1", placeholder="์ฐธ๊ณ ํ ๋ธ๋ก๊ทธ ํฌ์คํ
๊ธ์ ๋ณต์ฌํ์ฌ ๋ถ์ฌ๋ฃ์ผ์ธ์", lines=10, visible=False)
|
189 |
+
references2 = gr.Textbox(label="์ฐธ๊ณ ๊ธ 2", placeholder="์ฐธ๊ณ ํ ๋ธ๋ก๊ทธ ํฌ์คํ
๊ธ์ ๋ณต์ฌํ์ฌ ๋ถ์ฌ๋ฃ์ผ์ธ์", lines=10, visible=False)
|
190 |
+
references3 = gr.Textbox(label="์ฐธ๊ณ ๊ธ 3", placeholder="์ฐธ๊ณ ํ ๋ธ๋ก๊ทธ ํฌ์คํ
๊ธ์ ๋ณต์ฌํ์ฌ ๋ถ์ฌ๋ฃ์ผ์ธ์", lines=10, visible=False)
|
191 |
+
|
192 |
+
# ์งํ ์ํฉ ํ์๋ฅผ ์ํ ์ถ๋ ฅ ํ
์คํธ๋ฐ์ค
|
193 |
+
progress_output = gr.Textbox(label="์งํ ์ํฉ", lines=2, visible=True)
|
194 |
+
|
195 |
+
# ์ฐธ๊ณ ๊ธ ๊ฐ์ ธ์ค๊ธฐ ๋ฒํผ
|
196 |
+
fetch_references_btn = gr.Button("์ฐธ๊ณ ๊ธ ์์ฑํ๊ธฐ")
|
197 |
+
fetch_references_btn.click(fn=fetch_references, inputs=[topic], outputs=[progress_output, references1, references2, references3])
|
198 |
+
|
199 |
+
# ์ฐธ๊ณ ๊ธ ๋ค์ ๋ฃ๊ธฐ ๋ฒํผ
|
200 |
+
refill_btn = gr.Button("์ฐธ๊ณ ๊ธ ๋ค์ ๋ฃ๊ธฐ")
|
201 |
+
refill_btn.click(fn=fetch_references, inputs=[topic], outputs=[progress_output, references1, references2, references3])
|
202 |
+
|
203 |
+
# 5๋จ๊ณ: ๋ธ๋ก๊ทธ ์ ๋ชฉ์ ์
๋ ฅํ์ธ์
|
204 |
+
gr.Markdown("### 5๋จ๊ณ : ๋ธ๋ก๊ทธ ์ ๋ชฉ์ ์
๋ ฅํ์ธ์")
|
205 |
+
|
206 |
+
with gr.Accordion("์ ๋ชฉ ์ค์ ", open=True):
|
207 |
+
title_system_message = gr.Textbox(label="์์คํ
๋ฉ์์ง", value=get_title_prompt("์ผ๋ฐ"), lines=15)
|
208 |
+
title_max_tokens = gr.Slider(label="Max Tokens", minimum=1000, maximum=8000, value=5000, step=1000)
|
209 |
+
title_temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, value=0.8, step=0.1)
|
210 |
+
title_top_p = gr.Slider(label="Top P", minimum=0.1, maximum=1.0, value=0.95, step=0.05)
|
211 |
+
|
212 |
+
title_btn = gr.Button("์ ๋ชฉ ์ถ์ฒํ๊ธฐ")
|
213 |
+
title_suggestions = gr.Textbox(label="์ ๋ชฉ ์ถ์ฒ", lines=10)
|
214 |
+
|
215 |
+
title_btn.click(fn=suggest_title, inputs=[category, topic, references1, references2, references3, title_system_message, title_max_tokens, title_temperature, title_top_p], outputs=[title_suggestions])
|
216 |
+
gr.HTML("<span style='color: grey;'>[์ ๋ชฉ ์ถ์ฒ ํญ๋ชฉ์ ์ฐธ๊ณ ํ์ฌ ํ๋๋ฅผ ๋ณต์ฌํ์ฌ ์ฌ์ฉํ์
๋ ๋ฉ๋๋ค.]</span>")
|
217 |
+
blog_title = gr.Textbox(label="๋ธ๋ก๊ทธ ์ ๋ชฉ", placeholder="๋ธ๋ก๊ทธ ์ ๋ชฉ์ ์
๋ ฅํด์ฃผ์ธ์")
|
218 |
+
|
219 |
+
# 6๋จ๊ณ: ์์๋ผ์ธ์ ์ ํด์ฃผ์ธ์
|
220 |
+
gr.Markdown("### 6๋จ๊ณ : ์์๋ผ์ธ์ ์์ฑํด์ฃผ์ธ์")
|
221 |
+
gr.HTML("<span style='color: grey;'>[์์๋ผ์ธ์์ ๋์จ ๊ฒฐ๊ณผ๋ฅผ ์์ ํด์ ์ฌ์ฉํด์ฃผ์ธ์]</span>")
|
222 |
+
|
223 |
+
with gr.Accordion("์์๋ผ์ธ ์ค์ ", open=True):
|
224 |
+
outline_system_message = gr.Textbox(label="์์คํ
๋ฉ์์ง", value=get_outline_prompt("์ผ๋ฐ"), lines=20)
|
225 |
+
outline_max_tokens = gr.Slider(label="Max Tokens", minimum=1000, maximum=8000, value=6000, step=1000)
|
226 |
+
outline_temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, value=0.8, step=0.1)
|
227 |
+
outline_top_p = gr.Slider(label="Top P", minimum=0.1, maximum=1.0, value=0.95, step=0.05)
|
228 |
+
|
229 |
+
outline_generate_btn = gr.Button("์์๋ผ์ธ ์์ฑํ๊ธฐ")
|
230 |
+
outline_result = gr.Textbox(label="์์๋ผ์ธ ๊ฒฐ๊ณผ", lines=15)
|
231 |
+
outline_input = gr.Textbox(label="์์ฑํ ์์๋ผ์ธ์ ์
๋ ฅํด์ฃผ์ธ์", placeholder="์์ฑ๋ ์์๋ผ์ธ ๋ณต์ฌ, ์์ ํด์ ์ฌ์ฉํ์ธ์", lines=10)
|
232 |
+
|
233 |
+
outline_generate_btn.click(fn=generate_outline, inputs=[category, topic, references1, references2, references3, blog_title, outline_system_message, outline_max_tokens, outline_temperature, outline_top_p], outputs=[outline_result])
|
234 |
+
|
235 |
+
# 7๋จ๊ณ: ๊ธ ์์ฑํ๊ธฐ
|
236 |
+
gr.Markdown("### 7๋จ๊ณ : ๊ธ ์์ฑํ๊ธฐ")
|
237 |
+
gr.HTML("<span style='color: grey;'>[์์๋ผ์ธ ๋ณ ํ
์คํธ๋์ ์ ํ๊ณ ๊ธ ์์ฑํ๊ธฐ ๋ฒํผ์ ์ ํํด์ฃผ์ธ์]</span>")
|
238 |
+
|
239 |
+
with gr.Accordion("๋ธ๋ก๊ทธ ๊ธ ์ค์ ", open=True):
|
240 |
+
blog_system_message = gr.Textbox(label="์์คํ
๋ฉ์์ง", value=get_blog_post_prompt("์ผ๋ฐ"), lines=20)
|
241 |
+
blog_max_tokens = gr.Slider(label="Max Tokens", minimum=1000, maximum=12000, value=8000, step=1000)
|
242 |
+
blog_temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, value=0.8, step=0.1)
|
243 |
+
blog_top_p = gr.Slider(label="Top P", minimum=0.1, maximum=1.0, value=0.95, step=0.05)
|
244 |
+
|
245 |
+
generate_btn = gr.Button("๋ธ๋ก๊ทธ ๊ธ ์์ฑํ๊ธฐ")
|
246 |
+
output = gr.Textbox(label="์์ฑ๋ ๋ธ๋ก๊ทธ ๊ธ", lines=30)
|
247 |
+
|
248 |
+
generate_btn.click(fn=generate_blog_post, inputs=[category, topic, references1, references2, references3, blog_title, outline_input, blog_system_message, blog_max_tokens, blog_temperature, blog_top_p], outputs=[output])
|
249 |
+
|
250 |
+
category.change(fn=update_prompts, inputs=category, outputs=[title_system_message, outline_system_message, blog_system_message])
|
251 |
+
|
252 |
+
demo.launch()
|