AIRider's picture
Update app.py
43764c5 verified
raw
history blame
13.1 kB
import gradio as gr
import requests
from bs4 import BeautifulSoup
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
import re
import time
import random
import os
from huggingface_hub import InferenceClient
def setup_session():
try:
session = requests.Session()
retries = Retry(total=5, backoff_factor=1, status_forcelist=[502, 503, 504])
session.mount('https://', HTTPAdapter(max_retries=retries))
return session
except Exception as e:
return None
def generate_naver_search_url(query):
base_url = "https://search.naver.com/search.naver?"
params = {"ssc": "tab.blog.all", "sm": "tab_jum", "query": query}
url = base_url + "&".join(f"{key}={value}" for key, value in params.items())
return url
def crawl_blog_content(url, session):
try:
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
"Accept-Language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7",
"Accept-Encoding": "gzip, deflate, br",
"Connection": "keep-alive",
"Referer": "https://search.naver.com/search.naver",
}
# ๋žœ๋ค ๋”œ๋ ˆ์ด ์ถ”๊ฐ€
delay = random.uniform(1, 2)
time.sleep(delay)
response = session.get(url, headers=headers)
if response.status_code != 200:
return ""
soup = BeautifulSoup(response.content, "html.parser")
content = soup.find("div", attrs={'class': 'se-main-container'})
if content:
return clean_text(content.get_text())
else:
return ""
except Exception as e:
return ""
def crawl_naver_search_results(url, session):
try:
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
"Accept-Language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7",
"Accept-Encoding": "gzip, deflate, br",
"Connection": "keep-alive",
"Referer": "https://search.naver.com/search.naver",
}
response = session.get(url, headers=headers)
if response.status_code != 200:
return []
soup = BeautifulSoup(response.content, "html.parser")
results = []
count = 0
for li in soup.find_all("li", class_=re.compile("bx.*")):
if count >= 10:
break
for div in li.find_all("div", class_="detail_box"):
for div2 in div.find_all("div", class_="title_area"):
title = div2.text.strip()
for a in div2.find_all("a", href=True):
link = a["href"]
if "blog.naver" in link:
link = link.replace("https://", "https://m.")
results.append({"์ œ๋ชฉ": title, "๋งํฌ": link})
count += 1
if count >= 10:
break
if count >= 10:
break
if count >= 10:
break
return results
except Exception as e:
return []
def clean_text(text):
text = re.sub(r'\s+', ' ', text).strip()
return text
def create_client(model_name):
return InferenceClient(model_name, token=os.getenv("HF_TOKEN"))
client = create_client("OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5")
def call_api(content, system_message, max_tokens, temperature, top_p):
messages = [{"role": "system", "content": system_message}, {"role": "user", "content": content}]
random_seed = random.randint(0, 1000000)
response = client.chat_completion(messages=messages, max_tokens=max_tokens, temperature=temperature, top_p=top_p, seed=random_seed)
modified_text = response.choices[0].message.content
return modified_text
def analyze_info(category, topic, references1, references2, references3):
return f"์„ ํƒํ•œ ์นดํ…Œ๊ณ ๋ฆฌ: {category}\n๋ธ”๋กœ๊ทธ ์ฃผ์ œ: {topic}\n์ฐธ๊ณ  ๊ธ€1: {references1}\n์ฐธ๊ณ  ๊ธ€2: {references2}\n์ฐธ๊ณ  ๊ธ€3: {references3}"
def suggest_title(category, topic, references1, references2, references3, system_message, max_tokens, temperature, top_p):
full_content = analyze_info(category, topic, references1, references2, references3)
modified_text = call_api(full_content, system_message, max_tokens, temperature, top_p)
return modified_text
def generate_outline(category, topic, references1, references2, references3, title, system_message, max_tokens, temperature, top_p):
full_content = analyze_info(category, topic, references1, references2, references3)
content = f"{full_content}\nTitle: {title}"
modified_text = call_api(content, system_message, max_tokens, temperature, top_p)
return modified_text
def generate_blog_post(category, topic, references1, references2, references3, title, outline, system_message, max_tokens, temperature, top_p):
full_content = analyze_info(category, topic, references1, references2, references3)
content = f"{full_content}\nTitle: {title}\nOutline: {outline}"
modified_text = call_api(content, system_message, max_tokens, temperature, top_p)
formatted_text = modified_text.replace('\n', '\n\n')
return formatted_text
def fetch_references(topic):
search_url = generate_naver_search_url(topic)
session = setup_session()
if session is None:
return "Failed to set up session.", "", "", ""
results = crawl_naver_search_results(search_url, session)
if not results:
return "No results found.", "", "", ""
# ์ƒ์œ„ 10๊ฐœ์˜ ๋ธ”๋กœ๊ทธ ์ค‘ ๋žœ๋ค์œผ๋กœ 3๊ฐœ ์„ ํƒ
selected_results = random.sample(results, 3)
references1_content = f"์ œ๋ชฉ: {selected_results[0]['์ œ๋ชฉ']}\n๋‚ด์šฉ: {crawl_blog_content(selected_results[0]['๋งํฌ'], session)}"
references2_content = f"์ œ๋ชฉ: {selected_results[1]['์ œ๋ชฉ']}\n๋‚ด์šฉ: {crawl_blog_content(selected_results[1]['๋งํฌ'], session)}"
references3_content = f"์ œ๋ชฉ: {selected_results[2]['์ œ๋ชฉ']}\n๋‚ด์šฉ: {crawl_blog_content(selected_results[2]['๋งํฌ'], session)}"
return "์ฐธ๊ณ ๊ธ€ ์ƒ์„ฑ ์™„๋ฃŒ", references1_content, references2_content, references3_content
def get_title_prompt(category):
if (category == "์ผ๋ฐ˜"):
return """
# ๋ธ”๋กœ๊ทธ ์ œ๋ชฉ ์ƒ์„ฑ ๊ทœ์น™(์ผ๋ฐ˜)
"""
elif (category == "๊ฑด๊ฐ•์ •๋ณด"):
return """
# ๋ธ”๋กœ๊ทธ ์ œ๋ชฉ ์ƒ์„ฑ ๊ทœ์น™(๊ฑด๊ฐ•์ •๋ณด)
"""
def get_outline_prompt(category):
if (category == "์ผ๋ฐ˜"):
return """
# ๋ธ”๋กœ๊ทธ ์†Œ์ฃผ์ œ(Subtopic) ์ƒ์„ฑ ๊ทœ์น™(์ผ๋ฐ˜)
"""
elif (category == "๊ฑด๊ฐ•์ •๋ณด"):
return """
# ๋ธ”๋กœ๊ทธ ์†Œ์ฃผ์ œ(Subtopic) ์ƒ์„ฑ ๊ทœ์น™(๊ฑด๊ฐ•์ •๋ณด)
"""
def get_blog_post_prompt(category):
if (category == "์ผ๋ฐ˜"):
return """
# ๋ธ”๋กœ๊ทธ ํ…์ŠคํŠธ ์ƒ์„ฑ ๊ทœ์น™(์ผ๋ฐ˜)
"""
elif (category == "๊ฑด๊ฐ•์ •๋ณด"):
return """
# ๋ธ”๋กœ๊ทธ ํ…์ŠคํŠธ ์ƒ์„ฑ ๊ทœ์น™(๊ฑด๊ฐ•์ •๋ณด)
"""
# Gradio ์ธํ„ฐํŽ˜์ด์Šค ๊ตฌ์„ฑ
title = "์นดํ…Œ๊ณ ๋ฆฌ๋ณ„ ๋ธ”๋กœ๊ทธ ๊ธ€ ์ƒ์„ฑ๊ธฐ(Play Ground)"
def update_prompts(category):
title_prompt = get_title_prompt(category)
outline_prompt = get_outline_prompt(category)
blog_post_prompt = get_blog_post_prompt(category)
return title_prompt, outline_prompt, blog_post_prompt
with gr.Blocks() as demo:
gr.Markdown(f"# {title}")
# 1๋‹จ๊ณ„
gr.Markdown("### 1๋‹จ๊ณ„ : ํฌ์ŠคํŒ… ์นดํ…Œ๊ณ ๋ฆฌ๋ฅผ ์ง€์ •ํ•ด์ฃผ์„ธ์š”")
category = gr.Radio(choices=["์ผ๋ฐ˜", "๊ฑด๊ฐ•์ •๋ณด"], label="ํฌ์ŠคํŒ… ์นดํ…Œ๊ณ ๋ฆฌ", value="์ผ๋ฐ˜")
# 2๋‹จ๊ณ„
gr.Markdown("### 2๋‹จ๊ณ„ : ๋ธ”๋กœ๊ทธ ์ฃผ์ œ, ๋˜๋Š” ํ‚ค์›Œ๋“œ๋ฅผ ์ƒ์„ธํžˆ ์ž…๋ ฅํ•˜์„ธ์š”")
topic = gr.Textbox(label="๋ธ”๋กœ๊ทธ ์ฃผ์ œ(์˜ˆ์‹œ: ์˜ค์ง•์–ด ๋ฌด์นจํšŒ(X), ์˜ค์ง•์–ด ๋ฌด์นจํšŒ ๋ ˆ์‹œํ”ผ(O))", placeholder="์˜ˆ์‹œ: ์—ฌํ–‰์ง€ ์ถ”์ฒœ(X), 8์›” ๊ตญ๋‚ด ์—ฌํ–‰์ง€ ์ถ”์ฒœ(O)")
# 3๋‹จ๊ณ„: ์ฐธ๊ณ  ๊ธ€์„ ์œ„ํ•œ ๋ณ€์ˆ˜๋“ค ๋ฏธ๋ฆฌ ์ •์˜
references1 = gr.Textbox(label="์ฐธ๊ณ  ๊ธ€ 1", placeholder="์ฐธ๊ณ ํ•  ๋ธ”๋กœ๊ทธ ํฌ์ŠคํŒ…๊ธ€์„ ๋ณต์‚ฌํ•˜์—ฌ ๋ถ™์—ฌ๋„ฃ์œผ์„ธ์š”", lines=10, visible=False)
references2 = gr.Textbox(label="์ฐธ๊ณ  ๊ธ€ 2", placeholder="์ฐธ๊ณ ํ•  ๋ธ”๋กœ๊ทธ ํฌ์ŠคํŒ…๊ธ€์„ ๋ณต์‚ฌํ•˜์—ฌ ๋ถ™์—ฌ๋„ฃ์œผ์„ธ์š”", lines=10, visible=False)
references3 = gr.Textbox(label="์ฐธ๊ณ  ๊ธ€ 3", placeholder="์ฐธ๊ณ ํ•  ๋ธ”๋กœ๊ทธ ํฌ์ŠคํŒ…๊ธ€์„ ๋ณต์‚ฌํ•˜์—ฌ ๋ถ™์—ฌ๋„ฃ์œผ์„ธ์š”", lines=10, visible=False)
# ์ง„ํ–‰ ์ƒํ™ฉ ํ‘œ์‹œ๋ฅผ ์œ„ํ•œ ์ถœ๋ ฅ ํ…์ŠคํŠธ๋ฐ•์Šค
progress_output = gr.Textbox(label="์ง„ํ–‰ ์ƒํ™ฉ", lines=2, visible=True)
# ์ฐธ๊ณ ๊ธ€ ๊ฐ€์ ธ์˜ค๊ธฐ ๋ฒ„ํŠผ
fetch_references_btn = gr.Button("์ฐธ๊ณ ๊ธ€ ์ƒ์„ฑํ•˜๊ธฐ")
fetch_references_btn.click(fn=fetch_references, inputs=[topic], outputs=[progress_output, references1, references2, references3])
# ์ฐธ๊ณ ๊ธ€ ๋‹ค์‹œ ๋„ฃ๊ธฐ ๋ฒ„ํŠผ
refill_btn = gr.Button("์ฐธ๊ณ ๊ธ€ ๋‹ค์‹œ ๋„ฃ๊ธฐ")
refill_btn.click(fn=fetch_references, inputs=[topic], outputs=[progress_output, references1, references2, references3])
# 5๋‹จ๊ณ„: ๋ธ”๋กœ๊ทธ ์ œ๋ชฉ์„ ์ž…๋ ฅํ•˜์„ธ์š”
gr.Markdown("### 5๋‹จ๊ณ„ : ๋ธ”๋กœ๊ทธ ์ œ๋ชฉ์„ ์ž…๋ ฅํ•˜์„ธ์š”")
with gr.Accordion("์ œ๋ชฉ ์„ค์ •", open=True):
title_system_message = gr.Textbox(label="์‹œ์Šคํ…œ ๋ฉ”์‹œ์ง€", value=get_title_prompt("์ผ๋ฐ˜"), lines=15)
title_max_tokens = gr.Slider(label="Max Tokens", minimum=1000, maximum=8000, value=5000, step=1000)
title_temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, value=0.8, step=0.1)
title_top_p = gr.Slider(label="Top P", minimum=0.1, maximum=1.0, value=0.95, step=0.05)
title_btn = gr.Button("์ œ๋ชฉ ์ถ”์ฒœํ•˜๊ธฐ")
title_suggestions = gr.Textbox(label="์ œ๋ชฉ ์ถ”์ฒœ", lines=10)
title_btn.click(fn=suggest_title, inputs=[category, topic, references1, references2, references3, title_system_message, title_max_tokens, title_temperature, title_top_p], outputs=[title_suggestions])
gr.HTML("<span style='color: grey;'>[์ œ๋ชฉ ์ถ”์ฒœ ํ•ญ๋ชฉ์„ ์ฐธ๊ณ ํ•˜์—ฌ ํ•˜๋‚˜๋ฅผ ๋ณต์‚ฌํ•˜์—ฌ ์‚ฌ์šฉํ•˜์…”๋„ ๋ฉ๋‹ˆ๋‹ค.]</span>")
blog_title = gr.Textbox(label="๋ธ”๋กœ๊ทธ ์ œ๋ชฉ", placeholder="๋ธ”๋กœ๊ทธ ์ œ๋ชฉ์„ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”")
# 6๋‹จ๊ณ„: ์•„์›ƒ๋ผ์ธ์„ ์ •ํ•ด์ฃผ์„ธ์š”
gr.Markdown("### 6๋‹จ๊ณ„ : ์•„์›ƒ๋ผ์ธ์„ ์ž‘์„ฑํ•ด์ฃผ์„ธ์š”")
gr.HTML("<span style='color: grey;'>[์•„์›ƒ๋ผ์ธ์—์„œ ๋‚˜์˜จ ๊ฒฐ๊ณผ๋ฅผ ์ˆ˜์ •ํ•ด์„œ ์‚ฌ์šฉํ•ด์ฃผ์„ธ์š”]</span>")
with gr.Accordion("์•„์›ƒ๋ผ์ธ ์„ค์ •", open=True):
outline_system_message = gr.Textbox(label="์‹œ์Šคํ…œ ๋ฉ”์‹œ์ง€", value=get_outline_prompt("์ผ๋ฐ˜"), lines=20)
outline_max_tokens = gr.Slider(label="Max Tokens", minimum=1000, maximum=8000, value=6000, step=1000)
outline_temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, value=0.8, step=0.1)
outline_top_p = gr.Slider(label="Top P", minimum=0.1, maximum=1.0, value=0.95, step=0.05)
outline_generate_btn = gr.Button("์•„์›ƒ๋ผ์ธ ์ƒ์„ฑํ•˜๊ธฐ")
outline_result = gr.Textbox(label="์•„์›ƒ๋ผ์ธ ๊ฒฐ๊ณผ", lines=15)
outline_input = gr.Textbox(label="์ž‘์„ฑํ•  ์•„์›ƒ๋ผ์ธ์„ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”", placeholder="์ƒ์„ฑ๋œ ์•„์›ƒ๋ผ์ธ ๋ณต์‚ฌ, ์ˆ˜์ •ํ•ด์„œ ์‚ฌ์šฉํ•˜์„ธ์š”", lines=10)
outline_generate_btn.click(fn=generate_outline, inputs=[category, topic, references1, references2, references3, blog_title, outline_system_message, outline_max_tokens, outline_temperature, outline_top_p], outputs=[outline_result])
# 7๋‹จ๊ณ„: ๊ธ€ ์ƒ์„ฑํ•˜๊ธฐ
gr.Markdown("### 7๋‹จ๊ณ„ : ๊ธ€ ์ƒ์„ฑํ•˜๊ธฐ")
gr.HTML("<span style='color: grey;'>[์•„์›ƒ๋ผ์ธ ๋ณ„ ํ…์ŠคํŠธ๋Ÿ‰์„ ์ •ํ•˜๊ณ  ๊ธ€ ์ƒ์„ฑํ•˜๊ธฐ ๋ฒ„ํŠผ์„ ์„ ํƒํ•ด์ฃผ์„ธ์š”]</span>")
with gr.Accordion("๋ธ”๋กœ๊ทธ ๊ธ€ ์„ค์ •", open=True):
blog_system_message = gr.Textbox(label="์‹œ์Šคํ…œ ๋ฉ”์‹œ์ง€", value=get_blog_post_prompt("์ผ๋ฐ˜"), lines=20)
blog_max_tokens = gr.Slider(label="Max Tokens", minimum=1000, maximum=12000, value=8000, step=1000)
blog_temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, value=0.8, step=0.1)
blog_top_p = gr.Slider(label="Top P", minimum=0.1, maximum=1.0, value=0.95, step=0.05)
generate_btn = gr.Button("๋ธ”๋กœ๊ทธ ๊ธ€ ์ƒ์„ฑํ•˜๊ธฐ")
output = gr.Textbox(label="์ƒ์„ฑ๋œ ๋ธ”๋กœ๊ทธ ๊ธ€", lines=30)
generate_btn.click(fn=generate_blog_post, inputs=[category, topic, references1, references2, references3, blog_title, outline_input, blog_system_message, blog_max_tokens, blog_temperature, blog_top_p], outputs=[output])
category.change(fn=update_prompts, inputs=category, outputs=[title_system_message, outline_system_message, blog_system_message])
demo.launch()