Spaces:
Sleeping
Sleeping
import gradio as gr | |
import requests | |
from bs4 import BeautifulSoup | |
from requests.adapters import HTTPAdapter | |
from requests.packages.urllib3.util.retry import Retry | |
import re | |
import time | |
import random | |
import os | |
from huggingface_hub import InferenceClient | |
def setup_session(): | |
try: | |
session = requests.Session() | |
retries = Retry(total=5, backoff_factor=1, status_forcelist=[502, 503, 504]) | |
session.mount('https://', HTTPAdapter(max_retries=retries)) | |
return session | |
except Exception as e: | |
return None | |
def generate_naver_search_url(query): | |
base_url = "https://search.naver.com/search.naver?" | |
params = {"ssc": "tab.blog.all", "sm": "tab_jum", "query": query} | |
url = base_url + "&".join(f"{key}={value}" for key, value in params.items()) | |
return url | |
def crawl_blog_content(url, session): | |
try: | |
headers = { | |
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3", | |
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", | |
"Accept-Language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7", | |
"Accept-Encoding": "gzip, deflate, br", | |
"Connection": "keep-alive", | |
"Referer": "https://search.naver.com/search.naver", | |
} | |
# ๋๋ค ๋๋ ์ด ์ถ๊ฐ | |
delay = random.uniform(1, 2) | |
time.sleep(delay) | |
response = session.get(url, headers=headers) | |
if response.status_code != 200: | |
return "" | |
soup = BeautifulSoup(response.content, "html.parser") | |
content = soup.find("div", attrs={'class': 'se-main-container'}) | |
if content: | |
return clean_text(content.get_text()) | |
else: | |
return "" | |
except Exception as e: | |
return "" | |
def crawl_naver_search_results(url, session): | |
try: | |
headers = { | |
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3", | |
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", | |
"Accept-Language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7", | |
"Accept-Encoding": "gzip, deflate, br", | |
"Connection": "keep-alive", | |
"Referer": "https://search.naver.com/search.naver", | |
} | |
response = session.get(url, headers=headers) | |
if response.status_code != 200: | |
return [] | |
soup = BeautifulSoup(response.content, "html.parser") | |
results = [] | |
count = 0 | |
for li in soup.find_all("li", class_=re.compile("bx.*")): | |
if count >= 10: | |
break | |
for div in li.find_all("div", class_="detail_box"): | |
for div2 in div.find_all("div", class_="title_area"): | |
title = div2.text.strip() | |
for a in div2.find_all("a", href=True): | |
link = a["href"] | |
if "blog.naver" in link: | |
link = link.replace("https://", "https://m.") | |
results.append({"์ ๋ชฉ": title, "๋งํฌ": link}) | |
count += 1 | |
if count >= 10: | |
break | |
if count >= 10: | |
break | |
if count >= 10: | |
break | |
return results | |
except Exception as e: | |
return [] | |
def clean_text(text): | |
text = re.sub(r'\s+', ' ', text).strip() | |
return text | |
def create_client(model_name): | |
return InferenceClient(model_name, token=os.getenv("HF_TOKEN")) | |
client = create_client("OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5") | |
def call_api(content, system_message, max_tokens, temperature, top_p): | |
messages = [{"role": "system", "content": system_message}, {"role": "user", "content": content}] | |
random_seed = random.randint(0, 1000000) | |
response = client.chat_completion(messages=messages, max_tokens=max_tokens, temperature=temperature, top_p=top_p, seed=random_seed) | |
modified_text = response.choices[0].message.content | |
return modified_text | |
def analyze_info(category, topic, references1, references2, references3): | |
return f"์ ํํ ์นดํ ๊ณ ๋ฆฌ: {category}\n๋ธ๋ก๊ทธ ์ฃผ์ : {topic}\n์ฐธ๊ณ ๊ธ1: {references1}\n์ฐธ๊ณ ๊ธ2: {references2}\n์ฐธ๊ณ ๊ธ3: {references3}" | |
def suggest_title(category, topic, references1, references2, references3, system_message, max_tokens, temperature, top_p): | |
full_content = analyze_info(category, topic, references1, references2, references3) | |
modified_text = call_api(full_content, system_message, max_tokens, temperature, top_p) | |
return modified_text | |
def generate_outline(category, topic, references1, references2, references3, title, system_message, max_tokens, temperature, top_p): | |
full_content = analyze_info(category, topic, references1, references2, references3) | |
content = f"{full_content}\nTitle: {title}" | |
modified_text = call_api(content, system_message, max_tokens, temperature, top_p) | |
return modified_text | |
def generate_blog_post(category, topic, references1, references2, references3, title, outline, system_message, max_tokens, temperature, top_p): | |
full_content = analyze_info(category, topic, references1, references2, references3) | |
content = f"{full_content}\nTitle: {title}\nOutline: {outline}" | |
modified_text = call_api(content, system_message, max_tokens, temperature, top_p) | |
formatted_text = modified_text.replace('\n', '\n\n') | |
return formatted_text | |
def fetch_references(topic): | |
search_url = generate_naver_search_url(topic) | |
session = setup_session() | |
if session is None: | |
return "Failed to set up session.", "", "", "" | |
results = crawl_naver_search_results(search_url, session) | |
if not results: | |
return "No results found.", "", "", "" | |
# ์์ 10๊ฐ์ ๋ธ๋ก๊ทธ ์ค ๋๋ค์ผ๋ก 3๊ฐ ์ ํ | |
selected_results = random.sample(results, 3) | |
references1_content = f"์ ๋ชฉ: {selected_results[0]['์ ๋ชฉ']}\n๋ด์ฉ: {crawl_blog_content(selected_results[0]['๋งํฌ'], session)}" | |
references2_content = f"์ ๋ชฉ: {selected_results[1]['์ ๋ชฉ']}\n๋ด์ฉ: {crawl_blog_content(selected_results[1]['๋งํฌ'], session)}" | |
references3_content = f"์ ๋ชฉ: {selected_results[2]['์ ๋ชฉ']}\n๋ด์ฉ: {crawl_blog_content(selected_results[2]['๋งํฌ'], session)}" | |
return "์ฐธ๊ณ ๊ธ ์์ฑ ์๋ฃ", references1_content, references2_content, references3_content | |
def get_title_prompt(category): | |
if (category == "์ผ๋ฐ"): | |
return """ | |
# ๋ธ๋ก๊ทธ ์ ๋ชฉ ์์ฑ ๊ท์น(์ผ๋ฐ) | |
""" | |
elif (category == "๊ฑด๊ฐ์ ๋ณด"): | |
return """ | |
# ๋ธ๋ก๊ทธ ์ ๋ชฉ ์์ฑ ๊ท์น(๊ฑด๊ฐ์ ๋ณด) | |
""" | |
def get_outline_prompt(category): | |
if (category == "์ผ๋ฐ"): | |
return """ | |
# ๋ธ๋ก๊ทธ ์์ฃผ์ (Subtopic) ์์ฑ ๊ท์น(์ผ๋ฐ) | |
""" | |
elif (category == "๊ฑด๊ฐ์ ๋ณด"): | |
return """ | |
# ๋ธ๋ก๊ทธ ์์ฃผ์ (Subtopic) ์์ฑ ๊ท์น(๊ฑด๊ฐ์ ๋ณด) | |
""" | |
def get_blog_post_prompt(category): | |
if (category == "์ผ๋ฐ"): | |
return """ | |
# ๋ธ๋ก๊ทธ ํ ์คํธ ์์ฑ ๊ท์น(์ผ๋ฐ) | |
""" | |
elif (category == "๊ฑด๊ฐ์ ๋ณด"): | |
return """ | |
# ๋ธ๋ก๊ทธ ํ ์คํธ ์์ฑ ๊ท์น(๊ฑด๊ฐ์ ๋ณด) | |
""" | |
# Gradio ์ธํฐํ์ด์ค ๊ตฌ์ฑ | |
title = "์นดํ ๊ณ ๋ฆฌ๋ณ ๋ธ๋ก๊ทธ ๊ธ ์์ฑ๊ธฐ(Play Ground)" | |
def update_prompts(category): | |
title_prompt = get_title_prompt(category) | |
outline_prompt = get_outline_prompt(category) | |
blog_post_prompt = get_blog_post_prompt(category) | |
return title_prompt, outline_prompt, blog_post_prompt | |
with gr.Blocks() as demo: | |
gr.Markdown(f"# {title}") | |
# 1๋จ๊ณ | |
gr.Markdown("### 1๋จ๊ณ : ํฌ์คํ ์นดํ ๊ณ ๋ฆฌ๋ฅผ ์ง์ ํด์ฃผ์ธ์") | |
category = gr.Radio(choices=["์ผ๋ฐ", "๊ฑด๊ฐ์ ๋ณด"], label="ํฌ์คํ ์นดํ ๊ณ ๋ฆฌ", value="์ผ๋ฐ") | |
# 2๋จ๊ณ | |
gr.Markdown("### 2๋จ๊ณ : ๋ธ๋ก๊ทธ ์ฃผ์ , ๋๋ ํค์๋๋ฅผ ์์ธํ ์ ๋ ฅํ์ธ์") | |
topic = gr.Textbox(label="๋ธ๋ก๊ทธ ์ฃผ์ (์์: ์ค์ง์ด ๋ฌด์นจํ(X), ์ค์ง์ด ๋ฌด์นจํ ๋ ์ํผ(O))", placeholder="์์: ์ฌํ์ง ์ถ์ฒ(X), 8์ ๊ตญ๋ด ์ฌํ์ง ์ถ์ฒ(O)") | |
# 3๋จ๊ณ: ์ฐธ๊ณ ๊ธ์ ์ํ ๋ณ์๋ค ๋ฏธ๋ฆฌ ์ ์ | |
references1 = gr.Textbox(label="์ฐธ๊ณ ๊ธ 1", placeholder="์ฐธ๊ณ ํ ๋ธ๋ก๊ทธ ํฌ์คํ ๊ธ์ ๋ณต์ฌํ์ฌ ๋ถ์ฌ๋ฃ์ผ์ธ์", lines=10, visible=False) | |
references2 = gr.Textbox(label="์ฐธ๊ณ ๊ธ 2", placeholder="์ฐธ๊ณ ํ ๋ธ๋ก๊ทธ ํฌ์คํ ๊ธ์ ๋ณต์ฌํ์ฌ ๋ถ์ฌ๋ฃ์ผ์ธ์", lines=10, visible=False) | |
references3 = gr.Textbox(label="์ฐธ๊ณ ๊ธ 3", placeholder="์ฐธ๊ณ ํ ๋ธ๋ก๊ทธ ํฌ์คํ ๊ธ์ ๋ณต์ฌํ์ฌ ๋ถ์ฌ๋ฃ์ผ์ธ์", lines=10, visible=False) | |
# ์งํ ์ํฉ ํ์๋ฅผ ์ํ ์ถ๋ ฅ ํ ์คํธ๋ฐ์ค | |
progress_output = gr.Textbox(label="์งํ ์ํฉ", lines=2, visible=True) | |
# ์ฐธ๊ณ ๊ธ ๊ฐ์ ธ์ค๊ธฐ ๋ฒํผ | |
fetch_references_btn = gr.Button("์ฐธ๊ณ ๊ธ ์์ฑํ๊ธฐ") | |
fetch_references_btn.click(fn=fetch_references, inputs=[topic], outputs=[progress_output, references1, references2, references3]) | |
# ์ฐธ๊ณ ๊ธ ๋ค์ ๋ฃ๊ธฐ ๋ฒํผ | |
refill_btn = gr.Button("์ฐธ๊ณ ๊ธ ๋ค์ ๋ฃ๊ธฐ") | |
refill_btn.click(fn=fetch_references, inputs=[topic], outputs=[progress_output, references1, references2, references3]) | |
# 5๋จ๊ณ: ๋ธ๋ก๊ทธ ์ ๋ชฉ์ ์ ๋ ฅํ์ธ์ | |
gr.Markdown("### 5๋จ๊ณ : ๋ธ๋ก๊ทธ ์ ๋ชฉ์ ์ ๋ ฅํ์ธ์") | |
with gr.Accordion("์ ๋ชฉ ์ค์ ", open=True): | |
title_system_message = gr.Textbox(label="์์คํ ๋ฉ์์ง", value=get_title_prompt("์ผ๋ฐ"), lines=15) | |
title_max_tokens = gr.Slider(label="Max Tokens", minimum=1000, maximum=8000, value=5000, step=1000) | |
title_temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, value=0.8, step=0.1) | |
title_top_p = gr.Slider(label="Top P", minimum=0.1, maximum=1.0, value=0.95, step=0.05) | |
title_btn = gr.Button("์ ๋ชฉ ์ถ์ฒํ๊ธฐ") | |
title_suggestions = gr.Textbox(label="์ ๋ชฉ ์ถ์ฒ", lines=10) | |
title_btn.click(fn=suggest_title, inputs=[category, topic, references1, references2, references3, title_system_message, title_max_tokens, title_temperature, title_top_p], outputs=[title_suggestions]) | |
gr.HTML("<span style='color: grey;'>[์ ๋ชฉ ์ถ์ฒ ํญ๋ชฉ์ ์ฐธ๊ณ ํ์ฌ ํ๋๋ฅผ ๋ณต์ฌํ์ฌ ์ฌ์ฉํ์ ๋ ๋ฉ๋๋ค.]</span>") | |
blog_title = gr.Textbox(label="๋ธ๋ก๊ทธ ์ ๋ชฉ", placeholder="๋ธ๋ก๊ทธ ์ ๋ชฉ์ ์ ๋ ฅํด์ฃผ์ธ์") | |
# 6๋จ๊ณ: ์์๋ผ์ธ์ ์ ํด์ฃผ์ธ์ | |
gr.Markdown("### 6๋จ๊ณ : ์์๋ผ์ธ์ ์์ฑํด์ฃผ์ธ์") | |
gr.HTML("<span style='color: grey;'>[์์๋ผ์ธ์์ ๋์จ ๊ฒฐ๊ณผ๋ฅผ ์์ ํด์ ์ฌ์ฉํด์ฃผ์ธ์]</span>") | |
with gr.Accordion("์์๋ผ์ธ ์ค์ ", open=True): | |
outline_system_message = gr.Textbox(label="์์คํ ๋ฉ์์ง", value=get_outline_prompt("์ผ๋ฐ"), lines=20) | |
outline_max_tokens = gr.Slider(label="Max Tokens", minimum=1000, maximum=8000, value=6000, step=1000) | |
outline_temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, value=0.8, step=0.1) | |
outline_top_p = gr.Slider(label="Top P", minimum=0.1, maximum=1.0, value=0.95, step=0.05) | |
outline_generate_btn = gr.Button("์์๋ผ์ธ ์์ฑํ๊ธฐ") | |
outline_result = gr.Textbox(label="์์๋ผ์ธ ๊ฒฐ๊ณผ", lines=15) | |
outline_input = gr.Textbox(label="์์ฑํ ์์๋ผ์ธ์ ์ ๋ ฅํด์ฃผ์ธ์", placeholder="์์ฑ๋ ์์๋ผ์ธ ๋ณต์ฌ, ์์ ํด์ ์ฌ์ฉํ์ธ์", lines=10) | |
outline_generate_btn.click(fn=generate_outline, inputs=[category, topic, references1, references2, references3, blog_title, outline_system_message, outline_max_tokens, outline_temperature, outline_top_p], outputs=[outline_result]) | |
# 7๋จ๊ณ: ๊ธ ์์ฑํ๊ธฐ | |
gr.Markdown("### 7๋จ๊ณ : ๊ธ ์์ฑํ๊ธฐ") | |
gr.HTML("<span style='color: grey;'>[์์๋ผ์ธ ๋ณ ํ ์คํธ๋์ ์ ํ๊ณ ๊ธ ์์ฑํ๊ธฐ ๋ฒํผ์ ์ ํํด์ฃผ์ธ์]</span>") | |
with gr.Accordion("๋ธ๋ก๊ทธ ๊ธ ์ค์ ", open=True): | |
blog_system_message = gr.Textbox(label="์์คํ ๋ฉ์์ง", value=get_blog_post_prompt("์ผ๋ฐ"), lines=20) | |
blog_max_tokens = gr.Slider(label="Max Tokens", minimum=1000, maximum=12000, value=8000, step=1000) | |
blog_temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, value=0.8, step=0.1) | |
blog_top_p = gr.Slider(label="Top P", minimum=0.1, maximum=1.0, value=0.95, step=0.05) | |
generate_btn = gr.Button("๋ธ๋ก๊ทธ ๊ธ ์์ฑํ๊ธฐ") | |
output = gr.Textbox(label="์์ฑ๋ ๋ธ๋ก๊ทธ ๊ธ", lines=30) | |
generate_btn.click(fn=generate_blog_post, inputs=[category, topic, references1, references2, references3, blog_title, outline_input, blog_system_message, blog_max_tokens, blog_temperature, blog_top_p], outputs=[output]) | |
category.change(fn=update_prompts, inputs=category, outputs=[title_system_message, outline_system_message, blog_system_message]) | |
demo.launch() | |