import gradio as gr
import requests
from bs4 import BeautifulSoup
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
import re
import time
import random
import os
from huggingface_hub import InferenceClient
def setup_session():
try:
session = requests.Session()
retries = Retry(total=5, backoff_factor=1, status_forcelist=[502, 503, 504])
session.mount('https://', HTTPAdapter(max_retries=retries))
return session
except Exception as e:
return None
def generate_naver_search_url(query):
base_url = "https://search.naver.com/search.naver?"
params = {"ssc": "tab.blog.all", "sm": "tab_jum", "query": query}
url = base_url + "&".join(f"{key}={value}" for key, value in params.items())
return url
def crawl_blog_content(url, session):
try:
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
"Accept-Language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7",
"Accept-Encoding": "gzip, deflate, br",
"Connection": "keep-alive",
"Referer": "https://search.naver.com/search.naver",
}
# 랜덤 딜레이 추가
delay = random.uniform(1, 2)
time.sleep(delay)
response = session.get(url, headers=headers)
if response.status_code != 200:
return ""
soup = BeautifulSoup(response.content, "html.parser")
content = soup.find("div", attrs={'class': 'se-main-container'})
if content:
return clean_text(content.get_text())
else:
return ""
except Exception as e:
return ""
def crawl_naver_search_results(url, session):
try:
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
"Accept-Language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7",
"Accept-Encoding": "gzip, deflate, br",
"Connection": "keep-alive",
"Referer": "https://search.naver.com/search.naver",
}
response = session.get(url, headers=headers)
if response.status_code != 200:
return []
soup = BeautifulSoup(response.content, "html.parser")
results = []
count = 0
for li in soup.find_all("li", class_=re.compile("bx.*")):
if count >= 10:
break
for div in li.find_all("div", class_="detail_box"):
for div2 in div.find_all("div", class_="title_area"):
title = div2.text.strip()
for a in div2.find_all("a", href=True):
link = a["href"]
if "blog.naver" in link:
link = link.replace("https://", "https://m.")
results.append({"제목": title, "링크": link})
count += 1
if count >= 10:
break
if count >= 10:
break
if count >= 10:
break
return results
except Exception as e:
return []
def clean_text(text):
text = re.sub(r'\s+', ' ', text).strip()
return text
def create_client(model_name):
return InferenceClient(model_name, token=os.getenv("HF_TOKEN"))
client = create_client("CohereForAI/c4ai-command-r-plus")
def call_api(content, system_message, max_tokens, temperature, top_p):
messages = [{"role": "system", "content": system_message}, {"role": "user", "content": content}]
random_seed = random.randint(0, 1000000)
response = client.chat_completion(messages=messages, max_tokens=max_tokens, temperature=temperature, top_p=top_p, seed=random_seed)
modified_text = response.choices[0].message.content
return modified_text
def analyze_info(category, topic, references1, references2, references3):
return f"선택한 카테고리: {category}\n블로그 주제: {topic}\n참고 글1: {references1}\n참고 글2: {references2}\n참고 글3: {references3}"
def suggest_title(category, topic, references1, references2, references3, system_message, max_tokens, temperature, top_p):
full_content = analyze_info(category, topic, references1, references2, references3)
modified_text = call_api(full_content, system_message, max_tokens, temperature, top_p)
return modified_text
def generate_outline(category, topic, references1, references2, references3, title, system_message, max_tokens, temperature, top_p):
full_content = analyze_info(category, topic, references1, references2, references3)
content = f"{full_content}\nTitle: {title}"
modified_text = call_api(content, system_message, max_tokens, temperature, top_p)
return modified_text
def generate_blog_post(category, topic, references1, references2, references3, title, outline, system_message, max_tokens, temperature, top_p):
full_content = analyze_info(category, topic, references1, references2, references3)
content = f"{full_content}\nTitle: {title}\nOutline: {outline}"
modified_text = call_api(content, system_message, max_tokens, temperature, top_p)
formatted_text = modified_text.replace('\n', '\n\n')
return formatted_text
def fetch_references(topic):
search_url = generate_naver_search_url(topic)
session = setup_session()
if session is None:
return "Failed to set up session.", "", "", ""
results = crawl_naver_search_results(search_url, session)
if not results:
return "No results found.", "", "", ""
# 상위 10개의 블로그 중 랜덤으로 3개 선택
selected_results = random.sample(results, 3)
references1_content = f"제목: {selected_results[0]['제목']}\n내용: {crawl_blog_content(selected_results[0]['링크'], session)}"
references2_content = f"제목: {selected_results[1]['제목']}\n내용: {crawl_blog_content(selected_results[1]['링크'], session)}"
references3_content = f"제목: {selected_results[2]['제목']}\n내용: {crawl_blog_content(selected_results[2]['링크'], session)}"
return "참고글 생성 완료", references1_content, references2_content, references3_content
def get_title_prompt(category):
if (category == "일반"):
return """
# 블로그 제목 생성 규칙(일반)
"""
elif (category == "건강정보"):
return """
# 블로그 제목 생성 규칙(건강정보)
"""
def get_outline_prompt(category):
if (category == "일반"):
return """
# 블로그 소주제(Subtopic) 생성 규칙(일반)
"""
elif (category == "건강정보"):
return """
# 블로그 소주제(Subtopic) 생성 규칙(건강정보)
"""
def get_blog_post_prompt(category):
if (category == "일반"):
return """
# 블로그 텍스트 생성 규칙(일반)
"""
elif (category == "건강정보"):
return """
# 블로그 텍스트 생성 규칙(건강정보)
"""
# Gradio 인터페이스 구성
title = "카테고리별 블로그 글 생성기(Play Ground)"
def update_prompts(category):
title_prompt = get_title_prompt(category)
outline_prompt = get_outline_prompt(category)
blog_post_prompt = get_blog_post_prompt(category)
return title_prompt, outline_prompt, blog_post_prompt
with gr.Blocks() as demo:
gr.Markdown(f"# {title}")
# 1단계
gr.Markdown("### 1단계 : 포스팅 카테고리를 지정해주세요")
category = gr.Radio(choices=["일반", "건강정보"], label="포스팅 카테고리", value="일반")
# 2단계
gr.Markdown("### 2단계 : 블로그 주제, 또는 키워드를 상세히 입력하세요")
topic = gr.Textbox(label="블로그 주제(예시: 오징어 무침회(X), 오징어 무침회 레시피(O))", placeholder="예시: 여행지 추천(X), 8월 국내 여행지 추천(O)")
# 3단계: 참고 글을 위한 변수들 미리 정의
references1 = gr.Textbox(label="참고 글 1", placeholder="참고할 블로그 포스팅글을 복사하여 붙여넣으세요", lines=10, visible=False)
references2 = gr.Textbox(label="참고 글 2", placeholder="참고할 블로그 포스팅글을 복사하여 붙여넣으세요", lines=10, visible=False)
references3 = gr.Textbox(label="참고 글 3", placeholder="참고할 블로그 포스팅글을 복사하여 붙여넣으세요", lines=10, visible=False)
# 진행 상황 표시를 위한 출력 텍스트박스
progress_output = gr.Textbox(label="진행 상황", lines=2, visible=True)
# 참고글 가져오기 버튼
fetch_references_btn = gr.Button("참고글 생성하기")
fetch_references_btn.click(fn=fetch_references, inputs=[topic], outputs=[progress_output, references1, references2, references3])
# 참고글 다시 넣기 버튼
refill_btn = gr.Button("참고글 다시 넣기")
refill_btn.click(fn=fetch_references, inputs=[topic], outputs=[progress_output, references1, references2, references3])
# 5단계: 블로그 제목을 입력하세요
gr.Markdown("### 5단계 : 블로그 제목을 입력하세요")
with gr.Accordion("제목 설정", open=True):
title_system_message = gr.Textbox(label="시스템 메시지", value=get_title_prompt("일반"), lines=15)
title_max_tokens = gr.Slider(label="Max Tokens", minimum=1000, maximum=8000, value=5000, step=1000)
title_temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, value=0.8, step=0.1)
title_top_p = gr.Slider(label="Top P", minimum=0.1, maximum=1.0, value=0.95, step=0.05)
title_btn = gr.Button("제목 추천하기")
title_suggestions = gr.Textbox(label="제목 추천", lines=10)
title_btn.click(fn=suggest_title, inputs=[category, topic, references1, references2, references3, title_system_message, title_max_tokens, title_temperature, title_top_p], outputs=[title_suggestions])
gr.HTML("[제목 추천 항목을 참고하여 하나를 복사하여 사용하셔도 됩니다.]")
blog_title = gr.Textbox(label="블로그 제목", placeholder="블로그 제목을 입력해주세요")
# 6단계: 아웃라인을 정해주세요
gr.Markdown("### 6단계 : 아웃라인을 작성해주세요")
gr.HTML("[아웃라인에서 나온 결과를 수정해서 사용해주세요]")
with gr.Accordion("아웃라인 설정", open=True):
outline_system_message = gr.Textbox(label="시스템 메시지", value=get_outline_prompt("일반"), lines=20)
outline_max_tokens = gr.Slider(label="Max Tokens", minimum=1000, maximum=8000, value=6000, step=1000)
outline_temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, value=0.8, step=0.1)
outline_top_p = gr.Slider(label="Top P", minimum=0.1, maximum=1.0, value=0.95, step=0.05)
outline_generate_btn = gr.Button("아웃라인 생성하기")
outline_result = gr.Textbox(label="아웃라인 결과", lines=15)
outline_input = gr.Textbox(label="작성할 아웃라인을 입력해주세요", placeholder="생성된 아웃라인 복사, 수정해서 사용하세요", lines=10)
outline_generate_btn.click(fn=generate_outline, inputs=[category, topic, references1, references2, references3, blog_title, outline_system_message, outline_max_tokens, outline_temperature, outline_top_p], outputs=[outline_result])
# 7단계: 글 생성하기
gr.Markdown("### 7단계 : 글 생성하기")
gr.HTML("[아웃라인 별 텍스트량을 정하고 글 생성하기 버튼을 선택해주세요]")
with gr.Accordion("블로그 글 설정", open=True):
blog_system_message = gr.Textbox(label="시스템 메시지", value=get_blog_post_prompt("일반"), lines=20)
blog_max_tokens = gr.Slider(label="Max Tokens", minimum=1000, maximum=12000, value=8000, step=1000)
blog_temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, value=0.8, step=0.1)
blog_top_p = gr.Slider(label="Top P", minimum=0.1, maximum=1.0, value=0.95, step=0.05)
generate_btn = gr.Button("블로그 글 생성하기")
output = gr.Textbox(label="생성된 블로그 글", lines=30)
generate_btn.click(fn=generate_blog_post, inputs=[category, topic, references1, references2, references3, blog_title, outline_input, blog_system_message, blog_max_tokens, blog_temperature, blog_top_p], outputs=[output])
category.change(fn=update_prompts, inputs=category, outputs=[title_system_message, outline_system_message, blog_system_message])
demo.launch()