import gradio as gr import requests from bs4 import BeautifulSoup from requests.adapters import HTTPAdapter from requests.packages.urllib3.util.retry import Retry import re import time import random import os from huggingface_hub import InferenceClient def setup_session(): try: session = requests.Session() retries = Retry(total=5, backoff_factor=1, status_forcelist=[502, 503, 504]) session.mount('https://', HTTPAdapter(max_retries=retries)) return session except Exception as e: return None def generate_naver_search_url(query): base_url = "https://search.naver.com/search.naver?" params = {"ssc": "tab.blog.all", "sm": "tab_jum", "query": query} url = base_url + "&".join(f"{key}={value}" for key, value in params.items()) return url def crawl_blog_content(url, session): try: headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", "Accept-Language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7", "Accept-Encoding": "gzip, deflate, br", "Connection": "keep-alive", "Referer": "https://search.naver.com/search.naver", } # 랜덤 딜레이 추가 delay = random.uniform(1, 2) time.sleep(delay) response = session.get(url, headers=headers) if response.status_code != 200: return "" soup = BeautifulSoup(response.content, "html.parser") content = soup.find("div", attrs={'class': 'se-main-container'}) if content: return clean_text(content.get_text()) else: return "" except Exception as e: return "" def crawl_naver_search_results(url, session): try: headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", "Accept-Language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7", "Accept-Encoding": "gzip, deflate, br", "Connection": "keep-alive", "Referer": "https://search.naver.com/search.naver", } response = session.get(url, headers=headers) if response.status_code != 200: return [] soup = BeautifulSoup(response.content, "html.parser") results = [] count = 0 for li in soup.find_all("li", class_=re.compile("bx.*")): if count >= 10: break for div in li.find_all("div", class_="detail_box"): for div2 in div.find_all("div", class_="title_area"): title = div2.text.strip() for a in div2.find_all("a", href=True): link = a["href"] if "blog.naver" in link: link = link.replace("https://", "https://m.") results.append({"제목": title, "링크": link}) count += 1 if count >= 10: break if count >= 10: break if count >= 10: break return results except Exception as e: return [] def clean_text(text): text = re.sub(r'\s+', ' ', text).strip() return text def create_client(model_name): return InferenceClient(model_name, token=os.getenv("HF_TOKEN")) client = create_client("CohereForAI/c4ai-command-r-plus") def call_api(content, system_message, max_tokens, temperature, top_p): messages = [{"role": "system", "content": system_message}, {"role": "user", "content": content}] random_seed = random.randint(0, 1000000) response = client.chat_completion(messages=messages, max_tokens=max_tokens, temperature=temperature, top_p=top_p, seed=random_seed) modified_text = response.choices[0].message.content return modified_text def analyze_info(category, topic, references1, references2, references3): return f"선택한 카테고리: {category}\n블로그 주제: {topic}\n참고 글1: {references1}\n참고 글2: {references2}\n참고 글3: {references3}" def suggest_title(category, topic, references1, references2, references3, system_message, max_tokens, temperature, top_p): full_content = analyze_info(category, topic, references1, references2, references3) modified_text = call_api(full_content, system_message, max_tokens, temperature, top_p) return modified_text def generate_outline(category, topic, references1, references2, references3, title, system_message, max_tokens, temperature, top_p): full_content = analyze_info(category, topic, references1, references2, references3) content = f"{full_content}\nTitle: {title}" modified_text = call_api(content, system_message, max_tokens, temperature, top_p) return modified_text def generate_blog_post(category, topic, references1, references2, references3, title, outline, system_message, max_tokens, temperature, top_p): full_content = analyze_info(category, topic, references1, references2, references3) content = f"{full_content}\nTitle: {title}\nOutline: {outline}" modified_text = call_api(content, system_message, max_tokens, temperature, top_p) formatted_text = modified_text.replace('\n', '\n\n') return formatted_text def fetch_references(topic): search_url = generate_naver_search_url(topic) session = setup_session() if session is None: return "Failed to set up session.", "", "", "" results = crawl_naver_search_results(search_url, session) if not results: return "No results found.", "", "", "" # 상위 10개의 블로그 중 랜덤으로 3개 선택 selected_results = random.sample(results, 3) references1_content = f"제목: {selected_results[0]['제목']}\n내용: {crawl_blog_content(selected_results[0]['링크'], session)}" references2_content = f"제목: {selected_results[1]['제목']}\n내용: {crawl_blog_content(selected_results[1]['링크'], session)}" references3_content = f"제목: {selected_results[2]['제목']}\n내용: {crawl_blog_content(selected_results[2]['링크'], session)}" return "참고글 생성 완료", references1_content, references2_content, references3_content def get_title_prompt(category): if (category == "일반"): return """ # 블로그 제목 생성 규칙(일반) """ elif (category == "건강정보"): return """ # 블로그 제목 생성 규칙(건강정보) """ def get_outline_prompt(category): if (category == "일반"): return """ # 블로그 소주제(Subtopic) 생성 규칙(일반) """ elif (category == "건강정보"): return """ # 블로그 소주제(Subtopic) 생성 규칙(건강정보) """ def get_blog_post_prompt(category): if (category == "일반"): return """ # 블로그 텍스트 생성 규칙(일반) """ elif (category == "건강정보"): return """ # 블로그 텍스트 생성 규칙(건강정보) """ # Gradio 인터페이스 구성 title = "카테고리별 블로그 글 생성기(Play Ground)" def update_prompts(category): title_prompt = get_title_prompt(category) outline_prompt = get_outline_prompt(category) blog_post_prompt = get_blog_post_prompt(category) return title_prompt, outline_prompt, blog_post_prompt with gr.Blocks() as demo: gr.Markdown(f"# {title}") # 1단계 gr.Markdown("### 1단계 : 포스팅 카테고리를 지정해주세요") category = gr.Radio(choices=["일반", "건강정보"], label="포스팅 카테고리", value="일반") # 2단계 gr.Markdown("### 2단계 : 블로그 주제, 또는 키워드를 상세히 입력하세요") topic = gr.Textbox(label="블로그 주제(예시: 오징어 무침회(X), 오징어 무침회 레시피(O))", placeholder="예시: 여행지 추천(X), 8월 국내 여행지 추천(O)") # 3단계: 참고 글을 위한 변수들 미리 정의 references1 = gr.Textbox(label="참고 글 1", placeholder="참고할 블로그 포스팅글을 복사하여 붙여넣으세요", lines=10, visible=False) references2 = gr.Textbox(label="참고 글 2", placeholder="참고할 블로그 포스팅글을 복사하여 붙여넣으세요", lines=10, visible=False) references3 = gr.Textbox(label="참고 글 3", placeholder="참고할 블로그 포스팅글을 복사하여 붙여넣으세요", lines=10, visible=False) # 진행 상황 표시를 위한 출력 텍스트박스 progress_output = gr.Textbox(label="진행 상황", lines=2, visible=True) # 참고글 가져오기 버튼 fetch_references_btn = gr.Button("참고글 생성하기") fetch_references_btn.click(fn=fetch_references, inputs=[topic], outputs=[progress_output, references1, references2, references3]) # 참고글 다시 넣기 버튼 refill_btn = gr.Button("참고글 다시 넣기") refill_btn.click(fn=fetch_references, inputs=[topic], outputs=[progress_output, references1, references2, references3]) # 5단계: 블로그 제목을 입력하세요 gr.Markdown("### 5단계 : 블로그 제목을 입력하세요") with gr.Accordion("제목 설정", open=True): title_system_message = gr.Textbox(label="시스템 메시지", value=get_title_prompt("일반"), lines=15) title_max_tokens = gr.Slider(label="Max Tokens", minimum=1000, maximum=8000, value=5000, step=1000) title_temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, value=0.8, step=0.1) title_top_p = gr.Slider(label="Top P", minimum=0.1, maximum=1.0, value=0.95, step=0.05) title_btn = gr.Button("제목 추천하기") title_suggestions = gr.Textbox(label="제목 추천", lines=10) title_btn.click(fn=suggest_title, inputs=[category, topic, references1, references2, references3, title_system_message, title_max_tokens, title_temperature, title_top_p], outputs=[title_suggestions]) gr.HTML("[제목 추천 항목을 참고하여 하나를 복사하여 사용하셔도 됩니다.]") blog_title = gr.Textbox(label="블로그 제목", placeholder="블로그 제목을 입력해주세요") # 6단계: 아웃라인을 정해주세요 gr.Markdown("### 6단계 : 아웃라인을 작성해주세요") gr.HTML("[아웃라인에서 나온 결과를 수정해서 사용해주세요]") with gr.Accordion("아웃라인 설정", open=True): outline_system_message = gr.Textbox(label="시스템 메시지", value=get_outline_prompt("일반"), lines=20) outline_max_tokens = gr.Slider(label="Max Tokens", minimum=1000, maximum=8000, value=6000, step=1000) outline_temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, value=0.8, step=0.1) outline_top_p = gr.Slider(label="Top P", minimum=0.1, maximum=1.0, value=0.95, step=0.05) outline_generate_btn = gr.Button("아웃라인 생성하기") outline_result = gr.Textbox(label="아웃라인 결과", lines=15) outline_input = gr.Textbox(label="작성할 아웃라인을 입력해주세요", placeholder="생성된 아웃라인 복사, 수정해서 사용하세요", lines=10) outline_generate_btn.click(fn=generate_outline, inputs=[category, topic, references1, references2, references3, blog_title, outline_system_message, outline_max_tokens, outline_temperature, outline_top_p], outputs=[outline_result]) # 7단계: 글 생성하기 gr.Markdown("### 7단계 : 글 생성하기") gr.HTML("[아웃라인 별 텍스트량을 정하고 글 생성하기 버튼을 선택해주세요]") with gr.Accordion("블로그 글 설정", open=True): blog_system_message = gr.Textbox(label="시스템 메시지", value=get_blog_post_prompt("일반"), lines=20) blog_max_tokens = gr.Slider(label="Max Tokens", minimum=1000, maximum=12000, value=8000, step=1000) blog_temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, value=0.8, step=0.1) blog_top_p = gr.Slider(label="Top P", minimum=0.1, maximum=1.0, value=0.95, step=0.05) generate_btn = gr.Button("블로그 글 생성하기") output = gr.Textbox(label="생성된 블로그 글", lines=30) generate_btn.click(fn=generate_blog_post, inputs=[category, topic, references1, references2, references3, blog_title, outline_input, blog_system_message, blog_max_tokens, blog_temperature, blog_top_p], outputs=[output]) category.change(fn=update_prompts, inputs=category, outputs=[title_system_message, outline_system_message, blog_system_message]) demo.launch()