Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import requests
|
3 |
+
from bs4 import BeautifulSoup
|
4 |
+
|
5 |
+
# ๋๋ฒ๊น
(๋ก๊ทธ)์ฉ ํจ์
|
6 |
+
def debug_log(message: str):
|
7 |
+
"""
|
8 |
+
๊ฐ๋จํ ๋๋ฒ๊น
(๋ก๊ทธ) ์ถ๋ ฅ์ ์ํ ํจ์
|
9 |
+
"""
|
10 |
+
print(f"[DEBUG] {message}")
|
11 |
+
|
12 |
+
def scrape_naver_blog(url: str) -> str:
|
13 |
+
"""
|
14 |
+
์ฃผ์ด์ง ๋ค์ด๋ฒ ๋ธ๋ก๊ทธ URL์์
|
15 |
+
์ ๋ชฉ๊ณผ ๋ณธ๋ฌธ์ ์ถ์ถํ์ฌ ๋ฐํํฉ๋๋ค.
|
16 |
+
"""
|
17 |
+
debug_log("scrape_naver_blog ํจ์ ์์")
|
18 |
+
debug_log(f"์์ฒญ๋ฐ์ URL: {url}")
|
19 |
+
|
20 |
+
# ํค๋ ์ธํ
(ํฌ๋กค๋ง ์ฐจ๋จ ๋ฐฉ์ง ์ผ๋ถ ๋์)
|
21 |
+
headers = {
|
22 |
+
"User-Agent": (
|
23 |
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
24 |
+
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
25 |
+
"Chrome/96.0.4664.110 Safari/537.36"
|
26 |
+
)
|
27 |
+
}
|
28 |
+
|
29 |
+
try:
|
30 |
+
response = requests.get(url, headers=headers)
|
31 |
+
debug_log("HTTP GET ์์ฒญ ์๋ฃ")
|
32 |
+
|
33 |
+
# ์๋ต ์ํ์ฝ๋ ํ์ธ
|
34 |
+
if response.status_code != 200:
|
35 |
+
debug_log(f"์์ฒญ ์คํจ, ์ํ์ฝ๋: {response.status_code}")
|
36 |
+
return f"์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค. ์ํ์ฝ๋: {response.status_code}"
|
37 |
+
|
38 |
+
# BeautifulSoup ํ์ฑ
|
39 |
+
soup = BeautifulSoup(response.text, "html.parser")
|
40 |
+
debug_log("HTML ํ์ฑ ์๋ฃ")
|
41 |
+
|
42 |
+
# ์ ๋ชฉ ์ถ์ถ
|
43 |
+
title_div = soup.select_one('.se-module.se-module-text.se-title-text')
|
44 |
+
title = title_div.get_text(strip=True) if title_div else "์ ๋ชฉ์ ์ฐพ์ ์ ์์ต๋๋ค."
|
45 |
+
debug_log(f"์ถ์ถ๋ ์ ๋ชฉ: {title}")
|
46 |
+
|
47 |
+
# ๋ณธ๋ฌธ ์ถ์ถ
|
48 |
+
content_div = soup.select_one('.se-main-container')
|
49 |
+
content = content_div.get_text("\n", strip=True) if content_div else "๋ณธ๋ฌธ์ ์ฐพ์ ์ ์์ต๋๋ค."
|
50 |
+
debug_log("๋ณธ๋ฌธ ์ถ์ถ ์๋ฃ")
|
51 |
+
|
52 |
+
# ๊ฒฐ๊ณผ ํฉ์น๊ธฐ
|
53 |
+
result = f"[์ ๋ชฉ]\n{title}\n\n[๋ณธ๋ฌธ]\n{content}"
|
54 |
+
debug_log("์ ๋ชฉ๊ณผ ๋ณธ๋ฌธ์ ํฉ์ณ ๋ฐํ ์ค๋น ์๋ฃ")
|
55 |
+
|
56 |
+
return result
|
57 |
+
|
58 |
+
except Exception as e:
|
59 |
+
debug_log(f"์๋ฌ ๋ฐ์: {str(e)}")
|
60 |
+
return f"์คํฌ๋ํ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}"
|
61 |
+
|
62 |
+
|
63 |
+
# Gradio ์ธํฐํ์ด์ค
|
64 |
+
def main_interface():
|
65 |
+
# ์
๋ ฅ: ๋ค์ด๋ฒ ๋ธ๋ก๊ทธ ๋งํฌ
|
66 |
+
# ์ถ๋ ฅ: ์ ๋ชฉ + ๋ณธ๋ฌธ ๋ด์ฉ
|
67 |
+
interface = gr.Interface(
|
68 |
+
fn=scrape_naver_blog,
|
69 |
+
inputs=gr.inputs.Textbox(
|
70 |
+
lines=1,
|
71 |
+
label="๋ค์ด๋ฒ ๋ธ๋ก๊ทธ ๋งํฌ",
|
72 |
+
placeholder="์: https://blog.naver.com/ssboost/222983068507"
|
73 |
+
),
|
74 |
+
outputs=gr.outputs.Textbox(label="๊ฒฐ๊ณผ"),
|
75 |
+
title="๋ค์ด๋ฒ ๋ธ๋ก๊ทธ ์คํฌ๋ํผ",
|
76 |
+
description="๋ค์ด๋ฒ ๋ธ๋ก๊ทธ ๋งํฌ๋ฅผ ์
๋ ฅํ๋ฉด ์ ๋ชฉ๊ณผ ๋ณธ๋ฌธ์ ์ถ์ถํ์ฌ ํ์ํฉ๋๋ค."
|
77 |
+
)
|
78 |
+
return interface
|
79 |
+
|
80 |
+
if __name__ == "__main__":
|
81 |
+
debug_log("Gradio ์ฑ ์คํ ์์")
|
82 |
+
demo = main_interface()
|
83 |
+
demo.launch()
|
84 |
+
debug_log("Gradio ์ฑ ์คํ ์ข
๋ฃ")
|