Kims12 commited on
Commit
24b80cc
ยท
verified ยท
1 Parent(s): 1415100

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -0
app.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+
5
+ # ๋””๋ฒ„๊น…(๋กœ๊ทธ)์šฉ ํ•จ์ˆ˜
6
+ def debug_log(message: str):
7
+ """
8
+ ๊ฐ„๋‹จํ•œ ๋””๋ฒ„๊น…(๋กœ๊ทธ) ์ถœ๋ ฅ์„ ์œ„ํ•œ ํ•จ์ˆ˜
9
+ """
10
+ print(f"[DEBUG] {message}")
11
+
12
+ def scrape_naver_blog(url: str) -> str:
13
+ """
14
+ ์ฃผ์–ด์ง„ ๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ URL์—์„œ
15
+ ์ œ๋ชฉ๊ณผ ๋ณธ๋ฌธ์„ ์ถ”์ถœํ•˜์—ฌ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
16
+ """
17
+ debug_log("scrape_naver_blog ํ•จ์ˆ˜ ์‹œ์ž‘")
18
+ debug_log(f"์š”์ฒญ๋ฐ›์€ URL: {url}")
19
+
20
+ # ํ—ค๋” ์„ธํŒ…(ํฌ๋กค๋ง ์ฐจ๋‹จ ๋ฐฉ์ง€ ์ผ๋ถ€ ๋„์›€)
21
+ headers = {
22
+ "User-Agent": (
23
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
24
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
25
+ "Chrome/96.0.4664.110 Safari/537.36"
26
+ )
27
+ }
28
+
29
+ try:
30
+ response = requests.get(url, headers=headers)
31
+ debug_log("HTTP GET ์š”์ฒญ ์™„๋ฃŒ")
32
+
33
+ # ์‘๋‹ต ์ƒํƒœ์ฝ”๋“œ ํ™•์ธ
34
+ if response.status_code != 200:
35
+ debug_log(f"์š”์ฒญ ์‹คํŒจ, ์ƒํƒœ์ฝ”๋“œ: {response.status_code}")
36
+ return f"์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค. ์ƒํƒœ์ฝ”๋“œ: {response.status_code}"
37
+
38
+ # BeautifulSoup ํŒŒ์‹ฑ
39
+ soup = BeautifulSoup(response.text, "html.parser")
40
+ debug_log("HTML ํŒŒ์‹ฑ ์™„๋ฃŒ")
41
+
42
+ # ์ œ๋ชฉ ์ถ”์ถœ
43
+ title_div = soup.select_one('.se-module.se-module-text.se-title-text')
44
+ title = title_div.get_text(strip=True) if title_div else "์ œ๋ชฉ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
45
+ debug_log(f"์ถ”์ถœ๋œ ์ œ๋ชฉ: {title}")
46
+
47
+ # ๋ณธ๋ฌธ ์ถ”์ถœ
48
+ content_div = soup.select_one('.se-main-container')
49
+ content = content_div.get_text("\n", strip=True) if content_div else "๋ณธ๋ฌธ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
50
+ debug_log("๋ณธ๋ฌธ ์ถ”์ถœ ์™„๋ฃŒ")
51
+
52
+ # ๊ฒฐ๊ณผ ํ•ฉ์น˜๊ธฐ
53
+ result = f"[์ œ๋ชฉ]\n{title}\n\n[๋ณธ๋ฌธ]\n{content}"
54
+ debug_log("์ œ๋ชฉ๊ณผ ๋ณธ๋ฌธ์„ ํ•ฉ์ณ ๋ฐ˜ํ™˜ ์ค€๋น„ ์™„๋ฃŒ")
55
+
56
+ return result
57
+
58
+ except Exception as e:
59
+ debug_log(f"์—๋Ÿฌ ๋ฐœ์ƒ: {str(e)}")
60
+ return f"์Šคํฌ๋ž˜ํ•‘ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"
61
+
62
+
63
+ # Gradio ์ธํ„ฐํŽ˜์ด์Šค
64
+ def main_interface():
65
+ # ์ž…๋ ฅ: ๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ๋งํฌ
66
+ # ์ถœ๋ ฅ: ์ œ๋ชฉ + ๋ณธ๋ฌธ ๋‚ด์šฉ
67
+ interface = gr.Interface(
68
+ fn=scrape_naver_blog,
69
+ inputs=gr.inputs.Textbox(
70
+ lines=1,
71
+ label="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ๋งํฌ",
72
+ placeholder="์˜ˆ: https://blog.naver.com/ssboost/222983068507"
73
+ ),
74
+ outputs=gr.outputs.Textbox(label="๊ฒฐ๊ณผ"),
75
+ title="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ์Šคํฌ๋ž˜ํผ",
76
+ description="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ๋งํฌ๋ฅผ ์ž…๋ ฅํ•˜๋ฉด ์ œ๋ชฉ๊ณผ ๋ณธ๋ฌธ์„ ์ถ”์ถœํ•˜์—ฌ ํ‘œ์‹œํ•ฉ๋‹ˆ๋‹ค."
77
+ )
78
+ return interface
79
+
80
+ if __name__ == "__main__":
81
+ debug_log("Gradio ์•ฑ ์‹คํ–‰ ์‹œ์ž‘")
82
+ demo = main_interface()
83
+ demo.launch()
84
+ debug_log("Gradio ์•ฑ ์‹คํ–‰ ์ข…๋ฃŒ")