Update app.py
Browse files
app.py
CHANGED
@@ -27,14 +27,12 @@ def parse_comments(response: Response) -> Dict:
|
|
27 |
try:
|
28 |
data = json.loads(response.text)
|
29 |
except json.JSONDecodeError:
|
30 |
-
log.error(f"Failed to parse JSON response: {response.text}")
|
31 |
return {"comments": [], "total_comments": 0}
|
32 |
|
33 |
comments_data = data.get("comments", [])
|
34 |
total_comments = data.get("total", 0)
|
35 |
|
36 |
if not comments_data:
|
37 |
-
log.warning(f"No comments found in response: {response.text}")
|
38 |
return {"comments": [], "total_comments": total_comments}
|
39 |
|
40 |
parsed_comments = []
|
@@ -59,19 +57,16 @@ async def scrape_comments(post_id: int, comments_count: int = 20, max_comments:
|
|
59 |
}
|
60 |
return base_url + urlencode(params)
|
61 |
|
62 |
-
log.info(f"Scraping comments from post ID: {post_id}")
|
63 |
first_page = await client.get(form_api_url(0))
|
64 |
data = parse_comments(first_page)
|
65 |
comments_data = data["comments"]
|
66 |
total_comments = data["total_comments"]
|
67 |
|
68 |
if not comments_data:
|
69 |
-
log.warning(f"No comments found for post ID {post_id}")
|
70 |
return []
|
71 |
if max_comments and max_comments < total_comments:
|
72 |
total_comments = max_comments
|
73 |
|
74 |
-
log.info(f"Scraping comments pagination, remaining {total_comments // comments_count - 1} more pages")
|
75 |
_other_pages = [
|
76 |
client.get(form_api_url(cursor=cursor))
|
77 |
for cursor in range(comments_count, total_comments + comments_count, comments_count)
|
@@ -86,8 +81,6 @@ async def scrape_comments(post_id: int, comments_count: int = 20, max_comments:
|
|
86 |
if max_comments and len(comments_data) >= max_comments:
|
87 |
comments_data = comments_data[:max_comments]
|
88 |
break
|
89 |
-
|
90 |
-
log.success(f"Scraped {len(comments_data)} comments from post ID {post_id}")
|
91 |
return comments_data
|
92 |
|
93 |
class SentimentClassifier(nn.Module):
|
|
|
27 |
try:
|
28 |
data = json.loads(response.text)
|
29 |
except json.JSONDecodeError:
|
|
|
30 |
return {"comments": [], "total_comments": 0}
|
31 |
|
32 |
comments_data = data.get("comments", [])
|
33 |
total_comments = data.get("total", 0)
|
34 |
|
35 |
if not comments_data:
|
|
|
36 |
return {"comments": [], "total_comments": total_comments}
|
37 |
|
38 |
parsed_comments = []
|
|
|
57 |
}
|
58 |
return base_url + urlencode(params)
|
59 |
|
|
|
60 |
first_page = await client.get(form_api_url(0))
|
61 |
data = parse_comments(first_page)
|
62 |
comments_data = data["comments"]
|
63 |
total_comments = data["total_comments"]
|
64 |
|
65 |
if not comments_data:
|
|
|
66 |
return []
|
67 |
if max_comments and max_comments < total_comments:
|
68 |
total_comments = max_comments
|
69 |
|
|
|
70 |
_other_pages = [
|
71 |
client.get(form_api_url(cursor=cursor))
|
72 |
for cursor in range(comments_count, total_comments + comments_count, comments_count)
|
|
|
81 |
if max_comments and len(comments_data) >= max_comments:
|
82 |
comments_data = comments_data[:max_comments]
|
83 |
break
|
|
|
|
|
84 |
return comments_data
|
85 |
|
86 |
class SentimentClassifier(nn.Module):
|