Spaces:
Runtime error
Runtime error
Susanna Anil
commited on
Commit
·
7688b91
1
Parent(s):
239c0f3
cli added
Browse files- Dockerfile +8 -0
- app.py +56 -0
- requirements.txt +4 -1
- scrape_load.ipynb +389 -21
Dockerfile
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.8.13-slim-buster
|
2 |
+
RUN mkdir -p /app
|
3 |
+
COPY . main.py /app/
|
4 |
+
WORKDIR /app
|
5 |
+
RUN pip install -r requirements.txt
|
6 |
+
EXPOSE 8080
|
7 |
+
CMD [ "main.py" ]
|
8 |
+
ENTRYPOINT [ "python" ]
|
app.py
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
'''
|
2 |
+
Run 'python3 main.py' in terminal
|
3 |
+
Follow local URL
|
4 |
+
'''
|
5 |
+
|
6 |
+
import praw
|
7 |
+
import pandas as pd
|
8 |
+
from praw.models import MoreComments
|
9 |
+
from transformers import pipeline
|
10 |
+
from transformers import DistilBertTokenizerFast
|
11 |
+
import gradio as gr
|
12 |
+
import numpy as np
|
13 |
+
|
14 |
+
reddit= praw.Reddit(client_id="Q1w42RHhLq2fgwljAk_k-Q", # your client id
|
15 |
+
client_secret="enUJfFthiZRynGfPQtoK1nCxRer2Dw", # your client secret
|
16 |
+
usernme = "xl395", #profile username
|
17 |
+
password = "12xiao34quanAria!", #profile password
|
18 |
+
user_agent="706_post") # your user agent
|
19 |
+
|
20 |
+
|
21 |
+
classifier = pipeline("sentiment-analysis", model="michellejieli/NSFW_text_classifier")
|
22 |
+
|
23 |
+
#input_url = "https://www.reddit.com/r/europe/comments/r0hthg/sweden_is_taking_the_lead_to_persuade_the_rest_of/"
|
24 |
+
|
25 |
+
def extract_comments(input_url):
|
26 |
+
submission = reddit.submission(url=input_url)
|
27 |
+
#posts_dict = {"Post text":[],}
|
28 |
+
posts_dict = {"Post text":[], "class": []}
|
29 |
+
for top_level_comment in submission.comments:
|
30 |
+
if isinstance(top_level_comment, MoreComments):
|
31 |
+
continue
|
32 |
+
|
33 |
+
posts_dict["Post text"].append(top_level_comment.body)
|
34 |
+
posts_dict["class"].append(classifier(top_level_comment.body)[0]['label'])
|
35 |
+
df = pd.DataFrame(posts_dict)
|
36 |
+
percent_exp = np.count_nonzero(np.array(df["class"]) == 'NSFW') / df.shape[0]
|
37 |
+
output_msg = "Reddit page is contains no explicit content. Page is safe for users under 18."
|
38 |
+
if percent_exp > 0:
|
39 |
+
output_msg = f"Reddit page contains some explicit content. Users under the age of 18 should proceed with caution. \n{percent_exp * 100:.0f}% explicit."
|
40 |
+
if percent_exp > 0.5:
|
41 |
+
output_msg = f"Reddit page contains major explicit content. Users be wary. \n{percent_exp * 100:.0f}% explicit."
|
42 |
+
return output_msg
|
43 |
+
|
44 |
+
# use gradio to create a web interface take a wikipedia page and summarize it
|
45 |
+
iface = gr.Interface(
|
46 |
+
fn=extract_comments,
|
47 |
+
inputs=gr.Textbox(
|
48 |
+
lines=2,
|
49 |
+
placeholder="Enter Reddit page link",
|
50 |
+
),
|
51 |
+
outputs="text",
|
52 |
+
)
|
53 |
+
|
54 |
+
|
55 |
+
if __name__ == "__main__":
|
56 |
+
iface.launch()
|
requirements.txt
CHANGED
@@ -5,4 +5,7 @@ uvicorn[standard]
|
|
5 |
pandas
|
6 |
black
|
7 |
transformers
|
8 |
-
praw
|
|
|
|
|
|
|
|
5 |
pandas
|
6 |
black
|
7 |
transformers
|
8 |
+
praw
|
9 |
+
numpy
|
10 |
+
gradio
|
11 |
+
altair
|
scrape_load.ipynb
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
-
"execution_count":
|
6 |
"metadata": {},
|
7 |
"outputs": [],
|
8 |
"source": [
|
@@ -18,18 +18,9 @@
|
|
18 |
},
|
19 |
{
|
20 |
"cell_type": "code",
|
21 |
-
"execution_count":
|
22 |
"metadata": {},
|
23 |
-
"outputs": [
|
24 |
-
{
|
25 |
-
"ename": "SyntaxError",
|
26 |
-
"evalue": "expected ':' (3580341109.py, line 8)",
|
27 |
-
"output_type": "error",
|
28 |
-
"traceback": [
|
29 |
-
"\u001b[0;36m Cell \u001b[0;32mIn[35], line 8\u001b[0;36m\u001b[0m\n\u001b[0;31m def extract_comments(input_url)\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m expected ':'\n"
|
30 |
-
]
|
31 |
-
}
|
32 |
-
],
|
33 |
"source": [
|
34 |
"from praw.models import MoreComments\n",
|
35 |
"from transformers import pipeline\n",
|
@@ -40,28 +31,405 @@
|
|
40 |
"\n",
|
41 |
"def extract_comments(input_url):\n",
|
42 |
" submission = reddit.submission(url=input_url)\n",
|
43 |
-
" posts_dict = {\"Post text\":[],}\n",
|
44 |
-
"
|
45 |
" for top_level_comment in submission.comments:\n",
|
46 |
" if isinstance(top_level_comment, MoreComments):\n",
|
47 |
" continue\n",
|
48 |
" \n",
|
49 |
" posts_dict[\"Post text\"].append(top_level_comment.body)\n",
|
50 |
-
"
|
51 |
" df = pd.DataFrame(posts_dict)\n",
|
52 |
" return df "
|
53 |
]
|
54 |
},
|
55 |
{
|
56 |
"cell_type": "code",
|
57 |
-
"execution_count":
|
58 |
"metadata": {},
|
59 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
"source": [
|
61 |
-
"
|
62 |
-
"\n"
|
63 |
-
"# find how many are NSFW - output message"
|
64 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
}
|
66 |
],
|
67 |
"metadata": {
|
@@ -80,7 +448,7 @@
|
|
80 |
"name": "python",
|
81 |
"nbconvert_exporter": "python",
|
82 |
"pygments_lexer": "ipython3",
|
83 |
-
"version": "3.10.4
|
84 |
},
|
85 |
"orig_nbformat": 4,
|
86 |
"vscode": {
|
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
"metadata": {},
|
7 |
"outputs": [],
|
8 |
"source": [
|
|
|
18 |
},
|
19 |
{
|
20 |
"cell_type": "code",
|
21 |
+
"execution_count": 8,
|
22 |
"metadata": {},
|
23 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
"source": [
|
25 |
"from praw.models import MoreComments\n",
|
26 |
"from transformers import pipeline\n",
|
|
|
31 |
"\n",
|
32 |
"def extract_comments(input_url):\n",
|
33 |
" submission = reddit.submission(url=input_url)\n",
|
34 |
+
" #posts_dict = {\"Post text\":[],}\n",
|
35 |
+
" posts_dict = {\"Post text\":[], \"class\": []}\n",
|
36 |
" for top_level_comment in submission.comments:\n",
|
37 |
" if isinstance(top_level_comment, MoreComments):\n",
|
38 |
" continue\n",
|
39 |
" \n",
|
40 |
" posts_dict[\"Post text\"].append(top_level_comment.body)\n",
|
41 |
+
" posts_dict[\"class\"].append(classifier(top_level_comment.body)[0]['label'])\n",
|
42 |
" df = pd.DataFrame(posts_dict)\n",
|
43 |
" return df "
|
44 |
]
|
45 |
},
|
46 |
{
|
47 |
"cell_type": "code",
|
48 |
+
"execution_count": 9,
|
49 |
"metadata": {},
|
50 |
+
"outputs": [
|
51 |
+
{
|
52 |
+
"data": {
|
53 |
+
"text/html": [
|
54 |
+
"<div>\n",
|
55 |
+
"<style scoped>\n",
|
56 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
57 |
+
" vertical-align: middle;\n",
|
58 |
+
" }\n",
|
59 |
+
"\n",
|
60 |
+
" .dataframe tbody tr th {\n",
|
61 |
+
" vertical-align: top;\n",
|
62 |
+
" }\n",
|
63 |
+
"\n",
|
64 |
+
" .dataframe thead th {\n",
|
65 |
+
" text-align: right;\n",
|
66 |
+
" }\n",
|
67 |
+
"</style>\n",
|
68 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
69 |
+
" <thead>\n",
|
70 |
+
" <tr style=\"text-align: right;\">\n",
|
71 |
+
" <th></th>\n",
|
72 |
+
" <th>Post text</th>\n",
|
73 |
+
" <th>class</th>\n",
|
74 |
+
" </tr>\n",
|
75 |
+
" </thead>\n",
|
76 |
+
" <tbody>\n",
|
77 |
+
" <tr>\n",
|
78 |
+
" <th>0</th>\n",
|
79 |
+
" <td>Does… does that mean GPU’s might become afford...</td>\n",
|
80 |
+
" <td>SFW</td>\n",
|
81 |
+
" </tr>\n",
|
82 |
+
" <tr>\n",
|
83 |
+
" <th>1</th>\n",
|
84 |
+
" <td>But I use my mining rig for interior heating. ...</td>\n",
|
85 |
+
" <td>SFW</td>\n",
|
86 |
+
" </tr>\n",
|
87 |
+
" <tr>\n",
|
88 |
+
" <th>2</th>\n",
|
89 |
+
" <td>I approve.\\n\\nAlthough have they suggested how...</td>\n",
|
90 |
+
" <td>SFW</td>\n",
|
91 |
+
" </tr>\n",
|
92 |
+
" <tr>\n",
|
93 |
+
" <th>3</th>\n",
|
94 |
+
" <td>You can't ban cryptocurrencies. Just like you ...</td>\n",
|
95 |
+
" <td>SFW</td>\n",
|
96 |
+
" </tr>\n",
|
97 |
+
" <tr>\n",
|
98 |
+
" <th>4</th>\n",
|
99 |
+
" <td>But still no nuclear plants</td>\n",
|
100 |
+
" <td>SFW</td>\n",
|
101 |
+
" </tr>\n",
|
102 |
+
" <tr>\n",
|
103 |
+
" <th>5</th>\n",
|
104 |
+
" <td>https://twitter.com/ercwl/status/1457114531314...</td>\n",
|
105 |
+
" <td>NSFW</td>\n",
|
106 |
+
" </tr>\n",
|
107 |
+
" <tr>\n",
|
108 |
+
" <th>6</th>\n",
|
109 |
+
" <td>Yes, \"we\" are missing the goal. Maybe stop sub...</td>\n",
|
110 |
+
" <td>NSFW</td>\n",
|
111 |
+
" </tr>\n",
|
112 |
+
" <tr>\n",
|
113 |
+
" <th>7</th>\n",
|
114 |
+
" <td>here is something complete insane to think abo...</td>\n",
|
115 |
+
" <td>NSFW</td>\n",
|
116 |
+
" </tr>\n",
|
117 |
+
" <tr>\n",
|
118 |
+
" <th>8</th>\n",
|
119 |
+
" <td>Put a tax carbon on it, it will elimanate the ...</td>\n",
|
120 |
+
" <td>SFW</td>\n",
|
121 |
+
" </tr>\n",
|
122 |
+
" <tr>\n",
|
123 |
+
" <th>9</th>\n",
|
124 |
+
" <td>Banning crypto mining will only cause it to mo...</td>\n",
|
125 |
+
" <td>SFW</td>\n",
|
126 |
+
" </tr>\n",
|
127 |
+
" <tr>\n",
|
128 |
+
" <th>10</th>\n",
|
129 |
+
" <td>Can someone explain how this is possible?</td>\n",
|
130 |
+
" <td>SFW</td>\n",
|
131 |
+
" </tr>\n",
|
132 |
+
" <tr>\n",
|
133 |
+
" <th>11</th>\n",
|
134 |
+
" <td>The average knowledge on crypto in here is ver...</td>\n",
|
135 |
+
" <td>NSFW</td>\n",
|
136 |
+
" </tr>\n",
|
137 |
+
" <tr>\n",
|
138 |
+
" <th>12</th>\n",
|
139 |
+
" <td>Howbout u ban Facebook instead?</td>\n",
|
140 |
+
" <td>NSFW</td>\n",
|
141 |
+
" </tr>\n",
|
142 |
+
" <tr>\n",
|
143 |
+
" <th>13</th>\n",
|
144 |
+
" <td>Good. Blockchain is useful but so many despera...</td>\n",
|
145 |
+
" <td>NSFW</td>\n",
|
146 |
+
" </tr>\n",
|
147 |
+
" <tr>\n",
|
148 |
+
" <th>14</th>\n",
|
149 |
+
" <td>I don't particularly like crypto or it's fans,...</td>\n",
|
150 |
+
" <td>SFW</td>\n",
|
151 |
+
" </tr>\n",
|
152 |
+
" <tr>\n",
|
153 |
+
" <th>15</th>\n",
|
154 |
+
" <td>I was thinking of getting rid of my RTX 2070s,...</td>\n",
|
155 |
+
" <td>SFW</td>\n",
|
156 |
+
" </tr>\n",
|
157 |
+
" <tr>\n",
|
158 |
+
" <th>16</th>\n",
|
159 |
+
" <td>Total waste of time. As seen following the Chi...</td>\n",
|
160 |
+
" <td>SFW</td>\n",
|
161 |
+
" </tr>\n",
|
162 |
+
" <tr>\n",
|
163 |
+
" <th>17</th>\n",
|
164 |
+
" <td>It's actually shocking how uneducated people a...</td>\n",
|
165 |
+
" <td>NSFW</td>\n",
|
166 |
+
" </tr>\n",
|
167 |
+
" <tr>\n",
|
168 |
+
" <th>18</th>\n",
|
169 |
+
" <td>Totally doesn’t have anything to do with money...</td>\n",
|
170 |
+
" <td>NSFW</td>\n",
|
171 |
+
" </tr>\n",
|
172 |
+
" <tr>\n",
|
173 |
+
" <th>19</th>\n",
|
174 |
+
" <td>Sweden's biggest electric company Vattenfall s...</td>\n",
|
175 |
+
" <td>NSFW</td>\n",
|
176 |
+
" </tr>\n",
|
177 |
+
" <tr>\n",
|
178 |
+
" <th>20</th>\n",
|
179 |
+
" <td>BASED</td>\n",
|
180 |
+
" <td>SFW</td>\n",
|
181 |
+
" </tr>\n",
|
182 |
+
" <tr>\n",
|
183 |
+
" <th>21</th>\n",
|
184 |
+
" <td>Excellent news.\\n\\nIt consumes vast quantities...</td>\n",
|
185 |
+
" <td>NSFW</td>\n",
|
186 |
+
" </tr>\n",
|
187 |
+
" <tr>\n",
|
188 |
+
" <th>22</th>\n",
|
189 |
+
" <td>sweden want to fuck with danemark and his cryp...</td>\n",
|
190 |
+
" <td>NSFW</td>\n",
|
191 |
+
" </tr>\n",
|
192 |
+
" <tr>\n",
|
193 |
+
" <th>23</th>\n",
|
194 |
+
" <td>They should ban gold too, mining gold uses 10x...</td>\n",
|
195 |
+
" <td>NSFW</td>\n",
|
196 |
+
" </tr>\n",
|
197 |
+
" <tr>\n",
|
198 |
+
" <th>24</th>\n",
|
199 |
+
" <td>Great news. Cryptocurrencies are an embarrassi...</td>\n",
|
200 |
+
" <td>NSFW</td>\n",
|
201 |
+
" </tr>\n",
|
202 |
+
" <tr>\n",
|
203 |
+
" <th>25</th>\n",
|
204 |
+
" <td>This is BS, it doesn't make sense.</td>\n",
|
205 |
+
" <td>NSFW</td>\n",
|
206 |
+
" </tr>\n",
|
207 |
+
" <tr>\n",
|
208 |
+
" <th>26</th>\n",
|
209 |
+
" <td>I see no problem with this, the amount of ener...</td>\n",
|
210 |
+
" <td>SFW</td>\n",
|
211 |
+
" </tr>\n",
|
212 |
+
" <tr>\n",
|
213 |
+
" <th>27</th>\n",
|
214 |
+
" <td>[deleted]</td>\n",
|
215 |
+
" <td>SFW</td>\n",
|
216 |
+
" </tr>\n",
|
217 |
+
" <tr>\n",
|
218 |
+
" <th>28</th>\n",
|
219 |
+
" <td>\"I'm running my graphics card at full load so ...</td>\n",
|
220 |
+
" <td>SFW</td>\n",
|
221 |
+
" </tr>\n",
|
222 |
+
" <tr>\n",
|
223 |
+
" <th>29</th>\n",
|
224 |
+
" <td>The market is moving from POW to POS which is ...</td>\n",
|
225 |
+
" <td>SFW</td>\n",
|
226 |
+
" </tr>\n",
|
227 |
+
" <tr>\n",
|
228 |
+
" <th>30</th>\n",
|
229 |
+
" <td>And exactly how are they gonna do that?\\n\\nPir...</td>\n",
|
230 |
+
" <td>SFW</td>\n",
|
231 |
+
" </tr>\n",
|
232 |
+
" <tr>\n",
|
233 |
+
" <th>31</th>\n",
|
234 |
+
" <td>Yeah because that's what's the main cause of o...</td>\n",
|
235 |
+
" <td>SFW</td>\n",
|
236 |
+
" </tr>\n",
|
237 |
+
" <tr>\n",
|
238 |
+
" <th>32</th>\n",
|
239 |
+
" <td>Banning mining without banning transactions an...</td>\n",
|
240 |
+
" <td>NSFW</td>\n",
|
241 |
+
" </tr>\n",
|
242 |
+
" <tr>\n",
|
243 |
+
" <th>33</th>\n",
|
244 |
+
" <td>Crypto seems to have some side effects, who wo...</td>\n",
|
245 |
+
" <td>SFW</td>\n",
|
246 |
+
" </tr>\n",
|
247 |
+
" <tr>\n",
|
248 |
+
" <th>34</th>\n",
|
249 |
+
" <td>Crypto currency is the new tulips. So hot righ...</td>\n",
|
250 |
+
" <td>NSFW</td>\n",
|
251 |
+
" </tr>\n",
|
252 |
+
" <tr>\n",
|
253 |
+
" <th>35</th>\n",
|
254 |
+
" <td>Fuck yeah, ban them all! Easiest climate check...</td>\n",
|
255 |
+
" <td>NSFW</td>\n",
|
256 |
+
" </tr>\n",
|
257 |
+
" <tr>\n",
|
258 |
+
" <th>36</th>\n",
|
259 |
+
" <td>Ban fossil fuels instead. What a useless measu...</td>\n",
|
260 |
+
" <td>NSFW</td>\n",
|
261 |
+
" </tr>\n",
|
262 |
+
" <tr>\n",
|
263 |
+
" <th>37</th>\n",
|
264 |
+
" <td>Please no. I earn 200 euros a month from it.</td>\n",
|
265 |
+
" <td>SFW</td>\n",
|
266 |
+
" </tr>\n",
|
267 |
+
" <tr>\n",
|
268 |
+
" <th>38</th>\n",
|
269 |
+
" <td>Swedens state owned power company rejects this...</td>\n",
|
270 |
+
" <td>NSFW</td>\n",
|
271 |
+
" </tr>\n",
|
272 |
+
" <tr>\n",
|
273 |
+
" <th>39</th>\n",
|
274 |
+
" <td>Great not only we had Chinese crypto miners se...</td>\n",
|
275 |
+
" <td>SFW</td>\n",
|
276 |
+
" </tr>\n",
|
277 |
+
" <tr>\n",
|
278 |
+
" <th>40</th>\n",
|
279 |
+
" <td>Let's ban the currency that is somewhat indepe...</td>\n",
|
280 |
+
" <td>NSFW</td>\n",
|
281 |
+
" </tr>\n",
|
282 |
+
" <tr>\n",
|
283 |
+
" <th>41</th>\n",
|
284 |
+
" <td>Is that really that much of a problem with ene...</td>\n",
|
285 |
+
" <td>NSFW</td>\n",
|
286 |
+
" </tr>\n",
|
287 |
+
" <tr>\n",
|
288 |
+
" <th>42</th>\n",
|
289 |
+
" <td>How about instead of banning it, we make it so...</td>\n",
|
290 |
+
" <td>SFW</td>\n",
|
291 |
+
" </tr>\n",
|
292 |
+
" <tr>\n",
|
293 |
+
" <th>43</th>\n",
|
294 |
+
" <td>The electric footprint of crypto won't go up. ...</td>\n",
|
295 |
+
" <td>SFW</td>\n",
|
296 |
+
" </tr>\n",
|
297 |
+
" <tr>\n",
|
298 |
+
" <th>44</th>\n",
|
299 |
+
" <td>Lets make playing a demanding game more than t...</td>\n",
|
300 |
+
" <td>NSFW</td>\n",
|
301 |
+
" </tr>\n",
|
302 |
+
" <tr>\n",
|
303 |
+
" <th>45</th>\n",
|
304 |
+
" <td>How does cryptocurrency change the climate? I ...</td>\n",
|
305 |
+
" <td>SFW</td>\n",
|
306 |
+
" </tr>\n",
|
307 |
+
" <tr>\n",
|
308 |
+
" <th>46</th>\n",
|
309 |
+
" <td>Load ze Sweden FUD</td>\n",
|
310 |
+
" <td>SFW</td>\n",
|
311 |
+
" </tr>\n",
|
312 |
+
" <tr>\n",
|
313 |
+
" <th>47</th>\n",
|
314 |
+
" <td>[removed]</td>\n",
|
315 |
+
" <td>NSFW</td>\n",
|
316 |
+
" </tr>\n",
|
317 |
+
" <tr>\n",
|
318 |
+
" <th>48</th>\n",
|
319 |
+
" <td>Such pathetic virtue-signaling.\\n\\nIts no diff...</td>\n",
|
320 |
+
" <td>NSFW</td>\n",
|
321 |
+
" </tr>\n",
|
322 |
+
" <tr>\n",
|
323 |
+
" <th>49</th>\n",
|
324 |
+
" <td>shame on sweden</td>\n",
|
325 |
+
" <td>NSFW</td>\n",
|
326 |
+
" </tr>\n",
|
327 |
+
" <tr>\n",
|
328 |
+
" <th>50</th>\n",
|
329 |
+
" <td>-And now, NEWS FOR DUMBASSES, Bob?\\n\\n-Yes Bri...</td>\n",
|
330 |
+
" <td>NSFW</td>\n",
|
331 |
+
" </tr>\n",
|
332 |
+
" <tr>\n",
|
333 |
+
" <th>51</th>\n",
|
334 |
+
" <td>dumb</td>\n",
|
335 |
+
" <td>NSFW</td>\n",
|
336 |
+
" </tr>\n",
|
337 |
+
" <tr>\n",
|
338 |
+
" <th>52</th>\n",
|
339 |
+
" <td>Wow, Sweden is such a dictatorial shithole cou...</td>\n",
|
340 |
+
" <td>NSFW</td>\n",
|
341 |
+
" </tr>\n",
|
342 |
+
" <tr>\n",
|
343 |
+
" <th>53</th>\n",
|
344 |
+
" <td>talking about that, a full ban might be unlike...</td>\n",
|
345 |
+
" <td>NSFW</td>\n",
|
346 |
+
" </tr>\n",
|
347 |
+
" </tbody>\n",
|
348 |
+
"</table>\n",
|
349 |
+
"</div>"
|
350 |
+
],
|
351 |
+
"text/plain": [
|
352 |
+
" Post text class\n",
|
353 |
+
"0 Does… does that mean GPU’s might become afford... SFW\n",
|
354 |
+
"1 But I use my mining rig for interior heating. ... SFW\n",
|
355 |
+
"2 I approve.\\n\\nAlthough have they suggested how... SFW\n",
|
356 |
+
"3 You can't ban cryptocurrencies. Just like you ... SFW\n",
|
357 |
+
"4 But still no nuclear plants SFW\n",
|
358 |
+
"5 https://twitter.com/ercwl/status/1457114531314... NSFW\n",
|
359 |
+
"6 Yes, \"we\" are missing the goal. Maybe stop sub... NSFW\n",
|
360 |
+
"7 here is something complete insane to think abo... NSFW\n",
|
361 |
+
"8 Put a tax carbon on it, it will elimanate the ... SFW\n",
|
362 |
+
"9 Banning crypto mining will only cause it to mo... SFW\n",
|
363 |
+
"10 Can someone explain how this is possible? SFW\n",
|
364 |
+
"11 The average knowledge on crypto in here is ver... NSFW\n",
|
365 |
+
"12 Howbout u ban Facebook instead? NSFW\n",
|
366 |
+
"13 Good. Blockchain is useful but so many despera... NSFW\n",
|
367 |
+
"14 I don't particularly like crypto or it's fans,... SFW\n",
|
368 |
+
"15 I was thinking of getting rid of my RTX 2070s,... SFW\n",
|
369 |
+
"16 Total waste of time. As seen following the Chi... SFW\n",
|
370 |
+
"17 It's actually shocking how uneducated people a... NSFW\n",
|
371 |
+
"18 Totally doesn’t have anything to do with money... NSFW\n",
|
372 |
+
"19 Sweden's biggest electric company Vattenfall s... NSFW\n",
|
373 |
+
"20 BASED SFW\n",
|
374 |
+
"21 Excellent news.\\n\\nIt consumes vast quantities... NSFW\n",
|
375 |
+
"22 sweden want to fuck with danemark and his cryp... NSFW\n",
|
376 |
+
"23 They should ban gold too, mining gold uses 10x... NSFW\n",
|
377 |
+
"24 Great news. Cryptocurrencies are an embarrassi... NSFW\n",
|
378 |
+
"25 This is BS, it doesn't make sense. NSFW\n",
|
379 |
+
"26 I see no problem with this, the amount of ener... SFW\n",
|
380 |
+
"27 [deleted] SFW\n",
|
381 |
+
"28 \"I'm running my graphics card at full load so ... SFW\n",
|
382 |
+
"29 The market is moving from POW to POS which is ... SFW\n",
|
383 |
+
"30 And exactly how are they gonna do that?\\n\\nPir... SFW\n",
|
384 |
+
"31 Yeah because that's what's the main cause of o... SFW\n",
|
385 |
+
"32 Banning mining without banning transactions an... NSFW\n",
|
386 |
+
"33 Crypto seems to have some side effects, who wo... SFW\n",
|
387 |
+
"34 Crypto currency is the new tulips. So hot righ... NSFW\n",
|
388 |
+
"35 Fuck yeah, ban them all! Easiest climate check... NSFW\n",
|
389 |
+
"36 Ban fossil fuels instead. What a useless measu... NSFW\n",
|
390 |
+
"37 Please no. I earn 200 euros a month from it. SFW\n",
|
391 |
+
"38 Swedens state owned power company rejects this... NSFW\n",
|
392 |
+
"39 Great not only we had Chinese crypto miners se... SFW\n",
|
393 |
+
"40 Let's ban the currency that is somewhat indepe... NSFW\n",
|
394 |
+
"41 Is that really that much of a problem with ene... NSFW\n",
|
395 |
+
"42 How about instead of banning it, we make it so... SFW\n",
|
396 |
+
"43 The electric footprint of crypto won't go up. ... SFW\n",
|
397 |
+
"44 Lets make playing a demanding game more than t... NSFW\n",
|
398 |
+
"45 How does cryptocurrency change the climate? I ... SFW\n",
|
399 |
+
"46 Load ze Sweden FUD SFW\n",
|
400 |
+
"47 [removed] NSFW\n",
|
401 |
+
"48 Such pathetic virtue-signaling.\\n\\nIts no diff... NSFW\n",
|
402 |
+
"49 shame on sweden NSFW\n",
|
403 |
+
"50 -And now, NEWS FOR DUMBASSES, Bob?\\n\\n-Yes Bri... NSFW\n",
|
404 |
+
"51 dumb NSFW\n",
|
405 |
+
"52 Wow, Sweden is such a dictatorial shithole cou... NSFW\n",
|
406 |
+
"53 talking about that, a full ban might be unlike... NSFW"
|
407 |
+
]
|
408 |
+
},
|
409 |
+
"execution_count": 9,
|
410 |
+
"metadata": {},
|
411 |
+
"output_type": "execute_result"
|
412 |
+
},
|
413 |
+
{
|
414 |
+
"ename": "",
|
415 |
+
"evalue": "",
|
416 |
+
"output_type": "error",
|
417 |
+
"traceback": [
|
418 |
+
"\u001b[1;31mThe Kernel crashed while executing code in the the current cell or a previous cell. Please review the code in the cell(s) to identify a possible cause of the failure. Click <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. View Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
|
419 |
+
]
|
420 |
+
}
|
421 |
+
],
|
422 |
"source": [
|
423 |
+
"extract_comments(input_url)\n",
|
424 |
+
"\n"
|
|
|
425 |
]
|
426 |
+
},
|
427 |
+
{
|
428 |
+
"cell_type": "code",
|
429 |
+
"execution_count": null,
|
430 |
+
"metadata": {},
|
431 |
+
"outputs": [],
|
432 |
+
"source": []
|
433 |
}
|
434 |
],
|
435 |
"metadata": {
|
|
|
448 |
"name": "python",
|
449 |
"nbconvert_exporter": "python",
|
450 |
"pygments_lexer": "ipython3",
|
451 |
+
"version": "3.10.4"
|
452 |
},
|
453 |
"orig_nbformat": 4,
|
454 |
"vscode": {
|