prasant.goswivt commited on
Commit
dabad06
1 Parent(s): fd334ba

added sentiment analysis

Browse files
__pycache__/sentiment.cpython-310.pyc ADDED
Binary file (4.45 kB). View file
 
data/novel_list.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5af2f404aed4b4de777b9b2c0cb2fc1a744f9c95332f1a2b96ea4eb514d5a9aa
3
+ size 4886535
data/sentiment_analysis/Genius Seventh Prince_results.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74aafc6b657ba1e59d202e97046cd1699c102750e3fb02a60360bc9640ec6869
3
+ size 19
data/sentiment_analysis/Lord of the mysteries_results.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f18a626152311f0e9202975c342ddcfa9ecc343006ea7ea133b1fe3c708b235f
3
+ size 21735
data/sentiment_analysis/Mother of Learning_results.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:628f6afab594ad5d7a0f232c48f61c4d935dbea168fb6e166dc78ac680d31518
3
+ size 21848
data/sentiment_analysis/The Perfect Run_results.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5a3c88f7c51bb3bb492cc7b10bfd9c4d670b30d8ae8a714319d71cf34b021ae
3
+ size 21740
data/similarity.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96a69cd6b71b1342f594eabb095aab1b619c27d1004b4b34e2725ffbe838f1a1
3
+ size 1313383915
sentiment.py ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, render_template
2
+ import pickle
3
+ import os
4
+ import praw
5
+ import torch
6
+ from transformers import RobertaTokenizer, RobertaForSequenceClassification
7
+ import nltk
8
+ from nltk.stem.porter import PorterStemmer
9
+ from nltk.corpus import stopwords
10
+ import spacy
11
+ import string
12
+ import matplotlib.pyplot as plt
13
+ from wordcloud import WordCloud
14
+
15
+
16
+ def save_data(data, filename):
17
+ with open(filename, 'wb') as file:
18
+ pickle.dump(data, file)
19
+
20
+
21
+ def load_data(filename):
22
+ if os.path.exists(filename):
23
+ with open(filename, 'rb') as file:
24
+ return pickle.load(file)
25
+ else:
26
+ return None
27
+
28
+
29
+ # PRAW configs
30
+ REDDIT_CLIENT_ID = "lI0C_W9_eESoiS2mtUMNDg"
31
+ REDDIT_CLIENT_SECRET = "IK1Vn7s0EZGiNt6vMZ54sfT6pYvbHA"
32
+ REDDIT_USERNAME = "Tiger_in_the_Snow"
33
+
34
+ reddit = praw.Reddit(
35
+ client_id=REDDIT_CLIENT_ID,
36
+ client_secret=REDDIT_CLIENT_SECRET,
37
+ user_agent=f"script:sentiment-analysis:v0.0.1 (by {REDDIT_USERNAME})"
38
+ )
39
+
40
+ # NLP configs
41
+ stemmer = PorterStemmer()
42
+ nlp = spacy.load("en_core_web_sm")
43
+ nltk.download('punkt')
44
+
45
+
46
+ # Model configs
47
+ tokenizer = RobertaTokenizer.from_pretrained('aychang/roberta-base-imdb')
48
+ model = RobertaForSequenceClassification.from_pretrained(
49
+ 'aychang/roberta-base-imdb', num_labels=2)
50
+ model.classifier = torch.nn.Linear(768, 2)
51
+
52
+
53
+ def get_sentiment(query):
54
+ print(query)
55
+ filename = f"D:/projects/Recon/data/sentiment_analysis/{query}_results.pkl"
56
+ saved_data = load_data(filename)
57
+
58
+ if saved_data:
59
+ positive, negative, _ = saved_data
60
+ wordcloud = f'static/images/wordcloud/{query}_cloud.png'
61
+ return positive, negative, wordcloud
62
+ else:
63
+
64
+ results = get_reddit_results(query)
65
+ if not results:
66
+ error = "No results found for query"
67
+ return error
68
+
69
+ positive, negative, wordcloud = analyze_comments(
70
+ results, query=query)
71
+ print(f'positive:{positive}')
72
+ save_data((positive, negative, wordcloud), filename)
73
+ return positive, negative, f'static/images/wordcloud/{query}_cloud.png'
74
+
75
+
76
+ def get_reddit_results(query):
77
+ print(query)
78
+ sub = reddit.subreddit('noveltranslations+progressionfantasy')
79
+ results = sub.search(query, limit=1)
80
+
81
+ print(results)
82
+ return list(results)
83
+
84
+
85
+ def transform_text(text):
86
+ text = text.lower()
87
+ text = nltk.word_tokenize(text)
88
+ text = [i for i in text if i.isalnum()]
89
+ text = [i for i in text if i not in stopwords.words(
90
+ 'english') and i not in string.punctuation]
91
+ text = [stemmer.stem(i) for i in text]
92
+ return ' '.join(text)
93
+
94
+
95
+ def tokenize(text):
96
+ doc = nlp(text)
97
+ return [token.text for token in doc]
98
+
99
+
100
+ def analyze_comments(results, query):
101
+ total_positive = 0
102
+ total_negative = 0
103
+ total_comments = 0
104
+ comments_for_cloud = []
105
+
106
+ for submission in results:
107
+ submission.comments.replace_more(limit=None)
108
+ all_comments = submission.comments.list()
109
+
110
+ for comment in all_comments:
111
+ comment_body = comment.body
112
+
113
+ text = transform_text(comment_body)
114
+ comments_for_cloud.append(comment_body)
115
+
116
+ if text:
117
+ tokens = tokenize(text)
118
+
119
+ tokenized_input = tokenizer(
120
+ tokens, return_tensors='pt', truncation=True, padding=True)
121
+
122
+ outputs = model(**tokenized_input)
123
+
124
+ probabilities = torch.softmax(outputs.logits, dim=-1)
125
+ mean_probabilities = probabilities.mean(dim=1)
126
+
127
+ positive_pct = mean_probabilities[0][1].item() * 100
128
+ negative_pct = mean_probabilities[0][0].item() * 100
129
+
130
+ total_positive += positive_pct
131
+ total_negative += negative_pct
132
+ total_comments += 1
133
+
134
+ if total_comments > 0:
135
+ avg_positive = total_positive / total_comments
136
+ avg_negative = total_negative / total_comments
137
+ else:
138
+ avg_positive = 0
139
+ avg_negative = 0
140
+
141
+ if total_comments > 0:
142
+ all_comments_string = ' '.join(comments_for_cloud)
143
+
144
+ wordcloud = WordCloud(width=400, height=400,
145
+ background_color='white',
146
+ max_words=30,
147
+ stopwords=stopwords.words('english'),
148
+ min_font_size=10).generate(all_comments_string)
149
+ # Save the WordCloud image as a static file
150
+ wordcloud.to_file(
151
+ f'D:/projects/Recon/static/images/wordcloud/{query}_cloud.png')
152
+ else:
153
+ wordcloud = None
154
+ print(f'positive:{avg_positive}')
155
+ return round(avg_positive), round(avg_negative), wordcloud
static/images/wordcloud/Lord of the mysteries_cloud.png ADDED
static/images/wordcloud/Mother of Learning_cloud.png ADDED
static/images/wordcloud/The Perfect Run_cloud.png ADDED