Sj8287 commited on
Commit
bd7db97
·
0 Parent(s):

Duplicate from Sj8287/Sentiment_Classification

Browse files
.gitattributes ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ train_150k.txt filter=lfs diff=lfs merge=lfs -text
36
+ distilbert_model_weights.best.hdf5 filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9.16
2
+
3
+ WORKDIR /code
4
+
5
+ COPY ./requirements.txt /code/requirements.txt
6
+
7
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
8
+
9
+ # Set up a new user named "user" with user ID 1000
10
+ RUN useradd -m -u 1000 user
11
+
12
+ # Switch to the "user" user
13
+ USER user
14
+
15
+ # Set home to the user's home directory
16
+ ENV HOME=/home/user \
17
+ PATH=/home/user/.local/bin:$PATH
18
+
19
+ # Set the working directory to the user's home directory
20
+ WORKDIR $HOME/code
21
+
22
+ COPY . ./code
23
+
24
+ # Copy the current directory contents into the container at $HOME/app setting the owner to the user
25
+ COPY --chown=user . $HOME/code
26
+
27
+ EXPOSE 5000
28
+
29
+ CMD ["python", "/home/user/code/run.py"]
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Sentiment Classification
3
+ emoji: 🚀
4
+ colorFrom: blue
5
+ colorTo: red
6
+ sdk: docker
7
+ pinned: false
8
+ license: mit
9
+ app_port: 5000
10
+ duplicated_from: Sj8287/Sentiment_Classification
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from flask import Flask,render_template
2
+ app=Flask(__name__)
3
+ from app import routes
app/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (270 Bytes). View file
 
app/__pycache__/routes.cpython-39.pyc ADDED
Binary file (3.72 kB). View file
 
app/routes.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import render_template,redirect,url_for,flash,request
2
+ from wtforms.validators import ValidationError
3
+ from app import app
4
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
5
+ from keras.layers import Input, Dense, LSTM, GRU, Embedding
6
+ from keras.layers import Activation, Bidirectional, GlobalMaxPool1D, GlobalMaxPool2D, Dropout
7
+ from keras.models import Model
8
+ from keras.preprocessing import text, sequence
9
+ import transformers
10
+ from transformers import AutoTokenizer
11
+ from tokenizers import BertWordPieceTokenizer
12
+ from keras.initializers import Constant
13
+ import numpy as np
14
+ import re
15
+ import tensorflow as tf
16
+ import os
17
+ @app.route('/')
18
+ def home_page():
19
+ return render_template('index.html')
20
+
21
+
22
+ tokenizer = transformers.AutoTokenizer.from_pretrained("distilbert-base-uncased")
23
+ fast_tokenizer = BertWordPieceTokenizer('distilbert_base_uncased/vocab.txt', lowercase=True)
24
+
25
+
26
+
27
+ def fast_encode_sentence(text, tokenizer, maxlen=128):
28
+ tokenizer.enable_truncation(max_length=maxlen)
29
+ tokenizer.enable_padding(length=maxlen)
30
+ all_ids = []
31
+
32
+ text_chunk = text
33
+ encs = tokenizer.encode(text_chunk)
34
+ all_ids.extend([encs.ids])
35
+
36
+ return np.array(all_ids)
37
+
38
+
39
+
40
+
41
+
42
+ transformer_layer = transformers.TFDistilBertModel.from_pretrained('distilbert-base-uncased')
43
+
44
+ embedding_size = 128
45
+ inp = Input(shape=(128, ))
46
+ embedding_matrix=transformer_layer.weights[0].numpy()
47
+ x = Embedding(embedding_matrix.shape[0], embedding_matrix.shape[1],embeddings_initializer=Constant(embedding_matrix),trainable=False)(inp)
48
+ x = Bidirectional(LSTM(25, return_sequences=True,recurrent_regularizer='L1L2'))(x)
49
+ x = GlobalMaxPool1D()(x)
50
+ x = Dropout(0.9)(x)
51
+ x = Dense(50, activation='relu',kernel_initializer='he_normal',kernel_regularizer="L1L2")(x)
52
+ x = Dropout(0.9)(x)
53
+ x = Dense(1, activation='sigmoid')(x)
54
+
55
+ model = Model(inputs=[inp], outputs=x)
56
+ model.load_weights('distilbert_model_weights.best.hdf5')
57
+
58
+
59
+ def predict_on_sentence(model,text):
60
+ text=text.lower()
61
+ pattern = re.compile('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+')
62
+ text = pattern.sub('', text)
63
+ text = re.sub(r"i'm", "i am", text)
64
+ text = re.sub(r"he's", "he is", text)
65
+ text = re.sub(r"she's", "she is", text)
66
+ text = re.sub(r"that's", "that is", text)
67
+ text = re.sub(r"what's", "what is", text)
68
+ text = re.sub(r"where's", "where is", text)
69
+ text = re.sub(r"\'ll", " will", text)
70
+ text = re.sub(r"\'ve", " have", text)
71
+ text = re.sub(r"\'re", " are", text)
72
+ text = re.sub(r"\'d", " would", text)
73
+ text = re.sub(r"\'ve", " have", text)
74
+ text = re.sub(r"won't", "will not", text)
75
+ text = re.sub(r"don't", "do not", text)
76
+ text = re.sub(r"did't", "did not", text)
77
+ text = re.sub(r"can't", "can not", text)
78
+ text = re.sub(r"it's", "it is", text)
79
+ text = re.sub(r"couldn't", "could not", text)
80
+ text = re.sub(r"have't", "have not", text)
81
+ text=re.sub(r"(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)|^rt|http.+?", "", text)
82
+ text = re.sub(r"[,.\"!@#$%^&*(){}?/;`~:<>+=-]", "", text)
83
+ text = re.sub(r'(.)\1{3,}',r'\1', text)
84
+ final_text=fast_encode_sentence(text,fast_tokenizer)
85
+ prediction=model.predict(final_text)
86
+ final_text=tf.squeeze(tf.round(prediction))
87
+ return final_text
88
+
89
+
90
+ @app.route('/predict',methods=['POST'])
91
+ def predict():
92
+ int_features = request.form.get("sentence")
93
+ int_features=str(int_features)
94
+ final_result=predict_on_sentence(model,int_features)
95
+ result='bad'
96
+ if(final_result==1):
97
+ result='good'
98
+ return render_template('index.html', prediction_text='This is a {} comment'.format(result))
99
+
app/templates/index.html ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <!-- Required meta tags -->
5
+ <meta charset="utf-8" />
6
+ <meta
7
+ name="viewport"
8
+ content="width=device-width, initial-scale=1, shrink-to-fit=no"
9
+ />
10
+ <!-- Bootstrap CSS -->
11
+ <link
12
+ rel="stylesheet"
13
+ href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css"
14
+ integrity="sha384-TX8t27EcRE3e/ihU7zmQxVncDAy5uIKz4rEkgIXeMed4M0jlfIDPvg6uqKI2xXr2"
15
+ crossorigin="anonymous"
16
+ />
17
+ <title>Sentiment Classification</title>
18
+ </head>
19
+ <body>
20
+ <!DOCTYPE html>
21
+ <html lang="en">
22
+ <head>
23
+ <!-- Required meta tags -->
24
+ <meta charset="utf-8" />
25
+ <meta
26
+ name="viewport"
27
+ content="width=device-width, initial-scale=1, shrink-to-fit=no"
28
+ />
29
+ <!-- Bootstrap CSS -->
30
+ <link
31
+ rel="stylesheet"
32
+ href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css"
33
+ integrity="sha384-TX8t27EcRE3e/ihU7zmQxVncDAy5uIKz4rEkgIXeMed4M0jlfIDPvg6uqKI2xXr2"
34
+ crossorigin="anonymous"
35
+ />
36
+ <title>{% block title %} {% endblock %}</title>
37
+ </head>
38
+ <body>
39
+ <div class="login">
40
+ <br><br><br>
41
+ <center><h1>Sentiment Classification</h1></center>
42
+ <br><br><br>
43
+ <!-- Main Input For Receiving Query to our ML -->
44
+ <div class="form-outline w-100">
45
+ <center><form action="{{ url_for('predict')}}" method="post">
46
+ <input
47
+ type="text"
48
+ name="sentence"
49
+ placeholder="Insert your text"
50
+ required="required"
51
+ /></center></div>
52
+ <br>
53
+
54
+ <center><button type="submit" class="btn btn-primary btn-lg">
55
+ Predict
56
+ </button></center>
57
+ </form>
58
+
59
+ <br />
60
+ <br />
61
+ <center>{{ prediction_text }}</center>
62
+ </div>
63
+ <!-- Future Content here -->
64
+
65
+ <!-- Optional JavaScript -->
66
+ <!-- jQuery first, then Popper.js, then Bootstrap JS -->
67
+ <script src="https://kit.fontawesome.com/a076d05399.js"></script>
68
+ <script
69
+ src="https://code.jquery.com/jquery-3.5.1.slim.min.js"
70
+ integrity="sha384-DfXdz2htPH0lsSSs5nCTpuj/zy4C+OGpamoFVy38MVBnE+IbbVYUew+OrCXaRkfj"
71
+ crossorigin="anonymous"
72
+ ></script>
73
+ <script
74
+ src="https://cdn.jsdelivr.net/npm/[email protected]/dist/umd/popper.min.js"
75
+ integrity="sha384-9/reFTGAW83EW2RDu2S0VKaIzap3H66lZH81PoYlFhbGU+6BZp6G7niu735Sk7lN"
76
+ crossorigin="anonymous"
77
+ ></script>
78
+ <script
79
+ src="https://stackpath.bootstrapcdn.com/bootstrap/4.5.2/js/bootstrap.min.js"
80
+ integrity="sha384-B4gt1jrGC7Jh4AgTPSdUtOBvfO8shuf57BaghqFfPlYxofvL8/KUEfYiJOMMV+rV"
81
+ crossorigin="anonymous"
82
+ ></script>
83
+ </body>
84
+ <style>
85
+ body {
86
+ background-color: #212121;
87
+ color: white;
88
+ }
89
+ </style>
90
+ </html>
91
+
92
+ <!-- Future Content here -->
93
+
94
+ <!-- Optional JavaScript -->
95
+ <!-- jQuery first, then Popper.js, then Bootstrap JS -->
96
+ <script src="https://kit.fontawesome.com/a076d05399.js"></script>
97
+ <script
98
+ src="https://code.jquery.com/jquery-3.5.1.slim.min.js"
99
+ integrity="sha384-DfXdz2htPH0lsSSs5nCTpuj/zy4C+OGpamoFVy38MVBnE+IbbVYUew+OrCXaRkfj"
100
+ crossorigin="anonymous"
101
+ ></script>
102
+ <script
103
+ src="https://cdn.jsdelivr.net/npm/[email protected]/dist/umd/popper.min.js"
104
+ integrity="sha384-9/reFTGAW83EW2RDu2S0VKaIzap3H66lZH81PoYlFhbGU+6BZp6G7niu735Sk7lN"
105
+ crossorigin="anonymous"
106
+ ></script>
107
+ <script
108
+ src="https://stackpath.bootstrapcdn.com/bootstrap/4.5.2/js/bootstrap.min.js"
109
+ integrity="sha384-B4gt1jrGC7Jh4AgTPSdUtOBvfO8shuf57BaghqFfPlYxofvL8/KUEfYiJOMMV+rV"
110
+ crossorigin="anonymous"
111
+ ></script>
112
+ </body>
113
+ <style>
114
+ body {
115
+ h1,h2,h3,h4,h5,h6{
116
+ font-family: 'Montserrat', sans-serif;
117
+ }
118
+ background-color: #212121;
119
+ color: white;
120
+ }
121
+ </style>
122
+ </html>
distilbert_base_uncased/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
distilbert_base_uncased/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
distilbert_base_uncased/tokenizer_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "do_lower_case": true,
4
+ "mask_token": "[MASK]",
5
+ "model_max_length": 512,
6
+ "name_or_path": "distilbert-base-uncased",
7
+ "pad_token": "[PAD]",
8
+ "sep_token": "[SEP]",
9
+ "special_tokens_map_file": null,
10
+ "strip_accents": null,
11
+ "tokenize_chinese_chars": true,
12
+ "tokenizer_class": "DistilBertTokenizer",
13
+ "unk_token": "[UNK]"
14
+ }
distilbert_base_uncased/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
distilbert_model_weights.best.hdf5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2dc6fc767307f67255cb77c5e42722c3465d189dc7ee004e0d6a67fbe1b4e1e7
3
+ size 94436392
requirements.txt ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ charset-normalizer==3.1.0
2
+ click==8.1.3
3
+ Flask==2.2.3
4
+ Flask-WTF==1.1.1
5
+ flatbuffers==23.3.3
6
+ fonttools==4.25.0
7
+ gast==0.4.0
8
+ google-auth==2.16.2
9
+ google-auth-oauthlib==0.4.6
10
+ grpcio==1.51.3
11
+ h5py==3.8.0
12
+ keras==2.10.0
13
+ Keras-Preprocessing==1.1.2
14
+ libclang==15.0.6.1
15
+ Markdown==3.4.1
16
+ matplotlib==3.7.1
17
+ numpy==1.24.2
18
+ oauthlib==3.2.2
19
+ opt-einsum==3.3.0
20
+ packaging==23.0
21
+ pandas==1.5.3
22
+ requests==2.28.2
23
+ requests-oauthlib==1.3.1
24
+ rsa==4.9
25
+ scikit-learn==1.2.2
26
+ scipy==1.10.1
27
+ tensorboard-data-server==0.6.1
28
+ tensorboard-plugin-wit==1.8.1
29
+ tensorflow==2.10.0
30
+ tensorflow-estimator==2.10.0
31
+ tensorflow-io-gcs-filesystem==0.31.0
32
+ termcolor==2.2.0
33
+ tokenizers
34
+ transformers
35
+ typing_extensions==4.5.0
36
+ WTForms==3.0.1
run.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from app import app
2
+
3
+ if __name__=='__main__':
4
+ app.run(debug=True,host='0.0.0.0',port=5000)
train_150k.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75d03b31023bcccf255ae7228992a1626ac01d29cb3ebb588344b8a0d0ea7ccc
3
+ size 11696248