jeffreykthomas commited on
Commit
7029b6b
·
1 Parent(s): 2f62cb3

Add app.py and weights

Browse files
Files changed (7) hide show
  1. app.py +92 -0
  2. bert_classifier.h5 +3 -0
  3. countvect.pkl +3 -0
  4. logistic_model.pkl +3 -0
  5. lstm_model.h5 +3 -0
  6. tokenizer.pkl +3 -0
  7. tv_layer.pkl +3 -0
app.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import tensorflow as tf
3
+ import re
4
+ from tensorflow import keras
5
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
6
+ from tensorflow.keras.layers import TextVectorization
7
+ import pickle
8
+ import os
9
+
10
+ os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
11
+
12
+
13
+ def custom_standardization(input_data):
14
+ lowercase = tf.strings.lower(input_data)
15
+ stripped_html = tf.strings.regex_replace(lowercase, "<br />", " ")
16
+ return tf.strings.regex_replace(
17
+ stripped_html, "[%s]" % re.escape("!#$%&'()*+,-./:;<=>?@\^_`{|}~"), ""
18
+ )
19
+
20
+
21
+ count_vect = pickle.load(open('countvect.pkl', 'rb'))
22
+ tokenizer = pickle.load(open('tokenizer.pkl', 'rb'))
23
+
24
+ from_disk = pickle.load(open('tv_layer.pkl', 'rb'))
25
+ text_vectorization = TextVectorization.from_config(from_disk['config'])
26
+ text_vectorization.set_weights(from_disk['weights'])
27
+
28
+ lr_model = pickle.load(open('logistic_model.pkl', 'rb'))
29
+ lstm_model = keras.models.load_model('lstm_model.h5')
30
+ bert_classifier_model = keras.models.load_model('bert_classifier.h5')
31
+
32
+
33
+ def get_bert_end_to_end(model):
34
+ inputs_string = keras.Input(shape=(1,), dtype="string")
35
+ indices = text_vectorization(inputs_string)
36
+ outputs = model(indices)
37
+ end_to_end_model = keras.Model(inputs_string, outputs, name="end_to_end_model")
38
+ optimizer = keras.optimizers.Adam(learning_rate=0.001)
39
+ end_to_end_model.compile(
40
+ optimizer=optimizer, loss="binary_crossentropy", metrics=["accuracy"]
41
+ )
42
+ return end_to_end_model
43
+
44
+
45
+ bert_end_model = get_bert_end_to_end(bert_classifier_model)
46
+
47
+
48
+ def get_lr_results(text):
49
+ sample_vec = count_vect.transform([text])
50
+ return lr_model.predict(sample_vec)[0]
51
+
52
+
53
+ def get_lstm_results(text):
54
+ tokenized_text = tokenizer.texts_to_sequences([text])
55
+ padded_tokens = pad_sequences(tokenized_text, maxlen=200)
56
+ return lstm_model.predict(padded_tokens)[0][0]
57
+
58
+
59
+ def get_bert_results(text):
60
+ return bert_end_model.predict([text])[0][0]
61
+
62
+
63
+ def decide(text):
64
+ lr_result = get_lr_results(text)
65
+ lstm_result = get_lstm_results(text)
66
+ bert_result = get_bert_results(text)
67
+ results = [
68
+ lr_result.round(2),
69
+ lstm_result.round(2),
70
+ bert_result.round(2)]
71
+ if lstm_result >= 0.6:
72
+ return "Positive review (LR: {}, LSTM: {}, BERT: {}".format(*results)
73
+ elif lstm_result <= 0.4:
74
+ return "Negative review (LR: {}, LSTM: {}, BERT: {}".format(*results)
75
+ else:
76
+ return "Neutral review (LR: {}, LSTM: {}, BERT: {}".format(*results)
77
+
78
+
79
+ example_sentence_1 = "I hate this toaster, they made no effort in making it. So cheap, it almost immediately broke!"
80
+ example_sentence_2 = "Great toaster! We love the way it toasted my bread so quickly. Very high quality components too."
81
+ examples = [[example_sentence_1], [example_sentence_2]]
82
+
83
+ description = "Write out a product review to know the underlying sentiment."
84
+
85
+ gr.Interface(decide,
86
+ inputs=gr.inputs.Textbox(lines=1, placeholder=None, default="", label=None),
87
+ outputs='text',
88
+ examples=examples,
89
+ title="Sentiment analysis of product reviews",
90
+ theme="grass", description=description,
91
+ allow_flagging="auto",
92
+ flagging_dir='flagging records').launch(enable_queue=True, inline=False, share=True)
bert_classifier.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40773586c34fe1c3197640db0267716bedb5f76e1ffebb6b7232806741452178
3
+ size 16501864
countvect.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:117ac083bd1587e1cba48feb9669bb7e5e0871846a497c414e51e9610de8d946
3
+ size 14439392
logistic_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93bf191cb6320e2081ee5cf4ee695a497b4165f398e3ef877401d5787f55576d
3
+ size 7016090
lstm_model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee86ce5eac1a091e8997adf34c2b056769ae482f99cffa82c4274e5f4179b193
3
+ size 11066736
tokenizer.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4de6458d9a8021512a065ed2d64d182289fcb1333aa17e34f8c12a5c5f7cb222
3
+ size 49477377
tv_layer.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe6f5c5513bb7515866c9bf81c4a51438e9da90963414f7eb56f68a701e50ab4
3
+ size 298869