File size: 3,207 Bytes
cdb159e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import flask
import torch
from flask import Flask, render_template, request
from utils import label_full_decoder
import sys
import config
import dataset
import engine
from model import BERTBaseUncased
from tokenizer import tokenizer
from werkzeug.serving import run_simple
# from werkzeug.wsgi import DispatcherMiddleware


T = tokenizer.TweetTokenizer(
    preserve_handles=True, preserve_hashes=True, preserve_case=False, preserve_url=False)

app = Flask(__name__,
            static_folder='app_resources/static',
            static_url_path='/sentimentanalyzer',
            instance_relative_config=True,
            template_folder='app_resources/templates/public')


MODEL = None
DEVICE = config.device


def preprocess(text):
    tokens = T.tokenize(text)
    print(tokens, file=sys.stderr)
    ptokens = []
    for index, token in enumerate(tokens):
        if "@" in token:
            if index > 0:
                # check if previous token was mention
                if "@" in tokens[index-1]:
                    pass
                else:
                    ptokens.append("mention_0")
            else:
                ptokens.append("mention_0")
        else:
            ptokens.append(token)

    print(ptokens, file=sys.stderr)
    return " ".join(ptokens)


def sentence_prediction(sentence):
    sentence = preprocess(sentence)
    model_path = config.MODEL_PATH

    test_dataset = dataset.BERTDataset(
        review=[sentence],
        target=[0]
    )

    test_data_loader = torch.utils.data.DataLoader(
        test_dataset,
        batch_size=config.VALID_BATCH_SIZE,
        num_workers=3
    )

    device = config.device

    model = BERTBaseUncased()
    model.load_state_dict(torch.load(
        model_path, map_location=torch.device(device)))
    model.to(device)

    outputs, [] = engine.predict_fn(test_data_loader, model, device)
    print(outputs)
    return outputs[0]


@app.route("/sentimentanalyzer/predict", methods=['POST'])
def predict():
    print(request.form, file=sys.stderr)
    # print([(x) for x in request.get_json()],file=sys.stderr)
    # sentence = request.get_json().get("sentence","")
    sentence = request.form['sentence']
    if sentence:
        print(sentence, file=sys.stderr)
        prediction = sentence_prediction(sentence)
        response = {}
        response["response"] = {
            'sentence': sentence,
            'prediction': label_full_decoder(prediction),
        }
        return flask.jsonify(response)
    else:
        return flask.jsonify({"error": "empty text"})


@app.route("/sentimentanalyzer/")
def index():
    return render_template("index.html")


@app.route("/sentimentanalyzer/demo")
def demo():
    return render_template("demo.html")


@app.route("/sentimentanalyzer/models")
def models():
    return render_template("models.html")


@app.route("/sentimentanalyzer/about")
def about():
    return render_template("about.html")


if __name__ == "__main__":
    MODEL = BERTBaseUncased()
    MODEL.load_state_dict(torch.load(
        config.MODEL_PATH, map_location=torch.device(DEVICE)))
    MODEL.eval()

    app.run("127.0.0.1", port=1095, debug=True)
# host="http://cleopatra.ijs.si/sentimentanalyzer"