Tanor commited on
Commit
3b960c7
1 Parent(s): 90b710b
Files changed (3) hide show
  1. app.py +41 -0
  2. requirements.txt +4 -0
  3. sentiwordnet_calculator.py +64 -0
app.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from sentiwordnet_calculator import SentimentPipeline
3
+
4
+ pipe = SentimentPipeline("Tanor/SRGPTSENTPOS4", "Tanor/SRGPTSENTNEG4")
5
+
6
+
7
+ def calculate(text):
8
+ result = pipe(text)
9
+ # Visual representation
10
+ visual = result
11
+ # Numerical representation
12
+ numerical = {key: round(value, 2) for key, value in result.items()}
13
+ # Create a formatted string
14
+ numerical_str = ", ".join(f"{key}: {value}" for key, value in numerical.items())
15
+ return visual, numerical_str
16
+
17
+ iface = gr.Interface(
18
+ fn=calculate,
19
+ inputs=gr.inputs.Textbox(lines=5, placeholder="Enter your text here..."),
20
+ outputs=[gr.outputs.Label(num_top_classes=3), "text"],
21
+ title="Sentiment Analysis for Serbian",
22
+ description="""
23
+ This tool performs sentiment analysis on the input text using a model trained on Serbian dictionary definitions.
24
+ The pretrained model [sr-gpt2-large model by Mihailo Škorić](https://huggingface.co/JeRTeh/sr-gpt2-large),
25
+ was fine-tuned on selected definitions from the Serbian WordNet. Please limit the input to 300 tokens.
26
+ The outputs represent the Positive (POS), Negative (NEG), and Objective (OBJ) sentiment scores.
27
+ """,
28
+ examples=[
29
+ ["osoba koja ne prihvata nove ideje"],
30
+ ["intenzivna ojađenost"],
31
+ ["uopštenih osećanja tuge"],
32
+ ["žalostan zbog gubitka ili uskraćenosti"],
33
+ ["činjenje dobra; osećaj dobrotvornosti"],
34
+ ["Jako pozitivno osećanje poštovanja i privrežen..."],
35
+ ["usrećiti ili zadovoljiti"],
36
+ ["Korisna ili vredna osobina"],
37
+ ]
38
+
39
+ )
40
+
41
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ transformers==4.28.1
2
+ #git+https://github.com/huggingface/transformers
3
+ transformers[torch]
4
+ xformers
sentiwordnet_calculator.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+
3
+ class SentimentPipeline:
4
+ """
5
+ This class defines a custom sentiment analysis pipeline using Hugging Face's Transformers.
6
+
7
+ The pipeline uses two separate models for predicting positive/non-positive and
8
+ negative/non-negative sentiment respectively.
9
+
10
+ Inputs:
11
+ Single text string or a list of text strings for sentiment analysis.
12
+
13
+ Returns:
14
+ If a single text string is provided, a single dictionary is returned with POS, NEG, and OBJ scores.
15
+ If a list of text strings is provided, a list of dictionaries is returned with each dictionary
16
+ representing POS, NEG, and OBJ scores for the corresponding text.
17
+
18
+ Usage:
19
+ sentiment_pipeline = SentimentPipeline(YOUR_POS_MODEL, YOUR_NEG_MODEL)
20
+ result = sentiment_pipeline("Your glossed text here")
21
+ results = sentiment_pipeline(["Your first glossed text here", "Your second glossed text here"])
22
+ """
23
+
24
+ def __init__(self, model_path_positive, model_path_negative):
25
+ """
26
+ Constructor for the SentimentPipeline class.
27
+ Initializes two pipelines using Hugging Face's Transformers, one for positive and one for negative sentiment.
28
+ """
29
+ self.pos_pipeline = pipeline('text-classification', model=model_path_positive)
30
+ self.neg_pipeline = pipeline('text-classification', model=model_path_negative)
31
+
32
+ def __call__(self, texts):
33
+ """
34
+ Callable method for the SentimentPipeline class. Processes the given text(s) and returns sentiment scores.
35
+ """
36
+
37
+ # Check if input is a single string. If it is, convert it into a list.
38
+ if isinstance(texts, str):
39
+ texts = [texts]
40
+
41
+ results = []
42
+ for text in texts:
43
+ # Run the text through the pipelines
44
+ pos_result = self.pos_pipeline(text)[0]
45
+ neg_result = self.neg_pipeline(text)[0]
46
+
47
+ # Calculate probabilities for positive/non-positive and negative/non-negative.
48
+ # If the label is POSITIVE/NEGATIVE, the score for positive/negative is the score returned by the model,
49
+ # and the score for non-positive/non-negative is 1 - the score returned by the model.
50
+ # If the label is NON-POSITIVE/NON-NEGATIVE, the score for non-positive/non-negative is the score returned by the model,
51
+ # and the score for positive/negative is 1 - the score returned by the model.
52
+ Pt, Pn = (pos_result['score'], 1 - pos_result['score']) if pos_result['label'] == 'POSITIVE' else (1 - pos_result['score'], pos_result['score'])
53
+ Nt, Nn = (neg_result['score'], 1 - neg_result['score']) if neg_result['label'] == 'NEGATIVE' else (1 - neg_result['score'], neg_result['score'])
54
+
55
+ # Calculate POS, NEG, OBJ scores using the formulas provided
56
+ POS = Pt * Nn
57
+ NEG = Nt * Pn
58
+ OBJ = 1 - POS - NEG
59
+
60
+ # Append the scores to the results
61
+ results.append({"POS": POS, "NEG": NEG, "OBJ": OBJ})
62
+
63
+ # If the input was a single string, return a single dictionary. Otherwise, return a list of dictionaries.
64
+ return results if len(results) > 1 else results[0]