nebiyu29 commited on
Commit
1d32f14
1 Parent(s): 4347542

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -0
app.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
+
4
+ tokenizer = AutoTokenizer.from_pretrained("nebiyu29/fintunned-v2-roberta_GA")
5
+ model = AutoModelForSequenceClassification.from_pretrained("nebiyu29/fintunned-v2-roberta_GA")
6
+
7
+ # Define a function to split a text into segments of 512 tokens
8
+ def split_text(text):
9
+ # Tokenize the text
10
+ tokens = tokenizer.tokenize(text)
11
+ # Initialize an empty list for segments
12
+ segments = []
13
+ # Initialize an empty list for current segment
14
+ current_segment = []
15
+ # Initialize a counter for tokens
16
+ token_count = 0
17
+ # Loop through the tokens
18
+ for token in tokens:
19
+ # Add the token to the current segment
20
+ current_segment.append(token)
21
+ # Increment the token count
22
+ token_count += 1
23
+ # If the token count reaches 512 or the end of the text, add the current segment to the segments list
24
+ if token_count == 512 or token == tokens[-1]:
25
+ # Convert the current segment to a string and add it to the segments list
26
+ segments.append(tokenizer.convert_tokens_to_string(current_segment))
27
+ # Reset the current segment and the token count
28
+ current_segment = []
29
+ token_count = 0
30
+ # Return the segments list
31
+ return segments
32
+
33
+ # Define a function to extract predictions from model output (adjust as needed)
34
+ def extract_predictions(outputs):
35
+ # Assuming outputs contain logits and labels (adapt based on your model's output format)
36
+ logits = outputs.logits
37
+ probs = logits.softmax(dim=1)
38
+ preds = torch.argmax(probs, dim=1)
39
+ return probs, preds # Return all probabilities and predicted labels
40
+
41
+ # a function that classifies text
42
+ def classify_text(text):
43
+ # Split text into segments using split_text
44
+ segments = split_text(text)
45
+
46
+ # Initialize empty list for predictions
47
+ predictions = []
48
+
49
+ # Move device to GPU if available
50
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
51
+ model = model.to(device)
52
+
53
+ # Loop through segments, process, and store predictions
54
+ for segment in segments:
55
+ inputs = tokenizer([segment], padding=True, return_tensors="pt")
56
+ input_ids = inputs["input_ids"].to(device)
57
+ attention_mask = inputs["attention_mask"].to(device)
58
+
59
+ with torch.no_grad():
60
+ outputs = model(input_ids, attention_mask=attention_mask)
61
+
62
+ # Extract predictions for each segment
63
+ probs, preds = extract_predictions(outputs) # Define this function based on your model's output
64
+ pred_label = model.config.id2label[preds[0].item()]
65
+
66
+ # Append predictions for this segment
67
+ predictions.append({
68
+ "segment_text": segment,
69
+ "label": pred_label, # Assuming single label prediction
70
+ "probability": probs[0][preds[0]].item() # Access probability for the predicted label
71
+ })
72
+
73
+ return predictions
74
+
75
+ # Streamlit app
76
+ st.title("Text Classification Demo")
77
+ st.write("Enter some text, and the model will classify it.")
78
+
79
+ text_input = st.text_input("Text Input")
80
+ if st.button("Classify"):
81
+ predictions = classify_text(text_input)
82
+ for prediction in predictions:
83
+ st.write(f"Segment Text: {prediction['segment_text']}")
84
+ st.write(f"Label: {prediction['label']}")
85
+ st.write(f"Probability: {prediction['probability']}")