Spaces:

nebiyu29
/

good_acc_v2

Sleeping

App Files Files Community

nebiyu29 commited on Feb 15, 2024

Commit

1d32f14

verified ·

1 Parent(s): 4347542

Create app.py

Browse files

Files changed (1) hide show

app.py +85 -0

app.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import streamlit as st
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+tokenizer = AutoTokenizer.from_pretrained("nebiyu29/fintunned-v2-roberta_GA")
+model = AutoModelForSequenceClassification.from_pretrained("nebiyu29/fintunned-v2-roberta_GA")
+# Define a function to split a text into segments of 512 tokens
+def split_text(text):
+    # Tokenize the text
+    tokens = tokenizer.tokenize(text)
+    # Initialize an empty list for segments
+    segments = []
+    # Initialize an empty list for current segment
+    current_segment = []
+    # Initialize a counter for tokens
+    token_count = 0
+    # Loop through the tokens
+    for token in tokens:
+        # Add the token to the current segment
+        current_segment.append(token)
+        # Increment the token count
+        token_count += 1
+        # If the token count reaches 512 or the end of the text, add the current segment to the segments list
+        if token_count == 512 or token == tokens[-1]:
+            # Convert the current segment to a string and add it to the segments list
+            segments.append(tokenizer.convert_tokens_to_string(current_segment))
+            # Reset the current segment and the token count
+            current_segment = []
+            token_count = 0
+    # Return the segments list
+    return segments
+# Define a function to extract predictions from model output (adjust as needed)
+def extract_predictions(outputs):
+    # Assuming outputs contain logits and labels (adapt based on your model's output format)
+    logits = outputs.logits
+    probs = logits.softmax(dim=1)
+    preds = torch.argmax(probs, dim=1)
+    return probs, preds  # Return all probabilities and predicted labels
+# a function that classifies text
+def classify_text(text):
+    # Split text into segments using split_text
+    segments = split_text(text)
+    # Initialize empty list for predictions
+    predictions = []
+    # Move device to GPU if available
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model = model.to(device)
+    # Loop through segments, process, and store predictions
+    for segment in segments:
+        inputs = tokenizer([segment], padding=True, return_tensors="pt")
+        input_ids = inputs["input_ids"].to(device)
+        attention_mask = inputs["attention_mask"].to(device)
+        with torch.no_grad():
+            outputs = model(input_ids, attention_mask=attention_mask)
+        # Extract predictions for each segment
+        probs, preds = extract_predictions(outputs)  # Define this function based on your model's output
+        pred_label = model.config.id2label[preds[0].item()]
+        # Append predictions for this segment
+        predictions.append({
+            "segment_text": segment,
+            "label": pred_label,  # Assuming single label prediction
+            "probability": probs[0][preds[0]].item()  # Access probability for the predicted label
+        })
+    return predictions
+# Streamlit app
+st.title("Text Classification Demo")
+st.write("Enter some text, and the model will classify it.")
+text_input = st.text_input("Text Input")
+if st.button("Classify"):
+    predictions = classify_text(text_input)
+    for prediction in predictions:
+        st.write(f"Segment Text: {prediction['segment_text']}")
+        st.write(f"Label: {prediction['label']}")
+        st.write(f"Probability: {prediction['probability']}")