Spaces:

nebiyu29
/

good_acc_v3

Sleeping

App Files Files Community

nebiyu29 commited on Feb 15

Commit

bd1016c

•

1 Parent(s): 0516563

initial commit

Browse files

Files changed (1) hide show

app.py +110 -0

app.py ADDED Viewed

	@@ -0,0 +1,110 @@

+import streamlit as st
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import torch
+import re
+# Load the model and tokenizer
+tokenizer = AutoTokenizer.from_pretrained("nebiyu29/fintunned-v2-roberta_GA")
+model = AutoModelForSequenceClassification.from_pretrained("nebiyu29/fintunned-v2-roberta_GA")
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model = model.to(device)
+# Define a function to split a text into segments of 512 tokens
+def split_text(text):
+    text=re.sub(r'[^a-zA-Z\s]','',text)
+    text=str(text)
+    # Tokenize the text
+    tokens = tokenizer.tokenize(text)
+    # Initialize an empty list for segments
+    segments = []
+    # Initialize an empty list for current segment
+    current_segment = []
+    # Initialize a counter for tokens
+    token_count = 0
+    # Loop through the tokens
+    for token in tokens:
+        # Add the token to the current segment
+        current_segment.append(token)
+        # Increment the token count
+        token_count += 1
+        # If the token count reaches 512 or the end of the text, add the current segment to the segments list
+        if token_count == 512 or token == tokens[-1]:
+            # Convert the current segment to a string and add it to the segments list
+            segments.append(tokenizer.convert_tokens_to_string(current_segment))
+            # Reset the current segment and the token count
+            current_segment = []
+            token_count = 0
+    # Return the segments list
+    return segments
+def classify(text):
+    # Define the labels
+    labels = ["depression", "anxiety", "bipolar disorder", "schizophrenia", "PTSD", "OCD", "ADHD", "autism", "eating disorder", "personality disorder", "phobia"]
+    #labels=list(model.config.id2label)
+    # Encode the labels
+    label_encodings = tokenizer(labels, padding=True, return_tensors="pt")
+    # Split the text into segments
+    segments = split_text(text)
+    # Initialize an empty list for logits
+    logits_list = []
+    # Loop through the segments
+    for segment in segments:
+        # Encode the segment and the labels
+        inputs = tokenizer([segment] + labels, padding=True, return_tensors="pt")
+        # Get the input ids and attention mask
+        input_ids = inputs["input_ids"]
+        attention_mask = inputs["attention_mask"]
+        # Move the input ids and attention mask to the device
+        input_ids = input_ids.to(device)
+        attention_mask = attention_mask.to(device)
+        # Get the model outputs for each segment
+        with torch.no_grad():
+            outputs = model(
+                input_ids,
+                attention_mask=attention_mask,
+            )
+        # Get the logits for each segment and append them to the logits list
+        logits = outputs.logits
+        logits_list.append(logits)
+    # Average the logits across the segments
+    avg_logits = torch.mean(torch.stack(logits_list), dim=0)
+    # Apply softmax to convert logits to probabilities
+    probabilities = torch.softmax(avg_logits, dim=1)
+    # Get the probabilities for each label
+    label_probabilities = probabilities[:, :len(labels)].tolist()
+    # Get the top 3 most likely labels and their probabilities
+   # Get the top 3 most likely labels and their probabilities
+    top_labels = []
+    top_probabilities = []
+    label_probabilities = label_probabilities[0]  # Extract the list of probabilities for the first (and only) example
+    for _ in range(3):
+        max_prob_index = label_probabilities.index(max(label_probabilities))
+        top_labels.append(labels[max_prob_index])
+        top_probabilities.append(max(label_probabilities))
+        label_probabilities[max_prob_index] = 0  # Set the max probability to 0 to get the next highest probability
+    # Create a dictionary to store the results
+    results = {
+        "sequence": text,
+        "top_labels": top_labels,
+        "top_probabilities": top_probabilities
+    }
+    return results
+# Streamlit app
+st.title("Text Classification Demo")
+st.write("Enter some text, and the model will classify it.")
+text_input = st.text_input("Text Input")
+if st.button("Classify"):
+    predictions = classify(text_input)
+    for prediction in predictions:
+       # st.write(f"Segment Text: {prediction['segment_text']}")
+        st.write(f"Label: {prediction['top_labels']}")
+        st.write(f"Probability: {prediction['top_probabilities']}")