Spaces:

nebiyu29
/

good_acc

Sleeping

App Files Files Community

nebiyu29 commited on Feb 12

Commit

2f6ade8

•

1 Parent(s): 1f8178b

added more text capability

Browse files

Files changed (1) hide show

app.py +107 -10

app.py CHANGED Viewed

@@ -4,19 +4,116 @@ from transformers import AutoTokenizer, AutoModelForSequenceClassification
 # Load model directly
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 tokenizer = AutoTokenizer.from_pretrained("nebiyu29/fintunned-v2-roberta_GA")
 model = AutoModelForSequenceClassification.from_pretrained("nebiyu29/fintunned-v2-roberta_GA")
-def classify_text(text):
-  """
-  This function preprocesses, feeds text to the model, and outputs the predicted class.
-  """
-  inputs = tokenizer(text, padding=True, truncation=True, return_tensors="pt")
-  outputs = model(**inputs)
-  logits = outputs.logits  # Access logits instead of pipeline output
-  predictions = torch.argmax(logits, dim=-1)  # Apply argmax for prediction
-  return model.config.id2label[predictions.item()]  # Map index to class label
 interface = gr.Interface(
     fn=classify_text,
@@ -24,7 +121,7 @@ interface = gr.Interface(
     outputs="text",
     title="Text Classification Demo",
     description="Enter some text, and the model will classify it.",
-    choices=["positive", "negative", "neutral"]  # Adjust class names
 )
 interface.launch()

 # Load model directly
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import torch
+import transformers
 tokenizer = AutoTokenizer.from_pretrained("nebiyu29/fintunned-v2-roberta_GA")
 model = AutoModelForSequenceClassification.from_pretrained("nebiyu29/fintunned-v2-roberta_GA")
+# Load the model and tokenizer
+# model = transformers.AutoModelForSequenceClassification.from_pretrained("facebook/bart-large-mnli")
+# tokenizer = transformers.AutoTokenizer.from_pretrained("facebook/bart-large-mnli")
+# Define a function to split a text into segments of 512 tokens
+def split_text(text):
+    # Tokenize the text
+    tokens = tokenizer.tokenize(text)
+    # Initialize an empty list for segments
+    segments = []
+    # Initialize an empty list for current segment
+    current_segment = []
+    # Initialize a counter for tokens
+    token_count = 0
+    # Loop through the tokens
+    for token in tokens:
+        # Add the token to the current segment
+        current_segment.append(token)
+        # Increment the token count
+        token_count += 1
+        # If the token count reaches 512 or the end of the text, add the current segment to the segments list
+        if token_count == 512 or token == tokens[-1]:
+            # Convert the current segment to a string and add it to the segments list
+            segments.append(tokenizer.convert_tokens_to_string(current_segment))
+            # Reset the current segment and the token count
+            current_segment = []
+            token_count = 0
+    # Return the segments list
+    return segments
+def classify(text, model):
+    # Define the labels
+    labels = ["depression", "anxiety", "bipolar disorder", "schizophrenia", "PTSD", "OCD", "ADHD", "autism", "eating disorder", "personality disorder", "phobia"]
+    # Encode the labels
+    label_encodings = tokenizer(labels, padding=True, return_tensors="pt")
+    # Split the text into segments
+    segments = split_text(text)
+    # Initialize an empty list for logits
+    logits_list = []
+    # Move device to GPU
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model = model.to(device) # Move the model to the device
+    # Loop through the segments
+    for segment in segments:
+        # Encode the segment and the labels
+        inputs = tokenizer([segment] + labels, padding=True, return_tensors="pt")
+        # Get the input ids and attention mask
+        input_ids = inputs["input_ids"]
+        attention_mask = inputs["attention_mask"]
+        # Move the input ids and attention mask to the device
+        input_ids = input_ids.to(device)
+        attention_mask = attention_mask.to(device)
+        # Get the model outputs for each segment
+        with torch.no_grad():
+            outputs = model(
+                input_ids,
+                attention_mask=attention_mask,
+            )
+        # Get the logits for each segment and append them to the logits list
+        logits = outputs.logits
+        logits_list.append(logits)
+    # Average the logits across the segments
+    avg_logits = torch.mean(torch.stack(logits_list), dim=0)
+    # Apply softmax to convert logits to probabilities
+    probabilities = torch.softmax(avg_logits, dim=1)
+    # Get the probabilities for each label
+    label_probabilities = probabilities[:, :len(labels)].tolist()
+    # Get the top 3 most likely labels and their probabilities
+   # Get the top 3 most likely labels and their probabilities
+    top_labels = []
+    top_probabilities = []
+    label_probabilities = label_probabilities[0]  # Extract the list of probabilities for the first (and only) example
+    for _ in range(3):
+        max_prob_index = label_probabilities.index(max(label_probabilities))
+        top_labels.append(labels[max_prob_index])
+        top_probabilities.append(max(label_probabilities))
+        label_probabilities[max_prob_index] = 0  # Set the max probability to 0 to get the next highest probability
+    # Create a dictionary to store the results
+    results = {
+        "sequence": text,
+        "top_labels": top_labels,
+        "top_probabilities": top_probabilities
+    }
+    return results
+# def classify_text(text):
+#   """
+#   This function preprocesses, feeds text to the model, and outputs the predicted class.
+#   """
+#   inputs = tokenizer(text, padding=True, truncation=True, return_tensors="pt")
+#   outputs = model(**inputs)
+#   logits = outputs.logits  # Access logits instead of pipeline output
+#   predictions = torch.argmax(logits, dim=-1)  # Apply argmax for prediction
+#   return model.config.id2label[predictions.item()]  # Map index to class label
 interface = gr.Interface(
     fn=classify_text,
     outputs="text",
     title="Text Classification Demo",
     description="Enter some text, and the model will classify it.",
+    choices=["depression", "anxiety", "bipolar disorder", "schizophrenia", "PTSD", "OCD", "ADHD", "autism", "eating disorder", "personality disorder", "phobia"]  # Adjust class names
 )
 interface.launch()