|
from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
import gradio as gr |
|
|
|
|
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
|
|
import torch |
|
import transformers |
|
|
|
tokenizer = AutoTokenizer.from_pretrained("nebiyu29/fintunned-v2-roberta_GA") |
|
model = AutoModelForSequenceClassification.from_pretrained("nebiyu29/fintunned-v2-roberta_GA") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def split_text(text): |
|
|
|
tokens = tokenizer.tokenize(text) |
|
|
|
segments = [] |
|
|
|
current_segment = [] |
|
|
|
token_count = 0 |
|
|
|
for token in tokens: |
|
|
|
current_segment.append(token) |
|
|
|
token_count += 1 |
|
|
|
if token_count == 512 or token == tokens[-1]: |
|
|
|
segments.append(tokenizer.convert_tokens_to_string(current_segment)) |
|
|
|
current_segment = [] |
|
token_count = 0 |
|
|
|
return segments |
|
|
|
def classify(text, model): |
|
|
|
labels = ["depression", "anxiety", "bipolar disorder", "schizophrenia", "PTSD", "OCD", "ADHD", "autism", "eating disorder", "personality disorder", "phobia"] |
|
|
|
label_encodings = tokenizer(labels, padding=True, return_tensors="pt") |
|
|
|
segments = split_text(text) |
|
|
|
logits_list = [] |
|
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
model = model.to(device) |
|
|
|
for segment in segments: |
|
|
|
inputs = tokenizer([segment] + labels, padding=True, return_tensors="pt") |
|
|
|
input_ids = inputs["input_ids"] |
|
attention_mask = inputs["attention_mask"] |
|
|
|
input_ids = input_ids.to(device) |
|
attention_mask = attention_mask.to(device) |
|
|
|
|
|
with torch.no_grad(): |
|
outputs = model( |
|
input_ids, |
|
attention_mask=attention_mask, |
|
) |
|
|
|
logits = outputs.logits |
|
logits_list.append(logits) |
|
|
|
avg_logits = torch.mean(torch.stack(logits_list), dim=0) |
|
|
|
probabilities = torch.softmax(avg_logits, dim=1) |
|
|
|
label_probabilities = probabilities[:, :len(labels)].tolist() |
|
|
|
|
|
|
|
top_labels = [] |
|
top_probabilities = [] |
|
label_probabilities = label_probabilities[0] |
|
for _ in range(3): |
|
max_prob_index = label_probabilities.index(max(label_probabilities)) |
|
top_labels.append(labels[max_prob_index]) |
|
top_probabilities.append(max(label_probabilities)) |
|
label_probabilities[max_prob_index] = 0 |
|
|
|
|
|
results = { |
|
"sequence": text, |
|
"top_labels": top_labels, |
|
"top_probabilities": top_probabilities |
|
} |
|
|
|
return results |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
interface = gr.Interface( |
|
fn=classify_text, |
|
inputs="text", |
|
outputs="text", |
|
title="Text Classification Demo", |
|
description="Enter some text, and the model will classify it.", |
|
choices=["depression", "anxiety", "bipolar disorder", "schizophrenia", "PTSD", "OCD", "ADHD", "autism", "eating disorder", "personality disorder", "phobia"] |
|
) |
|
|
|
interface.launch() |
|
|