Spaces:

nebiyu29
/

good_acc

Sleeping

App Files Files Community

good_acc / app.py

nebiyu29

Update app.py

855ff50 verified 7 months ago

raw

history blame

No virus

5.03 kB

	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	import gradio as gr

	# Load model directly
	from transformers import AutoTokenizer, AutoModelForSequenceClassification

	import torch
	import transformers

	tokenizer = AutoTokenizer.from_pretrained("nebiyu29/fintunned-v2-roberta_GA")
	model = AutoModelForSequenceClassification.from_pretrained("nebiyu29/fintunned-v2-roberta_GA")


	# Load the model and tokenizer
	# model = transformers.AutoModelForSequenceClassification.from_pretrained("facebook/bart-large-mnli")

	# tokenizer = transformers.AutoTokenizer.from_pretrained("facebook/bart-large-mnli")

	# Define a function to split a text into segments of 512 tokens
	def split_text(text):
	#this prints progress
	print("going to split the text")
	# Tokenize the text
	tokens = tokenizer.tokenize(text)
	# Initialize an empty list for segments
	segments = []
	# Initialize an empty list for current segment
	current_segment = []
	# Initialize a counter for tokens
	token_count = 0
	# Loop through the tokens
	for token in tokens:
	# Add the token to the current segment
	current_segment.append(token)
	# Increment the token count
	token_count += 1
	# If the token count reaches 512 or the end of the text, add the current segment to the segments list
	if token_count == 512 or token == tokens[-1]:
	# Convert the current segment to a string and add it to the segments list
	segments.append(tokenizer.convert_tokens_to_string(current_segment))
	# Reset the current segment and the token count
	current_segment = []
	token_count = 0
	# Return the segments list
	return segments

	# Define a function to extract predictions from model output (adjust as needed)
	def extract_predictions(outputs):
	# Assuming outputs contain logits and labels (adapt based on your model's output format)
	logits = outputs.logits
	probs = logits.softmax(dim=1)
	preds = torch.argmax(probs, dim=1)
	return probs, preds # Return all probabilities and predicted labels

	# a function that classifies text

	# def classify_text(text):
	# # Define labels
	# labels = ["depression", "anxiety", "bipolar disorder", "schizophrenia", "PTSD", "OCD", "ADHD", "autism", "eating disorder", "personality disorder", "phobia"]

	# # Split text into segments using split_text
	# segments = split_text(text)

	# # Initialize empty list for predictions
	# predictions = []

	# # Move device to GPU if available
	# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	# model = model.to(device)

	# # Loop through segments, process, and store predictions
	# for segment in segments:
	# inputs = tokenizer([segment], padding=True, return_tensors="pt")
	# input_ids = inputs["input_ids"].to(device)
	# attention_mask = inputs["attention_mask"].to(device)

	# with torch.no_grad():
	# outputs = model(input_ids, attention_mask=attention_mask)

	# # Extract predictions for each segment
	# probs, preds = extract_predictions(outputs) # Define this function based on your model's output

	# # Append predictions for this segment
	# predictions.append({
	# "segment_text": segment,
	# "label": preds[0], # Assuming single label prediction
	# "probability": probs[preds[0]] # Access probability for the predicted label
	# })

	def classify_text(text):


	segments=split_text(text)

	predictions = []
	for segment in segments:
	inputs = tokenizer([segment], padding=True, return_tensors="pt")
	input_ids = inputs["input_ids"].to(device)
	attention_mask = inputs["attention_mask"].to(device)

	with torch.no_grad():
	outputs = model(input_ids, attention_mask=attention_mask)

	probs, preds = extract_predictions(outputs)

	predictions.append({
	"segment_text": segment,
	"label": model.config.id2label[preds[0]], # assuming single label prediction
	"probability": probs[preds[0]]
	})

	return predictions



	# def classify_text(text):
	# """
	# This function preprocesses, feeds text to the model, and outputs the predicted class.
	# """
	# inputs = tokenizer(text, padding=True, truncation=True, return_tensors="pt")
	# outputs = model(**inputs)
	# logits = outputs.logits # Access logits instead of pipeline output
	# predictions = torch.argmax(logits, dim=-1) # Apply argmax for prediction
	# return model.config.id2label[predictions.item()] # Map index to class label

	interface = gr.Interface(
	fn=classify_text,
	inputs="text",
	outputs="text",
	title="Text Classification Demo",
	description="Enter some text, and the model will classify it.",
	#choices=["depression", "anxiety", "bipolar disorder", "schizophrenia", "PTSD", "OCD", "ADHD", "autism", "eating disorder", "personality disorder", "phobia"] # Adjust class names
	)

	interface.launch()