Spaces:

nebiyu29
/

good_acc

Sleeping

App Files Files Community

good_acc / app.py

nebiyu29

Update app.py

7768e0f verified 9 months ago

raw

history blame

3.52 kB

	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	import gradio as gr

	# Load model directly
	from transformers import AutoTokenizer, AutoModelForSequenceClassification

	import torch
	import transformers

	tokenizer = AutoTokenizer.from_pretrained("nebiyu29/fintunned-v2-roberta_GA")
	model = AutoModelForSequenceClassification.from_pretrained("nebiyu29/fintunned-v2-roberta_GA")


	# Load the model and tokenizer
	# model = transformers.AutoModelForSequenceClassification.from_pretrained("facebook/bart-large-mnli")

	# tokenizer = transformers.AutoTokenizer.from_pretrained("facebook/bart-large-mnli")

	# Define a function to split a text into segments of 512 tokens
	def split_text(text):
	#this prints progress
	print("going to split the text")
	# Tokenize the text
	tokens = tokenizer.tokenize(text)
	# Initialize an empty list for segments
	segments = []
	# Initialize an empty list for current segment
	current_segment = []
	# Initialize a counter for tokens
	token_count = 0
	# Loop through the tokens
	for token in tokens:
	# Add the token to the current segment
	current_segment.append(token)
	# Increment the token count
	token_count += 1
	# If the token count reaches 512 or the end of the text, add the current segment to the segments list
	if token_count == 512 or token == tokens[-1]:
	# Convert the current segment to a string and add it to the segments list
	segments.append(tokenizer.convert_tokens_to_string(current_segment))
	# Reset the current segment and the token count
	current_segment = []
	token_count = 0
	# Return the segments list
	return segments

	# Define a function to extract predictions from model output (adjust as needed)
	def extract_predictions(outputs):
	# Assuming outputs contain logits and labels (adapt based on your model's output format)
	logits = outputs.logits
	probs = logits.softmax(dim=1)
	preds = torch.argmax(probs, dim=1)
	return probs, preds # Return all probabilities and predicted labels

	# a function that classifies text

	def classify_text(text):

	# Split text into segments using split_text
	segments = split_text(text)

	# Initialize empty list for predictions
	predictions = []

	# Move device to GPU if available
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model = model.to(device)

	# Loop through segments, process, and store predictions
	for segment in segments:
	inputs = tokenizer([segment], padding=True, return_tensors="pt")
	input_ids = inputs["input_ids"].to(device)
	attention_mask = inputs["attention_mask"].to(device)

	with torch.no_grad():
	outputs = model(input_ids, attention_mask=attention_mask)

	# Extract predictions for each segment
	probs, preds = extract_predictions(outputs) # Define this function based on your model's output

	# Append predictions for this segment
	predictions.append({
	"segment_text": segment,
	"label": preds[0], # Assuming single label prediction
	"probability": probs[preds[0]] # Access probability for the predicted label
	})


	interface = gr.Interface(
	fn=classify_text,
	inputs="text",
	outputs="text",
	title="Text Classification Demo",
	description="Enter some text, and the model will classify it.",
	)

	#interface.launch(server_port=8080)