Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import BertConfig, BertForSequenceClassification, AutoTokenizer | |
from safetensors import safe_open | |
import torch | |
config_path = "peterkros/cofogv1-bert/modelbert2/config.json" | |
config = BertConfig.from_json_file(config_path) | |
model = BertForSequenceClassification(config) | |
model_path = "modelbert2" | |
model = BertForSequenceClassification.from_pretrained(model_path) | |
tokenizer = AutoTokenizer.from_pretrained(model_path) | |
tokenizer = AutoTokenizer.from_pretrained("peterkros/cofogv1-bert/modelbert2/") | |
# Load the label encoder | |
import pickle | |
with open('peterkros/cofogv1-bert/label_encoder.pkl', 'rb') as file: | |
label_encoder = pickle.load(file) | |
def predict(text): | |
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512) | |
with torch.no_grad(): | |
outputs = model(**inputs) | |
probs = torch.nn.functional.softmax(outputs.logits, dim=-1) | |
predicted_class = torch.argmax(probs, dim=-1).item() | |
predicted_label = label_encoder.inverse_transform([predicted_class])[0] | |
return predicted_label | |
# Define the markdown text with bullet points | |
markdown_text = """ | |
- Trainied with ~1500 rows of data on bert-base-uncased 110M, English | |
- Input one budget line per time. | |
- Accuracy of the model is ~72%. | |
""" | |
# Define the interface | |
iface = gr.Interface( | |
fn=predict, | |
inputs=gr.inputs.Textbox(lines=1, placeholder="Enter Budget line here..."), | |
outputs="text", | |
title="COFOG Level 1 Classification", | |
description=markdown_text # Add the markdown text to the description | |
) | |
# Run the interface | |
if __name__ == "__main__": | |
iface.launch() |