|
import gradio as gr |
|
from transformers import AutoModelForSequenceClassification, AutoTokenizer |
|
import torch |
|
import pickle |
|
|
|
|
|
model_name = "peterkros/cofogv1-bert" |
|
model = AutoModelForSequenceClassification.from_pretrained(model_name) |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
|
|
|
with open('label_encoder.pkl', 'rb') as file: |
|
label_encoder = pickle.load(file) |
|
|
|
def predict(text): |
|
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512) |
|
with torch.no_grad(): |
|
outputs = model(**inputs) |
|
probs = torch.nn.functional.softmax(outputs.logits, dim=-1) |
|
predicted_class = torch.argmax(probs, dim=-1).item() |
|
predicted_label = label_encoder.inverse_transform([predicted_class])[0] |
|
return predicted_label |
|
|
|
|
|
markdown_text = """ |
|
- Trained with ~1500 rows of data on bert-base-uncased, 110M, English. |
|
- Input one budget line per time. |
|
- Accuracy of the model is ~72%. |
|
""" |
|
|
|
|
|
iface = gr.Interface( |
|
fn=predict, |
|
inputs=gr.inputs.Textbox(lines=2, placeholder="Enter Budget line here..."), |
|
outputs="text", |
|
title="COFOG Level 1 Classification", |
|
description=markdown_text |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
iface.launch() |
|
|