Spaces:

peterkros
/

COFOG-Bert-AutoClassifier

Running

File size: 1,409 Bytes

dffac30
1d4525b
dffac30
1d4525b
dffac30
1d4525b
047df6f
1d4525b
 
dffac30
 
1d4525b
dffac30
 
 
 
 
 
 
 
 
 
 
 
 
1d4525b
 
79860af
dffac30
 
 
 
 
1d4525b
dffac30
 
 
 
 
 
 
1d4525b

import gradio as gr
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch
import pickle

# Load the model and tokenizer from Hugging Face Hub
model_name = "peterkros/cofogv1-bert"
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Load the label encoder
with open('label_encoder.pkl', 'rb') as file:
    label_encoder = pickle.load(file)

def predict(text):
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
    with torch.no_grad():
        outputs = model(**inputs)
    probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
    predicted_class = torch.argmax(probs, dim=-1).item()
    predicted_label = label_encoder.inverse_transform([predicted_class])[0]
    return predicted_label

# Define the markdown text with bullet points
markdown_text = """
- Trained with ~1500 rows of data on bert-base-uncased, 110M, English.
- Input one budget line per time.
- Accuracy of the model is ~72%.
"""

# Define the interface
iface = gr.Interface(
    fn=predict, 
    inputs=gr.inputs.Textbox(lines=2, placeholder="Enter Budget line here..."), 
    outputs="text", 
    title="COFOG Level 1 Classification",
    description=markdown_text  # Add the markdown text to the description
)

# Run the interface
if __name__ == "__main__":
    iface.launch()