File size: 4,308 Bytes
ba33d46
 
 
91191df
ba33d46
 
 
9ea2a9b
 
ba33d46
 
 
 
 
 
 
 
 
 
 
 
 
9ea2a9b
ba33d46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9ea2a9b
ba33d46
9ea2a9b
 
 
ba33d46
 
 
 
 
9ea2a9b
 
ba33d46
 
9ea2a9b
ba33d46
 
9ea2a9b
 
 
 
 
ba33d46
 
 
 
9ea2a9b
 
 
 
 
ba33d46
 
 
91191df
 
 
 
 
 
 
 
 
 
 
 
 
 
388fe6c
64e7d7e
62cba7e
 
91191df
 
 
da9327b
91191df
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import gradio as gr
import json

def run_inference(review_text: str) -> str:
    """
    Perform inference on the given wine review text and return the predicted wine variety
    using ModernBERT, an encoder-only classifier from "spawn99/modernbert-wine-classification".

    Args:
        review_text (str): Wine review text in the format "country [SEP] description".

    Returns:
        str: The predicted wine variety using the model's id2label mapping if available.
    """
    # Define model and tokenizer identifiers
    model_id = "spawn99/modernbert-wine-classification"
    tokenizer_id = "answerdotai/ModernBERT-base"

    # Load tokenizer and model
    tokenizer = AutoTokenizer.from_pretrained(tokenizer_id)
    # The model used here is a ModernBERT encoder-only classifier.
    model = AutoModelForSequenceClassification.from_pretrained(model_id)

    # Tokenize the input text
    inputs = tokenizer(
        review_text,
        return_tensors="pt",
        padding="max_length",
        truncation=True,
        max_length=256
    )

    model.eval()
    with torch.no_grad():
        outputs = model(**inputs)
    logits = outputs.logits

    # Determine prediction and map to label if available
    pred = torch.argmax(logits, dim=-1).item()
    variety = (
        model.config.id2label.get(pred, str(pred))
        if hasattr(model.config, "id2label") and model.config.id2label
        else str(pred)
    )
    
    return variety


def predict_wine_variety(country: str, description: str, output_format: str) -> str:
    """
    Combine the provided country and description, perform inference, and format the output
    based on the selected output format.

    Enforces a maximum character limit of 750 on the description.

    Args:
        country (str): The country of wine origin.
        description (str): The wine review description.
        output_format (str): Either "JSON" to return output as a JSON-formatted string,
                             or "Text" for plain text output.

    Returns:
        str: The predicted wine variety formatted as JSON (if selected) or as plain text.
    """
    if len(description) > 750:
        error_msg = "Description exceeds 750 character limit. Please shorten your input."
        if output_format.lower() == "json":
            return json.dumps({"error": error_msg}, indent=2)
        else:
            return error_msg
    
    # Capitalize input values and format the review text accordingly.
    review_text = f"{country.capitalize()} [SEP] {description.capitalize()}"
    predicted_variety = run_inference(review_text)
    
    if output_format.lower() == "json":
        return json.dumps({"Variety": predicted_variety}, indent=2)
    else:
        return predicted_variety


if __name__ == "__main__":
    iface = gr.Interface(
        fn=predict_wine_variety,
        inputs=[
            gr.Textbox(label="Country", placeholder="Enter country of origin..."),
            gr.Textbox(label="Description", placeholder="Enter wine review description..."),
            gr.Radio(choices=["JSON", "Text"], value="JSON", label="Output Format")
        ],
        outputs=gr.Textbox(label="Prediction"),
        title="Wine Variety Predictor",
        description=(
            "Predict the wine variety based on the country and wine review.\n\n"
            "This tool uses [ModernBERT](https://huggingface.co/answerdotai/ModernBERT-base), "
            "an encoder-only classifier, trained on the [wine reviews dataset]"
            "(https://huggingface.co/datasets/spawn99/wine-reviews)\n\n"
            "---- You can try a few examples from this dataset, which the model hasn't seen:\n"
            "(https://huggingface.co/datasets/psiyum/winemag-feb-2019?row=27)\n\n"
            "** Resources:**\n"
            "- Repository: [GitHub:cavit99/wine-classifier-modernbert](https://github.com/cavit99/wine-classifier-modernbert)\n"
            "- Model: [spawn99/modernbert-wine-classification](https://huggingface.co/spawn99/modernbert-wine-classification)\n"
            "- Dataset: [spawn99/wine-reviews](https://huggingface.co/datasets/spawn99/wine-reviews)\n\n"
            "*Cavit Erginsoy, 2025*\n"
        )
    )
    iface.launch()