Spaces:
Runtime error
Runtime error
File size: 4,096 Bytes
704dc9c 7360456 48b5e6e 7360456 9609874 7360456 704dc9c 7360456 704dc9c 7360456 704dc9c 7360456 704dc9c 7360456 704dc9c 7360456 704dc9c 7360456 704dc9c 7360456 704dc9c 7360456 704dc9c 7360456 704dc9c 7360456 704dc9c 7360456 704dc9c 7360456 704dc9c 7360456 704dc9c 7360456 704dc9c 7360456 704dc9c 7360456 704dc9c 7360456 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
import gradio as gr
import os
# https://huggingface.co/docs/hub/spaces-gpus
import torch
from transformers import AutoTokenizer, AutoModelForMaskedLM
from torch.nn.functional import softmax
# import logging
# import pandas as pd
# save your HF API token from https:/hf.co/settings/tokens as an env variable to avoid rate limiting
auth_token = os.getenv("auth_token")
print("========================================================================")
print("Starting ... gradio_demo_nlp_autocomplete/app.py")
print("AUTH TOKEN:", auth_token)
# load a model from https://hf.co/models as an interface, then use it as an api
# you can remove the api_key parameter if you don't care about rate limiting.
# api = gr.Interface.load(, api_key=auth_token,)
model_ref = "projecte-aina/roberta-base-ca-v2"
tokenizer = AutoTokenizer.from_pretrained(model_ref)
model = AutoModelForMaskedLM.from_pretrained(model_ref)
def get_topk(text, tokenizer, model, k):
print("Get top K,", text)
# Tokenize
# ==========================================================================================
tokenizer_kwargs = dict(padding='longest', return_token_type_ids=False, return_tensors="pt")
inputs = tokenizer(text, **tokenizer_kwargs).to("cpu")
input_ids = inputs.input_ids
# Get model outputs and probabilities
# ==========================================================================================
# logits = model(input_ids=input_ids, attention_mask=attention_mask).logits
logits = model.to("cpu")(**inputs).logits
probs = softmax(logits, dim=2)
# Index ok <mask> (ojo només funciona quan hi ha 1 MASK)
# ==========================================================================================
row_idx, mask_idx = torch.where(input_ids.to("cpu") == tokenizer.mask_token_id)
return probs[row_idx, mask_idx].topk(k), mask_idx
def generate_output(text, k):
# lines = print_topk(text, tokenizer, model, k=10)
(values, indices), input_idx = get_topk(text, tokenizer, model, int(k))
for mask_vals, mask_indices, input_idx in zip(values, indices, input_idx):
labels = {tokenizer.decode(ind): val.item()
for val, ind in zip(mask_vals, mask_indices)}
return labels
md_text ="""
# Masked Language Modeling Example
by [nurasaki](https://huggingface.co/spaces/nurasaki)
* Space : [https://huggingface.co/spaces/nurasaki/gradio_nlp_berta_masked_example](https://huggingface.co/spaces/nurasaki/gradio_nlp_berta_masked_example)
* Model used: Catalan BERTa-v2 (roberta-base-ca-v2) base model
* Hugginface link: [https://huggingface.co/projecte-aina/roberta-base-ca-v2](https://huggingface.co/projecte-aina/roberta-base-ca-v2)
<br>
## Model description
The **roberta-base-ca-v2** is a transformer-based masked language model for the Catalan language.
It is based on the [RoBERTA](https://github.com/pytorch/fairseq/tree/master/examples/roberta) base model and has been trained on a medium-size corpus collected from publicly available corpora and crawlers.
<br>
## Usage
The model accepts an input text with a *mask* (for example, "La meva mare es diu \<mask\>.") and generates the *k* most probable words that could fill the *mask* position in the sentence.
Choose one of the provided examples or enter your own masked text.
<br>
"""
examples = [
"La meva mare es diu <mask>.",
"La meva mare treballa de <mask>.",
"El meu fill es diu <mask>.",
"El teu pare treballa de <mask>.",
]
with gr.Blocks() as demo:
gr.Markdown(md_text)
with gr.Row():
with gr.Column():
text = gr.Textbox("La meva mare es diu <mask>.", label="Masked text")
k = gr.Number(value=10, label="Num. results")
btn = gr.Button("Generate")
with gr.Column():
out_label = gr.Label(label="Results")
btn.click(generate_output, inputs=[text, k], outputs=[out_label])
gr.Examples(examples, inputs=[text])
# if __name__ == "__main__":
demo.launch(favicon_path="favicon.png")
|