File size: 952 Bytes
d3a1f2f
ee679e3
5124fa1
ee679e3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import gradio as gr 
from transformers import pipeline, AutoModelForSeq2SeqLM, MBart50Tokenizer, AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained('jafrilalam/bangla_sentence_correction_01', src_lang="bn_IN", tgt_lang="bn_IN", use_fast=True)
model = AutoModelForSeq2SeqLM.from_pretrained("jafrilalam/bangla_sentence_correction_01", use_safetensors=True)

def correct_text(given_sentence):
    inputs = tokenizer.encode(
        given_sentence,
        truncation=True,
        return_tensors="pt",
        max_length=len(given_sentence),
    )

    output_ids = model.generate(
        inputs,
        max_new_tokens=len(given_sentence),
        early_stopping=True,
    )
    
    return tokenizer.decode(output_ids[0], skip_special_tokens=True)

iface = gr.Interface(
    fn=correct_text,
    inputs=gr.Textbox(lines=4, label="Incorrect Bangla Sentence"),
    outputs=gr.Textbox(label="Corrected Bangla Sentence")
)

iface.launch(share = True)