yash
add print
ab3b5e0
import torch
import torch.onnx
from transformer import Transformer
import torch
from huggingface_hub import hf_hub_download
import torch
import numpy as np
import gradio as gr
# Generated this by filtering Appendix code
START_TOKEN = '<START>'
PADDING_TOKEN = '<PADDING>'
END_TOKEN = '<END>'
english_vocabulary = [START_TOKEN, ' ', '!', '"', '#', '$', '%', '&', "'", '(', ')', '*', '+', ',', '-', '.', '/',
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
':', '<', '=', '>', '?', '@',
'[', '\\', ']', '^', '_', '`',
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x',
'y', 'z',
'{', '|', '}', '~', PADDING_TOKEN, END_TOKEN]
gujarati_vocabulary = [
START_TOKEN, ' ', '!', '"', '#', '$', '%', '&', "'", '(', ')', '*', '+', ',', '-', '.', '/',
'૦', '૧', '૨', '૩', '૪', '૫', '૬', '૭', '૮', '૯',
':', '<', '=', '>', '?', '@', '[', '\\', ']', '^', '_', '`',
'અ', 'આ', 'ઇ', 'ઈ', 'ઉ', 'ઊ', 'ઋ', 'એ', 'ઐ', 'ઓ', 'ઔ',
'ક', 'ખ', 'ગ', 'ઘ', 'ઙ', 'ચ', 'છ', 'જ', 'ઝ', 'ઞ',
'ટ', 'ઠ', 'ડ', 'ઢ', 'ણ', 'ત', 'થ', 'દ', 'ધ', 'ન',
'પ', 'ફ', 'બ', 'ભ', 'મ', 'ય', 'ર', 'લ', 'વ', 'શ',
'ષ', 'સ', 'હ', 'ળ', 'ક્ષ', 'જ્ઞ', 'ં', 'ઃ', 'ઁ', 'ા',
'િ', 'ી', 'ુ', 'ૂ', 'ે', 'ૈ', 'ો', 'ૌ', '્', 'ૐ',
'{', '|', '}', '~', PADDING_TOKEN, END_TOKEN
]
index_to_gujarati = {k:v for k,v in enumerate(gujarati_vocabulary)}
gujarati_to_index = {v:k for k,v in enumerate(gujarati_vocabulary)}
index_to_english = {k:v for k,v in enumerate(english_vocabulary)}
english_to_index = {v:k for k,v in enumerate(english_vocabulary)}
d_model = 512
# batch_size = 64
ffn_hidden = 2048
num_heads = 8
drop_prob = 0.1
num_layers = 6
max_sequence_length = 200
kn_vocab_size = len(gujarati_vocabulary)
# device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device = torch.device('cpu')
transformer = Transformer(d_model,
ffn_hidden,
num_heads,
drop_prob,
num_layers,
max_sequence_length,
kn_vocab_size,
english_to_index,
gujarati_to_index,
START_TOKEN,
END_TOKEN,
PADDING_TOKEN)
model_file = hf_hub_download(repo_id="yashAI007/English_to_Gujarati_Translation", filename="model.pth")
model = torch.load(model_file,map_location='cpu')
transformer.load_state_dict(model['model_state_dict'])
transformer.to(device)
transformer.eval()
NEG_INFTY = -1e9
def create_masks(eng_batch, kn_batch):
num_sentences = len(eng_batch)
look_ahead_mask = torch.full([max_sequence_length, max_sequence_length] , True)
look_ahead_mask = torch.triu(look_ahead_mask, diagonal=1)
encoder_padding_mask = torch.full([num_sentences, max_sequence_length, max_sequence_length] , False)
decoder_padding_mask_self_attention = torch.full([num_sentences, max_sequence_length, max_sequence_length] , False)
decoder_padding_mask_cross_attention = torch.full([num_sentences, max_sequence_length, max_sequence_length] , False)
for idx in range(num_sentences):
eng_sentence_length, kn_sentence_length = len(eng_batch[idx]), len(kn_batch[idx])
eng_chars_to_padding_mask = np.arange(eng_sentence_length + 1, max_sequence_length)
kn_chars_to_padding_mask = np.arange(kn_sentence_length + 1, max_sequence_length)
encoder_padding_mask[idx, :, eng_chars_to_padding_mask] = True
encoder_padding_mask[idx, eng_chars_to_padding_mask, :] = True
decoder_padding_mask_self_attention[idx, :, kn_chars_to_padding_mask] = True
decoder_padding_mask_self_attention[idx, kn_chars_to_padding_mask, :] = True
decoder_padding_mask_cross_attention[idx, :, eng_chars_to_padding_mask] = True
decoder_padding_mask_cross_attention[idx, kn_chars_to_padding_mask, :] = True
encoder_self_attention_mask = torch.where(encoder_padding_mask, NEG_INFTY, 0)
decoder_self_attention_mask = torch.where(look_ahead_mask + decoder_padding_mask_self_attention, NEG_INFTY, 0)
decoder_cross_attention_mask = torch.where(decoder_padding_mask_cross_attention, NEG_INFTY, 0)
return encoder_self_attention_mask, decoder_self_attention_mask, decoder_cross_attention_mask
transformer.eval()
def translate(eng_sentence):
print("English Sentence:",eng_sentence)
eng_sentence = (eng_sentence.lower(),)
kn_sentence = ("",)
for word_counter in range(max_sequence_length):
encoder_self_attention_mask, decoder_self_attention_mask, decoder_cross_attention_mask= create_masks(eng_sentence, kn_sentence)
predictions = transformer(eng_sentence,
kn_sentence,
encoder_self_attention_mask.to(device),
decoder_self_attention_mask.to(device),
decoder_cross_attention_mask.to(device),
enc_start_token=False,
enc_end_token=False,
dec_start_token=True,
dec_end_token=False)
next_token_prob_distribution = predictions[0][word_counter]
next_token_index = torch.argmax(next_token_prob_distribution).item()
next_token = index_to_gujarati[next_token_index]
kn_sentence = (kn_sentence[0] + next_token, )
if next_token == END_TOKEN:
break
print("Gujarati Sentence:",kn_sentence[0][:-5],'\n')
return kn_sentence[0][:-5]
examples = [
["Hello, how are you?"],
["What is your name?"],
["I love programming."],
["This is a beautiful day."],
["Can you help me with this?"],
["What time is it?"],
["I am learning data science."],
["Where is the nearest bus stop?"],
["I enjoy reading books."],
["Thank you for your help."]
]
description = "This tool translates English sentences into Gujarati. Please enter your text above to get started!"
iface = gr.Interface(fn=translate,
inputs="text",
outputs="text",
title="English to Gujarati Translation",
examples=examples,
description=description,
)
if __name__ == "__main__":
iface.launch()