Spaces:

yashAI007
/

English-To-Gujarati-Language-Translation

Running

English-To-Gujarati-Language-Translation / app.py

yash

add print

ab3b5e0 7 months ago

6.69 kB

	import torch
	import torch.onnx
	from transformer import Transformer
	import torch
	from huggingface_hub import hf_hub_download
	import torch
	import numpy as np
	import gradio as gr


	# Generated this by filtering Appendix code
	START_TOKEN = '<START>'
	PADDING_TOKEN = '<PADDING>'
	END_TOKEN = '<END>'


	english_vocabulary = [START_TOKEN, ' ', '!', '"', '#', '$', '%', '&', "'", '(', ')', '*', '+', ',', '-', '.', '/',
	'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
	':', '<', '=', '>', '?', '@',
	'[', '\\', ']', '^', '_', '`',
	'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
	'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x',
	'y', 'z',
	'{', '\|', '}', '~', PADDING_TOKEN, END_TOKEN]


	gujarati_vocabulary = [
	START_TOKEN, ' ', '!', '"', '#', '$', '%', '&', "'", '(', ')', '*', '+', ',', '-', '.', '/',
	'૦', '૧', '૨', '૩', '૪', '૫', '૬', '૭', '૮', '૯',
	':', '<', '=', '>', '?', '@', '[', '\\', ']', '^', '_', '`',
	'અ', 'આ', 'ઇ', 'ઈ', 'ઉ', 'ઊ', 'ઋ', 'એ', 'ઐ', 'ઓ', 'ઔ',
	'ક', 'ખ', 'ગ', 'ઘ', 'ઙ', 'ચ', 'છ', 'જ', 'ઝ', 'ઞ',
	'ટ', 'ઠ', 'ડ', 'ઢ', 'ણ', 'ત', 'થ', 'દ', 'ધ', 'ન',
	'પ', 'ફ', 'બ', 'ભ', 'મ', 'ય', 'ર', 'લ', 'વ', 'શ',
	'ષ', 'સ', 'હ', 'ળ', 'ક્ષ', 'જ્ઞ', 'ં', 'ઃ', 'ઁ', 'ા',
	'િ', 'ી', 'ુ', 'ૂ', 'ે', 'ૈ', 'ો', 'ૌ', '્', 'ૐ',
	'{', '\|', '}', '~', PADDING_TOKEN, END_TOKEN
	]

	index_to_gujarati = {k:v for k,v in enumerate(gujarati_vocabulary)}
	gujarati_to_index = {v:k for k,v in enumerate(gujarati_vocabulary)}
	index_to_english = {k:v for k,v in enumerate(english_vocabulary)}
	english_to_index = {v:k for k,v in enumerate(english_vocabulary)}

	d_model = 512
	# batch_size = 64
	ffn_hidden = 2048
	num_heads = 8
	drop_prob = 0.1
	num_layers = 6
	max_sequence_length = 200
	kn_vocab_size = len(gujarati_vocabulary)
	# device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
	device = torch.device('cpu')

	transformer = Transformer(d_model,
	ffn_hidden,
	num_heads,
	drop_prob,
	num_layers,
	max_sequence_length,
	kn_vocab_size,
	english_to_index,
	gujarati_to_index,
	START_TOKEN,
	END_TOKEN,
	PADDING_TOKEN)

	model_file = hf_hub_download(repo_id="yashAI007/English_to_Gujarati_Translation", filename="model.pth")
	model = torch.load(model_file,map_location='cpu')
	transformer.load_state_dict(model['model_state_dict'])
	transformer.to(device)
	transformer.eval()


	NEG_INFTY = -1e9

	def create_masks(eng_batch, kn_batch):
	num_sentences = len(eng_batch)
	look_ahead_mask = torch.full([max_sequence_length, max_sequence_length] , True)
	look_ahead_mask = torch.triu(look_ahead_mask, diagonal=1)
	encoder_padding_mask = torch.full([num_sentences, max_sequence_length, max_sequence_length] , False)
	decoder_padding_mask_self_attention = torch.full([num_sentences, max_sequence_length, max_sequence_length] , False)
	decoder_padding_mask_cross_attention = torch.full([num_sentences, max_sequence_length, max_sequence_length] , False)

	for idx in range(num_sentences):
	eng_sentence_length, kn_sentence_length = len(eng_batch[idx]), len(kn_batch[idx])
	eng_chars_to_padding_mask = np.arange(eng_sentence_length + 1, max_sequence_length)
	kn_chars_to_padding_mask = np.arange(kn_sentence_length + 1, max_sequence_length)
	encoder_padding_mask[idx, :, eng_chars_to_padding_mask] = True
	encoder_padding_mask[idx, eng_chars_to_padding_mask, :] = True
	decoder_padding_mask_self_attention[idx, :, kn_chars_to_padding_mask] = True
	decoder_padding_mask_self_attention[idx, kn_chars_to_padding_mask, :] = True
	decoder_padding_mask_cross_attention[idx, :, eng_chars_to_padding_mask] = True
	decoder_padding_mask_cross_attention[idx, kn_chars_to_padding_mask, :] = True

	encoder_self_attention_mask = torch.where(encoder_padding_mask, NEG_INFTY, 0)
	decoder_self_attention_mask = torch.where(look_ahead_mask + decoder_padding_mask_self_attention, NEG_INFTY, 0)
	decoder_cross_attention_mask = torch.where(decoder_padding_mask_cross_attention, NEG_INFTY, 0)
	return encoder_self_attention_mask, decoder_self_attention_mask, decoder_cross_attention_mask

	transformer.eval()
	def translate(eng_sentence):
	print("English Sentence:",eng_sentence)
	eng_sentence = (eng_sentence.lower(),)
	kn_sentence = ("",)
	for word_counter in range(max_sequence_length):
	encoder_self_attention_mask, decoder_self_attention_mask, decoder_cross_attention_mask= create_masks(eng_sentence, kn_sentence)
	predictions = transformer(eng_sentence,
	kn_sentence,
	encoder_self_attention_mask.to(device),
	decoder_self_attention_mask.to(device),
	decoder_cross_attention_mask.to(device),
	enc_start_token=False,
	enc_end_token=False,
	dec_start_token=True,
	dec_end_token=False)
	next_token_prob_distribution = predictions[0][word_counter]
	next_token_index = torch.argmax(next_token_prob_distribution).item()
	next_token = index_to_gujarati[next_token_index]
	kn_sentence = (kn_sentence[0] + next_token, )
	if next_token == END_TOKEN:
	break
	print("Gujarati Sentence:",kn_sentence[0][:-5],'\n')
	return kn_sentence[0][:-5]

	examples = [
	["Hello, how are you?"],
	["What is your name?"],
	["I love programming."],
	["This is a beautiful day."],
	["Can you help me with this?"],
	["What time is it?"],
	["I am learning data science."],
	["Where is the nearest bus stop?"],
	["I enjoy reading books."],
	["Thank you for your help."]
	]

	description = "This tool translates English sentences into Gujarati. Please enter your text above to get started!"

	iface = gr.Interface(fn=translate,
	inputs="text",
	outputs="text",
	title="English to Gujarati Translation",
	examples=examples,
	description=description,
	)

	if __name__ == "__main__":
	iface.launch()