Spaces:
Runtime error
Runtime error
File size: 6,683 Bytes
4beca16 c4c201c bbc0f33 c4c201c ad573e3 c4c201c 5cf02ff c4c201c 5cf02ff c4c201c 5cf02ff c4c201c 5cf02ff c4c201c 5cf02ff c4c201c 5cf02ff c4c201c 5cf02ff c4c201c 5cf02ff c4c201c 5cf02ff c4c201c da7d7ed c4c201c da7d7ed 4beca16 c4c201c 4beca16 c4c201c 3f4170c c4c201c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 |
#Import libraries
import pytesseract
from PIL import Image, ImageFont, ImageDraw
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import matplotlib.pyplot as plt
import keras_ocr
import cv2
import math
import numpy as np
import gradio as gr
import numpy as np
#Support for Hindi, Spanish, French, Arabic, Turish, Gailec/Irish, and German
#'hindi':
tokenizerhi = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-hi")
modelhi = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-en-hi")
#'spanish':
tokenizeres = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-es")
modeles = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-en-es")
#'german':
tokenizerde = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-de")
modelde = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-en-de")
#'french':
tokenizerfr = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-fr")
modelfr = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-en-fr")
#'turkish':
tokenizertrk = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-trk")
modeltrk = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-en-trk")
#'arabic':
tokenizerar = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-ar")
modelar = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-en-ar")
#Irish /Gaelish
tokenizerga = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-ga")
modelga = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-en-ga")
#Translate in your desired language
def choose_language(language):
#Loading the tokenizers and trained models
if language == 'hindi':
tokenizer, model = tokenizerhi, modelhi
elif language == 'spanish':
tokenizer, model = tokenizeres, modeles
elif language == 'german':
tokenizer, model = tokenizerde, modelde
elif language == 'french':
tokenizer, model = tokenizerfr, modelfr
elif language == 'turkish':
tokenizer, model = tokenizertrk, modeltrk
elif language == 'arabic':
tokenizer, model = tokenizerar, modelar
else:
tokenizer, model = tokenizerga, modelga
return tokenizer, model
#Function to translate english text to desired language
def translator(text, lang):
if '\n' in text:
text_list = text.splitlines()
text = ' '.join(text_list)
#Huggingface transformers Magic
tokenizer, model = choose_language(lang)
input_ids = tokenizer.encode(text, return_tensors="pt", padding=True) #Tokenizer
outputs = model.generate(input_ids) #Model
#Translated Text
decoded_text = tokenizer.decode(outputs[0], skip_special_tokens=True) #Tokenizer
return decoded_text
#Getting cordinates
def midpoint(x1, y1, x2, y2):
x_mid = int((x1 + x2)/2)
y_mid = int((y1 + y2)/2)
return (x_mid, y_mid)
pipeline = keras_ocr.pipeline.Pipeline()
#Getting cordinates for text insie image
#This will help in filling up the space with colors
def img_text_cords(im): #, pipeline):
#read image
img = keras_ocr.tools.read(im)
#generate (word, box) tuples
prediction_groups = pipeline.recognize([img])
mask = np.zeros(img.shape[:2], dtype="uint8")
for box in prediction_groups[0]:
x0, y0 = box[1][0]
x1, y1 = box[1][1]
x2, y2 = box[1][2]
x3, y3 = box[1][3]
x_mid0, y_mid0 = midpoint(x1, y1, x2, y2)
x_mid1, y_mi1 = midpoint(x0, y0, x3, y3)
thickness = int(math.sqrt( (x2 - x1)**2 + (y2 - y1)**2 ))
cv2.line(mask, (x_mid0, y_mid0), (x_mid1, y_mi1), 255,
thickness)
img = cv2.inpaint(img, mask, 7, cv2.INPAINT_NS)
return img
#Extracting text from image
def text_extract(im):
#Using pytesseract to read text
ocr_text = pytesseract.image_to_string(im)
return ocr_text
#Formatting the text to multi lines structure
#This is mainly for translated text to look and fit better on an image
def format_text(language,extracted_text):
translated_text = translator(extracted_text, language)
word_list,i = [],0
for word in translated_text.split():
if i%5 != 0:
word_list.append(' '+word)
else:
word_list.append('\n'+word)
i+=1
new_title_text = ''.join(word_list)
return new_title_text
def translate_image(im, language):
#Extract text, translate in your language and format it
extracted_text = text_extract(im)
#font select -- Getting Unicode Text
title_font = ImageFont.truetype('./arial-unicode-ms.ttf',30)
#text to write on image #Example in hindi - Unicode text u"आप जीवन में मिलता हर मौका ले लो, क्योंकि कुछ चीजें केवल एक बार होती हैं. शुभ सुबह"
txt = format_text(language,extracted_text)
#Editing image
img_returned = img_text_cords(im)
img_rgb = cv2.cvtColor(img_returned, cv2.COLOR_BGR2RGB)
cv2.imwrite("text_free_image.jpg",img_rgb)
new_image = Image.open("text_free_image.jpg")
#Enable writing on image
image_editable = ImageDraw.Draw(new_image)
image_editable.multiline_text((10,10), txt,spacing=2, font=title_font, fill= (237, 230, 211)) # Text color e.g. (0, 0, 0)) blacks
return new_image
title = "Translate English Text to Your Regional Language In Your Forwarded Images"
description = "This fun Gradio demo is for translating English quote in an image (usually whatsapp forwards :) ) to your local or preferred language. To use it, simply upload your image, select one of the language choices given (hindi, spanish, german, french, arabic, irish, and turkish) from radio buttons provided. You can alternately click one of the examples to load them and select the language choice along with it."
article = "<div style='text-align: center;'>Image Text Translate by <a href='https://twitter.com/yvrjsharma' target='_blank'>Yuvraj S</a> | <a href='https://github.com/yvrjsharma/HugginFace_Gradio' target='_blank'>Github Repo</a> | <center><img src='https://visitor-badge.glitch.me/badge?page_id=ysharma/TranslateQuotesInImageForwards' alt='visitor badge'></center></div>"
pipeline = keras_ocr.pipeline.Pipeline()
gr.Interface(
translate_image,
[gr.inputs.Image(type="filepath", label="Input"), gr.inputs.Radio(choices=['hindi','spanish','french','turkish','german','irish', 'arabic'], type="value", default='hindi', label='Choose A Language')],
gr.outputs.Image(type="pil", label="Output"),
title=title,
description=description,
article=article,
examples=[['quote1.jpg','german'], ['en2.jpg','hindi'],['gm1.jpg','french'],['quotes6.jpg','spanish']],
enable_queue=True
).launch(debug=True)
|