Spaces:

ahmad4raza
/

Flying-Shakespeare

Sleeping

App Files Files Community

ahmad4raza commited on Sep 1, 2023

Commit

8acab6d

1 Parent(s): bab4993

Create main.py

Browse files

Files changed (1) hide show

main.py +91 -0

main.py ADDED Viewed

	@@ -0,0 +1,91 @@

+from transformers import pipeline, SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
+from langchain import PromptTemplate, LLMChain, OpenAI
+import requests
+import os
+import io
+from datasets import load_dataset
+import torch
+import soundfile as sf
+import gradio as gr
+from PIL import Image
+import numpy as np
+from dotenv import load_dotenv, find_dotenv
+load_dotenv(find_dotenv())
+def handwriting_to_text(url):
+    model_1 = pipeline("image-to-text", model="microsoft/trocr-base-handwritten")
+    output_1 = model_1(url)
+    return output_1
+def generate_story(scenario):
+    template = """
+    Consider yourself as the famous poet "William Shakespere";
+    You can generate a poem in Shakespeare's tone based on a single word, the poem should be no more than 4 lines in length;
+    CONTEXT: {scenario}
+    POEM:
+    """
+    prompt = PromptTemplate(template=template, input_variables=["scenario"])
+    story_llm = LLMChain(llm=OpenAI(model_name="gpt-3.5-turbo", temperature=1), prompt=prompt, verbose=True)
+    story = story_llm.predict(scenario=scenario)
+    print(story)
+    return story
+def recite_the_poem(content):
+    processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
+    model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
+    vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
+    inputs = processor(text=content, return_tensors="pt")
+    embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
+    speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
+    speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
+    sf.write("speech.wav", speech.numpy(), samplerate=16000)
+    return "speech.wav"
+def recite_the_poem(content):
+    processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
+    model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
+    vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
+    inputs = processor(text=content, return_tensors="pt")
+    embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
+    speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
+    speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
+    sf.write("speech.wav", speech.numpy(), samplerate=16000)
+    with open("speech.wav", "rb") as audio_file:
+        audio_data = audio_file.read()
+    return audio_data
+def main_model(image):
+    image = Image.fromarray(np.uint8(image))
+    image_path = "temp_image.png"
+    image.save(image_path)
+    text = handwriting_to_text(image_path)
+    poem = generate_story(text)
+    audio_data = recite_the_poem(poem)
+    return poem, audio_data
+iface = gr.Interface(
+    fn=main_model,
+    inputs="image",
+    outputs=["text", "audio"],
+    title="Handwriting to Shakespearean Poem",
+    description="Upload an image containing handwritten text to generate a Shakespearean poem and play the recited poem.",
+)
+if __name__ == "__main__":
+    iface.launch(share=True)