Spaces:

iohanngrig
/

image2textapp

Sleeping

App Files Files Community

iohanngrig commited on Jan 25, 2024

Commit

1793f90

verified ·

1 Parent(s): 23e5d9e

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -19

app.py CHANGED Viewed

@@ -8,12 +8,22 @@ from langchain.chains import LLMChain
 from langchain.chat_models import ChatOpenAI
 from langchain.prompts import PromptTemplate
 from transformers import pipeline
-from utils import css_code
 HUGGINGFACE_API_TOKEN = st.secrets["HUGGINGFACE_API_TOKEN"]
 OPENAI_API_KEY = st.secrets["OPENAI_API_KEY"]
 MODEL = st.secrets["MODEL2"]
 def progress_bar(amount_of_time: int) -> Any:
     """
@@ -31,7 +41,6 @@ def progress_bar(amount_of_time: int) -> Any:
     time.sleep(1)
     my_bar.empty()
 def generate_text_from_image(url: str) -> str:
     """
     A function that uses the blip model to generate text from an image.
@@ -39,14 +48,11 @@ def generate_text_from_image(url: str) -> str:
     :return: text: generated text from the image
     """
     image_to_text: Any = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
     generated_text: str = image_to_text(url)[0]["generated_text"]
     print(f"IMAGE INPUT: {url}")
     print(f"GENERATED TEXT OUTPUT: {generated_text}")
     return generated_text
 def generate_story_from_text(scenario: str) -> str:
     """
     A function using a prompt template and GPT to generate a short story. LangChain is also
@@ -57,24 +63,18 @@ def generate_story_from_text(scenario: str) -> str:
     prompt_template: str = f"""
     You are a story teller;
     You can generate a long story based on a simple narrative, the story should be no more than 100 words and have more than 30 words;
     CONTEXT: {scenario}
     STORY:
     """
     prompt: PromptTemplate = PromptTemplate(template=prompt_template, input_variables=["scenario"])
     llm: Any = ChatOpenAI(model_name=MODEL, temperature=1)
     story_llm: Any = LLMChain(llm=llm, prompt=prompt, verbose=True)
     generated_story: str = story_llm.predict(scenario=scenario)
     print(f"TEXT INPUT: {scenario}")
     print(f"GENERATED STORY OUTPUT: {generated_story}")
     return generated_story
 def generate_speech_from_text(message: str) -> Any:
     """
     A function using the ESPnet text to speech model from HuggingFace
@@ -96,16 +96,13 @@ def generate_speech_from_text(message: str) -> Any:
                 mime='flac',
             )
 def main() -> None:
     """
     Main function
     :return: None
     """
     st.set_page_config(page_title="Image to audio story", page_icon="img/logo.png", layout="wide")
     st.markdown(css_code, unsafe_allow_html=True)
     with st.sidebar:
         st.image("img/kandinsky.jpg")
         #st.write("---")
@@ -124,14 +121,12 @@ def main() -> None:
         progress_bar(100)
         scenario: str = generate_text_from_image(uploaded_file.name)
         story: str = generate_story_from_text(scenario)
-        #generate_speech_from_text(story)
         with st.expander("Generated scenario"):
             st.write(scenario)
         with st.expander("Generated story"):
             st.write(story)
-        #st.audio("generated_audio.flac")
 if __name__ == "__main__":

 from langchain.chat_models import ChatOpenAI
 from langchain.prompts import PromptTemplate
 from transformers import pipeline
 HUGGINGFACE_API_TOKEN = st.secrets["HUGGINGFACE_API_TOKEN"]
 OPENAI_API_KEY = st.secrets["OPENAI_API_KEY"]
 MODEL = st.secrets["MODEL2"]
+css_code = """
+    <style>
+    section[data-testid="stSidebar"] > div > div:nth-child(2) {
+        padding-top: 0.75rem !important;
+    }
+    section.main > div {
+        padding-top: 64px;
+    }
+    </style>
+"""
 def progress_bar(amount_of_time: int) -> Any:
     """
     time.sleep(1)
     my_bar.empty()
 def generate_text_from_image(url: str) -> str:
     """
     A function that uses the blip model to generate text from an image.
     :return: text: generated text from the image
     """
     image_to_text: Any = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
     generated_text: str = image_to_text(url)[0]["generated_text"]
     print(f"IMAGE INPUT: {url}")
     print(f"GENERATED TEXT OUTPUT: {generated_text}")
     return generated_text
 def generate_story_from_text(scenario: str) -> str:
     """
     A function using a prompt template and GPT to generate a short story. LangChain is also
     prompt_template: str = f"""
     You are a story teller;
     You can generate a long story based on a simple narrative, the story should be no more than 100 words and have more than 30 words;
     CONTEXT: {scenario}
     STORY:
     """
     prompt: PromptTemplate = PromptTemplate(template=prompt_template, input_variables=["scenario"])
     llm: Any = ChatOpenAI(model_name=MODEL, temperature=1)
     story_llm: Any = LLMChain(llm=llm, prompt=prompt, verbose=True)
     generated_story: str = story_llm.predict(scenario=scenario)
     print(f"TEXT INPUT: {scenario}")
     print(f"GENERATED STORY OUTPUT: {generated_story}")
     return generated_story
 def generate_speech_from_text(message: str) -> Any:
     """
     A function using the ESPnet text to speech model from HuggingFace
                 mime='flac',
             )
 def main() -> None:
     """
     Main function
     :return: None
     """
     st.set_page_config(page_title="Image to audio story", page_icon="img/logo.png", layout="wide")
     st.markdown(css_code, unsafe_allow_html=True)
     with st.sidebar:
         st.image("img/kandinsky.jpg")
         #st.write("---")
         progress_bar(100)
         scenario: str = generate_text_from_image(uploaded_file.name)
         story: str = generate_story_from_text(scenario)
+        generate_speech_from_text(story)
         with st.expander("Generated scenario"):
             st.write(scenario)
         with st.expander("Generated story"):
             st.write(story)
+        st.audio("generated_audio.flac")
 if __name__ == "__main__":