File size: 5,247 Bytes
372395e caed802 b19e4a9 f694503 04d2706 caed802 372395e caed802 372395e 41b5a1b cd970d9 2fc2b20 2a2bbb7 bf0da22 2a2bbb7 cd970d9 2fc2b20 68b4d15 eb1af87 b6e8417 68b4d15 cb934a1 a89612e 41b5a1b b5357a4 be460ce 6cc068b 71f4435 372395e 79b4496 48c215d 79b4496 e7c2915 12bd467 e7c2915 79b4496 372395e 79b4496 372395e 7acb3e3 be460ce 8e6038a f694503 372395e 570b690 79b4496 53f5458 79b4496 7acb3e3 2fc2b20 57f610b a89612e 71f4435 bc6b39c 372395e a63d987 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 |
import gradio as gr
import os
import time
from moviepy.editor import *
from share_btn import community_icon_html, loading_icon_html, share_js
token = os.environ.get('HF_TOKEN')
caption = gr.Blocks.load(name="spaces/SRDdev/Image-Caption")
audio_gen = gr.Blocks.load(name="spaces/fffiloni/audioldm-text-to-audio-generation-clone", api_key=token)
ph_message="If you're not happy with sound result, you can manually describe the scene depicted in your image :)"
def input_changes(input_img):
if input_img == None:
input_clear()
else:
cap = caption(input_img, fn_index=0)
print("gpt2 caption: '" + cap + "' β’ ")
ph_update = "gpt2 caption: '" + cap + "' β’ "
return manual_cap.update(value="",placeholder=f"{ph_update}{ph_message}"), caption_output.update(value=cap), sound_output.update(value=None)
def input_clear():
return manual_cap.update(value="",placeholder=ph_message), caption_output.update(value=None), sound_output.update(value=None)
def infer(image_input, manual_caption, duration_in, seed, caption_output):
print(duration_in)
if manual_caption == "":
cap = caption_output
#cap = caption(image_input, fn_index=0)
#print("gpt2 caption: '" + cap + "' β’ ")
#ph_update = "gpt2 caption: '" + cap + "' β’ "
else:
cap = manual_caption
print("manual caption: " + cap)
ph_update=""
sound = audio_gen(cap, duration_in, 2.5, seed, 3, fn_index=0)
#return cap, sound[1], gr.Textbox.update(placeholder=f"{ph_update}{ph_message}"), gr.Group.update(visible=True)
return cap, sound[1], gr.Group.update(visible=True)
title = """
<div style="text-align: center; max-width: 700px; margin: 0 auto;">
<div
style="
display: inline-flex;
align-items: center;
gap: 0.8rem;
font-size: 1.75rem;
"
>
<h1 style="font-weight: 900; margin-bottom: 7px; margin-top: 5px;">
Image to Sound Effect
</h1>
</div>
<p style="margin-bottom: 10px; font-size: 94%">
Convert an image to a corresponding sound effect generated through GPT2 Image Captioning & AudioLDM
</p>
</div>
"""
article = """
<div class="footer">
<p>
Follow <a href="https://twitter.com/fffiloni" target="_blank">Sylvain Filoni</a> for future updates π€
</p>
</div>
<div id="may-like-container" style="display: flex;justify-content: center;flex-direction: column;align-items: center;margin-bottom: 30px;">
<p>You may also like: </p>
<div id="may-like-content" style="display:flex;flex-wrap: wrap;align-items:center;height:20px;">
<svg height="20" width="208" style="margin-left:4px;margin-bottom: 6px;">
<a href="https://huggingface.co/spaces/haoheliu/audioldm-text-to-audio-generation" target="_blank">
<image href="https://img.shields.io/badge/π€ Spaces-AudioLDM_Text_to_Audio-blue" src="https://img.shields.io/badge/π€ Spaces-AudioLDM_Text_to_Audio-blue.png" height="20"/>
</a>
</svg>
<svg height="20" width="122" style="margin-left:4px;margin-bottom: 6px;">
<a href="https://huggingface.co/spaces/fffiloni/spectrogram-to-music" target="_blank">
<image href="https://img.shields.io/badge/π€ Spaces-Riffusion-blue" src="https://img.shields.io/badge/π€ Spaces-Riffusion-blue.png" height="20"/>
</a>
</svg>
</div>
</div>
"""
with gr.Blocks(css="style.css") as demo:
with gr.Column(elem_id="col-container"):
gr.HTML(title)
input_img = gr.Image(type="filepath", elem_id="input-img")
with gr.Column():
manual_cap = gr.Textbox(label="Manual Image description (optional)", lines=3, placeholder=ph_message)
with gr.Row():
duration_in = gr.Slider(minimum=5, maximum=10, step=5, value=5, label="Duration")
seed_in = gr.Slider(label="Seed", value=440, minimum=45, maximum=10000, step=1)
caption_output = gr.Textbox(label="Caption", visible=False, elem_id="text-caption")
sound_output = gr.Audio(label="Result", elem_id="sound-output")
generate = gr.Button("Generate SFX from Image")
with gr.Group(elem_id="share-btn-container", visible=False) as share_group:
community_icon = gr.HTML(community_icon_html)
loading_icon = gr.HTML(loading_icon_html)
share_button = gr.Button("Share to community", elem_id="share-btn")
gr.HTML(article)
change_out = [manual_cap, caption_output]
input_img.change(input_changes, input_img, change_out)
clear_out = [manual_cap, caption_output, sound_output]
input_img.clear(input_clear, [], clear_out)
generate.click(infer, inputs=[input_img, manual_cap, duration_in, seed_in, caption_output], outputs=[caption_output, sound_output, share_group], api_name="i2fx")
share_button.click(None, [], [], _js=share_js)
demo.queue(max_size=32).launch(debug=True)
|