import spaces import gradio as gr from joycaption import stream_chat_mod, get_text_model, change_text_model JC_TITLE_MD = "

JoyCaption Pre-Alpha Mod

" JC_DESC_MD = """This space is mod of [fancyfeast/joy-caption-pre-alpha](https://huggingface.co/spaces/fancyfeast/joy-caption-pre-alpha), [Wi-zz/joy-caption-pre-alpha](https://huggingface.co/Wi-zz/joy-caption-pre-alpha)""" css = """ .info {text-align:center; display:inline-flex; align-items:center !important} """ with gr.Blocks() as demo: gr.HTML(JC_TITLE_MD) with gr.Row(): with gr.Column(): with gr.Group(): jc_input_image = gr.Image(type="pil", label="Input Image", sources=["upload", "clipboard"], height=384) with gr.Accordion("Advanced", open=False): jc_text_model = gr.Dropdown(label="LLM Model", info="You can enter a huggingface model repo_id to want to use.", choices=get_text_model(), value=get_text_model()[0], allow_custom_value=True, interactive=True, min_width=320) jc_use_inference_client = gr.Checkbox(label="Use Inference Client", value=False, visible=False) with gr.Row(): jc_tokens = gr.Slider(minimum=1, maximum=4096, value=300, step=1, label="Max tokens") jc_temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.5, step=0.1, label="Temperature") jc_topk = gr.Slider(minimum=0, maximum=100, value=40, step=10, label="Top-k") jc_run_button = gr.Button("Caption", variant="primary") with gr.Column(): jc_output_caption = gr.Textbox(label="Caption", show_copy_button=True) gr.Markdown(JC_DESC_MD, elem_classes="info") jc_run_button.click(fn=stream_chat_mod, inputs=[jc_input_image, jc_tokens, jc_topk, jc_temperature], outputs=[jc_output_caption]) jc_text_model.change(change_text_model, [jc_text_model, jc_use_inference_client], [jc_text_model], show_api=False) jc_use_inference_client.change(change_text_model, [jc_text_model, jc_use_inference_client], [jc_text_model], show_api=False) if __name__ == "__main__": demo.queue() demo.launch()