from lmdeploy.serve.gradio.turbomind_coupled import *
from lmdeploy.messages import TurbomindEngineConfig

backend_config = TurbomindEngineConfig(max_batch_size=1, cache_max_entry_count=0.05)
model_path = 'internlm/internlm2-chat-20b-4bits'

InterFace.async_engine = AsyncEngine(
    model_path=model_path,
    backend='turbomind',
    backend_config=backend_config,
    tp=1)

with gr.Blocks(css=CSS, theme=THEME) as demo:
    state_chatbot = gr.State([])
    state_session_id = gr.State(0)

    with gr.Column(elem_id='container'):
        gr.Markdown('## LMDeploy Playground')

        chatbot = gr.Chatbot(
            elem_id='chatbot',
            label=InterFace.async_engine.engine.model_name)
        instruction_txtbox = gr.Textbox(
            placeholder='Please input the instruction',
            label='Instruction')
        with gr.Row():
            cancel_btn = gr.Button(value='Cancel', interactive=False)
            reset_btn = gr.Button(value='Reset')
        with gr.Row():
            request_output_len = gr.Slider(1,
                                            2048,
                                            value=512,
                                            step=1,
                                            label='Maximum new tokens')
            top_p = gr.Slider(0.01, 1, value=0.8, step=0.01, label='Top_p')
            temperature = gr.Slider(0.01,
                                    1.5,
                                    value=0.7,
                                    step=0.01,
                                    label='Temperature')

    send_event = instruction_txtbox.submit(chat_stream_local, [
        instruction_txtbox, state_chatbot, cancel_btn, reset_btn,
        state_session_id, top_p, temperature, request_output_len
    ], [state_chatbot, chatbot, cancel_btn, reset_btn])
    instruction_txtbox.submit(
        lambda: gr.Textbox.update(value=''),
        [],
        [instruction_txtbox],
    )
    cancel_btn.click(
        cancel_local_func,
        [state_chatbot, cancel_btn, reset_btn, state_session_id],
        [state_chatbot, cancel_btn, reset_btn],
        cancels=[send_event])

    reset_btn.click(reset_local_func,
                    [instruction_txtbox, state_chatbot, state_session_id],
                    [state_chatbot, chatbot, instruction_txtbox],
                    cancels=[send_event])

    def init():
        with InterFace.lock:
            InterFace.global_session_id += 1
        new_session_id = InterFace.global_session_id
        return new_session_id

    demo.load(init, inputs=None, outputs=[state_session_id])

demo.queue(concurrency_count=InterFace.async_engine.instance_num,
            max_size=100).launch()