Spaces:
Runtime error
Runtime error
from lmdeploy.serve.gradio.turbomind_coupled import * | |
from lmdeploy.messages import TurbomindEngineConfig | |
backend_config = TurbomindEngineConfig(max_batch_size=1, cache_max_entry_count=0.05) | |
model_path = 'internlm/internlm2-chat-20b-4bits' | |
InterFace.async_engine = AsyncEngine( | |
model_path=model_path, | |
backend='turbomind', | |
backend_config=backend_config, | |
tp=1) | |
with gr.Blocks(css=CSS, theme=THEME) as demo: | |
state_chatbot = gr.State([]) | |
state_session_id = gr.State(0) | |
with gr.Column(elem_id='container'): | |
gr.Markdown('## LMDeploy Playground') | |
chatbot = gr.Chatbot( | |
elem_id='chatbot', | |
label=InterFace.async_engine.engine.model_name) | |
instruction_txtbox = gr.Textbox( | |
placeholder='Please input the instruction', | |
label='Instruction') | |
with gr.Row(): | |
cancel_btn = gr.Button(value='Cancel', interactive=False) | |
reset_btn = gr.Button(value='Reset') | |
with gr.Row(): | |
request_output_len = gr.Slider(1, | |
2048, | |
value=512, | |
step=1, | |
label='Maximum new tokens') | |
top_p = gr.Slider(0.01, 1, value=0.8, step=0.01, label='Top_p') | |
temperature = gr.Slider(0.01, | |
1.5, | |
value=0.7, | |
step=0.01, | |
label='Temperature') | |
send_event = instruction_txtbox.submit(chat_stream_local, [ | |
instruction_txtbox, state_chatbot, cancel_btn, reset_btn, | |
state_session_id, top_p, temperature, request_output_len | |
], [state_chatbot, chatbot, cancel_btn, reset_btn]) | |
instruction_txtbox.submit( | |
lambda: gr.Textbox.update(value=''), | |
[], | |
[instruction_txtbox], | |
) | |
cancel_btn.click( | |
cancel_local_func, | |
[state_chatbot, cancel_btn, reset_btn, state_session_id], | |
[state_chatbot, cancel_btn, reset_btn], | |
cancels=[send_event]) | |
reset_btn.click(reset_local_func, | |
[instruction_txtbox, state_chatbot, state_session_id], | |
[state_chatbot, chatbot, instruction_txtbox], | |
cancels=[send_event]) | |
def init(): | |
with InterFace.lock: | |
InterFace.global_session_id += 1 | |
new_session_id = InterFace.global_session_id | |
return new_session_id | |
demo.load(init, inputs=None, outputs=[state_session_id]) | |
demo.queue(concurrency_count=InterFace.async_engine.instance_num, | |
max_size=100).launch() | |