import gradio as gr from huggingface_hub import InferenceClient # Function to return the appropriate client based on the model selected def client_fn(model): model_map = { "Nous Hermes Mixtral 8x7B DPO": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO", "StarChat2 15b": "HuggingFaceH4/starchat2-15b-v0.1", "Mistral 7B v0.3": "mistralai/Mistral-7B-Instruct-v0.3", "Phi 3 mini": "microsoft/Phi-3-mini-4k-instruct", "Mixtral 8x7B": "mistralai/Mixtral-8x7B-Instruct-v0.1" } return InferenceClient(model_map.get(model, "mistralai/Mixtral-8x7B-Instruct-v0.1")) system_instructions = ("[SYSTEM] You are a chat bot named 'NITHIYASRI'S CHATBOT'." "Your task is to Answer the question." "Keep conversation very short, clear and concise." "Respond naturally and concisely to the user's queries. " "The expectation is that you will avoid introductions and start answering the query directly, Only answer the question asked by user, Do not say unnecessary things." "Begin with a greeting if the user initiates the conversation. " "Here is the user's query:[QUESTION] ") # Function to generate model responses def models(text, model="Mixtral 8x7B"): client = client_fn(model) generate_kwargs = { "max_new_tokens": 100, "do_sample": True, } formatted_prompt = f"{system_instructions} {text} [ANSWER]" stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False) output = "" for response in stream: output += response.token.text if output.endswith(""): output = output[:-4] return output # Gradio interface description and configuration description = """# H GO ### Inspired from Google Go""" with gr.Blocks() as demo: gr.Markdown(description) text_input = gr.Textbox(label="Enter your message here:") dropdown = gr.Dropdown(['Mixtral 8x7B', 'Nous Hermes Mixtral 8x7B DPO', 'StarChat2 15b', 'Mistral 7B v0.3', 'Phi 3 mini'], value="Mistral 7B v0.3", label="Select Model") submit_btn = gr.Button("Send") output_text = gr.Textbox(label="Response") submit_btn.click(fn=models, inputs=[text_input, dropdown], outputs=output_text) # Queue and launch configuration for Gradio demo.queue(max_size=300000) demo.launch()