Spaces:
Runtime error
Runtime error
File size: 2,359 Bytes
9551276 3be135a 5e72808 2bcefc7 14f07e7 2bcefc7 3be135a 14f07e7 5e72808 3be135a 5e72808 2bcefc7 110c323 bb25d5e 5e72808 e42b84a 5d31a12 5e72808 110c323 415dd0a 2bcefc7 415dd0a 3be135a 110c323 5e72808 110c323 5e72808 110c323 2bcefc7 110c323 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
import os
import random
import gradio as gr
from groq import Groq
client = Groq(
api_key = os.environ.get("Groq_Api_Key")
)
def create_history_messages(history):
history_messages = [{"role": "user", "content": m[0]} for m in history]
history_messages.extend([{"role": "assistant", "content": m[1]} for m in history])
return history_messages
def generate_response(prompt, history, model, temperature, max_tokens, top_p, seed):
messages = create_history_messages(history)
messages.append({"role": "user", "content": prompt})
print(messages)
if seed == 0:
seed = random.randint(1, 100000)
stream = client.chat.completions.create(
messages=messages,
model=model,
temperature=temperature,
max_tokens=max_tokens,
top_p=top_p,
seed=seed,
stop=None,
stream=True,
)
response = ""
for chunk in stream:
delta_content = chunk.choices[0].delta.content
if delta_content is not None:
response += delta_content
yield response
return response
additional_inputs = [
gr.Dropdown(choices=["llama3-70b-8192", "llama3-8b-8192", "mixtral-8x7b-32768", "gemma-7b-it"], value="llama3-70b-8192", label="Model"),
gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.5, label="Temperature", info="Controls diversity of the generated text. Lower is more deterministic, higher is more creative."),
gr.Slider(minimum=1, maximum=32192, step=1, value=4096, label="Max Tokens", info="The maximum number of tokens that the model can process in a single response.<br>Maximums: 8k for gemma 7b, llama 7b & 70b, 32k for mixtral 8x7b."),
gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.5, label="Top P", info="A method of text generation where a model will only consider the most probable next tokens that make up the probability p."),
gr.Number(precision=0, value=42, label="Seed", info="A starting point to initiate generation, use 0 for random")
]
gr.ChatInterface(
fn=generate_response,
chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"),
additional_inputs=additional_inputs,
title="Groq API UI",
description="Inference by Groq. Hugging Face Space by [Nick088](https://linktr.ee/Nick088)",
).launch() |