Spaces:

Sugamdeol
/

Sug-gpt

Sleeping

App Files Files Community

Sug-gpt / app.py

Sugamdeol

Update app.py

a0876f1 verified 4 months ago

raw

history blame contribute delete

3.05 kB

	from huggingface_hub import InferenceClient
	import gradio as gr

	# Set up the client for Mistral model inference
	client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")

	# Function to format the conversation history
	def format_prompt(message, history):
	prompt = "<s>"
	for user_prompt, bot_response in history:
	prompt += f"[INST] {user_prompt} [/INST] {bot_response} "
	prompt += f"[INST] {message} [/INST]</s>"
	return prompt

	# Text generation function with parameters
	def generate(
	prompt, history, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0,
	):
	# Ensure temperature and top_p are correctly set
	temperature = max(float(temperature), 1e-2) # Prevent temperature going below 0.01
	top_p = float(top_p)

	# Keyword arguments for generation configuration
	generate_kwargs = dict(
	temperature=temperature,
	max_new_tokens=max_new_tokens,
	top_p=top_p,
	repetition_penalty=repetition_penalty,
	do_sample=True,
	seed=42, # Ensures results are reproducible
	)

	# Format the prompt with the user's message and history
	formatted_prompt = format_prompt(prompt, history)

	# Call the text generation endpoint
	stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
	output = "" # Initialize an empty string for the output

	# Stream the response token by token
	for response in stream:
	output += response.token.text # Append the generated tokens to output
	yield output # Yield partial output for real-time display
	return output

	# Additional inputs (sliders) for controlling generation parameters
	additional_inputs=[
	gr.Slider(
	label="Temperature", value=0.9, minimum=0.0, maximum=1.0, step=0.05,
	interactive=True, info="Higher values produce more diverse outputs"
	),
	gr.Slider(
	label="Max new tokens", value=256, minimum=0, maximum=1048, step=64,
	interactive=True, info="The maximum numbers of new tokens"
	),
	gr.Slider(
	label="Top-p (nucleus sampling)", value=0.90, minimum=0.0, maximum=1.0, step=0.05,
	interactive=True, info="Higher values sample more low-probability tokens"
	),
	gr.Slider(
	label="Repetition penalty", value=1.2, minimum=1.0, maximum=2.0, step=0.05,
	interactive=True, info="Penalize repeated tokens"
	)
	]

	# Gradio Chat Interface for the chatbot
	gr.ChatInterface(
	fn=generate, # The generate function is called when the user submits input
	chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"),
	additional_inputs=additional_inputs, # Sliders for adjusting generation parameters
	title="Mistral 7B v0.3 ChatGPT Clone", # Title for the interface
	description="A ChatGPT clone using Mistral 7B model. Adjust parameters to fine-tune the generation."
	).launch(show_api=False) # Launch the interface without showing the API key