Spaces:
Paused
Paused
import gradio as gr | |
from gradio_webrtc import WebRTC, ReplyOnPause, AdditionalOutputs | |
import anthropic | |
from pyht import Client as PyHtClient, TTSOptions | |
import dataclasses | |
import os | |
import numpy as np | |
from huggingface_hub import InferenceClient | |
import io | |
from pydub import AudioSegment | |
from dotenv import load_dotenv | |
import sambanova_gradio | |
from elevenlabs import ElevenLabs, VoiceSettings | |
## added by AL on 111124 to get SambaNova | |
import os | |
import openai | |
## added by AL on 111424 to get Pinecone | |
from pinecone import Pinecone | |
## To get the semantic piece | |
from openai import OpenAI | |
# ADDED BY AL ON 111824 TO GET PYPDF FOR ANNIE | |
import requests | |
#import io | |
import json | |
#import os | |
import PyPDF2 | |
# added by al on 120224 to clean the response. | |
import numpy as np # Ensure numpy is imported | |
import re # Fix for the missing re module | |
# added by al 120224 to improve speed of responses. | |
import random | |
import numpy as np # Ensure numpy is imported | |
import torch | |
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline | |
#css_code='body{background-image:url("https://picsum.photos/seed/picsum/200/300");}' | |
#css_code='body{background-image:url("https://i.postimg.cc/4Nfwd1GZ/wzzk.jpg");background-size: cover; background-position: center;background-repeat: no-repeat;}' | |
css_code='body{background-image:url("https://i.postimg.cc/YqTrRKF8/WZZKWeb-soundbar.jpg");background-size: cover; background-position: center;background-repeat: no-repeat;}' | |
#ADDED BY AL ON 112624 TO GET GRADIO CLIENT FOR STREAMING CHATBOT | |
from gradio_client import Client as gcClient | |
from huggingface_hub import login | |
# client = Client("BACKENDAPI2024/radarbackend11262024v11") | |
# result = client.predict( | |
# messages=[], | |
# user_message="Hello!!", | |
# api_name="/api_get_response_on_enter" | |
# ) | |
# print(result) | |
import whisper as whisper | |
load_dotenv() | |
account_sid = os.environ.get("TWILIO_ACCOUNT_SID") | |
auth_token = os.environ.get("TWILIO_AUTH_TOKEN") | |
# GET API KEYS Added by AL on 111124 | |
SAMBA_NOVA_API_KEY = os.environ.get("SAMBA_NOVA_API_KEY", None) | |
ELEVEN_LABS_API_KEY = os.environ.get("ELEVEN_LABS_API_KEY", None) | |
ELEVEN_DEFAULT_VOICE_ID = "ogvfya0XETMq7tFy4TO2" # Replace with your desired voice ID | |
CLAUDE_CLIENT_API_KEY = os.environ.get("ANTHROPIC_KEY", None) | |
PLAYHT_SECRET_KEY = os.environ.get("PLAYHT_SECRET_KEY", None) | |
PLAYHT_USER_ID = os.environ.get("PLAYHT_USER_ID", None) | |
PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY", None) | |
# GET API KEYS Added by AL on 111524 | |
OPENAI_API_KEY=os.environ.get("OPENAI_API_KEY", None) | |
# wip added by al to make the open ai embedding work | |
openai_client2 = OpenAI( | |
api_key=OPENAI_API_KEY, | |
) | |
# # set pinecone index name | |
index_name="radardataclean11122024" | |
# index = pc.Index(host="INDEX_HOST") | |
# added by Al on 111424 | |
pc = Pinecone(api_key=PINECONE_API_KEY) | |
index = pc.Index(index_name) | |
# Added by al on 112624 to get the hf token for the gradio client | |
HF_TOKEN = os.getenv("HF_TOKEN") | |
if HF_TOKEN is None: | |
print("Please set your Hugging Face token in the environment variables.") | |
else: | |
login(token=HF_TOKEN) | |
print(CLAUDE_CLIENT_API_KEY) | |
print(PLAYHT_SECRET_KEY) | |
print(PLAYHT_USER_ID) | |
print(ELEVEN_LABS_API_KEY) | |
print(PINECONE_API_KEY) | |
print(OPENAI_API_KEY) | |
print(HF_TOKEN) | |
# Whisper Speech-to-Text | |
model_id = 'openai/whisper-large-v3' | |
device = "cuda:0" if torch.cuda.is_available() else "cpu" | |
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 | |
model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id, torch_dtype=torch_dtype).to(device) | |
processor = AutoProcessor.from_pretrained(model_id) | |
pipe_asr = pipeline( | |
"automatic-speech-recognition", | |
model=model, | |
tokenizer=processor.tokenizer, | |
feature_extractor=processor.feature_extractor, | |
max_new_tokens=128, | |
chunk_length_s=15, | |
batch_size=16, | |
torch_dtype=torch_dtype, | |
device=device, | |
return_timestamps=True | |
) | |
def auto_reset_state(): | |
time.sleep(5) | |
return None, "" | |
def transcribe_function(stream, new_chunk): | |
try: | |
sr, y = new_chunk[0], new_chunk[1] | |
except TypeError: | |
print(f"Error chunk structure: {type(new_chunk)}, content: {new_chunk}") | |
return stream, "", None | |
if y is None or len(y) == 0: | |
return stream, "", None | |
y = y.astype(np.float32) | |
max_abs_y = np.max(np.abs(y)) | |
if max_abs_y > 0: | |
y = y / max_abs_y | |
if stream is not None and len(stream) > 0: | |
stream = np.concatenate([stream, y]) | |
else: | |
stream = y | |
result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False) | |
full_text = result.get("text", "") | |
threading.Thread(target=auto_reset_state).start() | |
return stream, full_text, full_text | |
def clear_transcription_state(): | |
return None, "" | |
if account_sid and auth_token: | |
from twilio.rest import Client | |
client = Client(account_sid, auth_token) | |
token = client.tokens.create() | |
rtc_configuration = { | |
"iceServers": token.ice_servers, | |
"iceTransportPolicy": "relay", | |
} | |
else: | |
rtc_configuration = None | |
class Clients: | |
claude: anthropic.Anthropic | |
play_ht: PyHtClient | |
hf: InferenceClient | |
eleven: ElevenLabs | |
sambanova: openai | |
pc: Pinecone | |
openai: openai | |
gc: gcClient | |
grState: gr.State | |
#whisper: whisper | |
## added by al for annie voice on 111124 | |
# tts_options = TTSOptions(voice= "s3://voice-cloning-zero-shot/544e26e9-64b4-4243-aed7-8f8891212ada/original/manifest.json", | |
# sample_rate=24000) | |
tts_options = TTSOptions(voice= "s3://voice-cloning-zero-shot/544e26e9-64b4-4243-aed7-8f8891212ada/original/manifest.json", | |
sample_rate=48000) | |
# From Play HT APIS https://docs.play.ht/reference/api-list-cloned-voices | |
def aggregate_chunks(chunks_iterator): | |
leftover = b'' # Store incomplete bytes between chunks | |
for chunk in chunks_iterator: | |
# Combine with any leftover bytes from previous chunk | |
current_bytes = leftover + chunk | |
# Calculate complete samples | |
n_complete_samples = len(current_bytes) // 2 # int16 = 2 bytes | |
bytes_to_process = n_complete_samples * 2 | |
# Split into complete samples and leftover | |
to_process = current_bytes[:bytes_to_process] | |
leftover = current_bytes[bytes_to_process:] | |
if to_process: # Only yield if we have complete samples | |
audio_array = np.frombuffer(to_process, dtype=np.int16).reshape(1, -1) | |
yield audio_array | |
def audio_to_bytes(audio: tuple[int, np.ndarray]) -> bytes: | |
audio_buffer = io.BytesIO() | |
segment = AudioSegment( | |
audio[1].tobytes(), | |
frame_rate=audio[0], | |
sample_width=audio[1].dtype.itemsize, | |
channels=1, | |
) | |
segment.export(audio_buffer, format="mp3") | |
return audio_buffer.getvalue() | |
# AL MIGHT NOT BE USING THIS APPROACH ANYMORE. | |
def semantic_search(query): | |
# Generate embeddings for the query using OpenAI | |
#response = client_state.openai.Embedding.create( | |
#response = client_state.openai.embeddings.create( | |
response = openai_client2.embeddings.create( | |
input=query, | |
model="text-embedding-ada-002" # Using text-embedding-ada-002 as ChatGPT-4 does not provide embeddings directly | |
) | |
query_embedding = response['data'][0]['embedding'] | |
# Search the Pinecone index | |
result = index.query(queries=[query_embedding], top_k=100, include_metadata=True) | |
# Format the results as a string | |
results_str = "" | |
for match in result.matches: | |
metadata = match.metadata | |
score = match.score | |
results_str += f"ID: {match.id}, Score: {score}, Metadata: {metadata}\n" | |
return results_str | |
## Updated version without the key selections | |
def set_api_key(): | |
try: | |
claude_client = anthropic.Anthropic(api_key=CLAUDE_CLIENT_API_KEY) | |
play_ht_client = PyHtClient(user_id=PLAYHT_USER_ID, api_key=PLAYHT_SECRET_KEY) | |
# added by al on 111124 | |
eleven_client = ElevenLabs(api_key="ELEVEN_LABS_API_KEY",) | |
sambanova_client = openai.OpenAI( | |
#api_key=os.environ.get("SAMBANOVA_API_KEY"), | |
api_key=SAMBA_NOVA_API_KEY, | |
base_url="https://api.sambanova.ai/v1", | |
) | |
# added by al on 111424 for Pinecone | |
pc_client = Pinecone(api_key=os.environ['PINECONE_API_KEY']) | |
# added by al on 111524 for openai embeddings | |
openai_client = OpenAI( | |
api_key=OPENAI_API_KEY, | |
) | |
# added by al on 112524 for gradio client for RAG | |
#gradio_c = Client("BACKENDAPI2024/radarbackend11262024v11") | |
# Initialize the client for the backend | |
#gradio_c = Client("BACKENDAPI2024/radarbackend11262024v11", hf_token=HF_TOKEN) | |
#gradio_c = Client("on1onmangoes/radarheyanniebackend112724", hf_token=HF_TOKEN) | |
gradio_c= gcClient ("on1onmangoes/radarheyanniebackend112724", hf_token=HF_TOKEN) | |
gradio_state = gr.State(), | |
#whisper_model = whisper.load_model("turbo") | |
#inf_client = InferenceClient(headers={"Authorization": "Bearer <hf_token>"}) | |
inf_client = InferenceClient(headers={"Content-Type": "audio/mpeg","Authorization": f"Bearer {HF_TOKEN}"}) | |
except: | |
raise gr.Error("Invalid API keys. Please try again.") | |
gr.Info("Successfully set Radar.", duration=3) | |
return Clients(claude=claude_client, play_ht=play_ht_client, | |
#hf=InferenceClient() | |
hf=inf_client, eleven=eleven_client,sambanova=sambanova_client, pc=pc_client, openai=openai_client,gc=gradio_c,grState=gradio_state), gr.skip() | |
#------------------------------------- END OF ANTHROPIC STYLE) | |
client_prompt= """You are Annie, a friendly and intelligent voice assistant who only replies in a couple of sentences using simple short sentences. NO MORE THAN 2 SENTENCES. | |
DO NOT PRODUCE UNNECCESSARY PUNCTUATION AS THIS TEXT WILL BE FED TO A TEXT TO VOICE SYSTEM. KEEP IT SIMPLE, SHORT AND SWEET. Today is December 2 2024 use this date for reference | |
Your primary goal is to provide accurate, concise, and engaging responses while maintaining a positive and upbeat tone about New Country Music, Alabama and WZZK 104.7. | |
First check for our preferred vendors in the context. FOR RADIO AND MUSIC QUESTIONS REFER TO WZZK 104.7 FOR THE MOST PART. | |
Tailor your responses based on the user's input and previous interactions, ensuring a personalized experience that feels like chatting with a witty friend. | |
If no information found, request additional information, don't provide negative responses or your limitations. so that this can be used downstream tasks | |
Always aim to provide clear and relevant information that directly addresses the user's query, but feel free to sprinkle in a dash of humor—after all. | |
Aim to put a smile on the user's face with light-hearted and fun responses, and be proactive in offering additional help or suggestions related to the user's query. | |
Remember, your goal is to be the go-to assistant for users, making their experience enjoyable and informative—like a delightful dessert after a hearty meal!""" | |
## ADDED BY AL ON 120224 TO CONVEY THE SYSTEMIC RESPONSE. | |
voicebot_responses = [ | |
"Hang tight, I'm working on that for you!", | |
"Give me a sec, I'll have it ready in no time!", | |
"Just a moment, let me check that for you.", | |
"Working on it, sit tight!", | |
"Got it! Let me pull that up for you.", | |
"On it! This won't take long.", | |
"Hold on, I'm finding that for you.", | |
"Let me grab that info for you real quick.", | |
"One sec, I'm putting it all together!", | |
"I’m on it! This should just take a moment." | |
] | |
# Predefined lists for random voicebot responses | |
processing_responses = [ | |
"Hang tight, I'm working on that for you!", | |
"Give me a sec, I'll have it ready in no time!", | |
"Just a moment, let me check that for you.", | |
"Working on it, sit tight!", | |
"Got it! Let me pull that up for you.", | |
"On it! This won't take long.", | |
"Hold on, I'm finding that for you.", | |
"Let me grab that info for you real quick.", | |
"One sec, I'm putting it all together!", | |
"I’m on it! This should just take a moment." | |
] | |
greeting_responses = [ | |
"Hey there! Great to hear from you!", | |
"Hi! How’s it going?", | |
"Hello! What’s on your mind today?", | |
"Hey! What can I help you with?", | |
"Hi there! Always good to chat with you." | |
] | |
## --------------------------Added by AL on 111724 to get the country music synopsis and client synopsis | |
# from datasets import load_dataset | |
# client_dataset_name = "on1onmangoes/SAMLONEv4_20241001145542" | |
# # Load the dataset | |
# dataset = load_dataset(client_dataset_name) | |
# # Initialize the context string | |
# context = '' | |
# # Assuming the dataset has a 'train' split | |
# # You can adjust this if there are other splits like 'validation' or 'test' | |
# data_split = dataset['train'] | |
# # Inspect the column names to adjust the field names accordingly | |
# print("Column names:", data_split.column_names) | |
# these are the field names for the client data here | |
# Name string | |
# Category string | |
# Address string | |
# Phone string | |
# Description string | |
# Build the context string | |
# for example in data_split: | |
# # Replace 'Title', 'Source', etc., with the actual field names from your dataset | |
# name = example.get('Name', '') | |
# category = example.get('Category', '') | |
# address = example.get('Address', '') | |
# phone = example.get('Phone', '') | |
# description = example.get('Description', '') | |
# # Concatenate the fields into the context string | |
# context += f"Name: {name}\n" | |
# context += f"Category: {category}\n" | |
# context += f"Address: {address}\n" | |
# context += f"Phone: {phone}\n" | |
# context += f"Description: {description}\n\n" | |
# # Optionally, print a portion of the context to verify | |
# print(context[:1000]) # Print the first 1000 characters | |
## --------------------------------------------------------------------------------------------------------------- | |
# Added by Al on 111724 to add the client pro | |
system_message = client_prompt | |
#system_message += "\n\n" + context | |
#------------- For PDF reading added by AL on 111824 | |
import PyPDF2 | |
def pdf_to_text(pdf_location): | |
# Check if the location is a URL or a file path | |
if pdf_location.startswith('http://') or pdf_location.startswith('https://'): | |
# Fetch the PDF from the URL | |
try: | |
response = requests.get(pdf_location) | |
response.raise_for_status() | |
pdf_bytes = io.BytesIO(response.content) | |
except requests.exceptions.RequestException as e: | |
return f"Error fetching the PDF file from the URL: {e}", None | |
else: | |
# Check if the file exists at the given path | |
if not os.path.exists(pdf_location): | |
return "The file does not exist at the specified location.", None | |
# Open the PDF file | |
try: | |
pdf_bytes = open(pdf_location, 'rb') | |
except Exception as e: | |
return f"Error opening the PDF file: {e}", None | |
# Read the PDF file | |
try: | |
reader = PyPDF2.PdfReader(pdf_bytes) | |
text = "" | |
for page_num in range(len(reader.pages)): | |
page = reader.pages[page_num] | |
page_text = page.extract_text() | |
if page_text: | |
text += page_text + "\n" | |
# Convert the text to JSON format | |
text_json = json.dumps({"text": text}) | |
return text, text_json | |
except Exception as e: | |
return f"An error occurred while reading the PDF: {e}", None | |
finally: | |
# Close the file if it's a local file | |
if not pdf_location.startswith('http://') and not pdf_location.startswith('https://'): | |
pdf_bytes.close() | |
# FILE IS TOO BIG | |
#content_file_path = "./content/ANNIE111824.pdf" | |
# Causes hallucinations | |
#content_file_path_short = "ANNIE30TO57SHORT111824.pdf" | |
# content_file_path_clientartists= "./content/ANNIECLIENTSARTISTS111824.pdf" | |
# content, content_json = pdf_to_text(content_file_path_clientartists) | |
#ANNIECLIENTARTISTSUPERSHORT111824.pdf | |
content_file_supershort= "./content/ANNIECLIENTARTISTSUPERSHORT111824.pdf" | |
content, content_json = pdf_to_text(content_file_supershort) | |
print("Annie Content is -->") | |
print(content) | |
#------------------ | |
# added by al on 112724 to clean the response from the gradio client api | |
def clean_response(response, user_message): | |
""" | |
Cleans the response text by removing unwanted symbols, formatting issues, | |
and ensures the response does not repeat the question. | |
""" | |
if isinstance(response, (list, tuple)): # Handle nested lists/tuples | |
response = " ".join(map(str, response)) | |
# Remove backslashes, newline characters, and specified unwanted symbols | |
response = re.sub(r"[\\\n\(\)\[\]\"']", " ", response) | |
# Normalize punctuation spacing | |
response = re.sub(r"\s([?.!,'](?:\s|$))", r"\1", response) | |
# Remove question repetition from the response | |
if response.lower().startswith(user_message.lower().strip()): | |
response = response[len(user_message):].strip(",. ") | |
# Replace multiple spaces with a single space | |
response = re.sub(r"\s+", " ", response).strip() | |
return response | |
# Added by Al on 111824 to add the content pro | |
#system_message += "\n\n" + content | |
print("the system message is -->") | |
print(system_message) | |
## Added by al on 12 02 24 to do the transcription locally asa | |
# New method uses the system message to summarize the client history upfront | |
def response(audio: tuple[int, np.ndarray], conversation_llm_format: list[dict], | |
chatbot: list[dict], client_state: Clients): | |
if not client_state: | |
raise gr.Error("Please set your API keys first.") | |
# # THIS IS THE ORIGINAL PROMPT, UPDATING IT FOR THE BETTER RESULTS WITH CLAUDE HAIKU | |
#prompt = client_state.hf.automatic_speech_recognition(audio_to_bytes(audio)).text | |
#prompt = client_state.hf.automatic_speech_recognition(audio_to_bytes(audio),model="distil-whisper/distil-large-v3").text | |
#prompt = client_state.hf.automatic_speech_recognition(audio_to_bytes(audio),model="openai/whisper-large-v3-turbo").text | |
#prompt = client_state.hf.automatic_speech_recognition(audio_to_bytes(audio),model=" nyrahealth/CrisperWhisper").text | |
#prompt = "Hey Annie how are you" | |
#prompt = transcribe_function (client_state.grState, audio) | |
#prompt = client_state.hf.automatic_speech_recognition(audio_to_bytes(audio),model="https://kttcvz41e0htmjpp.us-east-1.aws.endpoints.huggingface.cloud").text | |
prompt = client_state.hf.automatic_speech_recognition(audio_to_bytes(audio),model="https://u3fcydn2o5vvwyd0.us-east-1.aws.endpoints.huggingface.cloud").text | |
# ADDED BY AL TO USE THE SPEECH TO TEXT GRADIO CLIENT - not working | |
# # client = Client("on1onmangoes/radarheyanniebackend112724") | |
# # result = client.predict( | |
# # new_chunk=handle_file('https://github.com/gradio-app/gradio/raw/main/test/test_files/audio_sample.wav'), | |
# # api_name="/api_voice_to_text" | |
# # ) | |
# # print(result) | |
# prompt = client_state.gc.predict( | |
# new_chunk = audio, | |
# api_name="/api_voice_to_text" | |
# ).text | |
print("the prompt is-->") | |
print(prompt) | |
# added by al on 111524 | |
query = prompt # Use the transcribed text as the query for semantic search | |
# # added by al on 1204 24 for hey annie test | |
# if "hey annie" not in prompt.lower(): | |
# return | |
# # Normalize prompt by removing punctuation and converting to lowercase | |
# clean_prompt = re.sub(r'[^\w\s]', '', prompt.lower()) | |
# # Check if "hey" or "annie" appears in the prompt | |
# if "hey" not in clean_prompt and "annie" not in clean_prompt: | |
# return | |
#prompt += "\n\n" + semantic_search(query) | |
## added by al on 111724 to add the context from the dataset directly to the query | |
print("the prompt+context is-->") | |
print(prompt) | |
print("the prompt is-->") | |
print(prompt) | |
conversation_llm_format.append({"role": "user", "content": prompt}) | |
# added by al on 12 02 24 for removing deadspace | |
# Determine if the prompt is a greeting or requires processing | |
if any(greeting in prompt.lower() for greeting in ["hello", "hi", "hey", "good morning", "good afternoon", "good evening"]): | |
# Random greeting response for greeting-only prompts | |
processing_message = random.choice(greeting_responses) | |
else: | |
# Random processing message for general prompts | |
processing_message = random.choice(processing_responses) | |
print("processing message -->", processing_message) | |
# Generate audio for the processing response | |
processing_audio_iterator = client_state.play_ht.tts( | |
processing_message, options=tts_options, voice_engine="Play3.0-mini-http" | |
) | |
for chunk in aggregate_chunks(processing_audio_iterator): | |
audio_array = np.frombuffer(chunk, dtype=np.int16).reshape(1, -1) | |
yield (48000, audio_array, "mono") # Send processing audio immediately | |
# if llm="claude_haiku": | |
# response = client_state.claude.messages.create( | |
# model="claude-3-5-haiku-20241022", | |
# max_tokens=512, | |
# system="You are Annie, a friendly and intelligent voice assistant specializing in New Country Music, Alabama, and WZZK 104.7. Your responses should be brief, engaging, and informative while maintaining a positive and upbeat tone.", | |
# messages=conversation_llm_format, | |
# ) | |
#response_text = " ".join(block.text for block in response.content if getattr(block, "type", None) == "text") | |
# # elif llm= "meta8b_samba": | |
# response = client_state.sambanova.chat.completions.create( | |
# model='Meta-Llama-3.1-8B-Instruct', | |
# #ADDED BY AL ON 111824 TO INCREASE CONTEXT LENGTH | |
# #model='Meta-Llama-3.1-70B-Instruct', | |
# #model='Meta-Llama-3.1-405B-Instruct', | |
# #ADDED BY AL ON 111824 TO INCREASE LATENCY ON 3.2 | |
# #model='Meta-Llama-3.2-1B-Instruct', | |
# # model='Meta-Llama-3.2-3B-Instruct', | |
# # model='Llama-3.2-11B-Vision-Instruct', | |
# # model='Llama-3.2-90B-Vision-Instruct' | |
# messages=[{"role":"system","content":system_message},{"role":"user","content":conversation_llm_format}], | |
# #messages=[{"role":"system","content":"You are a helpful assistant"},{"role":"user","content":"Hello"}], | |
# temperature = 0.1, | |
# top_p = 0.1 | |
# ) | |
# print("the response is-->") | |
# print(response) | |
# response_text = response.choices[0].message.content | |
# print("the response_text is-->") | |
# print(response_text) | |
# this piece does not need to be uncommented out on 112624 | |
#response_text = " ".join(block.text for block in response.content if getattr(block, "type", None) == "text") | |
#response_text = response_text.replace("WZZK", "W Zee Zee Kay") | |
#response_text = get_sambanova_response(prompt) | |
# added by al on 112624 for the gradio client output | |
response = client_state.gc.predict( | |
messages=[], | |
#messages= [{"role":"system","content":system_message},{"role":"user","content":conversation_llm_format}], | |
#user_message="Hello!!", | |
user_message = prompt, | |
api_name="/api_get_response_on_enter" | |
) | |
# response = client.predict( | |
# messages=[], | |
# user_message=user_message, | |
# api_name="/api_get_response_on_enter" | |
# ) | |
print("gradio client response -->") | |
print(response) | |
assistant_response = response[0][0][1] | |
print("assistant response -->") | |
print(assistant_response) | |
# Sanitize and clean the response | |
#assistant_response = clean_response(assistant_response, prompt) | |
response_text = assistant_response | |
# Update conversation histories | |
conversation_llm_format.append({"role": "assistant", "content": response_text}) | |
chatbot.append({"role": "user", "content": prompt}) | |
chatbot.append({"role": "assistant", "content": response_text}) | |
yield AdditionalOutputs(conversation_llm_format, chatbot) | |
# added by al on 112624 for the gradio client output | |
#response_text = response | |
# Update conversation histories | |
#conversation_llm_format.append({"role": "assistant", "content": response_text}) | |
# Convert dict format to tuple format for Gradio chatbot | |
#chatbot.append((prompt, response_text)) # Changed from dict to tuple | |
yield AdditionalOutputs(conversation_llm_format, chatbot) | |
# This version commented out on 112724 though it works with Samba | |
# conversation_llm_format.append({"role": "assistant", "content": response_text}) | |
# chatbot.append({"role": "user", "content": prompt}) | |
# chatbot.append({"role": "assistant", "content": response_text}) | |
# yield AdditionalOutputs(conversation_llm_format, chatbot) | |
# # this version works for play ht | |
#iterator = client_state.play_ht.tts(response_text, options=tts_options, voice_engine="Play3.0") | |
# voice_engine: The voice engine to use for the TTS request. | |
# Play3.0-mini-http (default): Our latest multilingual model, streaming audio over HTTP. (NOTE that it is Play not PlayHT like previous voice engines) | |
# Play3.0-mini-ws: Our latest multilingual model, streaming audio over WebSockets. (NOTE that it is Play not PlayHT like previous voice engines) | |
# PlayHT2.0-turbo: Our legacy English-only model, streaming audio over gRPC. | |
iterator = client_state.play_ht.tts(response_text, options=tts_options, voice_engine="Play3.0-mini-http") | |
for chunk in aggregate_chunks(iterator): | |
audio_array = np.frombuffer(chunk, dtype=np.int16).reshape(1, -1) | |
#yield (24000, audio_array, "mono") | |
yield (48000, audio_array, "mono") | |
#this version for eleven labs | |
# yield client_state.eleven.text_to_speech.convert_as_stream( | |
# voice_id="pMsXgVXv3BLzUgSXRplE", | |
# optimize_streaming_latency="0", | |
# output_format="mp3_22050_32", | |
# text=response_text, | |
# voice_settings=VoiceSettings( | |
# stability=0.1, | |
# similarity_boost=0.3, | |
# style=0.2, | |
# ),) | |
##-------------- Added by AL based on feedback from Claude ----------- | |
with gr.Blocks(css=css_code) as demo: | |
# with gr.Group(): | |
# with gr.Row(variant="compact", height="10px"): | |
# chatbot = gr.Chatbot(label="Conversation", type="messages", visible=False, render=False) | |
# with gr.Row(): | |
# with gr.Column(): | |
# with gr.Row(): | |
# set_key_button = gr.Button("Ask ZZK", variant="primary") | |
# with gr.Column(): | |
# with gr.Row(): | |
# audio = WebRTC(modality="audio", mode="send-receive", | |
# label="Audio Stream", | |
# rtc_configuration=rtc_configuration) | |
# client_state = gr.State(None) | |
# conversation_llm_format = gr.State([]) | |
# set_key_button.click(set_api_key, inputs=[], | |
# outputs=[client_state, set_key_button]) | |
# audio.stream( | |
# ReplyOnPause(response), | |
# inputs=[audio, conversation_llm_format, chatbot, client_state], | |
# outputs=[audio] | |
# ) | |
# audio.on_additional_outputs(lambda l, g: (l, g), outputs=[conversation_llm_format, chatbot]) | |
with gr.Group(): | |
with gr.Row(): | |
chatbot = gr.Chatbot(label="Conversation", type="messages") | |
with gr.Row(equal_height=True): | |
with gr.Column(scale=1): | |
with gr.Row(): | |
set_key_button = gr.Button("Set Radar", variant="primary") | |
with gr.Column(scale=5): | |
audio = WebRTC(modality="audio", mode="send-receive", | |
label="Audio Stream", | |
rtc_configuration=rtc_configuration) | |
client_state = gr.State(None) | |
conversation_llm_format = gr.State([]) | |
set_key_button.click(set_api_key, inputs=[], | |
outputs=[client_state, set_key_button]) | |
audio.stream( | |
ReplyOnPause(response), | |
inputs=[audio, conversation_llm_format, chatbot, client_state], | |
outputs=[audio] | |
) | |
audio.on_additional_outputs(lambda l, g: (l, g), outputs=[conversation_llm_format, chatbot]) | |
if __name__ == "__main__": | |
demo.launch(show_error=True) |