# server.py remains the same as before # Updated client.py import asyncio import websockets import sounddevice as sd import numpy as np import base64 import queue import argparse import requests import time class AudioClient: def __init__(self, server_url="ws://localhost:8000", token_temp=None, categorical_temp=None, gaussian_temp=None): # Convert ws:// to http:// for the base URL self.base_url = server_url.replace("ws://", "http://") self.server_url = f"{server_url}/audio" # Set temperatures if provided if any(t is not None for t in [token_temp, categorical_temp, gaussian_temp]): self.set_temperature_and_echo(token_temp, categorical_temp, gaussian_temp) # Initialize queues self.audio_queue = queue.Queue() self.output_queue = queue.Queue() def set_temperature_and_echo(self, token_temp=None, categorical_temp=None, gaussian_temp=None, echo_testing = False): """Send temperature settings to server""" params = {} if token_temp is not None: params['token_temp'] = token_temp if categorical_temp is not None: params['categorical_temp'] = categorical_temp if gaussian_temp is not None: params['gaussian_temp'] = gaussian_temp response = requests.post(f"{self.base_url}/set_temperature", params=params) print(response.json()['message']) def audio_callback(self, indata, frames, time, status): """This is called for each audio block""" if status: print(status) # if np.isclose(indata, 0).all(): # raise Exception('Audio input is not working - received all zeros') # Convert float32 to int16 for efficient transmission indata_int16 = (indata.copy() * 32767).astype(np.int16) # indata_int16 = np.zeros_like(indata_int16) self.audio_queue.put(indata_int16) def output_stream_callback(self, outdata, frames, time, status): """Callback for output stream to get audio data""" if status: print(status) try: data = self.output_queue.get_nowait() data = data.astype(np.float32) / 32767.0 if len(data) < len(outdata): outdata[:len(data)] = data outdata[len(data):] = 0 else: outdata[:] = data[:len(outdata)] except queue.Empty: outdata.fill(0) async def process_audio(self): async with websockets.connect(self.server_url) as ws: while self.running: if not self.audio_queue.empty(): # Get recorded audio audio_data = self.audio_queue.get() print(f'Data from microphone:{audio_data.shape, audio_data.dtype, audio_data.min(), audio_data.max()}') # Convert to base64 audio_b64 = base64.b64encode(audio_data.tobytes()).decode('utf-8') # Send to server time_sent = time.time() await ws.send(f"data:audio/raw;base64,{audio_b64}") # Receive processed audio response = await ws.recv() response = response.split(",")[1] time_received = time.time() print(f"Data sent: {audio_b64[:10]}. Data received: {response[:10]}. Received in {(time_received - time_sent) * 1000:.2f} ms") processed_audio = np.frombuffer( base64.b64decode(response), dtype=np.int16 ).reshape(-1, CHANNELS) print(f'Data from model:{processed_audio.shape, processed_audio.dtype, processed_audio.min(), processed_audio.max()}') self.output_queue.put(processed_audio) def start(self): self.running = True # Print audio device information devices = sd.query_devices() default_input = sd.query_devices(kind='input') default_output = sd.query_devices(kind='output') print("\nAudio Device Configuration:") print("-" * 50) print(f"Default Input Device:\n{default_input}\n") print(f"Default Output Device:\n{default_output}\n") print("\nAll Available Devices:") print("-" * 50) for i, device in enumerate(devices): print(f"Device {i}:") print(f"Name: {device['name']}") print(f"Channels (in/out): {device['max_input_channels']}/{device['max_output_channels']}") print(f"Sample Rates: {device['default_samplerate']}") print() input_device = input("Enter the index of the input device or press enter for default: ") output_device = input("Enter the index of the output device or press enter for default: ") if input_device == "": input_device = default_input['index'] if output_device == "": output_device = default_output['index'] with sd.InputStream(callback=self.audio_callback, channels=CHANNELS, samplerate=SAMPLE_RATE, device=int(input_device), blocksize=2000), \ sd.OutputStream(callback=self.output_stream_callback, channels=CHANNELS, samplerate=SAMPLE_RATE, blocksize=2000, device=int(output_device)): asyncio.run(self.process_audio()) def stop(self): self.running = False if __name__ == "__main__": parser = argparse.ArgumentParser(description='Audio Client with Temperature Control') parser.add_argument('--token_temp', '-t1', type=float, help='Token (LM) temperature parameter') parser.add_argument('--categorical_temp', '-t2', type=float, help='Categorical (VAE) temperature parameter') parser.add_argument('--gaussian_temp', '-t3', type=float, help='Gaussian (VAE) temperature parameter') parser.add_argument('--server', '-s', default="ws://localhost:8000", help='Server URL (default: ws://localhost:8000)') args = parser.parse_args() # Audio settings SAMPLE_RATE = 16000 CHANNELS = 1 client = AudioClient( server_url=args.server, token_temp=args.token_temp, categorical_temp=args.categorical_temp, gaussian_temp=args.gaussian_temp ) try: client.start() except KeyboardInterrupt: client.stop()