khurrameycon commited on
Commit
d0ae17f
·
verified ·
1 Parent(s): 5c1c2d4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -8
app.py CHANGED
@@ -3,6 +3,7 @@ from fastapi.responses import FileResponse
3
  from kokoro import KPipeline
4
  import soundfile as sf
5
  import os
 
6
 
7
  app = FastAPI()
8
 
@@ -34,25 +35,24 @@ async def generate_audio(text: str, voice: str = "af_heart", speed: float = 1.0)
34
  # Process only the first segment for demo
35
  for i, (gs, ps, audio) in enumerate(generator):
36
  # Convert to 16-bit PCM
37
- # Ensure the audio is in the range [-1, 1] first
38
  audio = np.clip(audio, -1, 1)
39
  # Convert to 16-bit signed integers
40
  pcm_data = (audio * 32767).astype(np.int16)
41
 
42
- # Convert to bytes, ensuring little-endian
43
- raw_audio = pcm_data.tobytes(order='C')
44
 
45
- # Return PCM data with appropriate headers
46
  return Response(
47
  content=raw_audio,
48
- media_type="audio/l16", # Linear PCM
49
  headers={
50
  "Content-Disposition": f'attachment; filename="output.pcm"',
51
  "X-Sample-Rate": "24000",
52
  "X-Bits-Per-Sample": "16",
53
- "X-Channels": "1",
54
- "X-Encoding": "signed-integer",
55
- "X-Endian": "little"
56
  }
57
  )
 
58
  return Response("No audio generated", status_code=400)
 
3
  from kokoro import KPipeline
4
  import soundfile as sf
5
  import os
6
+ import numpy as np
7
 
8
  app = FastAPI()
9
 
 
35
  # Process only the first segment for demo
36
  for i, (gs, ps, audio) in enumerate(generator):
37
  # Convert to 16-bit PCM
38
+ # Ensure the audio is in the range [-1, 1]
39
  audio = np.clip(audio, -1, 1)
40
  # Convert to 16-bit signed integers
41
  pcm_data = (audio * 32767).astype(np.int16)
42
 
43
+ # Convert to bytes (automatically uses row-major order)
44
+ raw_audio = pcm_data.tobytes()
45
 
46
+ # Return PCM data with minimal necessary headers
47
  return Response(
48
  content=raw_audio,
49
+ media_type="application/octet-stream",
50
  headers={
51
  "Content-Disposition": f'attachment; filename="output.pcm"',
52
  "X-Sample-Rate": "24000",
53
  "X-Bits-Per-Sample": "16",
54
+ "X-Endianness": "little"
 
 
55
  }
56
  )
57
+
58
  return Response("No audio generated", status_code=400)