empowerus commited on
Commit
8e68530
·
verified ·
1 Parent(s): b007d91

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -0
app.py CHANGED
@@ -23,6 +23,60 @@ def generate_response(audio):
23
  return response, None, None
24
 
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
  with gr.Blocks() as block:
28
  gr.HTML(
 
23
  return response, None, None
24
 
25
 
26
+ from streamer import ParlerTTSStreamer
27
+ from transformers import AutoTokenizer, AutoFeatureExtractor, set_seed
28
+ import numpy as np
29
+ import spaces
30
+ import torch
31
+ from threading import Thread
32
+
33
+
34
+ device = "cuda:0" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
35
+ torch_dtype = torch.float16 if device != "cpu" else torch.float32
36
+
37
+ repo_id = "parler-tts/parler_tts_mini_v0.1"
38
+
39
+ jenny_repo_id = "ylacombe/parler-tts-mini-jenny-30H"
40
+
41
+ model = ParlerTTSForConditionalGeneration.from_pretrained(
42
+ jenny_repo_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True
43
+ ).to(device)
44
+
45
+ tokenizer = AutoTokenizer.from_pretrained(repo_id)
46
+ feature_extractor = AutoFeatureExtractor.from_pretrained(repo_id)
47
+
48
+ sampling_rate = model.audio_encoder.config.sampling_rate
49
+ frame_rate = model.audio_encoder.config.frame_rate
50
+
51
+ @spaces.GPU
52
+ def read_response(answer):
53
+
54
+ play_steps_in_s = 2.0
55
+ play_steps = int(frame_rate * play_steps_in_s)
56
+
57
+ description = "Jenny speaks at an average pace with a calm delivery in a very confined sounding environment with clear audio quality."
58
+ description_tokens = tokenizer(description, return_tensors="pt").to(device)
59
+
60
+ streamer = ParlerTTSStreamer(model, device=device, play_steps=play_steps)
61
+ prompt = tokenizer(answer, return_tensors="pt").to(device)
62
+
63
+ generation_kwargs = dict(
64
+ input_ids=description_tokens.input_ids,
65
+ prompt_input_ids=prompt.input_ids,
66
+ streamer=streamer,
67
+ do_sample=True,
68
+ temperature=1.0,
69
+ min_new_tokens=10,
70
+ )
71
+
72
+ set_seed(42)
73
+ thread = Thread(target=model.generate, kwargs=generation_kwargs)
74
+ thread.start()
75
+
76
+ for new_audio in streamer:
77
+ print(f"Sample of length: {round(new_audio.shape[0] / sampling_rate, 2)} seconds")
78
+ yield answer, numpy_to_mp3(new_audio, sampling_rate=sampling_rate)
79
+
80
 
81
  with gr.Blocks() as block:
82
  gr.HTML(