jdana commited on
Commit
b021279
·
verified ·
1 Parent(s): aaad0fc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -5
app.py CHANGED
@@ -32,6 +32,13 @@ from f5_tts.infer.utils_infer import (
32
 
33
  import torch # Added missing import
34
 
 
 
 
 
 
 
 
35
  try:
36
  import spaces
37
  USING_SPACES = True
@@ -51,7 +58,7 @@ vocoder = load_vocoder()
51
 
52
  def load_f5tts(ckpt_path=None):
53
  if ckpt_path is None:
54
- ckpt_path = str(cached_path("hf://SWivid/F5-TTS/F5TTS_Base/model_1200000.safetensors"))
55
  model_cfg = {
56
  "dim": 1024,
57
  "depth": 22,
@@ -62,7 +69,8 @@ def load_f5tts(ckpt_path=None):
62
  }
63
  model = load_model(DiT, model_cfg, ckpt_path)
64
  model.eval() # Ensure the model is in evaluation mode
65
- model.to('cuda') # Move model to GPU
 
66
  return model
67
 
68
  F5TTS_ema_model = load_f5tts()
@@ -80,7 +88,7 @@ def generate_response(messages, model, tokenizer):
80
  )
81
 
82
  # Tokenizer and model input preparation
83
- model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
84
 
85
  # Use full precision for higher audio quality
86
  with torch.no_grad():
@@ -244,7 +252,7 @@ def infer(ref_audio_orig, ref_text, gen_text, cross_fade_duration=0.0, speed=1,
244
  raise ValueError("Generated text is empty. Please provide valid text content.")
245
 
246
  try:
247
- # Ensure inference is in full precision
248
  with torch.no_grad():
249
  final_wave, final_sample_rate, _ = infer_process(
250
  ref_audio,
@@ -279,7 +287,7 @@ def basic_tts(ref_audio_input, ref_text_input, gen_file_input, cross_fade_durati
279
  if file_type != 'application/epub+zip':
280
  sanitized_base = sanitize_filename(os.path.splitext(os.path.basename(epub_path))[0])
281
  temp_epub = os.path.join("Working_files", "temp_converted", f"{sanitized_base}.epub")
282
- convert_to_epub(epub_path, temp_epub)
283
  epub_path = temp_epub
284
 
285
  progress(0.1, desc="Extracting text and title from EPUB")
 
32
 
33
  import torch # Added missing import
34
 
35
+ # Determine the available device (GPU or CPU)
36
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
37
+ if torch.cuda.is_available():
38
+ print("CUDA is available. Using GPU.")
39
+ else:
40
+ print("CUDA is not available. Using CPU.")
41
+
42
  try:
43
  import spaces
44
  USING_SPACES = True
 
58
 
59
  def load_f5tts(ckpt_path=None):
60
  if ckpt_path is None:
61
+ ckpt_path = str(cached_path("hf://SWivid/F5-TTS/F5-TTS/F5TTS_Base/model_1200000.safetensors"))
62
  model_cfg = {
63
  "dim": 1024,
64
  "depth": 22,
 
69
  }
70
  model = load_model(DiT, model_cfg, ckpt_path)
71
  model.eval() # Ensure the model is in evaluation mode
72
+ model = model.to(device) # Move model to the selected device
73
+ print(f"Model loaded on {device}.")
74
  return model
75
 
76
  F5TTS_ema_model = load_f5tts()
 
88
  )
89
 
90
  # Tokenizer and model input preparation
91
+ model_inputs = tokenizer([text], return_tensors="pt").to(device)
92
 
93
  # Use full precision for higher audio quality
94
  with torch.no_grad():
 
252
  raise ValueError("Generated text is empty. Please provide valid text content.")
253
 
254
  try:
255
+ # Ensure inference is on the correct device
256
  with torch.no_grad():
257
  final_wave, final_sample_rate, _ = infer_process(
258
  ref_audio,
 
287
  if file_type != 'application/epub+zip':
288
  sanitized_base = sanitize_filename(os.path.splitext(os.path.basename(epub_path))[0])
289
  temp_epub = os.path.join("Working_files", "temp_converted", f"{sanitized_base}.epub")
290
+ convert_to_epub(ebook, temp_epub)
291
  epub_path = temp_epub
292
 
293
  progress(0.1, desc="Extracting text and title from EPUB")