Dhahlan2000 commited on
Commit
4e06e40
·
verified ·
1 Parent(s): 9604a21

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -1
app.py CHANGED
@@ -3,6 +3,7 @@ from huggingface_hub import InferenceClient
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
  import torch
5
  import os
 
6
 
7
  # Replace 'your_huggingface_token' with your actual Hugging Face access token
8
  access_token = os.getenv('token')
@@ -19,6 +20,29 @@ model.eval() # Set the model to evaluation mode
19
  # Initialize the inference client (if needed for other API-based tasks)
20
  client = InferenceClient(token=access_token)
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  def conversation_predict(input_text):
23
  """Generate a response for single-turn input using the model."""
24
  # Tokenize the input text
@@ -28,7 +52,12 @@ def conversation_predict(input_text):
28
  outputs = model.generate(input_ids, max_new_tokens=2048)
29
 
30
  # Decode and return the generated response
31
- return tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
 
 
 
32
 
33
  def respond(
34
  message: str,
 
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
  import torch
5
  import os
6
+ import pyttsx3 # Importing pyttsx3 for text-to-speech
7
 
8
  # Replace 'your_huggingface_token' with your actual Hugging Face access token
9
  access_token = os.getenv('token')
 
20
  # Initialize the inference client (if needed for other API-based tasks)
21
  client = InferenceClient(token=access_token)
22
 
23
+ # Initialize the text-to-speech engine
24
+ tts_engine = pyttsx3.init()
25
+
26
+ # Import required modules for E2-F5-TTS
27
+ from huggingface_hub import Client
28
+
29
+ # Initialize the E2-F5-TTS client
30
+ client_tts = Client("mrfakename/E2-F5-TTS")
31
+
32
+ def text_to_speech(text, sample):
33
+ result = client_tts.predict(
34
+ ref_audio_input=handle_file(f'input/{sample}.mp3'),
35
+ ref_text_input="",
36
+ gen_text_input=text,
37
+ remove_silence=False,
38
+ cross_fade_duration_slider=0.15,
39
+ speed_slider=1,
40
+ api_name="/basic_tts"
41
+ )
42
+ audio_file = open(result[0], "rb")
43
+ audio_bytes = audio_file.read()
44
+ return audio_bytes
45
+
46
  def conversation_predict(input_text):
47
  """Generate a response for single-turn input using the model."""
48
  # Tokenize the input text
 
52
  outputs = model.generate(input_ids, max_new_tokens=2048)
53
 
54
  # Decode and return the generated response
55
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
56
+
57
+ # Convert the text response to speech using E2-F5-TTS
58
+ audio_bytes = text_to_speech(response, sample="input")
59
+
60
+ return response, audio_bytes
61
 
62
  def respond(
63
  message: str,