navidved commited on
Commit
d2d0553
·
verified ·
1 Parent(s): 759964f

connect hf to luxi asr api

Browse files
Files changed (1) hide show
  1. app.py +25 -44
app.py CHANGED
@@ -1,49 +1,30 @@
1
  import gradio as gr
2
- from transformers import pipeline
3
- from faster_whisper import WhisperModel
4
- import librosa
5
 
6
- # Load the model
7
- model = WhisperModel("navidved/faster-gooya-v1", device="cpu", compute_type="int8", local_files_only=False)
 
 
 
 
 
 
 
8
 
9
- # Define the maximum audio length in seconds
10
- MAX_AUDIO_LENGTH = 30 # seconds
11
 
12
- # Define the inference function
13
- def transcribe_audio(audio):
14
- if audio is None:
15
- return "No audio file uploaded. Please try again."
16
- results = ""
17
- try:
18
- audio_data, sr = librosa.load(audio, sr=None)
19
- duration = librosa.get_duration(y=audio_data, sr=sr)
20
 
21
- # Check if the audio is longer than the allowed duration
22
- if duration > MAX_AUDIO_LENGTH:
23
- return f"Audio is too long. Please upload an audio file shorter than {MAX_AUDIO_LENGTH} seconds."
24
-
25
- # Perform transcription
26
- segments, _ = model.transcribe(audio, vad_filter=True)
27
- for seg in segments:
28
- results += seg.text
29
- return results
30
- except Exception as e:
31
- return f"Error during transcription: {str(e)}"
32
-
33
- # Create a Gradio interface for uploading audio or using the microphone
34
- with gr.Blocks() as interface:
35
- gr.Markdown("# Gooya v1 Persian Speech Recognition")
36
- gr.Markdown("Upload an audio file or use your microphone to transcribe speech to text.")
37
-
38
- # Create the input and output components
39
- audio_input = gr.Audio(type="filepath", label="Input Audio")
40
- output_text = gr.Textbox(label="Transcription")
41
-
42
- # Add a button to trigger the transcription
43
- transcribe_button = gr.Button("Transcribe")
44
-
45
- # Bind the transcribe_audio function to the button click
46
- transcribe_button.click(fn=transcribe_audio, inputs=audio_input, outputs=output_text)
47
-
48
- # Launch the Gradio app
49
- interface.launch()
 
1
  import gradio as gr
2
+ import requests
 
 
3
 
4
+ def transcribe_audio(file):
5
+ # Prepare headers and data
6
+ headers = {
7
+ 'accept': 'application/json',
8
+ 'Authorization': AUTH_TOKEN,
9
+ }
10
+ files = {
11
+ 'file': (file.name, file, 'audio/mpeg'),
12
+ }
13
 
14
+ # Send POST request
15
+ response = requests.post(ASR_API_URL, headers=headers, files=files)
16
 
17
+ # Check if response is successful
18
+ if response.status_code == 200:
19
+ return response.json().get("transcription", "No transcription returned.")
20
+ else:
21
+ return f"Error: {response.status_code}, {response.text}"
 
 
 
22
 
23
+ # Set up the Gradio interface
24
+ gr.Interface(
25
+ fn=transcribe_audio,
26
+ inputs=gr.Audio(source="upload", type="file"),
27
+ outputs="text",
28
+ title="Gooya v1 Persian Speech Recognition",
29
+ description="Upload an audio file in Persian, and this model will transcribe it."
30
+ ).launch()