Spaces:

Aekanun
/

ThaiSpeech-to-Text-v1.0

Running on Zero

App Files Files Community

Aekanun commited on Dec 1, 2024

Commit

526e16c

1 Parent(s): 4fceee3

rev app

Browse files

Files changed (1) hide show

app.py +10 -23

app.py CHANGED Viewed

@@ -1,49 +1,36 @@
 import gradio as gr
 from transformers import pipeline
-import torch
-import spaces
-# Initialize model on CPU
 model = pipeline(
     "automatic-speech-recognition",
     model="Aekanun/whisper-small-hi",
-    device="cpu"
 )
-@spaces.GPU
 def transcribe_speech(audio):
-    """Speech transcription with GPU support"""
     try:
         if audio is None:
             return "กรุณาบันทึกเสียงก่อน"
-        # Move model to GPU
-        model.model = model.model.to("cuda")
-        # Make sure input is on the same device as model
-        with torch.cuda.amp.autocast():
-            # Process audio
-            result = model(audio, batch_size=1)
-            # Get text result
-            text = result["text"] if isinstance(result, dict) else result
-        # Move model back to CPU
-        model.model = model.model.to("cpu")
-        torch.cuda.empty_cache()
         return text
     except Exception as e:
-        # Make sure model is back on CPU in case of error
-        model.model = model.model.to("cpu")
-        torch.cuda.empty_cache()
         return f"เกิดข้อผิดพลาด: {str(e)}"
 # Create Gradio interface
 demo = gr.Interface(
     fn=transcribe_speech,
-    inputs=gr.Audio(type="filepath"),  # Simplified Audio component
     outputs=gr.Textbox(label="ข้อความ"),
     title="Thai Speech Transcription",
     description="บันทึกเสียงเพื่อแปลงเป็นข้อความภาษาไทย",

+import spaces
 import gradio as gr
 from transformers import pipeline
+# Initialize model and move to GPU
 model = pipeline(
     "automatic-speech-recognition",
     model="Aekanun/whisper-small-hi",
+    device="cuda"  # เปลี่ยนเป็น cuda เลย
 )
+@spaces.GPU  # GPU function with default 60s duration
 def transcribe_speech(audio):
+    """Speech transcription function"""
     try:
         if audio is None:
             return "กรุณาบันทึกเสียงก่อน"
+        # Process audio (model is already on GPU)
+        result = model(audio, batch_size=1)
+        # Get text result
+        text = result["text"] if isinstance(result, dict) else result
         return text
     except Exception as e:
         return f"เกิดข้อผิดพลาด: {str(e)}"
 # Create Gradio interface
 demo = gr.Interface(
     fn=transcribe_speech,
+    inputs=gr.Audio(type="filepath"),
     outputs=gr.Textbox(label="ข้อความ"),
     title="Thai Speech Transcription",
     description="บันทึกเสียงเพื่อแปลงเป็นข้อความภาษาไทย",