Spaces:

Aekanun
/

ThaiSpeech-to-Text-v1.0

Running on Zero

Aekanun commited on Dec 1, 2024

Commit

24fa852

1 Parent(s): 2eb0277

rev app

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,11 +3,12 @@ from transformers import pipeline
 import torch
 import spaces
-# Initialize model on CPU
 model = pipeline(
     "automatic-speech-recognition",
     model="Aekanun/whisper-small-hi",
-    device="cpu"
 )
 @spaces.GPU
@@ -17,17 +18,12 @@ def transcribe_speech(audio):
         if audio is None:
             return "กรุณาบันทึกเสียงก่อน"
-        # Move model to GPU
-        model.model = model.model.to("cuda")
         with torch.amp.autocast('cuda'):
-            # Process audio with chunk length to handle long audio
-            result = model(
-                audio,
-                batch_size=1,
-                return_timestamps=True,  # แก้ error เรื่อง timestamps
-                chunk_length_s=30        # แก้ error เรื่องความยาวเสียง
-            )
             # Get text result
             text = result["text"] if isinstance(result, dict) else result

 import torch
 import spaces
+# Initialize model on CPU with float16
 model = pipeline(
     "automatic-speech-recognition",
     model="Aekanun/whisper-small-hi",
+    device="cpu",
+    torch_dtype=torch.float16  # กำหนด data type เป็น float16
 )
 @spaces.GPU
         if audio is None:
             return "กรุณาบันทึกเสียงก่อน"
+        # Move model to GPU with float16
+        model.model = model.model.to("cuda").half()  # ใช้ .half() เพื่อแปลงเป็น float16
         with torch.amp.autocast('cuda'):
+            # Process audio
+            result = model(audio, batch_size=1)
             # Get text result
             text = result["text"] if isinstance(result, dict) else result