import time from fastapi import FastAPI, UploadFile from fastapi.middleware.cors import CORSMiddleware import torch from transformers import pipeline app = FastAPI(docs_url="/api/docs") app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"], allow_credentials=True, ) device = "cuda:0" if torch.cuda.is_available() else "cpu" torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 BATCH_SIZE = 8 pipe = pipeline("automatic-speech-recognition", "openai/whisper-large-v3", torch_dtype=torch_dtype, device=device) @app.get("/device") def getDevice(): start_time = time.time() print("Time took to process the request and return response is {} sec".format( time.time() - start_time)) return device @app.post("/transcribe") def transcribe(soundFile: UploadFile, task="transcribe"): start_time = time.time() if soundFile is None: raise "No audio file submitted! Please upload or record an audio file before submitting your request." inputFile = soundFile.file.read() text = pipe(inputFile, batch_size=BATCH_SIZE, generate_kwargs={ "task": task}, return_timestamps=True)["text"] print("Time took to process the request and return response is {} sec".format( time.time() - start_time)) return text