Spaces:

hellos
/

Speech-to-Text

Runtime error

File size: 4,440 Bytes

43b6df2

import whisper
select_model ="base" # ['tiny', 'base']
model = whisper.load_model(select_model)


import yt_dlp
import ffmpeg
import sys
import uuid
import re

def extract_video_id(url):
    # Regular expression to extract the video ID from different YouTube URL formats
    pattern = r"(?:youtu\.be/|youtube(?:-nocookie)?\.com/(?:embed/|v/|shorts/|watch\?v=|watch\?.+&v=))([\w-]+)"
    match = re.search(pattern, url)
    if match:
        return match.group(1)
    return None
    
def download_audio(Youtube_Video_Link):
  video_id = extract_video_id(Youtube_Video_Link)
  yt_url = f"https://www.youtube.com/watch?v={video_id}"
  random_uuid = str(uuid.uuid4())[:8]
  ydl_opts = {
    'format': 'bestaudio/best',
  #    'outtmpl': 'output.%(ext)s',
    'postprocessors': [{
        'key': 'FFmpegExtractAudio',
        'preferredcodec': 'mp3',
    }],
    "outtmpl": f'{random_uuid}',  # this is where you can edit how you'd like the filenames to be formatted
  }
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
    ydl.download([yt_url])
  return f"{random_uuid}.mp3"

def store_path_in_json(path, json_file_path="stored_paths.json"):
    # Create a dictionary with the path and timestamp
    entry = {
        "path": path,
        "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    }

    # If the JSON file doesn't exist, create it with an empty list
    if not os.path.exists(json_file_path):
        with open(json_file_path, 'w') as json_file:
            json.dump([], json_file)

    try:
        # Read existing entries from the JSON file
        with open(json_file_path, 'r') as json_file:
            data = json.load(json_file)
    except json.decoder.JSONDecodeError as e:
        print(f"Error decoding JSON file: {e}")
        print(f"Content of JSON file: {json_file.read()}")
        raise  # Reraise the exception after printing for further analysis

    # Append the new entry to the list
    data.append(entry)

    # Write the updated list back to the JSON file
    with open(json_file_path, 'w') as json_file:
        json.dump(data, json_file, indent=2)

    # print(f"Path '{path}' stored in '{json_file_path}' with timestamp '{entry['timestamp']}'.")

import os
import json
from datetime import datetime, timedelta

def delete_old_files(json_filename, max_age_hours):
    # Load JSON data
    if os.path.exists(json_filename):
        with open(json_filename, 'r') as json_file:
            data = json.load(json_file)
    else:
        # No data in the JSON file, nothing to delete
        return

    # Get the current date and time
    now = datetime.now()

    # Loop through the entries in the JSON file
    updated_data = []
    for entry in data:
        path = entry["path"]
        timestamp_str = entry["timestamp"]
        creation_date = datetime.strptime(timestamp_str, '%Y-%m-%d %H:%M:%S')

        # Check if the file is older than the specified max age in hours
        if (now - creation_date).total_seconds() / 3600 > max_age_hours:
            # Delete the file if it exists
            if os.path.exists(path):
                os.remove(path)
            
            # Skip this entry in the updated data
            continue

        # Keep the entry in the updated data
        updated_data.append(entry)

    # Save the updated JSON data
    with open(json_filename, 'w') as json_file:
        json.dump(updated_data, json_file, indent=2)
def convert_to_text(audio_path):
  delete_old_files("stored_paths.json", 1)
  store_path_in_json(audio_path)
  result = model.transcribe(audio_path,fp16=False)
  return result["text"]
import os
def audio_to_text(youtube_link,audio_path):
  if len(youtube_link)>3:
    audio_file_path=download_audio(youtube_link)
    audio_file_path=os.getcwd()+"/"+audio_file_path
    text=convert_to_text(audio_file_path)
    return text
  if os.path.exists(audio_path):
    text=convert_to_text(audio_path)
    return text


import gradio as gr
import os


def transcribe_audio(youtube_link, audio_file):
    if youtube_link:
        result = audio_to_text(youtube_link, "None")
    elif audio_file:
        if os.path.exists(audio_file):
            result = audio_to_text("None",audio_file)
    else:
        result = "Please provide a YouTube link or upload an audio file."

    return result

iface = gr.Interface(
    fn=transcribe_audio,
    inputs=[
        gr.Textbox(),
        gr.File()
    ],
    outputs="text",
    live=True
)

iface.launch()