Spaces:
Runtime error
Runtime error
import whisper | |
select_model ="base" # ['tiny', 'base'] | |
model = whisper.load_model(select_model) | |
import yt_dlp | |
import ffmpeg | |
import sys | |
import uuid | |
import re | |
def extract_video_id(url): | |
# Regular expression to extract the video ID from different YouTube URL formats | |
pattern = r"(?:youtu\.be/|youtube(?:-nocookie)?\.com/(?:embed/|v/|shorts/|watch\?v=|watch\?.+&v=))([\w-]+)" | |
match = re.search(pattern, url) | |
if match: | |
return match.group(1) | |
return None | |
def download_audio(Youtube_Video_Link): | |
video_id = extract_video_id(Youtube_Video_Link) | |
yt_url = f"https://www.youtube.com/watch?v={video_id}" | |
random_uuid = str(uuid.uuid4())[:8] | |
ydl_opts = { | |
'format': 'bestaudio/best', | |
# 'outtmpl': 'output.%(ext)s', | |
'postprocessors': [{ | |
'key': 'FFmpegExtractAudio', | |
'preferredcodec': 'mp3', | |
}], | |
"outtmpl": f'{random_uuid}', # this is where you can edit how you'd like the filenames to be formatted | |
} | |
with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
ydl.download([yt_url]) | |
return f"{random_uuid}.mp3" | |
def store_path_in_json(path, json_file_path="stored_paths.json"): | |
# Create a dictionary with the path and timestamp | |
entry = { | |
"path": path, | |
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
} | |
# If the JSON file doesn't exist, create it with an empty list | |
if not os.path.exists(json_file_path): | |
with open(json_file_path, 'w') as json_file: | |
json.dump([], json_file) | |
try: | |
# Read existing entries from the JSON file | |
with open(json_file_path, 'r') as json_file: | |
data = json.load(json_file) | |
except json.decoder.JSONDecodeError as e: | |
print(f"Error decoding JSON file: {e}") | |
print(f"Content of JSON file: {json_file.read()}") | |
raise # Reraise the exception after printing for further analysis | |
# Append the new entry to the list | |
data.append(entry) | |
# Write the updated list back to the JSON file | |
with open(json_file_path, 'w') as json_file: | |
json.dump(data, json_file, indent=2) | |
# print(f"Path '{path}' stored in '{json_file_path}' with timestamp '{entry['timestamp']}'.") | |
import os | |
import json | |
from datetime import datetime, timedelta | |
def delete_old_files(json_filename, max_age_hours): | |
# Load JSON data | |
if os.path.exists(json_filename): | |
with open(json_filename, 'r') as json_file: | |
data = json.load(json_file) | |
else: | |
# No data in the JSON file, nothing to delete | |
return | |
# Get the current date and time | |
now = datetime.now() | |
# Loop through the entries in the JSON file | |
updated_data = [] | |
for entry in data: | |
path = entry["path"] | |
timestamp_str = entry["timestamp"] | |
creation_date = datetime.strptime(timestamp_str, '%Y-%m-%d %H:%M:%S') | |
# Check if the file is older than the specified max age in hours | |
if (now - creation_date).total_seconds() / 3600 > max_age_hours: | |
# Delete the file if it exists | |
if os.path.exists(path): | |
os.remove(path) | |
# Skip this entry in the updated data | |
continue | |
# Keep the entry in the updated data | |
updated_data.append(entry) | |
# Save the updated JSON data | |
with open(json_filename, 'w') as json_file: | |
json.dump(updated_data, json_file, indent=2) | |
def convert_to_text(audio_path): | |
delete_old_files("stored_paths.json", 1) | |
store_path_in_json(audio_path) | |
result = model.transcribe(audio_path,fp16=False) | |
return result["text"] | |
import os | |
def audio_to_text(youtube_link,audio_path): | |
if len(youtube_link)>3: | |
audio_file_path=download_audio(youtube_link) | |
audio_file_path=os.getcwd()+"/"+audio_file_path | |
text=convert_to_text(audio_file_path) | |
return text | |
if os.path.exists(audio_path): | |
text=convert_to_text(audio_path) | |
return text | |
import gradio as gr | |
import os | |
def transcribe_audio(youtube_link, audio_file): | |
if youtube_link: | |
result = audio_to_text(youtube_link, "None") | |
elif audio_file: | |
if os.path.exists(audio_file): | |
result = audio_to_text("None",audio_file) | |
else: | |
result = "Please provide a YouTube link or upload an audio file." | |
return result | |
iface = gr.Interface( | |
fn=transcribe_audio, | |
inputs=[ | |
gr.Textbox(), | |
gr.File() | |
], | |
outputs="text", | |
live=True | |
) | |
iface.launch() | |