Spaces:
Runtime error
Runtime error
File size: 4,440 Bytes
43b6df2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
import whisper
select_model ="base" # ['tiny', 'base']
model = whisper.load_model(select_model)
import yt_dlp
import ffmpeg
import sys
import uuid
import re
def extract_video_id(url):
# Regular expression to extract the video ID from different YouTube URL formats
pattern = r"(?:youtu\.be/|youtube(?:-nocookie)?\.com/(?:embed/|v/|shorts/|watch\?v=|watch\?.+&v=))([\w-]+)"
match = re.search(pattern, url)
if match:
return match.group(1)
return None
def download_audio(Youtube_Video_Link):
video_id = extract_video_id(Youtube_Video_Link)
yt_url = f"https://www.youtube.com/watch?v={video_id}"
random_uuid = str(uuid.uuid4())[:8]
ydl_opts = {
'format': 'bestaudio/best',
# 'outtmpl': 'output.%(ext)s',
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'mp3',
}],
"outtmpl": f'{random_uuid}', # this is where you can edit how you'd like the filenames to be formatted
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download([yt_url])
return f"{random_uuid}.mp3"
def store_path_in_json(path, json_file_path="stored_paths.json"):
# Create a dictionary with the path and timestamp
entry = {
"path": path,
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
}
# If the JSON file doesn't exist, create it with an empty list
if not os.path.exists(json_file_path):
with open(json_file_path, 'w') as json_file:
json.dump([], json_file)
try:
# Read existing entries from the JSON file
with open(json_file_path, 'r') as json_file:
data = json.load(json_file)
except json.decoder.JSONDecodeError as e:
print(f"Error decoding JSON file: {e}")
print(f"Content of JSON file: {json_file.read()}")
raise # Reraise the exception after printing for further analysis
# Append the new entry to the list
data.append(entry)
# Write the updated list back to the JSON file
with open(json_file_path, 'w') as json_file:
json.dump(data, json_file, indent=2)
# print(f"Path '{path}' stored in '{json_file_path}' with timestamp '{entry['timestamp']}'.")
import os
import json
from datetime import datetime, timedelta
def delete_old_files(json_filename, max_age_hours):
# Load JSON data
if os.path.exists(json_filename):
with open(json_filename, 'r') as json_file:
data = json.load(json_file)
else:
# No data in the JSON file, nothing to delete
return
# Get the current date and time
now = datetime.now()
# Loop through the entries in the JSON file
updated_data = []
for entry in data:
path = entry["path"]
timestamp_str = entry["timestamp"]
creation_date = datetime.strptime(timestamp_str, '%Y-%m-%d %H:%M:%S')
# Check if the file is older than the specified max age in hours
if (now - creation_date).total_seconds() / 3600 > max_age_hours:
# Delete the file if it exists
if os.path.exists(path):
os.remove(path)
# Skip this entry in the updated data
continue
# Keep the entry in the updated data
updated_data.append(entry)
# Save the updated JSON data
with open(json_filename, 'w') as json_file:
json.dump(updated_data, json_file, indent=2)
def convert_to_text(audio_path):
delete_old_files("stored_paths.json", 1)
store_path_in_json(audio_path)
result = model.transcribe(audio_path,fp16=False)
return result["text"]
import os
def audio_to_text(youtube_link,audio_path):
if len(youtube_link)>3:
audio_file_path=download_audio(youtube_link)
audio_file_path=os.getcwd()+"/"+audio_file_path
text=convert_to_text(audio_file_path)
return text
if os.path.exists(audio_path):
text=convert_to_text(audio_path)
return text
import gradio as gr
import os
def transcribe_audio(youtube_link, audio_file):
if youtube_link:
result = audio_to_text(youtube_link, "None")
elif audio_file:
if os.path.exists(audio_file):
result = audio_to_text("None",audio_file)
else:
result = "Please provide a YouTube link or upload an audio file."
return result
iface = gr.Interface(
fn=transcribe_audio,
inputs=[
gr.Textbox(),
gr.File()
],
outputs="text",
live=True
)
iface.launch()
|