Speech-to-Text / app.py
hellos's picture
Create app.py
43b6df2 verified
raw
history blame
4.44 kB
import whisper
select_model ="base" # ['tiny', 'base']
model = whisper.load_model(select_model)
import yt_dlp
import ffmpeg
import sys
import uuid
import re
def extract_video_id(url):
# Regular expression to extract the video ID from different YouTube URL formats
pattern = r"(?:youtu\.be/|youtube(?:-nocookie)?\.com/(?:embed/|v/|shorts/|watch\?v=|watch\?.+&v=))([\w-]+)"
match = re.search(pattern, url)
if match:
return match.group(1)
return None
def download_audio(Youtube_Video_Link):
video_id = extract_video_id(Youtube_Video_Link)
yt_url = f"https://www.youtube.com/watch?v={video_id}"
random_uuid = str(uuid.uuid4())[:8]
ydl_opts = {
'format': 'bestaudio/best',
# 'outtmpl': 'output.%(ext)s',
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'mp3',
}],
"outtmpl": f'{random_uuid}', # this is where you can edit how you'd like the filenames to be formatted
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download([yt_url])
return f"{random_uuid}.mp3"
def store_path_in_json(path, json_file_path="stored_paths.json"):
# Create a dictionary with the path and timestamp
entry = {
"path": path,
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
}
# If the JSON file doesn't exist, create it with an empty list
if not os.path.exists(json_file_path):
with open(json_file_path, 'w') as json_file:
json.dump([], json_file)
try:
# Read existing entries from the JSON file
with open(json_file_path, 'r') as json_file:
data = json.load(json_file)
except json.decoder.JSONDecodeError as e:
print(f"Error decoding JSON file: {e}")
print(f"Content of JSON file: {json_file.read()}")
raise # Reraise the exception after printing for further analysis
# Append the new entry to the list
data.append(entry)
# Write the updated list back to the JSON file
with open(json_file_path, 'w') as json_file:
json.dump(data, json_file, indent=2)
# print(f"Path '{path}' stored in '{json_file_path}' with timestamp '{entry['timestamp']}'.")
import os
import json
from datetime import datetime, timedelta
def delete_old_files(json_filename, max_age_hours):
# Load JSON data
if os.path.exists(json_filename):
with open(json_filename, 'r') as json_file:
data = json.load(json_file)
else:
# No data in the JSON file, nothing to delete
return
# Get the current date and time
now = datetime.now()
# Loop through the entries in the JSON file
updated_data = []
for entry in data:
path = entry["path"]
timestamp_str = entry["timestamp"]
creation_date = datetime.strptime(timestamp_str, '%Y-%m-%d %H:%M:%S')
# Check if the file is older than the specified max age in hours
if (now - creation_date).total_seconds() / 3600 > max_age_hours:
# Delete the file if it exists
if os.path.exists(path):
os.remove(path)
# Skip this entry in the updated data
continue
# Keep the entry in the updated data
updated_data.append(entry)
# Save the updated JSON data
with open(json_filename, 'w') as json_file:
json.dump(updated_data, json_file, indent=2)
def convert_to_text(audio_path):
delete_old_files("stored_paths.json", 1)
store_path_in_json(audio_path)
result = model.transcribe(audio_path,fp16=False)
return result["text"]
import os
def audio_to_text(youtube_link,audio_path):
if len(youtube_link)>3:
audio_file_path=download_audio(youtube_link)
audio_file_path=os.getcwd()+"/"+audio_file_path
text=convert_to_text(audio_file_path)
return text
if os.path.exists(audio_path):
text=convert_to_text(audio_path)
return text
import gradio as gr
import os
def transcribe_audio(youtube_link, audio_file):
if youtube_link:
result = audio_to_text(youtube_link, "None")
elif audio_file:
if os.path.exists(audio_file):
result = audio_to_text("None",audio_file)
else:
result = "Please provide a YouTube link or upload an audio file."
return result
iface = gr.Interface(
fn=transcribe_audio,
inputs=[
gr.Textbox(),
gr.File()
],
outputs="text",
live=True
)
iface.launch()