import whisper select_model ="base" # ['tiny', 'base'] model = whisper.load_model(select_model) import yt_dlp import ffmpeg import sys import uuid import re def extract_video_id(url): # Regular expression to extract the video ID from different YouTube URL formats pattern = r"(?:youtu\.be/|youtube(?:-nocookie)?\.com/(?:embed/|v/|shorts/|watch\?v=|watch\?.+&v=))([\w-]+)" match = re.search(pattern, url) if match: return match.group(1) return None def download_audio(Youtube_Video_Link): video_id = extract_video_id(Youtube_Video_Link) yt_url = f"https://www.youtube.com/watch?v={video_id}" random_uuid = str(uuid.uuid4())[:8] ydl_opts = { 'format': 'bestaudio/best', # 'outtmpl': 'output.%(ext)s', 'postprocessors': [{ 'key': 'FFmpegExtractAudio', 'preferredcodec': 'mp3', }], "outtmpl": f'{random_uuid}', # this is where you can edit how you'd like the filenames to be formatted } with yt_dlp.YoutubeDL(ydl_opts) as ydl: ydl.download([yt_url]) return f"{random_uuid}.mp3" def store_path_in_json(path, json_file_path="stored_paths.json"): # Create a dictionary with the path and timestamp entry = { "path": path, "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S") } # If the JSON file doesn't exist, create it with an empty list if not os.path.exists(json_file_path): with open(json_file_path, 'w') as json_file: json.dump([], json_file) try: # Read existing entries from the JSON file with open(json_file_path, 'r') as json_file: data = json.load(json_file) except json.decoder.JSONDecodeError as e: print(f"Error decoding JSON file: {e}") print(f"Content of JSON file: {json_file.read()}") raise # Reraise the exception after printing for further analysis # Append the new entry to the list data.append(entry) # Write the updated list back to the JSON file with open(json_file_path, 'w') as json_file: json.dump(data, json_file, indent=2) # print(f"Path '{path}' stored in '{json_file_path}' with timestamp '{entry['timestamp']}'.") import os import json from datetime import datetime, timedelta def delete_old_files(json_filename, max_age_hours): # Load JSON data if os.path.exists(json_filename): with open(json_filename, 'r') as json_file: data = json.load(json_file) else: # No data in the JSON file, nothing to delete return # Get the current date and time now = datetime.now() # Loop through the entries in the JSON file updated_data = [] for entry in data: path = entry["path"] timestamp_str = entry["timestamp"] creation_date = datetime.strptime(timestamp_str, '%Y-%m-%d %H:%M:%S') # Check if the file is older than the specified max age in hours if (now - creation_date).total_seconds() / 3600 > max_age_hours: # Delete the file if it exists if os.path.exists(path): os.remove(path) # Skip this entry in the updated data continue # Keep the entry in the updated data updated_data.append(entry) # Save the updated JSON data with open(json_filename, 'w') as json_file: json.dump(updated_data, json_file, indent=2) def convert_to_text(audio_path): delete_old_files("stored_paths.json", 1) store_path_in_json(audio_path) result = model.transcribe(audio_path,fp16=False) return result["text"] import os def audio_to_text(youtube_link,audio_path): if len(youtube_link)>3: audio_file_path=download_audio(youtube_link) audio_file_path=os.getcwd()+"/"+audio_file_path text=convert_to_text(audio_file_path) return text if os.path.exists(audio_path): text=convert_to_text(audio_path) return text import gradio as gr import os def transcribe_audio(youtube_link, audio_file): if youtube_link: result = audio_to_text(youtube_link, "None") elif audio_file: if os.path.exists(audio_file): result = audio_to_text("None",audio_file) else: result = "Please provide a YouTube link or upload an audio file." return result iface = gr.Interface( fn=transcribe_audio, inputs=[ gr.Textbox(), gr.File() ], outputs="text", live=True ) iface.launch()