File size: 4,440 Bytes
43b6df2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import whisper
select_model ="base" # ['tiny', 'base']
model = whisper.load_model(select_model)


import yt_dlp
import ffmpeg
import sys
import uuid
import re

def extract_video_id(url):
    # Regular expression to extract the video ID from different YouTube URL formats
    pattern = r"(?:youtu\.be/|youtube(?:-nocookie)?\.com/(?:embed/|v/|shorts/|watch\?v=|watch\?.+&v=))([\w-]+)"
    match = re.search(pattern, url)
    if match:
        return match.group(1)
    return None
    
def download_audio(Youtube_Video_Link):
  video_id = extract_video_id(Youtube_Video_Link)
  yt_url = f"https://www.youtube.com/watch?v={video_id}"
  random_uuid = str(uuid.uuid4())[:8]
  ydl_opts = {
    'format': 'bestaudio/best',
  #    'outtmpl': 'output.%(ext)s',
    'postprocessors': [{
        'key': 'FFmpegExtractAudio',
        'preferredcodec': 'mp3',
    }],
    "outtmpl": f'{random_uuid}',  # this is where you can edit how you'd like the filenames to be formatted
  }
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
    ydl.download([yt_url])
  return f"{random_uuid}.mp3"

def store_path_in_json(path, json_file_path="stored_paths.json"):
    # Create a dictionary with the path and timestamp
    entry = {
        "path": path,
        "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    }

    # If the JSON file doesn't exist, create it with an empty list
    if not os.path.exists(json_file_path):
        with open(json_file_path, 'w') as json_file:
            json.dump([], json_file)

    try:
        # Read existing entries from the JSON file
        with open(json_file_path, 'r') as json_file:
            data = json.load(json_file)
    except json.decoder.JSONDecodeError as e:
        print(f"Error decoding JSON file: {e}")
        print(f"Content of JSON file: {json_file.read()}")
        raise  # Reraise the exception after printing for further analysis

    # Append the new entry to the list
    data.append(entry)

    # Write the updated list back to the JSON file
    with open(json_file_path, 'w') as json_file:
        json.dump(data, json_file, indent=2)

    # print(f"Path '{path}' stored in '{json_file_path}' with timestamp '{entry['timestamp']}'.")

import os
import json
from datetime import datetime, timedelta

def delete_old_files(json_filename, max_age_hours):
    # Load JSON data
    if os.path.exists(json_filename):
        with open(json_filename, 'r') as json_file:
            data = json.load(json_file)
    else:
        # No data in the JSON file, nothing to delete
        return

    # Get the current date and time
    now = datetime.now()

    # Loop through the entries in the JSON file
    updated_data = []
    for entry in data:
        path = entry["path"]
        timestamp_str = entry["timestamp"]
        creation_date = datetime.strptime(timestamp_str, '%Y-%m-%d %H:%M:%S')

        # Check if the file is older than the specified max age in hours
        if (now - creation_date).total_seconds() / 3600 > max_age_hours:
            # Delete the file if it exists
            if os.path.exists(path):
                os.remove(path)
            
            # Skip this entry in the updated data
            continue

        # Keep the entry in the updated data
        updated_data.append(entry)

    # Save the updated JSON data
    with open(json_filename, 'w') as json_file:
        json.dump(updated_data, json_file, indent=2)
def convert_to_text(audio_path):
  delete_old_files("stored_paths.json", 1)
  store_path_in_json(audio_path)
  result = model.transcribe(audio_path,fp16=False)
  return result["text"]
import os
def audio_to_text(youtube_link,audio_path):
  if len(youtube_link)>3:
    audio_file_path=download_audio(youtube_link)
    audio_file_path=os.getcwd()+"/"+audio_file_path
    text=convert_to_text(audio_file_path)
    return text
  if os.path.exists(audio_path):
    text=convert_to_text(audio_path)
    return text


import gradio as gr
import os


def transcribe_audio(youtube_link, audio_file):
    if youtube_link:
        result = audio_to_text(youtube_link, "None")
    elif audio_file:
        if os.path.exists(audio_file):
            result = audio_to_text("None",audio_file)
    else:
        result = "Please provide a YouTube link or upload an audio file."

    return result

iface = gr.Interface(
    fn=transcribe_audio,
    inputs=[
        gr.Textbox(),
        gr.File()
    ],
    outputs="text",
    live=True
)

iface.launch()