pawipa commited on
Commit
a6bff07
·
1 Parent(s): e76c7d1

added AI functionality

Browse files
Files changed (1) hide show
  1. app.py +112 -23
app.py CHANGED
@@ -2,27 +2,105 @@ import gradio as gr
2
  import time
3
  import os
4
  import zipfile
 
 
 
 
5
  from typing import List, Tuple, Generator
 
 
 
 
 
 
6
 
7
  # Initial status message
8
  STANDARD_OUTPUT_TEXT = "**Status:**<br>"
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  def process_files_with_live_updates(
11
  files: List[gr.File],
12
- dropdown_option: str,
13
- dropdown_option_2: str
14
  ) -> Generator[Tuple[str, List[str]], None, None]:
15
  """
16
- Processes a list of uploaded files and provides live updates with progress.
17
 
18
  Args:
19
  files (List[gr.File]): List of files uploaded by the user.
20
- dropdown_option (str): Selected option from the first dropdown.
21
- dropdown_option_2 (str): Selected option from the second dropdown.
22
 
23
  Yields:
24
  Tuple[str, List[str]]: Updated status message and list of processed file paths.
25
  """
 
 
 
26
  file_details = []
27
  total_files = len(files)
28
  output_files = []
@@ -32,21 +110,28 @@ def process_files_with_live_updates(
32
  os.makedirs(output_dir, exist_ok=True)
33
 
34
  for idx, file in enumerate(files):
35
- # Simulate file processing
36
- time.sleep(1)
 
 
 
 
 
 
 
 
 
 
37
 
38
  # Add to file details
39
  detail = (
40
- f"**File Name**: {file.name} - {dropdown_option} - {dropdown_option_2}<br>"
 
 
 
41
  )
42
  file_details.append(detail)
43
 
44
- # Generate a .txt file
45
- txt_filename = os.path.join(output_dir, f"output_file_{idx + 1}.txt")
46
- with open(txt_filename, "w") as txt_file:
47
- txt_file.write(f"Original File Name: {file.name}")
48
- output_files.append(txt_filename)
49
-
50
  # Update progress bar and yield the updated Markdown
51
  yield (
52
  f"**Status: {int(((idx + 1) / total_files) * 100)}%**<br>" + "".join(file_details),
@@ -81,15 +166,18 @@ with gr.Blocks() as demo:
81
  # Input section
82
  with gr.Row():
83
  with gr.Column():
84
- file_input = gr.Files(file_types=[".wav", ".mp3"], label="Upload your audio files")
85
  with gr.Column():
86
- dropdown = gr.Dropdown(
87
- choices=["Language: English", "Language: German", "Language: French"],
88
- label="Select Language",
89
- value="Language: English",
 
 
 
90
  )
91
  dropdown_2 = gr.Dropdown(
92
- choices=["Format: Plain Text", "Format: JSON", "Format: SRT"],
93
  label="Select Output Format",
94
  value="Format: Plain Text",
95
  )
@@ -106,14 +194,14 @@ with gr.Blocks() as demo:
106
  # Button actions
107
  submit_button.click(
108
  process_files_with_live_updates,
109
- inputs=[file_input, dropdown, dropdown_2],
110
  outputs=[output_md, output_files],
111
  )
112
 
113
  clear_button.click(
114
- lambda: (None, "Language: English", "Format: Plain Text", STANDARD_OUTPUT_TEXT, None),
115
  inputs=[], # No inputs
116
- outputs=[file_input, dropdown, dropdown_2, output_md, output_files],
117
  )
118
 
119
  gr.Textbox(os.getcwd(), label="Current Working Directory")
@@ -143,3 +231,4 @@ demo.css = """
143
 
144
  # Launch app
145
  demo.launch()
 
 
2
  import time
3
  import os
4
  import zipfile
5
+ import torch
6
+ import librosa
7
+ import soundfile as sf
8
+ from transformers import pipeline
9
  from typing import List, Tuple, Generator
10
+ import datetime
11
+ from pydub import AudioSegment
12
+
13
+ # Initial model name
14
+ MODEL_NAME = "primeline/whisper-tiny-german-1224"
15
+ speech_to_text = pipeline("automatic-speech-recognition", model=MODEL_NAME)
16
 
17
  # Initial status message
18
  STANDARD_OUTPUT_TEXT = "**Status:**<br>"
19
 
20
+ def get_file_creation_date(file_path: str) -> str:
21
+ """
22
+ Returns the creation date of a file.
23
+
24
+ Args:
25
+ file_path (str): The path to the file.
26
+
27
+ Returns:
28
+ str: The creation date in a human-readable format.
29
+ """
30
+ try:
31
+ # Get file statistics
32
+ file_stats = os.stat(file_path)
33
+
34
+ # Retrieve and format creation time
35
+ creation_time = datetime.datetime.fromtimestamp(file_stats.st_ctime)
36
+ return creation_time.strftime("%Y-%m-%d %H:%M:%S")
37
+ except FileNotFoundError:
38
+ return "File not found."
39
+
40
+ def load_model(model_name: str):
41
+ """
42
+ Loads the selected Hugging Face model.
43
+
44
+ Args:
45
+ model_name (str): The name of the Hugging Face model to load.
46
+
47
+ Returns:
48
+ pipeline: The loaded model pipeline.
49
+ """
50
+ return pipeline("automatic-speech-recognition", model=model_name)
51
+
52
+ def convert_to_wav(file_path: str) -> str:
53
+ """
54
+ Converts audio files to WAV format if necessary.
55
+
56
+ Args:
57
+ file_path (str): Path to the uploaded audio file.
58
+
59
+ Returns:
60
+ str: Path to the converted WAV file.
61
+ """
62
+ if file_path.endswith(".m4a"):
63
+ audio = AudioSegment.from_file(file_path, format="m4a")
64
+ wav_path = file_path.replace(".m4a", ".wav")
65
+ audio.export(wav_path, format="wav")
66
+ return wav_path
67
+ return file_path
68
+
69
+ def preprocess_audio(file_path: str) -> str:
70
+ """
71
+ Preprocesses the audio file to ensure compatibility with the AI model.
72
+
73
+ Args:
74
+ file_path (str): Path to the uploaded audio file.
75
+
76
+ Returns:
77
+ str: Path to the preprocessed audio file.
78
+ """
79
+ file_path = convert_to_wav(file_path) # Convert to WAV if necessary
80
+ y, sr = librosa.load(file_path, sr=16000) # Resample audio to 16kHz
81
+ processed_path = file_path.replace(".mp3", "_processed.wav").replace(".wav", "_processed.wav")
82
+ sf.write(processed_path, y, sr) # Save the resampled audio
83
+ return processed_path
84
+
85
  def process_files_with_live_updates(
86
  files: List[gr.File],
87
+ model_option: str,
88
+ output_format: str
89
  ) -> Generator[Tuple[str, List[str]], None, None]:
90
  """
91
+ Processes a list of uploaded files, transcribes audio, and provides live updates.
92
 
93
  Args:
94
  files (List[gr.File]): List of files uploaded by the user.
95
+ model_option (str): Selected model option.
96
+ output_format (str): Selected output format option.
97
 
98
  Yields:
99
  Tuple[str, List[str]]: Updated status message and list of processed file paths.
100
  """
101
+ global speech_to_text
102
+ speech_to_text = load_model(model_option)
103
+
104
  file_details = []
105
  total_files = len(files)
106
  output_files = []
 
110
  os.makedirs(output_dir, exist_ok=True)
111
 
112
  for idx, file in enumerate(files):
113
+ # Preprocess audio file
114
+ preprocessed_path = preprocess_audio(file.name)
115
+
116
+ # Transcribe audio using the AI model with timestamp support
117
+ transcription_result = speech_to_text(preprocessed_path, return_timestamps=True)
118
+ transcription = transcription_result["text"]
119
+
120
+ # Save transcription to file
121
+ txt_filename = os.path.join(output_dir, f"transcription_{file.name.split('/')[-1].split('.')[0]}.txt")
122
+ with open(txt_filename, "w", encoding="utf-8") as txt_file:
123
+ txt_file.write(transcription)
124
+ output_files.append(txt_filename)
125
 
126
  # Add to file details
127
  detail = (
128
+ f"**File Name**: {file.name.split('/')[-1]}<br>"
129
+ f"**File Date**: {get_file_creation_date(file)}<br>"
130
+ f"**Options**: {model_option} - {output_format}<br>"
131
+ f"**Transcription**: {transcription}<br><br>"
132
  )
133
  file_details.append(detail)
134
 
 
 
 
 
 
 
135
  # Update progress bar and yield the updated Markdown
136
  yield (
137
  f"**Status: {int(((idx + 1) / total_files) * 100)}%**<br>" + "".join(file_details),
 
166
  # Input section
167
  with gr.Row():
168
  with gr.Column():
169
+ file_input = gr.Files(file_types=[".wav", ".mp3", ".m4a"], label="Upload your audio files")
170
  with gr.Column():
171
+ model_dropdown = gr.Dropdown(
172
+ choices=[
173
+ "primeline/whisper-tiny-german-1224",
174
+ "primeline/whisper-tiny-german",
175
+ "primeline/whisper-large-v3-german"],
176
+ label="Select Model",
177
+ value="primeline/whisper-tiny-german-1224",
178
  )
179
  dropdown_2 = gr.Dropdown(
180
+ choices=["Format: Plain Text"],
181
  label="Select Output Format",
182
  value="Format: Plain Text",
183
  )
 
194
  # Button actions
195
  submit_button.click(
196
  process_files_with_live_updates,
197
+ inputs=[file_input, model_dropdown, dropdown_2],
198
  outputs=[output_md, output_files],
199
  )
200
 
201
  clear_button.click(
202
+ lambda: (None, "primeline/whisper-tiny-german-1224", "Format: Plain Text", STANDARD_OUTPUT_TEXT, None),
203
  inputs=[], # No inputs
204
+ outputs=[file_input, model_dropdown, dropdown_2, output_md, output_files],
205
  )
206
 
207
  gr.Textbox(os.getcwd(), label="Current Working Directory")
 
231
 
232
  # Launch app
233
  demo.launch()
234
+