awsaf49 commited on
Commit
be3ce29
Β·
1 Parent(s): ad9713a

example added + colors

Browse files
Files changed (4) hide show
  1. .gitattributes +1 -0
  2. app.py +202 -48
  3. example/fake_song.mp3 +3 -0
  4. example/real_song.mp3 +3 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.mp3 filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -18,6 +18,7 @@ MODEL_IDS = {
18
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
19
  model_cache = {}
20
 
 
21
  def load_model(model_name):
22
  """Load model if not already cached"""
23
  if model_name not in model_cache:
@@ -28,109 +29,262 @@ def load_model(model_name):
28
  model_cache[model_name] = model
29
  return model_cache[model_name]
30
 
 
31
  def process_audio(audio_path, model_name):
32
  """Process audio file and return prediction"""
33
  try:
34
  model = load_model(model_name)
35
  max_time = model.config.audio.max_time
36
-
37
  # Load and process audio
38
  audio, sr = librosa.load(audio_path, sr=16000)
39
  chunk_samples = int(max_time * sr)
40
  total_chunks = len(audio) // chunk_samples
41
  middle_chunk_idx = total_chunks // 2
42
-
43
  # Extract middle chunk
44
  start = middle_chunk_idx * chunk_samples
45
  end = start + chunk_samples
46
  chunk = audio[start:end]
47
-
48
  if len(chunk) < chunk_samples:
49
  chunk = np.pad(chunk, (0, chunk_samples - len(chunk)))
50
-
51
  # Get prediction
52
  with torch.no_grad():
53
  chunk = torch.from_numpy(chunk).float().to(device)
54
  pred = model(chunk.unsqueeze(0))
55
  prob = torch.sigmoid(pred).cpu().numpy()[0]
56
-
57
- return {"Real": 1 - prob, "Fake": prob}
 
 
 
 
 
 
 
58
 
59
  except Exception as e:
60
- return {"Error": str(e)}
 
61
 
62
  def predict(audio_file, model_name):
63
  """Gradio interface function"""
64
  if audio_file is None:
65
- return {"Message": "Please upload an audio file"}
66
  return process_audio(audio_file, model_name)
67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  # Create Gradio interface
69
- with gr.Blocks() as demo:
70
  # Title, Subtitle, and Logo
71
  gr.HTML(
72
  """
73
- <div style="text-align: center;">
74
- <img src="https://i.postimg.cc/3Jx3yZ5b/real-vs-fake-sonics-w-logo.jpg"
75
- style="max-width: 150px; margin: 0 auto;">
76
- <h1>SONICS: Synthetic Or Not - Identifying Counterfeit Songs</h1>
 
 
77
  <h3>ICLR 2025 [Poster]</h3>
78
- <p style="font-size: 1.1em; color: #666; margin: 10px 0;">
79
- Detect if a song is real or AI-generated (created using text-to-song models).
80
- Upload any audio file to check its authenticity!
81
  </p>
82
  </div>
83
  """
84
  )
85
-
86
- # # Resource Links
87
- # with gr.Row():
88
- # paper_radio = gr.Radio(
89
- # choices=["Paper", "Dataset", "ArXiv", "GitHub"],
90
- # label="Resources",
91
- # info="Click to visit respective links"
92
- # )
93
-
94
  gr.HTML(
95
  """
96
- <div style="text-align: center; margin-bottom: 1rem;">
97
- <p>
98
- <a href="https://openreview.net/forum?id=PY7KSh29Z8" target="_blank">πŸ“„ Paper</a> |
99
- <a href="https://huggingface.co/datasets/awsaf49/sonics" target="_blank">🎡 Dataset</a> |
100
- <a href="https://huggingface.co/collections/awsaf49/sonics-spectttra-67bb6517b3920fd18e409013" target="_blank">πŸ€– Models</a> |
101
- <a href="https://arxiv.org/abs/2408.14080" target="_blank">πŸ”¬ ArXiv</a> |
102
- <a href="https://github.com/awsaf49/sonics" target="_blank">πŸ’» GitHub</a>
103
- </p>
 
 
 
 
 
 
 
 
104
  </div>
105
  """
106
  )
107
-
108
  # Main Interface
109
- with gr.Row():
110
  with gr.Column():
111
  audio_input = gr.Audio(
112
- label="Upload Audio File",
113
- type="filepath"
 
114
  )
 
115
  model_dropdown = gr.Dropdown(
116
  choices=list(MODEL_IDS.keys()),
117
  value="SpecTTTra-Ξ³ (5s)",
118
- label="Select Model"
 
119
  )
120
- submit_btn = gr.Button("Analyze Audio")
121
-
 
 
 
 
122
  with gr.Column():
 
123
  output = gr.Label(
124
- label="Analysis Result",
125
- num_top_classes=2
 
 
126
  )
127
-
128
- # Prediction handling
129
- submit_btn.click(
130
- fn=predict,
131
- inputs=[audio_input, model_dropdown],
132
- outputs=[output]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  )
134
 
 
 
 
135
  if __name__ == "__main__":
136
  demo.launch()
 
18
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
19
  model_cache = {}
20
 
21
+
22
  def load_model(model_name):
23
  """Load model if not already cached"""
24
  if model_name not in model_cache:
 
29
  model_cache[model_name] = model
30
  return model_cache[model_name]
31
 
32
+
33
  def process_audio(audio_path, model_name):
34
  """Process audio file and return prediction"""
35
  try:
36
  model = load_model(model_name)
37
  max_time = model.config.audio.max_time
38
+
39
  # Load and process audio
40
  audio, sr = librosa.load(audio_path, sr=16000)
41
  chunk_samples = int(max_time * sr)
42
  total_chunks = len(audio) // chunk_samples
43
  middle_chunk_idx = total_chunks // 2
44
+
45
  # Extract middle chunk
46
  start = middle_chunk_idx * chunk_samples
47
  end = start + chunk_samples
48
  chunk = audio[start:end]
49
+
50
  if len(chunk) < chunk_samples:
51
  chunk = np.pad(chunk, (0, chunk_samples - len(chunk)))
52
+
53
  # Get prediction
54
  with torch.no_grad():
55
  chunk = torch.from_numpy(chunk).float().to(device)
56
  pred = model(chunk.unsqueeze(0))
57
  prob = torch.sigmoid(pred).cpu().numpy()[0]
58
+
59
+ real_prob = 1 - prob
60
+ fake_prob = prob
61
+
62
+ # Return formatted results with emojis
63
+ return {
64
+ "🎡 Real": float(real_prob),
65
+ "πŸ€– Fake": float(fake_prob)
66
+ }
67
 
68
  except Exception as e:
69
+ return {"❌ Error": str(e)}
70
+
71
 
72
  def predict(audio_file, model_name):
73
  """Gradio interface function"""
74
  if audio_file is None:
75
+ return {"⚠️ Message": "Please upload an audio file"}
76
  return process_audio(audio_file, model_name)
77
 
78
+
79
+ # Custom CSS for styling
80
+ css = """
81
+ :root {
82
+ --primary-color: #6366f1;
83
+ --secondary-color: #8b5cf6;
84
+ --accent-color: #ec4899;
85
+ --background-color: #f8fafc;
86
+ --text-color: #1e293b;
87
+ --border-radius: 10px;
88
+ }
89
+
90
+ .gradio-container {
91
+ background-color: var(--background-color);
92
+ }
93
+
94
+ .gr-button {
95
+ background: linear-gradient(90deg, var(--primary-color), var(--secondary-color));
96
+ border: none !important;
97
+ color: white !important;
98
+ border-radius: var(--border-radius) !important;
99
+ }
100
+
101
+ .gr-button:hover {
102
+ background: linear-gradient(90deg, var(--secondary-color), var(--accent-color));
103
+ transform: translateY(-2px);
104
+ box-shadow: 0 10px 20px rgba(0,0,0,0.1);
105
+ transition: all 0.3s ease;
106
+ }
107
+
108
+ .gr-form {
109
+ border-radius: var(--border-radius) !important;
110
+ border: 1px solid #e2e8f0 !important;
111
+ box-shadow: 0 4px 12px rgba(0,0,0,0.05) !important;
112
+ }
113
+
114
+ .footer {
115
+ margin-top: 20px;
116
+ text-align: center;
117
+ font-size: 0.9em;
118
+ color: #64748b;
119
+ }
120
+
121
+ .gradient-text {
122
+ background: linear-gradient(90deg, var(--primary-color), var(--accent-color));
123
+ -webkit-background-clip: text;
124
+ -webkit-text-fill-color: transparent;
125
+ background-clip: text;
126
+ text-fill-color: transparent;
127
+ }
128
+
129
+ .logo-container {
130
+ display: flex;
131
+ justify-content: center;
132
+ margin-bottom: 1rem;
133
+ }
134
+
135
+ .header-container {
136
+ text-align: center;
137
+ margin-bottom: 2rem;
138
+ padding: 1.5rem;
139
+ background: rgba(255, 255, 255, 0.8);
140
+ border-radius: var(--border-radius);
141
+ box-shadow: 0 4px 15px rgba(0, 0, 0, 0.05);
142
+ }
143
+
144
+ .resource-links {
145
+ display: flex;
146
+ justify-content: center;
147
+ gap: 1rem;
148
+ flex-wrap: wrap;
149
+ margin-bottom: 1.5rem;
150
+ }
151
+
152
+ .resource-link {
153
+ display: inline-block;
154
+ padding: 0.5rem 1rem;
155
+ background: white;
156
+ border-radius: var(--border-radius);
157
+ color: var(--primary-color);
158
+ text-decoration: none;
159
+ box-shadow: 0 2px 5px rgba(0, 0, 0, 0.1);
160
+ transition: all 0.2s ease;
161
+ }
162
+
163
+ .resource-link:hover {
164
+ transform: translateY(-2px);
165
+ box-shadow: 0 4px 8px rgba(0, 0, 0, 0.15);
166
+ }
167
+
168
+ .label-container {
169
+ border-radius: var(--border-radius);
170
+ overflow: hidden;
171
+ box-shadow: 0 4px 12px rgba(0,0,0,0.05);
172
+ }
173
+ """
174
+
175
  # Create Gradio interface
176
+ with gr.Blocks(css=css) as demo:
177
  # Title, Subtitle, and Logo
178
  gr.HTML(
179
  """
180
+ <div class="header-container">
181
+ <div class="logo-container">
182
+ <img src="https://i.postimg.cc/3Jx3yZ5b/real-vs-fake-sonics-w-logo.jpg"
183
+ style="max-width: 180px; border-radius: 15px; box-shadow: 0 4px 12px rgba(0,0,0,0.1);">
184
+ </div>
185
+ <h1 class="gradient-text">🎡 SONICS: Synthetic Or Not - Identifying Counterfeit Songs πŸ€–</h1>
186
  <h3>ICLR 2025 [Poster]</h3>
187
+ <p style="font-size: 1.1em; color: #64748b; margin: 15px 0;">
188
+ Detect if a song is real or AI-generated with our state-of-the-art models.
189
+ Simply upload an audio file to verify its authenticity!
190
  </p>
191
  </div>
192
  """
193
  )
194
+
195
+ # Resource Links
 
 
 
 
 
 
 
196
  gr.HTML(
197
  """
198
+ <div class="resource-links">
199
+ <a href="https://openreview.net/forum?id=PY7KSh29Z8" target="_blank" class="resource-link">
200
+ πŸ“„ Paper
201
+ </a>
202
+ <a href="https://huggingface.co/datasets/awsaf49/sonics" target="_blank" class="resource-link">
203
+ 🎡 Dataset
204
+ </a>
205
+ <a href="https://huggingface.co/collections/awsaf49/sonics-spectttra-67bb6517b3920fd18e409013" target="_blank" class="resource-link">
206
+ πŸ€– Models
207
+ </a>
208
+ <a href="https://arxiv.org/abs/2408.14080" target="_blank" class="resource-link">
209
+ πŸ”¬ ArXiv
210
+ </a>
211
+ <a href="https://github.com/awsaf49/sonics" target="_blank" class="resource-link">
212
+ πŸ’» GitHub
213
+ </a>
214
  </div>
215
  """
216
  )
217
+
218
  # Main Interface
219
+ with gr.Row(equal_height=True):
220
  with gr.Column():
221
  audio_input = gr.Audio(
222
+ label="🎧 Upload Audio File",
223
+ type="filepath",
224
+ elem_id="audio_input"
225
  )
226
+
227
  model_dropdown = gr.Dropdown(
228
  choices=list(MODEL_IDS.keys()),
229
  value="SpecTTTra-Ξ³ (5s)",
230
+ label="πŸ” Select Model",
231
+ elem_id="model_dropdown"
232
  )
233
+
234
+ submit_btn = gr.Button(
235
+ "✨ Analyze Audio",
236
+ elem_id="submit_btn"
237
+ )
238
+
239
  with gr.Column():
240
+ # Define output before using it in Examples
241
  output = gr.Label(
242
+ label="πŸ“Š Analysis Result",
243
+ num_top_classes=2,
244
+ elem_id="output",
245
+ elem_classes="label-container"
246
  )
247
+
248
+ with gr.Accordion("ℹ️ How It Works", open=False):
249
+ gr.Markdown("""
250
+ The SONICS classifier analyzes your audio to determine if it's an authentic song (Human created) or
251
+ generated by AI. Our models are trained on a diverse dataset of real and AI-generated songs from Suno and Udio.
252
+
253
+ **Models available:**
254
+ - **SpecTTTra-Ξ³**: Optimized for speed
255
+ - **SpecTTTra-Ξ²**: Balanced performance
256
+ - **SpecTTTra-Ξ±**: Highest accuracy
257
+
258
+ **Duration variants:**
259
+ - **5s**: Analyzes a 5-second clip (faster)
260
+ - **120s**: Analyzes up to 2 minutes (more accurate)
261
+ """)
262
+
263
+ # Add Examples section after output is defined
264
+ with gr.Accordion("🎬 Example Audio Files", open=True):
265
+ gr.Examples(
266
+ examples=[
267
+ ["example/real_song.mp3", "SpecTTTra-Ξ³ (5s)"],
268
+ ["example/fake_song.mp3", "SpecTTTra-Ξ³ (5s)"],
269
+ ],
270
+ inputs=[audio_input, model_dropdown],
271
+ outputs=[output],
272
+ fn=predict,
273
+ cache_examples=True,
274
+ )
275
+
276
+ # Footer
277
+ gr.HTML(
278
+ """
279
+ <div class="footer">
280
+ <p>SONICS: Synthetic Or Not - Identifying Counterfeit Songs | Created by SONICS Team</p>
281
+ <p>Β© 2025 - For research purposes only</p>
282
+ </div>
283
+ """
284
  )
285
 
286
+ # Prediction handling
287
+ submit_btn.click(fn=predict, inputs=[audio_input, model_dropdown], outputs=[output])
288
+
289
  if __name__ == "__main__":
290
  demo.launch()
example/fake_song.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ba0ad7b7a7104a29ddf18c3ba3e04fb5045cdc1eb530f62fa611a08228eb30e
3
+ size 4410477
example/real_song.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c54d0a4d79601bdc739970ed8c22b6f5199527b79592146ebecf180e94f37529
3
+ size 1922782