modify examples
Browse files- .gradio/cached_examples/13/log.csv +5 -0
- app.py +31 -48
.gradio/cached_examples/13/log.csv
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Predicted Dialect,timestamp
|
2 |
+
"{""label"": ""Gulf Arabic"", ""confidences"": [{""label"": ""Gulf Arabic"", ""confidence"": 0.9943048357963562}, {""label"": ""Levantine Arabic"", ""confidence"": 0.004683974664658308}, {""label"": ""Maghrebi Arabic"", ""confidence"": 0.0003852946974802762}, {""label"": ""Modern Standard Arabic"", ""confidence"": 0.0003597271570470184}, {""label"": ""Egyptian Arabic"", ""confidence"": 0.0002661938196979463}]}",2025-03-04 14:57:07.478940
|
3 |
+
"{""label"": ""Levantine Arabic"", ""confidences"": [{""label"": ""Levantine Arabic"", ""confidence"": 0.8999205827713013}, {""label"": ""Gulf Arabic"", ""confidence"": 0.09826569259166718}, {""label"": ""Maghrebi Arabic"", ""confidence"": 0.001049569109454751}, {""label"": ""Modern Standard Arabic"", ""confidence"": 0.0004323236644268036}, {""label"": ""Egyptian Arabic"", ""confidence"": 0.0003318020317237824}]}",2025-03-04 14:57:32.843399
|
4 |
+
"{""label"": ""Gulf Arabic"", ""confidences"": [{""label"": ""Gulf Arabic"", ""confidence"": 0.9867829084396362}, {""label"": ""Levantine Arabic"", ""confidence"": 0.011104526929557323}, {""label"": ""Maghrebi Arabic"", ""confidence"": 0.0016229108441621065}, {""label"": ""Modern Standard Arabic"", ""confidence"": 0.0003496674180496484}, {""label"": ""Egyptian Arabic"", ""confidence"": 0.00014002238458488137}]}",2025-03-04 14:57:54.273625
|
5 |
+
"{""label"": ""Levantine Arabic"", ""confidences"": [{""label"": ""Levantine Arabic"", ""confidence"": 0.9568566083908081}, {""label"": ""Gulf Arabic"", ""confidence"": 0.03988657519221306}, {""label"": ""Modern Standard Arabic"", ""confidence"": 0.002475168788805604}, {""label"": ""Egyptian Arabic"", ""confidence"": 0.0006239291978999972}, {""label"": ""Maghrebi Arabic"", ""confidence"": 0.00015768631419632584}]}",2025-03-04 14:58:14.103717
|
app.py
CHANGED
@@ -1,17 +1,13 @@
|
|
1 |
import gradio as gr
|
2 |
from transformers import pipeline
|
3 |
-
import numpy as np
|
4 |
import os
|
5 |
|
6 |
# Load the model
|
7 |
print("Loading model...")
|
8 |
model_id = "badrex/mms-300m-arabic-dialect-identifier"
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
except Exception as e:
|
13 |
-
print(f"Error loading model: {e}")
|
14 |
-
|
15 |
# Define dialect mapping
|
16 |
dialect_mapping = {
|
17 |
"MSA": "Modern Standard Arabic",
|
@@ -22,54 +18,41 @@ dialect_mapping = {
|
|
22 |
}
|
23 |
|
24 |
def predict_dialect(audio):
|
25 |
-
|
26 |
-
# The audio input from Gradio is a tuple of (sample_rate, audio_array)
|
27 |
-
if audio is None:
|
28 |
-
return {"Error": 1.0}
|
29 |
-
|
30 |
-
sr, audio_array = audio
|
31 |
-
|
32 |
-
# Process the audio input
|
33 |
-
if len(audio_array.shape) > 1:
|
34 |
-
audio_array = audio_array.mean(axis=1) # Convert stereo to mono
|
35 |
-
|
36 |
-
print(f"Processing audio: sample rate={sr}, shape={audio_array.shape}")
|
37 |
-
|
38 |
-
# Classify the dialect
|
39 |
-
predictions = classifier({"sampling_rate": sr, "raw": audio_array})
|
40 |
-
|
41 |
-
# Format results for display
|
42 |
-
results = {}
|
43 |
-
for pred in predictions:
|
44 |
-
dialect_name = dialect_mapping.get(pred['label'], pred['label'])
|
45 |
-
results[dialect_name] = float(pred['score'])
|
46 |
-
|
47 |
-
return results
|
48 |
-
except Exception as e:
|
49 |
-
print(f"Error in prediction: {e}")
|
50 |
return {"Error": 1.0}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
-
#
|
53 |
-
|
54 |
examples_dir = "examples"
|
55 |
if os.path.exists(examples_dir):
|
56 |
for filename in os.listdir(examples_dir):
|
57 |
if filename.endswith((".wav", ".mp3", ".ogg")):
|
58 |
-
|
59 |
-
|
60 |
-
print(f"Found {len(
|
61 |
else:
|
62 |
print("Examples directory not found")
|
63 |
|
64 |
-
# Examples with labels
|
65 |
-
examples = []
|
66 |
-
if example_files:
|
67 |
-
for file in example_files:
|
68 |
-
basename = os.path.basename(file)
|
69 |
-
dialect = basename.split("_")[0] if "_" in basename else basename.split(".")[0]
|
70 |
-
label = dialect_mapping.get(dialect, dialect.capitalize())
|
71 |
-
examples.append([file, f"{label} Sample"])
|
72 |
-
|
73 |
# Create the Gradio interface
|
74 |
demo = gr.Interface(
|
75 |
fn=predict_dialect,
|
@@ -80,8 +63,8 @@ demo = gr.Interface(
|
|
80 |
Upload an audio file or record your voice speaking Arabic to see which dialect it matches.
|
81 |
The model identifies: Modern Standard Arabic (MSA), Egyptian, Gulf, Levantine, and Maghrebi dialects.""",
|
82 |
examples=examples if examples else None,
|
83 |
-
|
84 |
-
flagging_mode=None
|
85 |
)
|
86 |
|
87 |
# Launch the app
|
|
|
1 |
import gradio as gr
|
2 |
from transformers import pipeline
|
|
|
3 |
import os
|
4 |
|
5 |
# Load the model
|
6 |
print("Loading model...")
|
7 |
model_id = "badrex/mms-300m-arabic-dialect-identifier"
|
8 |
+
classifier = pipeline("audio-classification", model=model_id)
|
9 |
+
print("Model loaded successfully")
|
10 |
+
|
|
|
|
|
|
|
11 |
# Define dialect mapping
|
12 |
dialect_mapping = {
|
13 |
"MSA": "Modern Standard Arabic",
|
|
|
18 |
}
|
19 |
|
20 |
def predict_dialect(audio):
|
21 |
+
if audio is None:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
return {"Error": 1.0}
|
23 |
+
|
24 |
+
# The audio input from Gradio is a tuple of (sample_rate, audio_array)
|
25 |
+
sr, audio_array = audio
|
26 |
+
|
27 |
+
# Process the audio input
|
28 |
+
if len(audio_array.shape) > 1:
|
29 |
+
audio_array = audio_array.mean(axis=1) # Convert stereo to mono
|
30 |
+
|
31 |
+
print(f"Processing audio: sample rate={sr}, shape={audio_array.shape}")
|
32 |
+
|
33 |
+
# Classify the dialect
|
34 |
+
predictions = classifier({"sampling_rate": sr, "raw": audio_array})
|
35 |
+
|
36 |
+
# Format results for display
|
37 |
+
results = {}
|
38 |
+
for pred in predictions:
|
39 |
+
dialect_name = dialect_mapping.get(pred['label'], pred['label'])
|
40 |
+
results[dialect_name] = float(pred['score'])
|
41 |
+
|
42 |
+
return results
|
43 |
|
44 |
+
# Manually prepare example file paths without metadata
|
45 |
+
examples = []
|
46 |
examples_dir = "examples"
|
47 |
if os.path.exists(examples_dir):
|
48 |
for filename in os.listdir(examples_dir):
|
49 |
if filename.endswith((".wav", ".mp3", ".ogg")):
|
50 |
+
examples.append([os.path.join(examples_dir, filename)])
|
51 |
+
|
52 |
+
print(f"Found {len(examples)} example files")
|
53 |
else:
|
54 |
print("Examples directory not found")
|
55 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
# Create the Gradio interface
|
57 |
demo = gr.Interface(
|
58 |
fn=predict_dialect,
|
|
|
63 |
Upload an audio file or record your voice speaking Arabic to see which dialect it matches.
|
64 |
The model identifies: Modern Standard Arabic (MSA), Egyptian, Gulf, Levantine, and Maghrebi dialects.""",
|
65 |
examples=examples if examples else None,
|
66 |
+
cache_examples=False, # Disable caching to avoid issues
|
67 |
+
flagging_mode=None
|
68 |
)
|
69 |
|
70 |
# Launch the app
|