Update app.py
Browse files
app.py
CHANGED
|
@@ -8,24 +8,40 @@ import re
|
|
| 8 |
|
| 9 |
from huggingface_hub import login
|
| 10 |
|
|
|
|
| 11 |
token = os.environ.get("HG_TOKEN")
|
| 12 |
-
|
|
|
|
| 13 |
|
|
|
|
| 14 |
try:
|
| 15 |
dataset = load_dataset("sudoping01/bambara-speech-recognition-benchmark", name="default")["eval"]
|
| 16 |
references = {row["id"]: row["text"] for row in dataset}
|
|
|
|
| 17 |
except Exception as e:
|
|
|
|
| 18 |
references = {}
|
| 19 |
|
|
|
|
| 20 |
leaderboard_file = "leaderboard.csv"
|
| 21 |
if not os.path.exists(leaderboard_file):
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
else:
|
| 24 |
leaderboard_df = pd.read_csv(leaderboard_file)
|
| 25 |
|
|
|
|
| 26 |
if "Combined_Score" not in leaderboard_df.columns:
|
| 27 |
leaderboard_df["Combined_Score"] = leaderboard_df["WER"] * 0.7 + leaderboard_df["CER"] * 0.3
|
| 28 |
leaderboard_df.to_csv(leaderboard_file, index=False)
|
|
|
|
|
|
|
| 29 |
|
| 30 |
def normalize_text(text):
|
| 31 |
"""Normalize text for WER/CER calculation"""
|
|
@@ -62,6 +78,7 @@ def calculate_metrics(predictions_df):
|
|
| 62 |
sample_wer = wer(reference, hypothesis)
|
| 63 |
sample_cer = cer(reference, hypothesis)
|
| 64 |
|
|
|
|
| 65 |
sample_wer = min(sample_wer, 2.0)
|
| 66 |
sample_cer = min(sample_cer, 2.0)
|
| 67 |
|
|
@@ -77,7 +94,8 @@ def calculate_metrics(predictions_df):
|
|
| 77 |
"wer": sample_wer,
|
| 78 |
"cer": sample_cer
|
| 79 |
})
|
| 80 |
-
except Exception:
|
|
|
|
| 81 |
pass
|
| 82 |
|
| 83 |
if not results:
|
|
@@ -98,22 +116,25 @@ def format_as_percentage(value):
|
|
| 98 |
|
| 99 |
def prepare_leaderboard_for_display(df, sort_by="Combined_Score"):
|
| 100 |
"""Format leaderboard for display with ranking and percentages"""
|
| 101 |
-
if len(df) == 0:
|
| 102 |
return pd.DataFrame(columns=["Rank", "Model_Name", "WER (%)", "CER (%)", "Combined_Score (%)", "timestamp"])
|
| 103 |
|
| 104 |
-
|
| 105 |
display_df = df.copy()
|
| 106 |
|
|
|
|
| 107 |
display_df = display_df.sort_values(sort_by)
|
| 108 |
|
|
|
|
| 109 |
display_df.insert(0, "Rank", range(1, len(display_df) + 1))
|
| 110 |
|
|
|
|
| 111 |
for col in ["WER", "CER", "Combined_Score"]:
|
| 112 |
if col in display_df.columns:
|
| 113 |
display_df[f"{col} (%)"] = display_df[col].apply(lambda x: f"{x * 100:.2f}")
|
| 114 |
-
display_df = display_df.drop(col, axis=1)
|
| 115 |
|
| 116 |
-
#
|
|
|
|
| 117 |
|
| 118 |
return display_df
|
| 119 |
|
|
@@ -133,10 +154,18 @@ def update_ranking(method):
|
|
| 133 |
|
| 134 |
return prepare_leaderboard_for_display(current_lb, sort_column)
|
| 135 |
|
| 136 |
-
except Exception:
|
|
|
|
| 137 |
return pd.DataFrame(columns=["Rank", "Model_Name", "WER (%)", "CER (%)", "Combined_Score (%)", "timestamp"])
|
| 138 |
|
| 139 |
def process_submission(model_name, csv_file):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
try:
|
| 141 |
df = pd.read_csv(csv_file)
|
| 142 |
|
|
@@ -162,28 +191,42 @@ def process_submission(model_name, csv_file):
|
|
| 162 |
try:
|
| 163 |
avg_wer, avg_cer, weighted_wer, weighted_cer, detailed_results = calculate_metrics(df)
|
| 164 |
|
| 165 |
-
# suspiciously low values
|
| 166 |
if avg_wer < 0.001:
|
| 167 |
return "Error: WER calculation yielded suspicious results (near-zero). Please check your submission CSV.", None
|
| 168 |
|
| 169 |
except Exception as e:
|
| 170 |
return f"Error calculating metrics: {str(e)}", None
|
| 171 |
|
|
|
|
| 172 |
leaderboard = pd.read_csv(leaderboard_file)
|
| 173 |
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
| 174 |
|
| 175 |
# Calculate combined score (70% WER, 30% CER)
|
| 176 |
combined_score = avg_wer * 0.7 + avg_cer * 0.3
|
| 177 |
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 182 |
|
| 183 |
-
|
| 184 |
-
updated_leaderboard =
|
| 185 |
updated_leaderboard.to_csv(leaderboard_file, index=False)
|
| 186 |
|
|
|
|
| 187 |
display_leaderboard = prepare_leaderboard_for_display(updated_leaderboard)
|
| 188 |
|
| 189 |
return f"Submission processed successfully! WER: {format_as_percentage(avg_wer)}, CER: {format_as_percentage(avg_cer)}, Combined Score: {format_as_percentage(combined_score)}", display_leaderboard
|
|
@@ -191,29 +234,56 @@ def process_submission(model_name, csv_file):
|
|
| 191 |
except Exception as e:
|
| 192 |
return f"Error processing submission: {str(e)}", None
|
| 193 |
|
| 194 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 195 |
gr.Markdown(
|
| 196 |
"""
|
| 197 |
# π²π± Bambara ASR Leaderboard
|
| 198 |
|
| 199 |
-
This leaderboard
|
| 200 |
-
Models are ranked based on
|
|
|
|
|
|
|
| 201 |
"""
|
| 202 |
)
|
| 203 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
with gr.Tabs() as tabs:
|
| 205 |
-
with gr.TabItem("π
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
if "Combined_Score" not in current_leaderboard.columns:
|
| 210 |
-
current_leaderboard["Combined_Score"] = current_leaderboard["WER"] * 0.7 + current_leaderboard["CER"] * 0.3
|
| 211 |
-
|
| 212 |
-
display_leaderboard = prepare_leaderboard_for_display(current_leaderboard)
|
| 213 |
-
except Exception:
|
| 214 |
-
display_leaderboard = pd.DataFrame(columns=["Rank", "Model_Name", "WER (%)", "CER (%)", "Combined_Score (%)", "timestamp"])
|
| 215 |
-
|
| 216 |
-
gr.Markdown("### Current ASR Model Rankings")
|
| 217 |
|
| 218 |
ranking_method = gr.Radio(
|
| 219 |
["Combined Score (WER 70%, CER 30%)", "WER Only", "CER Only"],
|
|
@@ -222,7 +292,7 @@ with gr.Blocks(title="Bambara ASR Leaderboard") as demo:
|
|
| 222 |
)
|
| 223 |
|
| 224 |
leaderboard_view = gr.DataFrame(
|
| 225 |
-
value=
|
| 226 |
interactive=False,
|
| 227 |
label="Models are ranked by selected metric - lower is better"
|
| 228 |
)
|
|
@@ -233,34 +303,60 @@ with gr.Blocks(title="Bambara ASR Leaderboard") as demo:
|
|
| 233 |
outputs=[leaderboard_view]
|
| 234 |
)
|
| 235 |
|
| 236 |
-
gr.
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 244 |
|
| 245 |
with gr.TabItem("π Submit New Results"):
|
| 246 |
gr.Markdown(
|
| 247 |
"""
|
| 248 |
### Submit a new model for evaluation
|
| 249 |
|
| 250 |
-
Upload a CSV file with
|
| 251 |
-
|
|
|
|
|
|
|
| 252 |
"""
|
| 253 |
)
|
| 254 |
|
| 255 |
with gr.Row():
|
| 256 |
-
model_name_input = gr.Textbox(
|
| 257 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 258 |
|
| 259 |
-
submit_btn = gr.Button("Submit")
|
| 260 |
output_msg = gr.Textbox(label="Status", interactive=False)
|
| 261 |
leaderboard_display = gr.DataFrame(
|
| 262 |
label="Updated Leaderboard",
|
| 263 |
-
value=
|
| 264 |
interactive=False
|
| 265 |
)
|
| 266 |
|
|
@@ -269,6 +365,49 @@ with gr.Blocks(title="Bambara ASR Leaderboard") as demo:
|
|
| 269 |
inputs=[model_name_input, csv_upload],
|
| 270 |
outputs=[output_msg, leaderboard_display]
|
| 271 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 272 |
|
| 273 |
if __name__ == "__main__":
|
| 274 |
demo.launch()
|
|
|
|
| 8 |
|
| 9 |
from huggingface_hub import login
|
| 10 |
|
| 11 |
+
# Login to Hugging Face Hub (if token is available)
|
| 12 |
token = os.environ.get("HG_TOKEN")
|
| 13 |
+
if token:
|
| 14 |
+
login(token)
|
| 15 |
|
| 16 |
+
# Load reference dataset
|
| 17 |
try:
|
| 18 |
dataset = load_dataset("sudoping01/bambara-speech-recognition-benchmark", name="default")["eval"]
|
| 19 |
references = {row["id"]: row["text"] for row in dataset}
|
| 20 |
+
print(f"Loaded {len(references)} reference transcriptions")
|
| 21 |
except Exception as e:
|
| 22 |
+
print(f"Error loading dataset: {str(e)}")
|
| 23 |
references = {}
|
| 24 |
|
| 25 |
+
# Initialize or load the leaderboard file
|
| 26 |
leaderboard_file = "leaderboard.csv"
|
| 27 |
if not os.path.exists(leaderboard_file):
|
| 28 |
+
# Create a new leaderboard with sample data for testing
|
| 29 |
+
sample_data = [
|
| 30 |
+
["MALIBA-AI/bambara-asr-v1", 0.2264, 0.1094, 0.1922, "2025-03-15 10:30:45"],
|
| 31 |
+
["whisper-large-v3-bambara", 0.3120, 0.1870, 0.2745, "2025-02-20 14:22:33"]
|
| 32 |
+
]
|
| 33 |
+
pd.DataFrame(sample_data,
|
| 34 |
+
columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp"]).to_csv(leaderboard_file, index=False)
|
| 35 |
+
print(f"Created new leaderboard file with sample data")
|
| 36 |
else:
|
| 37 |
leaderboard_df = pd.read_csv(leaderboard_file)
|
| 38 |
|
| 39 |
+
# Ensure the Combined_Score column exists
|
| 40 |
if "Combined_Score" not in leaderboard_df.columns:
|
| 41 |
leaderboard_df["Combined_Score"] = leaderboard_df["WER"] * 0.7 + leaderboard_df["CER"] * 0.3
|
| 42 |
leaderboard_df.to_csv(leaderboard_file, index=False)
|
| 43 |
+
print(f"Added Combined_Score column to existing leaderboard")
|
| 44 |
+
print(f"Loaded leaderboard with {len(leaderboard_df)} entries")
|
| 45 |
|
| 46 |
def normalize_text(text):
|
| 47 |
"""Normalize text for WER/CER calculation"""
|
|
|
|
| 78 |
sample_wer = wer(reference, hypothesis)
|
| 79 |
sample_cer = cer(reference, hypothesis)
|
| 80 |
|
| 81 |
+
# Cap extreme values to prevent outliers from skewing results
|
| 82 |
sample_wer = min(sample_wer, 2.0)
|
| 83 |
sample_cer = min(sample_cer, 2.0)
|
| 84 |
|
|
|
|
| 94 |
"wer": sample_wer,
|
| 95 |
"cer": sample_cer
|
| 96 |
})
|
| 97 |
+
except Exception as e:
|
| 98 |
+
print(f"Error processing sample {id_val}: {str(e)}")
|
| 99 |
pass
|
| 100 |
|
| 101 |
if not results:
|
|
|
|
| 116 |
|
| 117 |
def prepare_leaderboard_for_display(df, sort_by="Combined_Score"):
|
| 118 |
"""Format leaderboard for display with ranking and percentages"""
|
| 119 |
+
if df is None or len(df) == 0:
|
| 120 |
return pd.DataFrame(columns=["Rank", "Model_Name", "WER (%)", "CER (%)", "Combined_Score (%)", "timestamp"])
|
| 121 |
|
| 122 |
+
# Make a copy to avoid modifying the original
|
| 123 |
display_df = df.copy()
|
| 124 |
|
| 125 |
+
# Sort by the selected metric (lower is better)
|
| 126 |
display_df = display_df.sort_values(sort_by)
|
| 127 |
|
| 128 |
+
# Add ranking column
|
| 129 |
display_df.insert(0, "Rank", range(1, len(display_df) + 1))
|
| 130 |
|
| 131 |
+
# Format numeric columns as percentages
|
| 132 |
for col in ["WER", "CER", "Combined_Score"]:
|
| 133 |
if col in display_df.columns:
|
| 134 |
display_df[f"{col} (%)"] = display_df[col].apply(lambda x: f"{x * 100:.2f}")
|
|
|
|
| 135 |
|
| 136 |
+
# Keep both the raw values and percentage displays
|
| 137 |
+
# This allows for proper sorting while showing formatted values
|
| 138 |
|
| 139 |
return display_df
|
| 140 |
|
|
|
|
| 154 |
|
| 155 |
return prepare_leaderboard_for_display(current_lb, sort_column)
|
| 156 |
|
| 157 |
+
except Exception as e:
|
| 158 |
+
print(f"Error updating ranking: {str(e)}")
|
| 159 |
return pd.DataFrame(columns=["Rank", "Model_Name", "WER (%)", "CER (%)", "Combined_Score (%)", "timestamp"])
|
| 160 |
|
| 161 |
def process_submission(model_name, csv_file):
|
| 162 |
+
"""Process a new model submission"""
|
| 163 |
+
if not model_name or not model_name.strip():
|
| 164 |
+
return "Error: Please provide a model name.", None
|
| 165 |
+
|
| 166 |
+
if not csv_file:
|
| 167 |
+
return "Error: Please upload a CSV file.", None
|
| 168 |
+
|
| 169 |
try:
|
| 170 |
df = pd.read_csv(csv_file)
|
| 171 |
|
|
|
|
| 191 |
try:
|
| 192 |
avg_wer, avg_cer, weighted_wer, weighted_cer, detailed_results = calculate_metrics(df)
|
| 193 |
|
| 194 |
+
# Check for suspiciously low values
|
| 195 |
if avg_wer < 0.001:
|
| 196 |
return "Error: WER calculation yielded suspicious results (near-zero). Please check your submission CSV.", None
|
| 197 |
|
| 198 |
except Exception as e:
|
| 199 |
return f"Error calculating metrics: {str(e)}", None
|
| 200 |
|
| 201 |
+
# Load existing leaderboard
|
| 202 |
leaderboard = pd.read_csv(leaderboard_file)
|
| 203 |
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
| 204 |
|
| 205 |
# Calculate combined score (70% WER, 30% CER)
|
| 206 |
combined_score = avg_wer * 0.7 + avg_cer * 0.3
|
| 207 |
|
| 208 |
+
# Check if model already exists
|
| 209 |
+
if model_name in leaderboard["Model_Name"].values:
|
| 210 |
+
# Update existing entry
|
| 211 |
+
idx = leaderboard[leaderboard["Model_Name"] == model_name].index
|
| 212 |
+
leaderboard.loc[idx, "WER"] = avg_wer
|
| 213 |
+
leaderboard.loc[idx, "CER"] = avg_cer
|
| 214 |
+
leaderboard.loc[idx, "Combined_Score"] = combined_score
|
| 215 |
+
leaderboard.loc[idx, "timestamp"] = timestamp
|
| 216 |
+
updated_leaderboard = leaderboard
|
| 217 |
+
else:
|
| 218 |
+
# Add new entry
|
| 219 |
+
new_entry = pd.DataFrame(
|
| 220 |
+
[[model_name, avg_wer, avg_cer, combined_score, timestamp]],
|
| 221 |
+
columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp"]
|
| 222 |
+
)
|
| 223 |
+
updated_leaderboard = pd.concat([leaderboard, new_entry])
|
| 224 |
|
| 225 |
+
# Sort and save updated leaderboard
|
| 226 |
+
updated_leaderboard = updated_leaderboard.sort_values("Combined_Score")
|
| 227 |
updated_leaderboard.to_csv(leaderboard_file, index=False)
|
| 228 |
|
| 229 |
+
# Prepare for display
|
| 230 |
display_leaderboard = prepare_leaderboard_for_display(updated_leaderboard)
|
| 231 |
|
| 232 |
return f"Submission processed successfully! WER: {format_as_percentage(avg_wer)}, CER: {format_as_percentage(avg_cer)}, Combined Score: {format_as_percentage(combined_score)}", display_leaderboard
|
|
|
|
| 234 |
except Exception as e:
|
| 235 |
return f"Error processing submission: {str(e)}", None
|
| 236 |
|
| 237 |
+
def get_current_leaderboard():
|
| 238 |
+
"""Get the current leaderboard data for display"""
|
| 239 |
+
try:
|
| 240 |
+
if os.path.exists(leaderboard_file):
|
| 241 |
+
current_leaderboard = pd.read_csv(leaderboard_file)
|
| 242 |
+
|
| 243 |
+
if "Combined_Score" not in current_leaderboard.columns:
|
| 244 |
+
current_leaderboard["Combined_Score"] = current_leaderboard["WER"] * 0.7 + current_leaderboard["CER"] * 0.3
|
| 245 |
+
current_leaderboard.to_csv(leaderboard_file, index=False)
|
| 246 |
+
|
| 247 |
+
return current_leaderboard
|
| 248 |
+
else:
|
| 249 |
+
return pd.DataFrame(columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp"])
|
| 250 |
+
except Exception as e:
|
| 251 |
+
print(f"Error getting leaderboard: {str(e)}")
|
| 252 |
+
return pd.DataFrame(columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp"])
|
| 253 |
+
|
| 254 |
+
def create_leaderboard_table():
|
| 255 |
+
"""Create and format the leaderboard table for display"""
|
| 256 |
+
leaderboard_data = get_current_leaderboard()
|
| 257 |
+
return prepare_leaderboard_for_display(leaderboard_data)
|
| 258 |
+
|
| 259 |
+
with gr.Blocks(title="Bambara ASR Leaderboard", theme=gr.themes.Soft()) as demo:
|
| 260 |
gr.Markdown(
|
| 261 |
"""
|
| 262 |
# π²π± Bambara ASR Leaderboard
|
| 263 |
|
| 264 |
+
This leaderboard tracks and evaluates speech recognition models for the Bambara language.
|
| 265 |
+
Models are ranked based on Word Error Rate (WER), Character Error Rate (CER), and a combined score.
|
| 266 |
+
|
| 267 |
+
## Current Models Performance
|
| 268 |
"""
|
| 269 |
)
|
| 270 |
|
| 271 |
+
current_data = get_current_leaderboard()
|
| 272 |
+
|
| 273 |
+
# Highlight top-performing model
|
| 274 |
+
if len(current_data) > 0:
|
| 275 |
+
best_model = current_data.sort_values("Combined_Score").iloc[0]
|
| 276 |
+
gr.Markdown(f"""
|
| 277 |
+
### π Current Best Model: **{best_model['Model_Name']}**
|
| 278 |
+
* WER: **{best_model['WER']*100:.2f}%**
|
| 279 |
+
* CER: **{best_model['CER']*100:.2f}%**
|
| 280 |
+
* Combined Score: **{best_model['Combined_Score']*100:.2f}%**
|
| 281 |
+
""")
|
| 282 |
+
|
| 283 |
with gr.Tabs() as tabs:
|
| 284 |
+
with gr.TabItem("π
Model Rankings"):
|
| 285 |
+
# Pre-load the leaderboard data
|
| 286 |
+
initial_leaderboard = create_leaderboard_table()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 287 |
|
| 288 |
ranking_method = gr.Radio(
|
| 289 |
["Combined Score (WER 70%, CER 30%)", "WER Only", "CER Only"],
|
|
|
|
| 292 |
)
|
| 293 |
|
| 294 |
leaderboard_view = gr.DataFrame(
|
| 295 |
+
value=initial_leaderboard,
|
| 296 |
interactive=False,
|
| 297 |
label="Models are ranked by selected metric - lower is better"
|
| 298 |
)
|
|
|
|
| 303 |
outputs=[leaderboard_view]
|
| 304 |
)
|
| 305 |
|
| 306 |
+
with gr.Accordion("Metrics Explanation", open=False):
|
| 307 |
+
gr.Markdown(
|
| 308 |
+
"""
|
| 309 |
+
## Understanding ASR Metrics
|
| 310 |
+
|
| 311 |
+
### Word Error Rate (WER)
|
| 312 |
+
WER measures how accurately the ASR system recognizes whole words:
|
| 313 |
+
* Lower values indicate better performance
|
| 314 |
+
* Calculated as: (Substitutions + Insertions + Deletions) / Total Words
|
| 315 |
+
* A WER of 0% means perfect transcription
|
| 316 |
+
* A WER of 20% means approximately 1 in 5 words contains an error
|
| 317 |
+
|
| 318 |
+
### Character Error Rate (CER)
|
| 319 |
+
CER measures accuracy at the character level:
|
| 320 |
+
* More fine-grained than WER
|
| 321 |
+
* Better at capturing partial word matches
|
| 322 |
+
* Particularly useful for agglutinative languages like Bambara
|
| 323 |
+
|
| 324 |
+
### Combined Score
|
| 325 |
+
* Weighted average: 70% WER + 30% CER
|
| 326 |
+
* Provides a balanced evaluation of model performance
|
| 327 |
+
* Used as the primary ranking metric
|
| 328 |
+
"""
|
| 329 |
+
)
|
| 330 |
|
| 331 |
with gr.TabItem("π Submit New Results"):
|
| 332 |
gr.Markdown(
|
| 333 |
"""
|
| 334 |
### Submit a new model for evaluation
|
| 335 |
|
| 336 |
+
Upload a CSV file with the following format:
|
| 337 |
+
* Must contain exactly two columns: 'id' and 'text'
|
| 338 |
+
* The 'id' column should match the reference dataset IDs
|
| 339 |
+
* The 'text' column should contain your model's transcriptions
|
| 340 |
"""
|
| 341 |
)
|
| 342 |
|
| 343 |
with gr.Row():
|
| 344 |
+
model_name_input = gr.Textbox(
|
| 345 |
+
label="Model Name",
|
| 346 |
+
placeholder="e.g., MALIBA-AI/bambara-asr",
|
| 347 |
+
info="Use a descriptive name to identify your model"
|
| 348 |
+
)
|
| 349 |
+
csv_upload = gr.File(
|
| 350 |
+
label="Upload CSV File",
|
| 351 |
+
file_types=[".csv"],
|
| 352 |
+
info="CSV with columns: id, text"
|
| 353 |
+
)
|
| 354 |
|
| 355 |
+
submit_btn = gr.Button("Submit", variant="primary")
|
| 356 |
output_msg = gr.Textbox(label="Status", interactive=False)
|
| 357 |
leaderboard_display = gr.DataFrame(
|
| 358 |
label="Updated Leaderboard",
|
| 359 |
+
value=initial_leaderboard,
|
| 360 |
interactive=False
|
| 361 |
)
|
| 362 |
|
|
|
|
| 365 |
inputs=[model_name_input, csv_upload],
|
| 366 |
outputs=[output_msg, leaderboard_display]
|
| 367 |
)
|
| 368 |
+
|
| 369 |
+
with gr.TabItem("π Benchmark Dataset"):
|
| 370 |
+
gr.Markdown(
|
| 371 |
+
"""
|
| 372 |
+
## About the Benchmark Dataset
|
| 373 |
+
|
| 374 |
+
This leaderboard uses the **[sudoping01/bambara-speech-recognition-benchmark](https://huggingface.co/datasets/sudoping01/bambara-speech-recognition-benchmark)** dataset:
|
| 375 |
+
|
| 376 |
+
* Contains diverse Bambara speech samples
|
| 377 |
+
* Includes various speakers, accents, and dialects
|
| 378 |
+
* Covers different speech styles and recording conditions
|
| 379 |
+
* Professionally transcribed and validated
|
| 380 |
+
|
| 381 |
+
### How to Generate Predictions
|
| 382 |
+
|
| 383 |
+
To submit results to this leaderboard:
|
| 384 |
+
|
| 385 |
+
1. Download the audio files from the benchmark dataset
|
| 386 |
+
2. Run your ASR model on the audio files
|
| 387 |
+
3. Generate a CSV file with 'id' and 'text' columns
|
| 388 |
+
4. Submit your results using the form in the "Submit New Results" tab
|
| 389 |
+
|
| 390 |
+
### Evaluation Guidelines
|
| 391 |
+
|
| 392 |
+
* Text is normalized (lowercase, punctuation removed) before metrics calculation
|
| 393 |
+
* Extreme outliers are capped to prevent skewing results
|
| 394 |
+
* All submissions are validated for format and completeness
|
| 395 |
+
"""
|
| 396 |
+
)
|
| 397 |
+
|
| 398 |
+
gr.Markdown(
|
| 399 |
+
"""
|
| 400 |
+
---
|
| 401 |
+
### About MALIBA-AI
|
| 402 |
+
|
| 403 |
+
**MALIBA-AI: Empowering Mali's Future Through Community-Driven AI Innovation**
|
| 404 |
+
|
| 405 |
+
*"No Malian Language Left Behind"*
|
| 406 |
+
|
| 407 |
+
This leaderboard is maintained by the MALIBA-AI initiative to track progress in Bambara speech recognition technology.
|
| 408 |
+
For more information, visit [MALIBA-AI on Hugging Face](https://huggingface.co/MALIBA-AI).
|
| 409 |
+
"""
|
| 410 |
+
)
|
| 411 |
|
| 412 |
if __name__ == "__main__":
|
| 413 |
demo.launch()
|