Spaces:
Sleeping
Sleeping
Update data/leaderboard_json/afrobench_lite.json
#15
by
seun-ajayi
- opened
data/community_results/New Results - June2025.csv
CHANGED
|
@@ -85,3 +85,11 @@ Gemini 3 Pro,afrimmlu,1.0,88.4,89.6,82.8,88.4,83.6,85.0,85.6,88.0,86.8,82.8,89.0
|
|
| 85 |
Gemini 3 Pro,injongointent,4.0,88.1,92.0,95.9,92.2,81.1,91.9,87.7,86.7,94.5,78.0,91.4,89.1,87.5,90.5,85.3,88.8
|
| 86 |
Gemini 3 Pro,sib,3.0,87.3,87.7,88.2,87.7,89.7,88.7,85.3,89.2,87.7,89.2,89.2,87.7,84.8,86.8,88.2,87.9
|
| 87 |
Gemini 3 Pro,belebele,5.0,76.3,76.8,54.3,63.1,75.4,71.3,74.7,68.9,73.8,61.9,74.9,78.2,72.1,73.3,77.6,71.2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
Gemini 3 Pro,injongointent,4.0,88.1,92.0,95.9,92.2,81.1,91.9,87.7,86.7,94.5,78.0,91.4,89.1,87.5,90.5,85.3,88.8
|
| 86 |
Gemini 3 Pro,sib,3.0,87.3,87.7,88.2,87.7,89.7,88.7,85.3,89.2,87.7,89.2,89.2,87.7,84.8,86.8,88.2,87.9
|
| 87 |
Gemini 3 Pro,belebele,5.0,76.3,76.8,54.3,63.1,75.4,71.3,74.7,68.9,73.8,61.9,74.9,78.2,72.1,73.3,77.6,71.2
|
| 88 |
+
N-ATLAS-LLM,afrixnli,-,49.7,-,41.7,39.6,-,-,-,-,-,-,-,-,-,39.4,-,42.6
|
| 89 |
+
N-ATLAS-LLM,afrimgsm,-,60.9,-,34.2,24.6,-,-,-,-,-,-,-,-,-,31.2,-,37.7
|
| 90 |
+
N-ATLAS-LLM,flores - en_xx,-,-,-,49.6,40.6,-,-,-,-,-,-,-,-,-,27.2,-,39.1
|
| 91 |
+
N-ATLAS-LLM,flores - xx_en,-,-,-,51.0,45.4,-,-,-,-,-,-,-,-,-,38.9,-,45.1
|
| 92 |
+
N-ATLAS-LLM,afrimmlu,-,52.0,-,34.3,33.7,-,-,-,-,-,-,-,-,-,33.5,-,38.4
|
| 93 |
+
N-ATLAS-LLM,injongointent,-,62.6,-,39.2,28.7,-,-,-,-,-,-,-,-,-,25.8,-,39.1
|
| 94 |
+
N-ATLAS-LLM,sib,-,82.9,-,71.8,69.1,-,-,-,-,-,-,-,-,-,64.9,-,72.2
|
| 95 |
+
N-ATLAS-LLM,belebele,-,68.4,-,43.7,36.5,-,-,-,-,-,-,-,-,-,35.8,-,46.1
|
data/leaderboard_json/afrobench_lite.json
CHANGED
|
@@ -24,7 +24,8 @@
|
|
| 24 |
"Gemini-2.5 Flash": 69.3,
|
| 25 |
"Gemini-2.5 Pro": 72.5,
|
| 26 |
"GPT-5 (Aug)": 83.3,
|
| 27 |
-
"Gemini 3 Pro": 77.4
|
|
|
|
| 28 |
}
|
| 29 |
},
|
| 30 |
"Intent": {
|
|
@@ -52,7 +53,8 @@
|
|
| 52 |
"Gemini-2.5 Flash": 87.4,
|
| 53 |
"Gemini-2.5 Pro": 88.0,
|
| 54 |
"GPT-5 (Aug)": 87.3,
|
| 55 |
-
"Gemini 3 Pro": 88.8
|
|
|
|
| 56 |
}
|
| 57 |
},
|
| 58 |
"MT(en/fr-xx)": {
|
|
@@ -80,7 +82,8 @@
|
|
| 80 |
"Gemini-2.5 Flash": 45.3,
|
| 81 |
"Gemini-2.5 Pro": 46.3,
|
| 82 |
"GPT-5 (Aug)": 44.8,
|
| 83 |
-
"Gemini 3 Pro": 47.3
|
|
|
|
| 84 |
}
|
| 85 |
},
|
| 86 |
"MMLU": {
|
|
@@ -108,7 +111,8 @@
|
|
| 108 |
"Gemini-2.5 Flash": 67.3,
|
| 109 |
"Gemini-2.5 Pro": 77.4,
|
| 110 |
"GPT-5 (Aug)": 83.3,
|
| 111 |
-
"Gemini 3 Pro": 86.1
|
|
|
|
| 112 |
}
|
| 113 |
},
|
| 114 |
"Math": {
|
|
@@ -136,7 +140,8 @@
|
|
| 136 |
"Gemini-2.5 Flash": 69.3,
|
| 137 |
"Gemini-2.5 Pro": 73.2,
|
| 138 |
"GPT-5 (Aug)": 73.7,
|
| 139 |
-
"Gemini 3 Pro": 73.4
|
|
|
|
| 140 |
}
|
| 141 |
},
|
| 142 |
"Topic": {
|
|
@@ -164,7 +169,8 @@
|
|
| 164 |
"Gemini-2.5 Flash": 86.8,
|
| 165 |
"Gemini-2.5 Pro": 87.9,
|
| 166 |
"GPT-5 (Aug)": 88.5,
|
| 167 |
-
"Gemini 3 Pro": 87.9
|
|
|
|
| 168 |
}
|
| 169 |
},
|
| 170 |
"RC": {
|
|
@@ -192,7 +198,8 @@
|
|
| 192 |
"Gemini-2.5 Flash": 41.6,
|
| 193 |
"Gemini-2.5 Pro": 76.4,
|
| 194 |
"GPT-5 (Aug)": 83.3,
|
| 195 |
-
"Gemini 3 Pro": 71.2
|
|
|
|
| 196 |
}
|
| 197 |
}
|
| 198 |
}
|
|
|
|
| 24 |
"Gemini-2.5 Flash": 69.3,
|
| 25 |
"Gemini-2.5 Pro": 72.5,
|
| 26 |
"GPT-5 (Aug)": 83.3,
|
| 27 |
+
"Gemini 3 Pro": 77.4,
|
| 28 |
+
"N-ATLAS-LLM": 42.6
|
| 29 |
}
|
| 30 |
},
|
| 31 |
"Intent": {
|
|
|
|
| 53 |
"Gemini-2.5 Flash": 87.4,
|
| 54 |
"Gemini-2.5 Pro": 88.0,
|
| 55 |
"GPT-5 (Aug)": 87.3,
|
| 56 |
+
"Gemini 3 Pro": 88.8,
|
| 57 |
+
"N-ATLAS-LLM": 39.1
|
| 58 |
}
|
| 59 |
},
|
| 60 |
"MT(en/fr-xx)": {
|
|
|
|
| 82 |
"Gemini-2.5 Flash": 45.3,
|
| 83 |
"Gemini-2.5 Pro": 46.3,
|
| 84 |
"GPT-5 (Aug)": 44.8,
|
| 85 |
+
"Gemini 3 Pro": 47.3,
|
| 86 |
+
"N-ATLAS-LLM": 39.1
|
| 87 |
}
|
| 88 |
},
|
| 89 |
"MMLU": {
|
|
|
|
| 111 |
"Gemini-2.5 Flash": 67.3,
|
| 112 |
"Gemini-2.5 Pro": 77.4,
|
| 113 |
"GPT-5 (Aug)": 83.3,
|
| 114 |
+
"Gemini 3 Pro": 86.1,
|
| 115 |
+
"N-ATLAS-LLM": 38.4
|
| 116 |
}
|
| 117 |
},
|
| 118 |
"Math": {
|
|
|
|
| 140 |
"Gemini-2.5 Flash": 69.3,
|
| 141 |
"Gemini-2.5 Pro": 73.2,
|
| 142 |
"GPT-5 (Aug)": 73.7,
|
| 143 |
+
"Gemini 3 Pro": 73.4,
|
| 144 |
+
"N-ATLAS-LLM": 37.7
|
| 145 |
}
|
| 146 |
},
|
| 147 |
"Topic": {
|
|
|
|
| 169 |
"Gemini-2.5 Flash": 86.8,
|
| 170 |
"Gemini-2.5 Pro": 87.9,
|
| 171 |
"GPT-5 (Aug)": 88.5,
|
| 172 |
+
"Gemini 3 Pro": 87.9,
|
| 173 |
+
"N-ATLAS-LLM": 72.2
|
| 174 |
}
|
| 175 |
},
|
| 176 |
"RC": {
|
|
|
|
| 198 |
"Gemini-2.5 Flash": 41.6,
|
| 199 |
"Gemini-2.5 Pro": 76.4,
|
| 200 |
"GPT-5 (Aug)": 83.3,
|
| 201 |
+
"Gemini 3 Pro": 71.2,
|
| 202 |
+
"N-ATLAS-LLM": 46.1
|
| 203 |
}
|
| 204 |
}
|
| 205 |
}
|
data/leaderboard_json/lite_language_scores.json
CHANGED
|
@@ -382,5 +382,21 @@
|
|
| 382 |
"wol": 68.9,
|
| 383 |
"yor": 75.4,
|
| 384 |
"zul": 77.6
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 385 |
}
|
| 386 |
}
|
|
|
|
| 382 |
"wol": 68.9,
|
| 383 |
"yor": 75.4,
|
| 384 |
"zul": 77.6
|
| 385 |
+
},
|
| 386 |
+
"N-ATLAS-LLM": {
|
| 387 |
+
"amh": null,
|
| 388 |
+
"hau": 44.2,
|
| 389 |
+
"ibo": 40.3,
|
| 390 |
+
"kin": null,
|
| 391 |
+
"lin": null,
|
| 392 |
+
"lug": null,
|
| 393 |
+
"orm": null,
|
| 394 |
+
"sna": null,
|
| 395 |
+
"sot": null,
|
| 396 |
+
"swa": null,
|
| 397 |
+
"xho": null,
|
| 398 |
+
"wol": null,
|
| 399 |
+
"yor": 36.8,
|
| 400 |
+
"zul": null
|
| 401 |
}
|
| 402 |
}
|