Spaces:
Runtime error
Runtime error
Gül Sena Altıntaş commited on
Commit ·
9cada5f
1
Parent(s): b3de8c3
- Added summary that could be directly copy pasted into sheets
Browse files
app.py
CHANGED
|
@@ -558,7 +558,8 @@ def run_evaluation(
|
|
| 558 |
markdown_summary = generate_compact_summary_markdown(
|
| 559 |
questions, results, summary_stats
|
| 560 |
)
|
| 561 |
-
csv_summary = generate_csv_summary(questions, results, summary_stats)
|
|
|
|
| 562 |
slurm_id = os.environ.get("SLURM_JOB_ID", "")
|
| 563 |
if save_summary and slurm_id:
|
| 564 |
file_name = f"summaries/{slurm_id}_summary_{time()}.md"
|
|
@@ -1027,6 +1028,41 @@ def generate_csv_summary(questions, results, summary_stats):
|
|
| 1027 |
return "\n".join(lines)
|
| 1028 |
|
| 1029 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1030 |
# Sample datasets for quick testing
|
| 1031 |
SAMPLE_DATASETS = {
|
| 1032 |
"Custom (enter below)": "",
|
|
@@ -1168,7 +1204,7 @@ What is the capital of France?,Paris,London,Berlin,Paris""",
|
|
| 1168 |
)
|
| 1169 |
prefix = gr.Textbox(
|
| 1170 |
label="Filename Prefix",
|
| 1171 |
-
placeholder="
|
| 1172 |
value="",
|
| 1173 |
interactive=True,
|
| 1174 |
visible=True,
|
|
@@ -1250,9 +1286,11 @@ bigscience/bloom-560m""",
|
|
| 1250 |
)
|
| 1251 |
|
| 1252 |
with gr.Column():
|
| 1253 |
-
gr.Markdown("### 📊
|
|
|
|
| 1254 |
csv_summary_output = gr.Textbox(
|
| 1255 |
-
label="
|
|
|
|
| 1256 |
lines=15,
|
| 1257 |
max_lines=25,
|
| 1258 |
show_copy_button=True,
|
|
|
|
| 558 |
markdown_summary = generate_compact_summary_markdown(
|
| 559 |
questions, results, summary_stats
|
| 560 |
)
|
| 561 |
+
# csv_summary = generate_csv_summary(questions, results, summary_stats)
|
| 562 |
+
csv_summary = generate_excel_summary(questions, results, summary_stats)
|
| 563 |
slurm_id = os.environ.get("SLURM_JOB_ID", "")
|
| 564 |
if save_summary and slurm_id:
|
| 565 |
file_name = f"summaries/{slurm_id}_summary_{time()}.md"
|
|
|
|
| 1028 |
return "\n".join(lines)
|
| 1029 |
|
| 1030 |
|
| 1031 |
+
def generate_excel_summary(questions, results, summary_stats):
|
| 1032 |
+
"""Generate Excel format summary"""
|
| 1033 |
+
# TODO: add Excel file download if necessary
|
| 1034 |
+
if not summary_stats or not questions or not results:
|
| 1035 |
+
return "No data available"
|
| 1036 |
+
|
| 1037 |
+
lines = []
|
| 1038 |
+
|
| 1039 |
+
# Header for detailed results
|
| 1040 |
+
model_names = list(results.keys())
|
| 1041 |
+
header = "\t".join(model_names)
|
| 1042 |
+
lines.append(header)
|
| 1043 |
+
|
| 1044 |
+
# Detailed results
|
| 1045 |
+
for q_idx, question in enumerate(questions):
|
| 1046 |
+
# row = f'{q_idx + 1},"{question["question"]}",{question["correct_answer"]}'
|
| 1047 |
+
row = ""
|
| 1048 |
+
|
| 1049 |
+
for model in model_names:
|
| 1050 |
+
if q_idx < len(results[model]) and "error" not in results[model][q_idx]:
|
| 1051 |
+
result = results[model][q_idx]
|
| 1052 |
+
predicted = result.get("predicted", "N/A")
|
| 1053 |
+
is_correct = result.get("correct", False)
|
| 1054 |
+
confidence = result.get("confidence", 0)
|
| 1055 |
+
# row += is_correct
|
| 1056 |
+
status_emoji = "✅" if is_correct else "❌"
|
| 1057 |
+
row += f"{status_emoji} {predicted} ({confidence:.2f})\t"
|
| 1058 |
+
else:
|
| 1059 |
+
row += "ERROR\t"
|
| 1060 |
+
|
| 1061 |
+
lines.append(row)
|
| 1062 |
+
|
| 1063 |
+
return "\n".join(lines)
|
| 1064 |
+
|
| 1065 |
+
|
| 1066 |
# Sample datasets for quick testing
|
| 1067 |
SAMPLE_DATASETS = {
|
| 1068 |
"Custom (enter below)": "",
|
|
|
|
| 1204 |
)
|
| 1205 |
prefix = gr.Textbox(
|
| 1206 |
label="Filename Prefix",
|
| 1207 |
+
placeholder=f"The file will be saved at summaries/{slurm_id}_{prefix}_TIME.md will be used by default",
|
| 1208 |
value="",
|
| 1209 |
interactive=True,
|
| 1210 |
visible=True,
|
|
|
|
| 1286 |
)
|
| 1287 |
|
| 1288 |
with gr.Column():
|
| 1289 |
+
gr.Markdown("### 📊 Excel Format (Tab separated)")
|
| 1290 |
+
# gr.Markdown("### 📊 CSV Format")
|
| 1291 |
csv_summary_output = gr.Textbox(
|
| 1292 |
+
label="Excel Summary (Copy & Paste Ready, No additional formatting)",
|
| 1293 |
+
# label="CSV Summary (Copy & Paste Ready)",
|
| 1294 |
lines=15,
|
| 1295 |
max_lines=25,
|
| 1296 |
show_copy_button=True,
|