Gül Sena Altıntaş commited on
Commit
9cada5f
·
1 Parent(s): b3de8c3

- Added summary that could be directly copy pasted into sheets

Browse files
Files changed (1) hide show
  1. app.py +42 -4
app.py CHANGED
@@ -558,7 +558,8 @@ def run_evaluation(
558
  markdown_summary = generate_compact_summary_markdown(
559
  questions, results, summary_stats
560
  )
561
- csv_summary = generate_csv_summary(questions, results, summary_stats)
 
562
  slurm_id = os.environ.get("SLURM_JOB_ID", "")
563
  if save_summary and slurm_id:
564
  file_name = f"summaries/{slurm_id}_summary_{time()}.md"
@@ -1027,6 +1028,41 @@ def generate_csv_summary(questions, results, summary_stats):
1027
  return "\n".join(lines)
1028
 
1029
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1030
  # Sample datasets for quick testing
1031
  SAMPLE_DATASETS = {
1032
  "Custom (enter below)": "",
@@ -1168,7 +1204,7 @@ What is the capital of France?,Paris,London,Berlin,Paris""",
1168
  )
1169
  prefix = gr.Textbox(
1170
  label="Filename Prefix",
1171
- placeholder="SLURM_JOB_ID will be used by default",
1172
  value="",
1173
  interactive=True,
1174
  visible=True,
@@ -1250,9 +1286,11 @@ bigscience/bloom-560m""",
1250
  )
1251
 
1252
  with gr.Column():
1253
- gr.Markdown("### 📊 CSV Format")
 
1254
  csv_summary_output = gr.Textbox(
1255
- label="CSV Summary (Copy & Paste Ready)",
 
1256
  lines=15,
1257
  max_lines=25,
1258
  show_copy_button=True,
 
558
  markdown_summary = generate_compact_summary_markdown(
559
  questions, results, summary_stats
560
  )
561
+ # csv_summary = generate_csv_summary(questions, results, summary_stats)
562
+ csv_summary = generate_excel_summary(questions, results, summary_stats)
563
  slurm_id = os.environ.get("SLURM_JOB_ID", "")
564
  if save_summary and slurm_id:
565
  file_name = f"summaries/{slurm_id}_summary_{time()}.md"
 
1028
  return "\n".join(lines)
1029
 
1030
 
1031
+ def generate_excel_summary(questions, results, summary_stats):
1032
+ """Generate Excel format summary"""
1033
+ # TODO: add Excel file download if necessary
1034
+ if not summary_stats or not questions or not results:
1035
+ return "No data available"
1036
+
1037
+ lines = []
1038
+
1039
+ # Header for detailed results
1040
+ model_names = list(results.keys())
1041
+ header = "\t".join(model_names)
1042
+ lines.append(header)
1043
+
1044
+ # Detailed results
1045
+ for q_idx, question in enumerate(questions):
1046
+ # row = f'{q_idx + 1},"{question["question"]}",{question["correct_answer"]}'
1047
+ row = ""
1048
+
1049
+ for model in model_names:
1050
+ if q_idx < len(results[model]) and "error" not in results[model][q_idx]:
1051
+ result = results[model][q_idx]
1052
+ predicted = result.get("predicted", "N/A")
1053
+ is_correct = result.get("correct", False)
1054
+ confidence = result.get("confidence", 0)
1055
+ # row += is_correct
1056
+ status_emoji = "✅" if is_correct else "❌"
1057
+ row += f"{status_emoji} {predicted} ({confidence:.2f})\t"
1058
+ else:
1059
+ row += "ERROR\t"
1060
+
1061
+ lines.append(row)
1062
+
1063
+ return "\n".join(lines)
1064
+
1065
+
1066
  # Sample datasets for quick testing
1067
  SAMPLE_DATASETS = {
1068
  "Custom (enter below)": "",
 
1204
  )
1205
  prefix = gr.Textbox(
1206
  label="Filename Prefix",
1207
+ placeholder=f"The file will be saved at summaries/{slurm_id}_{prefix}_TIME.md will be used by default",
1208
  value="",
1209
  interactive=True,
1210
  visible=True,
 
1286
  )
1287
 
1288
  with gr.Column():
1289
+ gr.Markdown("### 📊 Excel Format (Tab separated)")
1290
+ # gr.Markdown("### 📊 CSV Format")
1291
  csv_summary_output = gr.Textbox(
1292
+ label="Excel Summary (Copy & Paste Ready, No additional formatting)",
1293
+ # label="CSV Summary (Copy & Paste Ready)",
1294
  lines=15,
1295
  max_lines=25,
1296
  show_copy_button=True,