Gül Sena Altıntaş commited on
Commit
41156f1
·
1 Parent(s): 15729bc

UI updates

Browse files
Files changed (1) hide show
  1. app.py +197 -54
app.py CHANGED
@@ -4,7 +4,9 @@ import io
4
  import logging
5
  import os
6
  import re
 
7
  from collections import Counter
 
8
  from typing import Any, Dict, List
9
 
10
  import gradio as gr
@@ -15,6 +17,12 @@ import psutil
15
  import torch
16
  from transformers import AutoModelForCausalLM, AutoTokenizer
17
 
 
 
 
 
 
 
18
 
19
  def get_memory_usage():
20
  """Return (gpu_mem_used_MB, gpu_mem_total_MB, ram_used_MB, ram_total_MB)"""
@@ -47,21 +55,68 @@ logger = logging.getLogger(__name__)
47
  # Model configurations - maps display names to HF model paths
48
  PREDEFINED_MODELS = [
49
  "meta-llama/Llama-3.2-1B",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  "google/gemma-2-2b",
 
 
51
  "Qwen/Qwen3-0.6B",
52
  "Qwen/Qwen2.5-0.5B",
53
  "Qwen/Qwen2.5-1.5B",
 
54
  "bigscience/bloom-560m",
55
  "CohereForAI/aya-expanse-8b",
56
  "common-pile/comma-v0.1-2t",
57
  "google/byt5-small",
58
- "gsaltintas/supertoken_models-llama_gpt2",
59
- "gsaltintas/supertoken_models-llama_google-gemma-2-2b",
60
- "gsaltintas/supertoken_models-llama_google-gemma-2-2b-100b",
 
 
 
 
 
 
 
 
 
 
 
 
61
  ]
62
  # Global cache for loaded models
63
  model_cache = {}
64
 
 
 
65
 
66
  def normalize_delimiter(delim: str) -> str:
67
  delim = delim.strip()
@@ -124,18 +179,36 @@ def setup_tokenizer(model_path):
124
  from huggingface_hub import hf_hub_download, list_repo_files
125
 
126
  files = list_repo_files(model_path)
 
127
  if "tokenizer_config.json" in files:
128
  tokenizer_path = hf_hub_download(
129
  repo_id=model_path, filename="tokenizer_config.json"
130
  )
131
  with open(tokenizer_path) as f:
132
  tok_config = json.load(f)["data"]["tokenizer"]
133
- if tok_config["name"] == "huggingface":
134
  tokenizer_name = tok_config["path"]
135
- # todo: tiktoken
136
- tokenizer = AutoTokenizer.from_pretrained(
137
- tokenizer_name, trust_remote_code=True, legacy=True
138
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  return tokenizer
140
 
141
 
@@ -178,9 +251,9 @@ def load_model_and_tokenizer(model_path, progress_callback=None):
178
  # Load tokenizer
179
  tokenizer = setup_tokenizer(model_path)
180
 
181
- # Add pad token if missing
182
- if tokenizer.pad_token is None:
183
- tokenizer.pad_token = tokenizer.eos_token
184
 
185
  if progress_callback:
186
  progress_callback(
@@ -212,13 +285,16 @@ def load_model_and_tokenizer(model_path, progress_callback=None):
212
 
213
  error_msg = f"❌ Error loading model {model_path}: {str(e)}"
214
  logger.error(error_msg)
 
215
  # code.interact(local=dict(globals(), **locals()))
216
  if progress_callback:
217
  progress_callback(0.0, error_msg)
218
  return None
219
 
220
 
221
- def calculate_choice_likelihood(model, tokenizer, question, choice):
 
 
222
  """Calculate the log-likelihood of the choice given the question prompt"""
223
  try:
224
  prompt = f"Question: {question}\nAnswer: "
@@ -252,6 +328,19 @@ def calculate_choice_likelihood(model, tokenizer, question, choice):
252
  token_log_probs = log_probs.gather(2, target_ids.unsqueeze(-1)).squeeze(-1)
253
 
254
  total_log_prob = token_log_probs.sum().item()
 
 
 
 
 
 
 
 
 
 
 
 
 
255
  return total_log_prob
256
 
257
  except Exception as e:
@@ -259,7 +348,12 @@ def calculate_choice_likelihood(model, tokenizer, question, choice):
259
  return float("-inf")
260
 
261
 
262
- def evaluate_model_on_questions(model_path, questions, progress_callback=None):
 
 
 
 
 
263
  """Evaluate a single model on all questions using likelihood-based scoring"""
264
 
265
  model_info = load_model_and_tokenizer(
@@ -281,7 +375,7 @@ def evaluate_model_on_questions(model_path, questions, progress_callback=None):
281
 
282
  for choice in question["choices"]:
283
  likelihood = calculate_choice_likelihood(
284
- model, tokenizer, question["question"], choice
285
  )
286
  choice_likelihoods[choice] = likelihood
287
 
@@ -348,9 +442,12 @@ def evaluate_model_on_questions(model_path, questions, progress_callback=None):
348
  def run_evaluation(
349
  dataset_text,
350
  selected_predefined,
 
351
  custom_models_text="",
352
  delimiter: str = "\t",
353
  progress=gr.Progress(),
 
 
354
  ):
355
  """Main evaluation function"""
356
  if not dataset_text.strip():
@@ -380,6 +477,9 @@ def run_evaluation(
380
 
381
  # Add predefined models
382
  all_models.extend(selected_predefined)
 
 
 
383
  all_models.extend(custom_models)
384
 
385
  if not all_models:
@@ -436,7 +536,7 @@ def run_evaluation(
436
  progress(overall_progress, msg)
437
 
438
  model_results = evaluate_model_on_questions(
439
- model_path, questions, model_progress
440
  )
441
  results[display_name] = model_results
442
 
@@ -461,6 +561,10 @@ def run_evaluation(
461
  questions, results, summary_stats
462
  )
463
  csv_summary = generate_csv_summary(questions, results, summary_stats)
 
 
 
 
464
 
465
  return (
466
  summary_md,
@@ -823,8 +927,10 @@ def generate_compact_summary_markdown(questions, results, summary_stats):
823
  header = "| Q# | Question | Correct Answer |" + "".join(
824
  [f" {model} |" for model in model_names]
825
  )
826
- separator = "|" + "|".join(
827
- ["-" * (len(col.strip()) + 2) for col in header.split("|")[1:]]
 
 
828
  )
829
 
830
  lines.append(header)
@@ -983,14 +1089,15 @@ css = """
983
  with gr.Blocks(
984
  title="🤖 Model Performance Comparison", theme=gr.themes.Soft(), css=css
985
  ) as demo:
986
- with gr.Row():
987
- with gr.Column(scale=2):
988
- gr.Markdown("""
989
  # 🤖 Model Performance Comparison Tool
990
 
991
  Compare LLM performance on multiple-choice questions using Hugging Face models.
992
-
993
- **Format**: Each line should have: `Question,Correct Answer,Choice1,Choice2,Choice3`
 
 
 
994
 
995
  💡 **Features**:
996
  - Model evaluation using HuggingFace transformers
@@ -998,18 +1105,16 @@ with gr.Blocks(
998
  - Detailed question-by-question results
999
  - Performance charts and statistics
1000
  """)
1001
- with gr.Column(scale=1):
1002
- # with gr.Accordion("Delimiter Options"):
1003
- gr.Markdown("""
1004
- Enter the delimiter used in your dataset:
1005
- """)
1006
- delimiter_selector = gr.Textbox(
1007
- label="Delimiter",
1008
- placeholder="Enter a delimiter, e.g., , or \\t",
1009
- value="\\t", # default
1010
- lines=1,
1011
  )
1012
- with gr.Row():
1013
  with gr.Column(scale=2):
1014
  # Sample dataset selector
1015
  sample_selector = gr.Dropdown(
@@ -1032,21 +1137,52 @@ What is the capital of France?,Paris,London,Berlin,Paris""",
1032
  max_lines=15,
1033
  elem_classes=["universal-text"],
1034
  )
1035
-
1036
- gr.Markdown("""
1037
- **Format Requirements**:
1038
- - Each data line: Question, Correct Answer, Choice1, Choice2, Choice3 (No header)
1039
- - Use commas or tabs as separators
1040
- """)
1041
-
 
1042
  with gr.Column(scale=1):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1043
  # Model selection
1044
  with gr.Tabs():
1045
  with gr.TabItem("🤖 Predefined Models"):
1046
- predefined_selector = gr.CheckboxGroup(
1047
- choices=PREDEFINED_MODELS,
1048
- value=[PREDEFINED_MODELS[0]],
1049
- label="Select from popular models",
 
 
1050
  interactive=True,
1051
  )
1052
 
@@ -1068,16 +1204,14 @@ bigscience/bloom-560m""",
1068
  - `facebook/opt-350m`
1069
  - Your own fine-tuned models!
1070
  """)
1071
-
 
 
 
1072
  # Evaluate button
1073
  evaluate_btn = gr.Button("⚡ Run Evaluation", variant="primary", scale=1)
1074
-
1075
- gr.Markdown("""
1076
- **⚠️ Note**:
1077
- - Larger models require more GPU memory, currently we only run on CPU
1078
- - First run will download models (may take time)
1079
- - Models are cached for subsequent runs
1080
- """)
1081
 
1082
  # Results section
1083
  with gr.Column(visible=True) as results_section:
@@ -1132,13 +1266,22 @@ bigscience/bloom-560m""",
1132
  fn=update_dataset_from_sample, inputs=sample_selector, outputs=dataset_input
1133
  )
1134
 
 
 
 
 
 
1135
  evaluate_btn.click(
1136
  fn=run_evaluation,
1137
  inputs=[
1138
  dataset_input,
1139
- predefined_selector,
 
 
1140
  custom_models_input,
1141
  delimiter_selector,
 
 
1142
  ],
1143
  outputs=[
1144
  summary_output,
@@ -1174,4 +1317,4 @@ bigscience/bloom-560m""",
1174
  """)
1175
 
1176
  if __name__ == "__main__":
1177
- demo.launch()
 
4
  import logging
5
  import os
6
  import re
7
+ import traceback
8
  from collections import Counter
9
+ from time import time
10
  from typing import Any, Dict, List
11
 
12
  import gradio as gr
 
17
  import torch
18
  from transformers import AutoModelForCausalLM, AutoTokenizer
19
 
20
+ from supertoken import (
21
+ build_tokenizer,
22
+ )
23
+
24
+ os.environ["HF_HUB_CACHE"] = f"/scratch/{os.environ.get('USER')}/.cache/huggingface/hub"
25
+
26
 
27
  def get_memory_usage():
28
  """Return (gpu_mem_used_MB, gpu_mem_total_MB, ram_used_MB, ram_total_MB)"""
 
55
  # Model configurations - maps display names to HF model paths
56
  PREDEFINED_MODELS = [
57
  "meta-llama/Llama-3.2-1B",
58
+ "meta-llama/Llama-3.2-1B-Instruct",
59
+ "meta-llama/Meta-Llama-3-8B-Instruct",
60
+ "meta-llama/Meta-Llama-3-8B",
61
+ "google/gemma-2-2b",
62
+ "google/gemma-2-2b-it",
63
+ "google/gemma-3-1b-pt",
64
+ "Qwen/Qwen3-0.6B",
65
+ "Qwen/Qwen2.5-0.5B",
66
+ "Qwen/Qwen2.5-1.5B",
67
+ # "Qwen/Qwen2.5-1.5B-Instruct",
68
+ "bigscience/bloom-560m",
69
+ "CohereForAI/aya-expanse-8b",
70
+ "common-pile/comma-v0.1-2t",
71
+ "google/byt5-small",
72
+ # "gsaltintas/supertoken_models-llama_gpt2",
73
+ # "gsaltintas/supertoken_models-llama_google-gemma-2-2b",
74
+ # "gsaltintas/supertoken_models-llama_google-gemma-2-2b-100b",
75
+ "r-three/supertoken_models-llama_google-gemma-2-2b",
76
+ "r-three/supertoken_models-llama_common-pile-comma-v0.1",
77
+ "r-three/supertoken_models-llama_meta-llama-Llama-3.2-1B",
78
+ "r-three/supertoken_models-llama_microsoft-Phi-3-mini-4k-instruct",
79
+ "r-three/supertoken_models-llama_gpt2",
80
+ "r-three/supertoken_models-llama_bigscience-bloom",
81
+ "r-three/supertoken_models-llama_facebook-xglm-564M",
82
+ ]
83
+ INDUSTRY_MODELS = [
84
+ "meta-llama/Llama-3.2-1B",
85
+ "meta-llama/Llama-3.2-1B-Instruct",
86
+ "meta-llama/Meta-Llama-3-8B-Instruct",
87
+ "meta-llama/Meta-Llama-3-8B",
88
  "google/gemma-2-2b",
89
+ "google/gemma-2-2b-it",
90
+ "google/gemma-3-1b-pt",
91
  "Qwen/Qwen3-0.6B",
92
  "Qwen/Qwen2.5-0.5B",
93
  "Qwen/Qwen2.5-1.5B",
94
+ # "Qwen/Qwen2.5-1.5B-Instruct",
95
  "bigscience/bloom-560m",
96
  "CohereForAI/aya-expanse-8b",
97
  "common-pile/comma-v0.1-2t",
98
  "google/byt5-small",
99
+ ]
100
+ TOKSUITE_MODELS = [
101
+ # "gsaltintas/supertoken_models-llama_gpt2",
102
+ # "gsaltintas/supertoken_models-llama_google-gemma-2-2b",
103
+ # "gsaltintas/supertoken_models-llama_google-gemma-2-2b-100b",
104
+ "google-gemma-2-2b",
105
+ "common-pile-comma-v0.1",
106
+ "meta-llama-Llama-3.2-1B",
107
+ "microsoft-Phi-3-mini-4k-instruct",
108
+ "gpt2",
109
+ "bigscience-bloom",
110
+ "facebook-xglm-564M",
111
+ "mistralai-tekken",
112
+ "tokenmonster-englishcode-32000-consistent-v1",
113
+ "google-byt5-small",
114
  ]
115
  # Global cache for loaded models
116
  model_cache = {}
117
 
118
+ print(os.environ.get("HF_HUB_CACHE"))
119
+
120
 
121
  def normalize_delimiter(delim: str) -> str:
122
  delim = delim.strip()
 
179
  from huggingface_hub import hf_hub_download, list_repo_files
180
 
181
  files = list_repo_files(model_path)
182
+ tokenizer = None
183
  if "tokenizer_config.json" in files:
184
  tokenizer_path = hf_hub_download(
185
  repo_id=model_path, filename="tokenizer_config.json"
186
  )
187
  with open(tokenizer_path) as f:
188
  tok_config = json.load(f)["data"]["tokenizer"]
 
189
  tokenizer_name = tok_config["path"]
190
+ typ = tok_config["name"]
191
+ # tokenizer = build_tokenizer(typ, tokenizer_name).tokenizer
192
+ # if tok_config["name"] == "tiktoken":
193
+ # tokenizer = TikTokenTokenizer.load(tokenizer_name)
194
+ # logger.info(f"Using TikToken tokenizer for {tokenizer_name}")
195
+ # elif tok_config["name"] == "tokenmonster":
196
+ # logger.info(f"Using TokenMonster tokenizer for {tokenizer_name}")
197
+ # tokenizer = TokenMonsterTokenizer.load(tokenizer_name)
198
+ # elif tok_config["name"] == "tekken":
199
+ # logger.info(f"Using Mistral tokenizer for {tokenizer_name}")
200
+ # tokenizer = MistralTokenizer.load(tokenizer_name)
201
+ # else:
202
+ # tokenizer = AutoTokenizer.from_pretrained(
203
+ # tokenizer_name, trust_remote_code=True, legacy=True
204
+ # )
205
+ tokenizer = AutoTokenizer.from_pretrained(
206
+ tokenizer_name, trust_remote_code=True, legacy=True
207
+ )
208
+ else:
209
+ tokenizer = AutoTokenizer.from_pretrained(
210
+ tokenizer_name, trust_remote_code=True, legacy=True
211
+ )
212
  return tokenizer
213
 
214
 
 
251
  # Load tokenizer
252
  tokenizer = setup_tokenizer(model_path)
253
 
254
+ # # Add pad token if missing
255
+ # if tokenizer.pad_token is None:
256
+ # tokenizer.pad_token = tokenizer.eos_token
257
 
258
  if progress_callback:
259
  progress_callback(
 
285
 
286
  error_msg = f"❌ Error loading model {model_path}: {str(e)}"
287
  logger.error(error_msg)
288
+ traceback.print_exc()
289
  # code.interact(local=dict(globals(), **locals()))
290
  if progress_callback:
291
  progress_callback(0.0, error_msg)
292
  return None
293
 
294
 
295
+ def calculate_choice_likelihood(
296
+ model, tokenizer, question, choice, normalization_method: str = "token-length"
297
+ ):
298
  """Calculate the log-likelihood of the choice given the question prompt"""
299
  try:
300
  prompt = f"Question: {question}\nAnswer: "
 
328
  token_log_probs = log_probs.gather(2, target_ids.unsqueeze(-1)).squeeze(-1)
329
 
330
  total_log_prob = token_log_probs.sum().item()
331
+ # char_len = answer_len
332
+ normalization_term = 1.0
333
+ if normalization_method == "token-length":
334
+ normalization_term = answer_len
335
+ elif normalization_method == "byte-length":
336
+ decoded_text = tokenizer.decode(target_ids[0]).strip()
337
+ byte_len = len(decoded_text.encode("utf-8"))
338
+ normalization_term = byte_len
339
+ # char_len = len(tokenizer.decode(target_ids[0]).strip())
340
+ # decoded_text = tokenizer.decode(target_ids[0]).strip()
341
+ # byte_len = len(decoded_text.encode("utf-8"))
342
+ # char_len = answer_len
343
+ total_log_prob /= normalization_term
344
  return total_log_prob
345
 
346
  except Exception as e:
 
348
  return float("-inf")
349
 
350
 
351
+ def evaluate_model_on_questions(
352
+ model_path,
353
+ questions,
354
+ progress_callback=None,
355
+ normalization_method: str = "token-length",
356
+ ):
357
  """Evaluate a single model on all questions using likelihood-based scoring"""
358
 
359
  model_info = load_model_and_tokenizer(
 
375
 
376
  for choice in question["choices"]:
377
  likelihood = calculate_choice_likelihood(
378
+ model, tokenizer, question["question"], choice, normalization_method
379
  )
380
  choice_likelihoods[choice] = likelihood
381
 
 
442
  def run_evaluation(
443
  dataset_text,
444
  selected_predefined,
445
+ toksuite_selector,
446
  custom_models_text="",
447
  delimiter: str = "\t",
448
  progress=gr.Progress(),
449
+ save_summary=False,
450
+ normalization_method: str = "token-length",
451
  ):
452
  """Main evaluation function"""
453
  if not dataset_text.strip():
 
477
 
478
  # Add predefined models
479
  all_models.extend(selected_predefined)
480
+ all_models.extend(
481
+ [f"r-three/supertoken_models-llama_{model}" for model in toksuite_selector]
482
+ )
483
  all_models.extend(custom_models)
484
 
485
  if not all_models:
 
536
  progress(overall_progress, msg)
537
 
538
  model_results = evaluate_model_on_questions(
539
+ model_path, questions, model_progress, normalization_method
540
  )
541
  results[display_name] = model_results
542
 
 
561
  questions, results, summary_stats
562
  )
563
  csv_summary = generate_csv_summary(questions, results, summary_stats)
564
+ slurm_id = os.environ.get("SLURM_JOB_ID", "")
565
+ if save_summary and slurm_id:
566
+ with open(f"{slurm_id}_summary_{time()}.md", "w") as f:
567
+ f.write(markdown_summary)
568
 
569
  return (
570
  summary_md,
 
927
  header = "| Q# | Question | Correct Answer |" + "".join(
928
  [f" {model} |" for model in model_names]
929
  )
930
+ separator = (
931
+ "|"
932
+ + "|".join(["-" * (len(col.strip()) + 1) for col in header.split("|")[1:-1]])
933
+ + "|"
934
  )
935
 
936
  lines.append(header)
 
1089
  with gr.Blocks(
1090
  title="🤖 Model Performance Comparison", theme=gr.themes.Soft(), css=css
1091
  ) as demo:
1092
+ gr.Markdown("""
 
 
1093
  # 🤖 Model Performance Comparison Tool
1094
 
1095
  Compare LLM performance on multiple-choice questions using Hugging Face models.
1096
+ """)
1097
+ with gr.Row():
1098
+ with gr.Column(scale=1):
1099
+ gr.Markdown("""
1100
+ **Format**: Each line should have: `Question,Correct Answer,Choice1,Choice2,Choice3` (No header). Use commas or tabs as separators.
1101
 
1102
  💡 **Features**:
1103
  - Model evaluation using HuggingFace transformers
 
1105
  - Detailed question-by-question results
1106
  - Performance charts and statistics
1107
  """)
1108
+ device_str = "a single GPU" if torch.cuda.is_available() else "CPU"
1109
+ info_str = (
1110
+ lambda: f"""
1111
+ **⚠️ Note**:
1112
+ - Larger models require more GPU memory, currently we only run on {device_str}
1113
+ - First run will download models (may take time)
1114
+ - Models are cached for subsequent runs. Currently loaded models: {list(model_cache.keys()) if model_cache else "None"}
1115
+ """
 
 
1116
  )
1117
+ info_md = gr.Markdown(info_str())
1118
  with gr.Column(scale=2):
1119
  # Sample dataset selector
1120
  sample_selector = gr.Dropdown(
 
1137
  max_lines=15,
1138
  elem_classes=["universal-text"],
1139
  )
1140
+ with gr.Row():
1141
+ with gr.Column(scale=1):
1142
+ normalization_method = gr.Radio(
1143
+ label="Normalization Method",
1144
+ choices=["token-length", "byte-length", "none"],
1145
+ value="token-length",
1146
+ info="Method to normalize log-likelihoods when scoring answers",
1147
+ )
1148
  with gr.Column(scale=1):
1149
+ # with gr.Accordion("Delimiter Options"):
1150
+ delimiter_selector = gr.Textbox(
1151
+ info="Delimiter used in the dataset (e.g., comma or tab)",
1152
+ label="Delimiter",
1153
+ placeholder="Enter a delimiter, e.g., , or \\t",
1154
+ value="\\t", # default
1155
+ lines=1,
1156
+ )
1157
+ with gr.Column(scale=1):
1158
+ save_summary_checkbox = False
1159
+ slurm_id = os.environ.get("SLURM_JOB_ID", "")
1160
+ if slurm_id:
1161
+ save_summary_checkbox = gr.Checkbox(
1162
+ info=f"To save the summary markdown file, check the box below. The filename will be prefixed with {slurm_id}:",
1163
+ label="Save summary markdown to file",
1164
+ value=False,
1165
+ # info="If checked, saves a markdown summary file with SLURM_JOB_ID prefix",
1166
+ )
1167
+ with gr.Row():
1168
+ with gr.Column(scale=2):
1169
+ toksuite_selector = gr.CheckboxGroup(
1170
+ label="Select toksuite models",
1171
+ choices=TOKSUITE_MODELS,
1172
+ value=TOKSUITE_MODELS,
1173
+ interactive=True,
1174
+ info="These models share the same initialization and training source but differ only in their tokenizers. See [r-three/toksuite](https://huggingface.co/collections/r-three/toksuite-68ae7490c151341d78423295) for details.",
1175
+ )
1176
+ with gr.Column(scale=3):
1177
  # Model selection
1178
  with gr.Tabs():
1179
  with gr.TabItem("🤖 Predefined Models"):
1180
+ industry_selector = gr.CheckboxGroup(
1181
+ # choices=PREDEFINED_MODELS,
1182
+ choices=INDUSTRY_MODELS,
1183
+ # value=[x for x in PREDEFINED_MODELS if "r-three" in x],
1184
+ value=[],
1185
+ label="Select from industry models",
1186
  interactive=True,
1187
  )
1188
 
 
1204
  - `facebook/opt-350m`
1205
  - Your own fine-tuned models!
1206
  """)
1207
+ with gr.Row():
1208
+ with gr.Column(scale=1):
1209
+ pass
1210
+ with gr.Column(scale=2):
1211
  # Evaluate button
1212
  evaluate_btn = gr.Button("⚡ Run Evaluation", variant="primary", scale=1)
1213
+ with gr.Column(scale=1):
1214
+ pass
 
 
 
 
 
1215
 
1216
  # Results section
1217
  with gr.Column(visible=True) as results_section:
 
1266
  fn=update_dataset_from_sample, inputs=sample_selector, outputs=dataset_input
1267
  )
1268
 
1269
+ def update_info_md():
1270
+ info_md.value = info_str()
1271
+ return gr.update(value=info_str())
1272
+
1273
+ evaluate_btn.click(fn=update_info_md, inputs=None, outputs=info_md)
1274
  evaluate_btn.click(
1275
  fn=run_evaluation,
1276
  inputs=[
1277
  dataset_input,
1278
+ # predefined_selector,
1279
+ industry_selector,
1280
+ toksuite_selector,
1281
  custom_models_input,
1282
  delimiter_selector,
1283
+ save_summary_checkbox,
1284
+ normalization_method,
1285
  ],
1286
  outputs=[
1287
  summary_output,
 
1317
  """)
1318
 
1319
  if __name__ == "__main__":
1320
+ demo.launch(share=True)