Chloe Anastasiades commited on
Commit
8f044f3
·
unverified ·
1 Parent(s): 3317293

Get openness and tool usage names from the same place (#90)

Browse files
Files changed (3) hide show
  1. aliases.py +12 -12
  2. submission.py +12 -11
  3. ui_components.py +16 -16
aliases.py CHANGED
@@ -1,23 +1,23 @@
1
- CANONICAL_OPENNESS_OPEN_SOURCE_OPEN_WEIGHTS = "Open Source + Open Weights"
2
- CANONICAL_OPENNESS_OPEN_SOURCE_CLOSED_WEIGHTS = "Open Source"
3
- CANONICAL_OPENNESS_CLOSED_API_AVAILABLE = "API Available"
4
- CANONICAL_OPENNESS_CLOSED_UI_ONLY = "Closed"
5
 
6
 
7
  CANONICAL_TOOL_USAGE_STANDARD = "Standard"
8
- CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE = "Custom with Standard Search"
9
- CANONICAL_TOOL_USAGE_FULLY_CUSTOM = "Fully Custom"
10
 
11
 
12
  OPENNESS_ALIASES = {
13
- CANONICAL_OPENNESS_OPEN_SOURCE_OPEN_WEIGHTS: {"Open source & open weights"},
14
- CANONICAL_OPENNESS_OPEN_SOURCE_CLOSED_WEIGHTS: {"Open source & closed weights"},
15
- CANONICAL_OPENNESS_CLOSED_API_AVAILABLE: {"Closed source & API available"},
16
- CANONICAL_OPENNESS_CLOSED_UI_ONLY: {"Closed source & UI only"}
17
  }
18
 
19
  TOOL_USAGE_ALIASES = {
20
  CANONICAL_TOOL_USAGE_STANDARD: {},
21
- CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE: {"Custom interface"},
22
- CANONICAL_TOOL_USAGE_FULLY_CUSTOM: {"Fully custom"}
23
  }
 
1
+ CANONICAL_OPENNESS_OPEN_SOURCE_OPEN_WEIGHTS = "Open source & open weights"
2
+ CANONICAL_OPENNESS_OPEN_SOURCE_CLOSED_WEIGHTS = "Open source & closed weights"
3
+ CANONICAL_OPENNESS_CLOSED_API_AVAILABLE = "Closed source & API available"
4
+ CANONICAL_OPENNESS_CLOSED_UI_ONLY = "Closed source & UI only"
5
 
6
 
7
  CANONICAL_TOOL_USAGE_STANDARD = "Standard"
8
+ CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE = "Custom interface"
9
+ CANONICAL_TOOL_USAGE_FULLY_CUSTOM = "Fully custom"
10
 
11
 
12
  OPENNESS_ALIASES = {
13
+ CANONICAL_OPENNESS_OPEN_SOURCE_OPEN_WEIGHTS: {"Open Source + Open Weights"},
14
+ CANONICAL_OPENNESS_OPEN_SOURCE_CLOSED_WEIGHTS: {"Open Source"},
15
+ CANONICAL_OPENNESS_CLOSED_API_AVAILABLE: {"API Available"},
16
+ CANONICAL_OPENNESS_CLOSED_UI_ONLY: {"Closed"}
17
  }
18
 
19
  TOOL_USAGE_ALIASES = {
20
  CANONICAL_TOOL_USAGE_STANDARD: {},
21
+ CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE: {"Custom with Standard Search"},
22
+ CANONICAL_TOOL_USAGE_FULLY_CUSTOM: {"Fully Custom"}
23
  }
submission.py CHANGED
@@ -29,6 +29,7 @@ from datasets import Dataset, DatasetDict, VerificationMode, load_dataset
29
  from datasets.data_files import EmptyDatasetError
30
  from huggingface_hub import HfApi
31
 
 
32
  from config import (
33
  CONFIG_NAME,
34
  CONTACT_DATASET,
@@ -316,25 +317,25 @@ def _is_hf_acct_too_new(submission_time: datetime, username: str):
316
  return submission_time - created_at < timedelta(days=60)
317
 
318
 
319
- openness_label_html = """
320
  <div class="form-label-with-tooltip">
321
  Agent Openness
322
- <span class="tooltip-icon" data-tooltip="• Closed: No API or code available
323
- API Available: API available, but no code
324
- Open Source: Code available, but no weights
325
- Open Source + Open Weights: Code and weights available"
326
  >
327
 
328
  </span>
329
  </div>
330
  """
331
 
332
- agent_tooling_label_html = """
333
  <div class="form-label-with-tooltip">
334
  Agent Tooling
335
- <span class="tooltip-icon" data-tooltip="• Standard: Only uses tools explicitly provided in state.tools
336
- Equivalent: Uses custom tools with identical or more restricted capabilities
337
- Fully Custom: Uses tools beyond constraints of Standard or Equivalent"
338
  >
339
 
340
  </span>
@@ -390,9 +391,9 @@ def build_page():
390
  gr.HTML(value="""<h3>URL</h3>""", elem_classes="form-label")
391
  agent_url_tb = gr.Textbox(label="Link to more information about your agent (e.g. GitHub repo, blog post, or website). This optional link may be shown on the leaderboard to let others explore your agent in more depth.")
392
  gr.HTML(value="""<h3>Agent openness</h3>""", elem_classes="form-label")
393
- openness_radio = gr.Radio(["Open Source","Open Source Open Weights", "API Available", "Closed"], elem_classes="form-label-fieldset", value=None, label="This affects how your submission is categorized on the leaderboard. Choose based on the availability of your code, model weights, or APIs.")
394
  gr.HTML(value="""<h3>Agent tooling</h3>""", elem_classes="form-label")
395
- degree_of_control_radio = gr.Radio(["Standard","Equivalent", "Fully Custom"], elem_classes="form-label-fieldset",value=None, label="Choose based on the tools and the execution environment your agent used during evaluation.")
396
  gr.HTML(value="""<h3>Submission file</h3>""", elem_classes="form-label")
397
  gr.HTML("<div id='submission-file-label'>Upload your run file, which is an archive prepared following the instructions in the <a href='https://github.com/allenai/asta-bench?tab=readme-ov-file#submitting-to-the-leaderboard' target='_blank'>README</a> (“Submitting to the Leaderboard”).</div>")
398
  file_upload_comp = gr.File(
 
29
  from datasets.data_files import EmptyDatasetError
30
  from huggingface_hub import HfApi
31
 
32
+ import aliases
33
  from config import (
34
  CONFIG_NAME,
35
  CONTACT_DATASET,
 
317
  return submission_time - created_at < timedelta(days=60)
318
 
319
 
320
+ openness_label_html = f"""
321
  <div class="form-label-with-tooltip">
322
  Agent Openness
323
+ <span class="tooltip-icon" data-tooltip="• {aliases.CANONICAL_OPENNESS_CLOSED_UI_ONLY}: No API or code available
324
+ {aliases.CANONICAL_OPENNESS_CLOSED_API_AVAILABLE}: API available, but no code
325
+ {aliases.CANONICAL_OPENNESS_OPEN_SOURCE_CLOSED_WEIGHTS}: Code available, but no weights
326
+ {aliases.CANONICAL_OPENNESS_OPEN_SOURCE_OPEN_WEIGHTS}: Code and weights available"
327
  >
328
 
329
  </span>
330
  </div>
331
  """
332
 
333
+ agent_tooling_label_html = f"""
334
  <div class="form-label-with-tooltip">
335
  Agent Tooling
336
+ <span class="tooltip-icon" data-tooltip="• {aliases.CANONICAL_TOOL_USAGE_STANDARD}: Only uses tools explicitly provided in state.tools
337
+ {aliases.CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE}: Uses custom tools with identical or more restricted capabilities
338
+ {aliases.CANONICAL_TOOL_USAGE_FULLY_CUSTOM}: Uses tools beyond constraints of {aliases.CANONICAL_TOOL_USAGE_STANDARD} or {aliases.CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE}"
339
  >
340
 
341
  </span>
 
391
  gr.HTML(value="""<h3>URL</h3>""", elem_classes="form-label")
392
  agent_url_tb = gr.Textbox(label="Link to more information about your agent (e.g. GitHub repo, blog post, or website). This optional link may be shown on the leaderboard to let others explore your agent in more depth.")
393
  gr.HTML(value="""<h3>Agent openness</h3>""", elem_classes="form-label")
394
+ openness_radio = gr.Radio([aliases.CANONICAL_OPENNESS_OPEN_SOURCE_CLOSED_WEIGHTS, aliases.CANONICAL_OPENNESS_OPEN_SOURCE_OPEN_WEIGHTS, aliases.CANONICAL_OPENNESS_CLOSED_API_AVAILABLE, aliases.CANONICAL_OPENNESS_CLOSED_UI_ONLY], elem_classes="form-label-fieldset", value=None, label="This affects how your submission is categorized on the leaderboard. Choose based on the availability of your code, model weights, or APIs.")
395
  gr.HTML(value="""<h3>Agent tooling</h3>""", elem_classes="form-label")
396
+ degree_of_control_radio = gr.Radio([aliases.CANONICAL_TOOL_USAGE_STANDARD, aliases.CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE, aliases.CANONICAL_TOOL_USAGE_FULLY_CUSTOM], elem_classes="form-label-fieldset",value=None, label="Choose based on the tools and the execution environment your agent used during evaluation.")
397
  gr.HTML(value="""<h3>Submission file</h3>""", elem_classes="form-label")
398
  gr.HTML("<div id='submission-file-label'>Upload your run file, which is an archive prepared following the instructions in the <a href='https://github.com/allenai/asta-bench?tab=readme-ov-file#submitting-to-the-leaderboard' target='_blank'>README</a> (“Submitting to the Leaderboard”).</div>")
399
  file_upload_comp = gr.File(
ui_components.py CHANGED
@@ -74,35 +74,35 @@ for canonical_openness, openness_aliases in aliases.OPENNESS_ALIASES.items():
74
 
75
 
76
  OPENNESS_SVG_MAP = {
77
- "Open source & open weights": {
78
  "path": "assets/ellipse-pink.svg",
79
  "description": "Code and models are open"
80
  },
81
- "Open source & closed weights": {
82
  "path": "assets/ellipse-coral.svg",
83
  "description": "Code is open but uses closed-weight models"
84
  },
85
- "Closed source & API available": {
86
  "path": "assets/ellipse-yellow.svg",
87
  "description": "No access to code; API access only"
88
  },
89
- "Closed source & UI only": {
90
  "path": "assets/ellipse-white.svg",
91
  "description": "No access to code or API; UI access only"
92
  },
93
  }
94
  TOOLING_SVG_MAP = {
95
- "Standard": {
96
  "path": "assets/five-point-star.svg",
97
  "description": "Uses only tools explicitly provided in state.tools"
98
  },
99
- "Custom interface": {
100
  "path": "assets/four-point-star.svg",
101
  "description": "Custom tools for accessing an equivalent underlying environment"
102
  },
103
- "Fully custom": {
104
  "path": "assets/three-point-star.svg",
105
- "description": "Uses tools beyond constraints of Standard or Custom interface"
106
  },
107
  }
108
 
@@ -165,10 +165,10 @@ def build_openness_tooltip_content() -> str:
165
  Generates the inner HTML for the Agent Openness tooltip card,
166
  """
167
  descriptions = {
168
- "Open source & open weights": "Both code and ML models are open",
169
- "Open source & closed weights": "Code is open but uses an ML model with closed-weights",
170
- "Closed source & API available": "No access to code; API access only",
171
- "Closed source & UI only": "No access to code or API; UI access only",
172
  }
173
  html_items = []
174
  for name, info in OPENNESS_SVG_MAP.items():
@@ -208,9 +208,9 @@ def build_pareto_tooltip_content() -> str:
208
  def build_tooling_tooltip_content() -> str:
209
  """Generates the inner HTML for the Agent Tooling tooltip card."""
210
  descriptions = {
211
- "Standard": "Uses only predefined tools from the evaluation environment (as defined in Inspect's state.tools).",
212
- "Custom interface": "Custom tools for accessing an equivalent underlying environment:",
213
- "Fully custom": "Uses tools beyond constraints of Standard or Custom interface",
214
  }
215
  custom_interface_sub_list = """
216
  <ul class="tooltip-sub-list">
@@ -224,7 +224,7 @@ def build_tooling_tooltip_content() -> str:
224
  desc = descriptions.get(name, "")
225
 
226
  # Check if this is the special case that needs a sub-list
227
- sub_list_html = custom_interface_sub_list if name == "Custom Interface" else ""
228
 
229
  html_items.append(f"""
230
  <div class="tooltip-legend-item">
 
74
 
75
 
76
  OPENNESS_SVG_MAP = {
77
+ aliases.CANONICAL_OPENNESS_OPEN_SOURCE_OPEN_WEIGHTS: {
78
  "path": "assets/ellipse-pink.svg",
79
  "description": "Code and models are open"
80
  },
81
+ aliases.CANONICAL_OPENNESS_OPEN_SOURCE_CLOSED_WEIGHTS: {
82
  "path": "assets/ellipse-coral.svg",
83
  "description": "Code is open but uses closed-weight models"
84
  },
85
+ aliases.CANONICAL_OPENNESS_CLOSED_API_AVAILABLE: {
86
  "path": "assets/ellipse-yellow.svg",
87
  "description": "No access to code; API access only"
88
  },
89
+ aliases.CANONICAL_OPENNESS_CLOSED_UI_ONLY: {
90
  "path": "assets/ellipse-white.svg",
91
  "description": "No access to code or API; UI access only"
92
  },
93
  }
94
  TOOLING_SVG_MAP = {
95
+ aliases.CANONICAL_TOOL_USAGE_STANDARD: {
96
  "path": "assets/five-point-star.svg",
97
  "description": "Uses only tools explicitly provided in state.tools"
98
  },
99
+ aliases.CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE: {
100
  "path": "assets/four-point-star.svg",
101
  "description": "Custom tools for accessing an equivalent underlying environment"
102
  },
103
+ aliases.CANONICAL_TOOL_USAGE_FULLY_CUSTOM: {
104
  "path": "assets/three-point-star.svg",
105
+ "description": f"Uses tools beyond constraints of {aliases.CANONICAL_TOOL_USAGE_STANDARD} or {aliases.CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE}"
106
  },
107
  }
108
 
 
165
  Generates the inner HTML for the Agent Openness tooltip card,
166
  """
167
  descriptions = {
168
+ aliases.CANONICAL_OPENNESS_OPEN_SOURCE_OPEN_WEIGHTS: "Both code and ML models are open",
169
+ aliases.CANONICAL_OPENNESS_OPEN_SOURCE_CLOSED_WEIGHTS: "Code is open but uses an ML model with closed-weights",
170
+ aliases.CANONICAL_OPENNESS_CLOSED_API_AVAILABLE: "No access to code; API access only",
171
+ aliases.CANONICAL_OPENNESS_CLOSED_UI_ONLY: "No access to code or API; UI access only",
172
  }
173
  html_items = []
174
  for name, info in OPENNESS_SVG_MAP.items():
 
208
  def build_tooling_tooltip_content() -> str:
209
  """Generates the inner HTML for the Agent Tooling tooltip card."""
210
  descriptions = {
211
+ aliases.CANONICAL_TOOL_USAGE_STANDARD: "Uses only predefined tools from the evaluation environment (as defined in Inspect's state.tools).",
212
+ aliases.CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE: "Custom tools for accessing an equivalent underlying environment:",
213
+ aliases.CANONICAL_TOOL_USAGE_FULLY_CUSTOM: f"Uses tools beyond constraints of {aliases.CANONICAL_TOOL_USAGE_STANDARD} or {aliases.CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE}",
214
  }
215
  custom_interface_sub_list = """
216
  <ul class="tooltip-sub-list">
 
224
  desc = descriptions.get(name, "")
225
 
226
  # Check if this is the special case that needs a sub-list
227
+ sub_list_html = custom_interface_sub_list if name == aliases.CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE else ""
228
 
229
  html_items.append(f"""
230
  <div class="tooltip-legend-item">