Spaces:
Running
Running
Chloe Anastasiades
commited on
Get openness and tool usage names from the same place (#90)
Browse files- aliases.py +12 -12
- submission.py +12 -11
- ui_components.py +16 -16
aliases.py
CHANGED
|
@@ -1,23 +1,23 @@
|
|
| 1 |
-
CANONICAL_OPENNESS_OPEN_SOURCE_OPEN_WEIGHTS = "Open
|
| 2 |
-
CANONICAL_OPENNESS_OPEN_SOURCE_CLOSED_WEIGHTS = "Open
|
| 3 |
-
CANONICAL_OPENNESS_CLOSED_API_AVAILABLE = "API
|
| 4 |
-
CANONICAL_OPENNESS_CLOSED_UI_ONLY = "Closed"
|
| 5 |
|
| 6 |
|
| 7 |
CANONICAL_TOOL_USAGE_STANDARD = "Standard"
|
| 8 |
-
CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE = "Custom
|
| 9 |
-
CANONICAL_TOOL_USAGE_FULLY_CUSTOM = "Fully
|
| 10 |
|
| 11 |
|
| 12 |
OPENNESS_ALIASES = {
|
| 13 |
-
CANONICAL_OPENNESS_OPEN_SOURCE_OPEN_WEIGHTS: {"Open
|
| 14 |
-
CANONICAL_OPENNESS_OPEN_SOURCE_CLOSED_WEIGHTS: {"Open
|
| 15 |
-
CANONICAL_OPENNESS_CLOSED_API_AVAILABLE: {"
|
| 16 |
-
CANONICAL_OPENNESS_CLOSED_UI_ONLY: {"Closed
|
| 17 |
}
|
| 18 |
|
| 19 |
TOOL_USAGE_ALIASES = {
|
| 20 |
CANONICAL_TOOL_USAGE_STANDARD: {},
|
| 21 |
-
CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE: {"Custom
|
| 22 |
-
CANONICAL_TOOL_USAGE_FULLY_CUSTOM: {"Fully
|
| 23 |
}
|
|
|
|
| 1 |
+
CANONICAL_OPENNESS_OPEN_SOURCE_OPEN_WEIGHTS = "Open source & open weights"
|
| 2 |
+
CANONICAL_OPENNESS_OPEN_SOURCE_CLOSED_WEIGHTS = "Open source & closed weights"
|
| 3 |
+
CANONICAL_OPENNESS_CLOSED_API_AVAILABLE = "Closed source & API available"
|
| 4 |
+
CANONICAL_OPENNESS_CLOSED_UI_ONLY = "Closed source & UI only"
|
| 5 |
|
| 6 |
|
| 7 |
CANONICAL_TOOL_USAGE_STANDARD = "Standard"
|
| 8 |
+
CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE = "Custom interface"
|
| 9 |
+
CANONICAL_TOOL_USAGE_FULLY_CUSTOM = "Fully custom"
|
| 10 |
|
| 11 |
|
| 12 |
OPENNESS_ALIASES = {
|
| 13 |
+
CANONICAL_OPENNESS_OPEN_SOURCE_OPEN_WEIGHTS: {"Open Source + Open Weights"},
|
| 14 |
+
CANONICAL_OPENNESS_OPEN_SOURCE_CLOSED_WEIGHTS: {"Open Source"},
|
| 15 |
+
CANONICAL_OPENNESS_CLOSED_API_AVAILABLE: {"API Available"},
|
| 16 |
+
CANONICAL_OPENNESS_CLOSED_UI_ONLY: {"Closed"}
|
| 17 |
}
|
| 18 |
|
| 19 |
TOOL_USAGE_ALIASES = {
|
| 20 |
CANONICAL_TOOL_USAGE_STANDARD: {},
|
| 21 |
+
CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE: {"Custom with Standard Search"},
|
| 22 |
+
CANONICAL_TOOL_USAGE_FULLY_CUSTOM: {"Fully Custom"}
|
| 23 |
}
|
submission.py
CHANGED
|
@@ -29,6 +29,7 @@ from datasets import Dataset, DatasetDict, VerificationMode, load_dataset
|
|
| 29 |
from datasets.data_files import EmptyDatasetError
|
| 30 |
from huggingface_hub import HfApi
|
| 31 |
|
|
|
|
| 32 |
from config import (
|
| 33 |
CONFIG_NAME,
|
| 34 |
CONTACT_DATASET,
|
|
@@ -316,25 +317,25 @@ def _is_hf_acct_too_new(submission_time: datetime, username: str):
|
|
| 316 |
return submission_time - created_at < timedelta(days=60)
|
| 317 |
|
| 318 |
|
| 319 |
-
openness_label_html = """
|
| 320 |
<div class="form-label-with-tooltip">
|
| 321 |
Agent Openness
|
| 322 |
-
<span class="tooltip-icon" data-tooltip="•
|
| 323 |
-
•
|
| 324 |
-
•
|
| 325 |
-
•
|
| 326 |
>
|
| 327 |
ⓘ
|
| 328 |
</span>
|
| 329 |
</div>
|
| 330 |
"""
|
| 331 |
|
| 332 |
-
agent_tooling_label_html = """
|
| 333 |
<div class="form-label-with-tooltip">
|
| 334 |
Agent Tooling
|
| 335 |
-
<span class="tooltip-icon" data-tooltip="•
|
| 336 |
-
•
|
| 337 |
-
•
|
| 338 |
>
|
| 339 |
ⓘ
|
| 340 |
</span>
|
|
@@ -390,9 +391,9 @@ def build_page():
|
|
| 390 |
gr.HTML(value="""<h3>URL</h3>""", elem_classes="form-label")
|
| 391 |
agent_url_tb = gr.Textbox(label="Link to more information about your agent (e.g. GitHub repo, blog post, or website). This optional link may be shown on the leaderboard to let others explore your agent in more depth.")
|
| 392 |
gr.HTML(value="""<h3>Agent openness</h3>""", elem_classes="form-label")
|
| 393 |
-
openness_radio = gr.Radio([
|
| 394 |
gr.HTML(value="""<h3>Agent tooling</h3>""", elem_classes="form-label")
|
| 395 |
-
degree_of_control_radio = gr.Radio([
|
| 396 |
gr.HTML(value="""<h3>Submission file</h3>""", elem_classes="form-label")
|
| 397 |
gr.HTML("<div id='submission-file-label'>Upload your run file, which is an archive prepared following the instructions in the <a href='https://github.com/allenai/asta-bench?tab=readme-ov-file#submitting-to-the-leaderboard' target='_blank'>README</a> (“Submitting to the Leaderboard”).</div>")
|
| 398 |
file_upload_comp = gr.File(
|
|
|
|
| 29 |
from datasets.data_files import EmptyDatasetError
|
| 30 |
from huggingface_hub import HfApi
|
| 31 |
|
| 32 |
+
import aliases
|
| 33 |
from config import (
|
| 34 |
CONFIG_NAME,
|
| 35 |
CONTACT_DATASET,
|
|
|
|
| 317 |
return submission_time - created_at < timedelta(days=60)
|
| 318 |
|
| 319 |
|
| 320 |
+
openness_label_html = f"""
|
| 321 |
<div class="form-label-with-tooltip">
|
| 322 |
Agent Openness
|
| 323 |
+
<span class="tooltip-icon" data-tooltip="• {aliases.CANONICAL_OPENNESS_CLOSED_UI_ONLY}: No API or code available
|
| 324 |
+
• {aliases.CANONICAL_OPENNESS_CLOSED_API_AVAILABLE}: API available, but no code
|
| 325 |
+
• {aliases.CANONICAL_OPENNESS_OPEN_SOURCE_CLOSED_WEIGHTS}: Code available, but no weights
|
| 326 |
+
• {aliases.CANONICAL_OPENNESS_OPEN_SOURCE_OPEN_WEIGHTS}: Code and weights available"
|
| 327 |
>
|
| 328 |
ⓘ
|
| 329 |
</span>
|
| 330 |
</div>
|
| 331 |
"""
|
| 332 |
|
| 333 |
+
agent_tooling_label_html = f"""
|
| 334 |
<div class="form-label-with-tooltip">
|
| 335 |
Agent Tooling
|
| 336 |
+
<span class="tooltip-icon" data-tooltip="• {aliases.CANONICAL_TOOL_USAGE_STANDARD}: Only uses tools explicitly provided in state.tools
|
| 337 |
+
• {aliases.CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE}: Uses custom tools with identical or more restricted capabilities
|
| 338 |
+
• {aliases.CANONICAL_TOOL_USAGE_FULLY_CUSTOM}: Uses tools beyond constraints of {aliases.CANONICAL_TOOL_USAGE_STANDARD} or {aliases.CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE}"
|
| 339 |
>
|
| 340 |
ⓘ
|
| 341 |
</span>
|
|
|
|
| 391 |
gr.HTML(value="""<h3>URL</h3>""", elem_classes="form-label")
|
| 392 |
agent_url_tb = gr.Textbox(label="Link to more information about your agent (e.g. GitHub repo, blog post, or website). This optional link may be shown on the leaderboard to let others explore your agent in more depth.")
|
| 393 |
gr.HTML(value="""<h3>Agent openness</h3>""", elem_classes="form-label")
|
| 394 |
+
openness_radio = gr.Radio([aliases.CANONICAL_OPENNESS_OPEN_SOURCE_CLOSED_WEIGHTS, aliases.CANONICAL_OPENNESS_OPEN_SOURCE_OPEN_WEIGHTS, aliases.CANONICAL_OPENNESS_CLOSED_API_AVAILABLE, aliases.CANONICAL_OPENNESS_CLOSED_UI_ONLY], elem_classes="form-label-fieldset", value=None, label="This affects how your submission is categorized on the leaderboard. Choose based on the availability of your code, model weights, or APIs.")
|
| 395 |
gr.HTML(value="""<h3>Agent tooling</h3>""", elem_classes="form-label")
|
| 396 |
+
degree_of_control_radio = gr.Radio([aliases.CANONICAL_TOOL_USAGE_STANDARD, aliases.CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE, aliases.CANONICAL_TOOL_USAGE_FULLY_CUSTOM], elem_classes="form-label-fieldset",value=None, label="Choose based on the tools and the execution environment your agent used during evaluation.")
|
| 397 |
gr.HTML(value="""<h3>Submission file</h3>""", elem_classes="form-label")
|
| 398 |
gr.HTML("<div id='submission-file-label'>Upload your run file, which is an archive prepared following the instructions in the <a href='https://github.com/allenai/asta-bench?tab=readme-ov-file#submitting-to-the-leaderboard' target='_blank'>README</a> (“Submitting to the Leaderboard”).</div>")
|
| 399 |
file_upload_comp = gr.File(
|
ui_components.py
CHANGED
|
@@ -74,35 +74,35 @@ for canonical_openness, openness_aliases in aliases.OPENNESS_ALIASES.items():
|
|
| 74 |
|
| 75 |
|
| 76 |
OPENNESS_SVG_MAP = {
|
| 77 |
-
|
| 78 |
"path": "assets/ellipse-pink.svg",
|
| 79 |
"description": "Code and models are open"
|
| 80 |
},
|
| 81 |
-
|
| 82 |
"path": "assets/ellipse-coral.svg",
|
| 83 |
"description": "Code is open but uses closed-weight models"
|
| 84 |
},
|
| 85 |
-
|
| 86 |
"path": "assets/ellipse-yellow.svg",
|
| 87 |
"description": "No access to code; API access only"
|
| 88 |
},
|
| 89 |
-
|
| 90 |
"path": "assets/ellipse-white.svg",
|
| 91 |
"description": "No access to code or API; UI access only"
|
| 92 |
},
|
| 93 |
}
|
| 94 |
TOOLING_SVG_MAP = {
|
| 95 |
-
|
| 96 |
"path": "assets/five-point-star.svg",
|
| 97 |
"description": "Uses only tools explicitly provided in state.tools"
|
| 98 |
},
|
| 99 |
-
|
| 100 |
"path": "assets/four-point-star.svg",
|
| 101 |
"description": "Custom tools for accessing an equivalent underlying environment"
|
| 102 |
},
|
| 103 |
-
|
| 104 |
"path": "assets/three-point-star.svg",
|
| 105 |
-
"description": "Uses tools beyond constraints of
|
| 106 |
},
|
| 107 |
}
|
| 108 |
|
|
@@ -165,10 +165,10 @@ def build_openness_tooltip_content() -> str:
|
|
| 165 |
Generates the inner HTML for the Agent Openness tooltip card,
|
| 166 |
"""
|
| 167 |
descriptions = {
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
}
|
| 173 |
html_items = []
|
| 174 |
for name, info in OPENNESS_SVG_MAP.items():
|
|
@@ -208,9 +208,9 @@ def build_pareto_tooltip_content() -> str:
|
|
| 208 |
def build_tooling_tooltip_content() -> str:
|
| 209 |
"""Generates the inner HTML for the Agent Tooling tooltip card."""
|
| 210 |
descriptions = {
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
}
|
| 215 |
custom_interface_sub_list = """
|
| 216 |
<ul class="tooltip-sub-list">
|
|
@@ -224,7 +224,7 @@ def build_tooling_tooltip_content() -> str:
|
|
| 224 |
desc = descriptions.get(name, "")
|
| 225 |
|
| 226 |
# Check if this is the special case that needs a sub-list
|
| 227 |
-
sub_list_html = custom_interface_sub_list if name ==
|
| 228 |
|
| 229 |
html_items.append(f"""
|
| 230 |
<div class="tooltip-legend-item">
|
|
|
|
| 74 |
|
| 75 |
|
| 76 |
OPENNESS_SVG_MAP = {
|
| 77 |
+
aliases.CANONICAL_OPENNESS_OPEN_SOURCE_OPEN_WEIGHTS: {
|
| 78 |
"path": "assets/ellipse-pink.svg",
|
| 79 |
"description": "Code and models are open"
|
| 80 |
},
|
| 81 |
+
aliases.CANONICAL_OPENNESS_OPEN_SOURCE_CLOSED_WEIGHTS: {
|
| 82 |
"path": "assets/ellipse-coral.svg",
|
| 83 |
"description": "Code is open but uses closed-weight models"
|
| 84 |
},
|
| 85 |
+
aliases.CANONICAL_OPENNESS_CLOSED_API_AVAILABLE: {
|
| 86 |
"path": "assets/ellipse-yellow.svg",
|
| 87 |
"description": "No access to code; API access only"
|
| 88 |
},
|
| 89 |
+
aliases.CANONICAL_OPENNESS_CLOSED_UI_ONLY: {
|
| 90 |
"path": "assets/ellipse-white.svg",
|
| 91 |
"description": "No access to code or API; UI access only"
|
| 92 |
},
|
| 93 |
}
|
| 94 |
TOOLING_SVG_MAP = {
|
| 95 |
+
aliases.CANONICAL_TOOL_USAGE_STANDARD: {
|
| 96 |
"path": "assets/five-point-star.svg",
|
| 97 |
"description": "Uses only tools explicitly provided in state.tools"
|
| 98 |
},
|
| 99 |
+
aliases.CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE: {
|
| 100 |
"path": "assets/four-point-star.svg",
|
| 101 |
"description": "Custom tools for accessing an equivalent underlying environment"
|
| 102 |
},
|
| 103 |
+
aliases.CANONICAL_TOOL_USAGE_FULLY_CUSTOM: {
|
| 104 |
"path": "assets/three-point-star.svg",
|
| 105 |
+
"description": f"Uses tools beyond constraints of {aliases.CANONICAL_TOOL_USAGE_STANDARD} or {aliases.CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE}"
|
| 106 |
},
|
| 107 |
}
|
| 108 |
|
|
|
|
| 165 |
Generates the inner HTML for the Agent Openness tooltip card,
|
| 166 |
"""
|
| 167 |
descriptions = {
|
| 168 |
+
aliases.CANONICAL_OPENNESS_OPEN_SOURCE_OPEN_WEIGHTS: "Both code and ML models are open",
|
| 169 |
+
aliases.CANONICAL_OPENNESS_OPEN_SOURCE_CLOSED_WEIGHTS: "Code is open but uses an ML model with closed-weights",
|
| 170 |
+
aliases.CANONICAL_OPENNESS_CLOSED_API_AVAILABLE: "No access to code; API access only",
|
| 171 |
+
aliases.CANONICAL_OPENNESS_CLOSED_UI_ONLY: "No access to code or API; UI access only",
|
| 172 |
}
|
| 173 |
html_items = []
|
| 174 |
for name, info in OPENNESS_SVG_MAP.items():
|
|
|
|
| 208 |
def build_tooling_tooltip_content() -> str:
|
| 209 |
"""Generates the inner HTML for the Agent Tooling tooltip card."""
|
| 210 |
descriptions = {
|
| 211 |
+
aliases.CANONICAL_TOOL_USAGE_STANDARD: "Uses only predefined tools from the evaluation environment (as defined in Inspect's state.tools).",
|
| 212 |
+
aliases.CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE: "Custom tools for accessing an equivalent underlying environment:",
|
| 213 |
+
aliases.CANONICAL_TOOL_USAGE_FULLY_CUSTOM: f"Uses tools beyond constraints of {aliases.CANONICAL_TOOL_USAGE_STANDARD} or {aliases.CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE}",
|
| 214 |
}
|
| 215 |
custom_interface_sub_list = """
|
| 216 |
<ul class="tooltip-sub-list">
|
|
|
|
| 224 |
desc = descriptions.get(name, "")
|
| 225 |
|
| 226 |
# Check if this is the special case that needs a sub-list
|
| 227 |
+
sub_list_html = custom_interface_sub_list if name == aliases.CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE else ""
|
| 228 |
|
| 229 |
html_items.append(f"""
|
| 230 |
<div class="tooltip-legend-item">
|