Spaces:

allenai
/

asta-bench-leaderboard

Running

App Files Files Community

Chloe Anastasiades commited on Aug 25

Commit

8f044f3

unverified ·

1 Parent(s): 3317293

Get openness and tool usage names from the same place (#90)

Browse files

Files changed (3) hide show

aliases.py +12 -12
submission.py +12 -11
ui_components.py +16 -16

aliases.py CHANGED Viewed

@@ -1,23 +1,23 @@
-CANONICAL_OPENNESS_OPEN_SOURCE_OPEN_WEIGHTS = "Open Source + Open Weights"
-CANONICAL_OPENNESS_OPEN_SOURCE_CLOSED_WEIGHTS = "Open Source"
-CANONICAL_OPENNESS_CLOSED_API_AVAILABLE = "API Available"
-CANONICAL_OPENNESS_CLOSED_UI_ONLY = "Closed"
 CANONICAL_TOOL_USAGE_STANDARD = "Standard"
-CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE = "Custom with Standard Search"
-CANONICAL_TOOL_USAGE_FULLY_CUSTOM = "Fully Custom"
 OPENNESS_ALIASES = {
-    CANONICAL_OPENNESS_OPEN_SOURCE_OPEN_WEIGHTS: {"Open source & open weights"},
-    CANONICAL_OPENNESS_OPEN_SOURCE_CLOSED_WEIGHTS: {"Open source & closed weights"},
-    CANONICAL_OPENNESS_CLOSED_API_AVAILABLE: {"Closed source & API available"},
-    CANONICAL_OPENNESS_CLOSED_UI_ONLY: {"Closed source & UI only"}
 }
 TOOL_USAGE_ALIASES = {
     CANONICAL_TOOL_USAGE_STANDARD: {},
-    CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE: {"Custom interface"},
-    CANONICAL_TOOL_USAGE_FULLY_CUSTOM: {"Fully custom"}
 }

+CANONICAL_OPENNESS_OPEN_SOURCE_OPEN_WEIGHTS = "Open source & open weights"
+CANONICAL_OPENNESS_OPEN_SOURCE_CLOSED_WEIGHTS = "Open source & closed weights"
+CANONICAL_OPENNESS_CLOSED_API_AVAILABLE = "Closed source & API available"
+CANONICAL_OPENNESS_CLOSED_UI_ONLY = "Closed source & UI only"
 CANONICAL_TOOL_USAGE_STANDARD = "Standard"
+CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE = "Custom interface"
+CANONICAL_TOOL_USAGE_FULLY_CUSTOM = "Fully custom"
 OPENNESS_ALIASES = {
+    CANONICAL_OPENNESS_OPEN_SOURCE_OPEN_WEIGHTS: {"Open Source + Open Weights"},
+    CANONICAL_OPENNESS_OPEN_SOURCE_CLOSED_WEIGHTS: {"Open Source"},
+    CANONICAL_OPENNESS_CLOSED_API_AVAILABLE: {"API Available"},
+    CANONICAL_OPENNESS_CLOSED_UI_ONLY: {"Closed"}
 }
 TOOL_USAGE_ALIASES = {
     CANONICAL_TOOL_USAGE_STANDARD: {},
+    CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE: {"Custom with Standard Search"},
+    CANONICAL_TOOL_USAGE_FULLY_CUSTOM: {"Fully Custom"}
 }

submission.py CHANGED Viewed

@@ -29,6 +29,7 @@ from datasets import Dataset, DatasetDict, VerificationMode, load_dataset
 from datasets.data_files import EmptyDatasetError
 from huggingface_hub import HfApi
 from config import (
     CONFIG_NAME,
     CONTACT_DATASET,
@@ -316,25 +317,25 @@ def _is_hf_acct_too_new(submission_time: datetime, username: str):
     return submission_time - created_at < timedelta(days=60)
-openness_label_html = """
 <div class="form-label-with-tooltip">
     Agent Openness
-    <span class="tooltip-icon" data-tooltip="• Closed: No API or code available
-        • API Available: API available, but no code
-        • Open Source: Code available, but no weights
-        • Open Source + Open Weights: Code and weights available"
     >
         ⓘ
     </span>
 </div>
 """
-agent_tooling_label_html = """
 <div class="form-label-with-tooltip">
     Agent Tooling
-    <span class="tooltip-icon" data-tooltip="• Standard: Only uses tools explicitly provided in state.tools
-        • Equivalent: Uses custom tools with identical or more restricted capabilities
-        • Fully Custom: Uses tools beyond constraints of Standard or Equivalent"
     >
         ⓘ
     </span>
@@ -390,9 +391,9 @@ def build_page():
             gr.HTML(value="""<h3>URL</h3>""", elem_classes="form-label")
             agent_url_tb = gr.Textbox(label="Link to more information about your agent (e.g. GitHub repo, blog post, or website). This optional link may be shown on the leaderboard to let others explore your agent in more depth.")
             gr.HTML(value="""<h3>Agent openness</h3>""", elem_classes="form-label")
-            openness_radio = gr.Radio(["Open Source","Open Source Open Weights", "API Available", "Closed"], elem_classes="form-label-fieldset", value=None, label="This affects how your submission is categorized on the leaderboard. Choose based on the availability of your code, model weights, or APIs.")
             gr.HTML(value="""<h3>Agent tooling</h3>""", elem_classes="form-label")
-            degree_of_control_radio = gr.Radio(["Standard","Equivalent", "Fully Custom"], elem_classes="form-label-fieldset",value=None, label="Choose based on the tools and the execution environment your agent used during evaluation.")
             gr.HTML(value="""<h3>Submission file</h3>""", elem_classes="form-label")
             gr.HTML("<div id='submission-file-label'>Upload your run file, which is an archive prepared following the instructions in the <a href='https://github.com/allenai/asta-bench?tab=readme-ov-file#submitting-to-the-leaderboard' target='_blank'>README</a> (“Submitting to the Leaderboard”).</div>")
             file_upload_comp = gr.File(

 from datasets.data_files import EmptyDatasetError
 from huggingface_hub import HfApi
+import aliases
 from config import (
     CONFIG_NAME,
     CONTACT_DATASET,
     return submission_time - created_at < timedelta(days=60)
+openness_label_html = f"""
 <div class="form-label-with-tooltip">
     Agent Openness
+    <span class="tooltip-icon" data-tooltip="• {aliases.CANONICAL_OPENNESS_CLOSED_UI_ONLY}: No API or code available
+        • {aliases.CANONICAL_OPENNESS_CLOSED_API_AVAILABLE}: API available, but no code
+        • {aliases.CANONICAL_OPENNESS_OPEN_SOURCE_CLOSED_WEIGHTS}: Code available, but no weights
+        • {aliases.CANONICAL_OPENNESS_OPEN_SOURCE_OPEN_WEIGHTS}: Code and weights available"
     >
         ⓘ
     </span>
 </div>
 """
+agent_tooling_label_html = f"""
 <div class="form-label-with-tooltip">
     Agent Tooling
+    <span class="tooltip-icon" data-tooltip="• {aliases.CANONICAL_TOOL_USAGE_STANDARD}: Only uses tools explicitly provided in state.tools
+        • {aliases.CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE}: Uses custom tools with identical or more restricted capabilities
+        • {aliases.CANONICAL_TOOL_USAGE_FULLY_CUSTOM}: Uses tools beyond constraints of {aliases.CANONICAL_TOOL_USAGE_STANDARD} or {aliases.CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE}"
     >
         ⓘ
     </span>
             gr.HTML(value="""<h3>URL</h3>""", elem_classes="form-label")
             agent_url_tb = gr.Textbox(label="Link to more information about your agent (e.g. GitHub repo, blog post, or website). This optional link may be shown on the leaderboard to let others explore your agent in more depth.")
             gr.HTML(value="""<h3>Agent openness</h3>""", elem_classes="form-label")
+            openness_radio = gr.Radio([aliases.CANONICAL_OPENNESS_OPEN_SOURCE_CLOSED_WEIGHTS, aliases.CANONICAL_OPENNESS_OPEN_SOURCE_OPEN_WEIGHTS, aliases.CANONICAL_OPENNESS_CLOSED_API_AVAILABLE, aliases.CANONICAL_OPENNESS_CLOSED_UI_ONLY], elem_classes="form-label-fieldset", value=None, label="This affects how your submission is categorized on the leaderboard. Choose based on the availability of your code, model weights, or APIs.")
             gr.HTML(value="""<h3>Agent tooling</h3>""", elem_classes="form-label")
+            degree_of_control_radio = gr.Radio([aliases.CANONICAL_TOOL_USAGE_STANDARD, aliases.CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE, aliases.CANONICAL_TOOL_USAGE_FULLY_CUSTOM], elem_classes="form-label-fieldset",value=None, label="Choose based on the tools and the execution environment your agent used during evaluation.")
             gr.HTML(value="""<h3>Submission file</h3>""", elem_classes="form-label")
             gr.HTML("<div id='submission-file-label'>Upload your run file, which is an archive prepared following the instructions in the <a href='https://github.com/allenai/asta-bench?tab=readme-ov-file#submitting-to-the-leaderboard' target='_blank'>README</a> (“Submitting to the Leaderboard”).</div>")
             file_upload_comp = gr.File(

ui_components.py CHANGED Viewed

@@ -74,35 +74,35 @@ for canonical_openness, openness_aliases in aliases.OPENNESS_ALIASES.items():
 OPENNESS_SVG_MAP = {
-    "Open source & open weights": {
         "path": "assets/ellipse-pink.svg",
         "description": "Code and models are open"
     },
-    "Open source & closed weights": {
         "path": "assets/ellipse-coral.svg",
         "description": "Code is open but uses closed-weight models"
     },
-    "Closed source & API available": {
         "path": "assets/ellipse-yellow.svg",
         "description": "No access to code; API access only"
     },
-    "Closed source & UI only": {
         "path": "assets/ellipse-white.svg",
         "description": "No access to code or API; UI access only"
     },
 }
 TOOLING_SVG_MAP = {
-    "Standard": {
         "path": "assets/five-point-star.svg",
         "description": "Uses only tools explicitly provided in state.tools"
     },
-    "Custom interface": {
         "path": "assets/four-point-star.svg",
         "description": "Custom tools for accessing an equivalent underlying environment"
     },
-    "Fully custom": {
         "path": "assets/three-point-star.svg",
-        "description": "Uses tools beyond constraints of Standard or Custom interface"
     },
 }
@@ -165,10 +165,10 @@ def build_openness_tooltip_content() -> str:
     Generates the inner HTML for the Agent Openness tooltip card,
     """
     descriptions = {
-        "Open source & open weights": "Both code and ML models are open",
-        "Open source & closed weights": "Code is open but uses an ML model with closed-weights",
-        "Closed source & API available": "No access to code; API access only",
-        "Closed source & UI only": "No access to code or API; UI  access only",
     }
     html_items = []
     for name, info in OPENNESS_SVG_MAP.items():
@@ -208,9 +208,9 @@ def build_pareto_tooltip_content() -> str:
 def build_tooling_tooltip_content() -> str:
     """Generates the inner HTML for the Agent Tooling tooltip card."""
     descriptions = {
-        "Standard": "Uses only predefined tools from the evaluation environment (as defined in Inspect's state.tools).",
-        "Custom interface": "Custom tools for accessing an equivalent underlying environment:",
-        "Fully custom": "Uses tools beyond constraints of Standard or Custom interface",
     }
     custom_interface_sub_list = """
         <ul class="tooltip-sub-list">
@@ -224,7 +224,7 @@ def build_tooling_tooltip_content() -> str:
         desc = descriptions.get(name, "")
         # Check if this is the special case that needs a sub-list
-        sub_list_html = custom_interface_sub_list if name == "Custom Interface" else ""
         html_items.append(f"""
             <div class="tooltip-legend-item">

 OPENNESS_SVG_MAP = {
+    aliases.CANONICAL_OPENNESS_OPEN_SOURCE_OPEN_WEIGHTS: {
         "path": "assets/ellipse-pink.svg",
         "description": "Code and models are open"
     },
+    aliases.CANONICAL_OPENNESS_OPEN_SOURCE_CLOSED_WEIGHTS: {
         "path": "assets/ellipse-coral.svg",
         "description": "Code is open but uses closed-weight models"
     },
+    aliases.CANONICAL_OPENNESS_CLOSED_API_AVAILABLE: {
         "path": "assets/ellipse-yellow.svg",
         "description": "No access to code; API access only"
     },
+    aliases.CANONICAL_OPENNESS_CLOSED_UI_ONLY: {
         "path": "assets/ellipse-white.svg",
         "description": "No access to code or API; UI access only"
     },
 }
 TOOLING_SVG_MAP = {
+    aliases.CANONICAL_TOOL_USAGE_STANDARD: {
         "path": "assets/five-point-star.svg",
         "description": "Uses only tools explicitly provided in state.tools"
     },
+    aliases.CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE: {
         "path": "assets/four-point-star.svg",
         "description": "Custom tools for accessing an equivalent underlying environment"
     },
+    aliases.CANONICAL_TOOL_USAGE_FULLY_CUSTOM: {
         "path": "assets/three-point-star.svg",
+        "description": f"Uses tools beyond constraints of {aliases.CANONICAL_TOOL_USAGE_STANDARD} or {aliases.CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE}"
     },
 }
     Generates the inner HTML for the Agent Openness tooltip card,
     """
     descriptions = {
+        aliases.CANONICAL_OPENNESS_OPEN_SOURCE_OPEN_WEIGHTS: "Both code and ML models are open",
+        aliases.CANONICAL_OPENNESS_OPEN_SOURCE_CLOSED_WEIGHTS: "Code is open but uses an ML model with closed-weights",
+        aliases.CANONICAL_OPENNESS_CLOSED_API_AVAILABLE: "No access to code; API access only",
+        aliases.CANONICAL_OPENNESS_CLOSED_UI_ONLY: "No access to code or API; UI  access only",
     }
     html_items = []
     for name, info in OPENNESS_SVG_MAP.items():
 def build_tooling_tooltip_content() -> str:
     """Generates the inner HTML for the Agent Tooling tooltip card."""
     descriptions = {
+        aliases.CANONICAL_TOOL_USAGE_STANDARD: "Uses only predefined tools from the evaluation environment (as defined in Inspect's state.tools).",
+        aliases.CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE: "Custom tools for accessing an equivalent underlying environment:",
+        aliases.CANONICAL_TOOL_USAGE_FULLY_CUSTOM: f"Uses tools beyond constraints of {aliases.CANONICAL_TOOL_USAGE_STANDARD} or {aliases.CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE}",
     }
     custom_interface_sub_list = """
         <ul class="tooltip-sub-list">
         desc = descriptions.get(name, "")
         # Check if this is the special case that needs a sub-list
+        sub_list_html = custom_interface_sub_list if name == aliases.CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE else ""
         html_items.append(f"""
             <div class="tooltip-legend-item">