Spaces:

LeMaterial
/

LeMat-GenBench

Running

App Files Files Community

cgeorgiaw HF Staff commited on about 1 month ago

Commit

f83fa62

1 Parent(s): fbafbd7

populating leaderboard

Browse files

Files changed (3) hide show

__pycache__/about.cpython-310.pyc +0 -0
about.py +129 -1
app.py +103 -15

__pycache__/about.cpython-310.pyc ADDED Viewed

Binary file (2.51 kB). View file

about.py CHANGED Viewed

@@ -7,4 +7,132 @@ CACHE_PATH=os.getenv("HF_HOME", ".")
 API = HfApi(token=TOKEN)
 organization="LeMaterial"
 submissions_repo = f'{organization}/lemat-gen-bench-submissions'
-results_repo = f'{organization}/lemat-gen-bench-results'

 API = HfApi(token=TOKEN)
 organization="LeMaterial"
 submissions_repo = f'{organization}/lemat-gen-bench-submissions'
+results_repo = f'{organization}/lemat-genbench-results'
+# Column display names mapping
+COLUMN_DISPLAY_NAMES = {
+    'run_name': 'Model',
+    'n_structures': 'Total Structures',
+    # Validity metrics
+    'overall_valid_count': 'Valid',
+    'charge_neutral_count': 'Charge Neutral',
+    'distance_valid_count': 'Distance Valid',
+    'plausibility_valid_count': 'Plausibility Valid',
+    # Uniqueness and Novelty
+    'unique_count': 'Unique',
+    'novel_count': 'Novel',
+    # Energy-based metrics
+    'mean_formation_energy': 'Formation Energy (eV)',
+    'formation_energy_std': 'Formation Energy Std',
+    'stability_mean_above_hull': 'E Above Hull (eV)',
+    'stability_std_e_above_hull': 'E Above Hull Std',
+    'mean_relaxation_RMSD': 'Relaxation RMSD (Å)',
+    'relaxation_RMSE_std': 'Relaxation RMSD Std',
+    # Stability metrics
+    'stable_count': 'Stable',
+    'unique_in_stable_count': 'Unique in Stable',
+    'sun_count': 'SUN',
+    # Metastability metrics
+    'metastable_count': 'Metastable',
+    'unique_in_metastable_count': 'Unique in Metastable',
+    'msun_count': 'MSUN',
+    # Distribution metrics
+    'JSDistance': 'JS Distance',
+    'MMD': 'MMD',
+    'FrechetDistance': 'FID',
+    # Diversity metrics
+    'element_diversity': 'Element Diversity',
+    'space_group_diversity': 'Space Group Diversity',
+    'site_diversity': 'Atomic Site Diversity',
+    'physical_size_diversity': 'Crystal Size Diversity',
+    # HHI metrics
+    'hhi_production_mean': 'HHI Production',
+    'hhi_reserve_mean': 'HHI Reserve',
+    'hhi_combined_mean': 'HHI Combined',
+}
+# Metrics that can be shown as percentages (count-based metrics)
+COUNT_BASED_METRICS = [
+    'overall_valid_count',
+    'charge_neutral_count',
+    'distance_valid_count',
+    'plausibility_valid_count',
+    'unique_count',
+    'novel_count',
+    'stable_count',
+    'unique_in_stable_count',
+    'sun_count',
+    'metastable_count',
+    'unique_in_metastable_count',
+    'msun_count',
+]
+# Metric groups for organized display
+METRIC_GROUPS = {
+    'Validity ↑': [
+        'overall_valid_count',
+        'charge_neutral_count',
+        'distance_valid_count',
+        'plausibility_valid_count',
+    ],
+    'Uniqueness & Novelty ↑': [
+        'unique_count',
+        'novel_count',
+    ],
+    'Energy Metrics ↓': [
+        'stability_mean_above_hull',
+        'stability_std_e_above_hull',
+        'mean_formation_energy',
+        'formation_energy_std',
+        'mean_relaxation_RMSD',
+        'relaxation_RMSE_std',
+    ],
+    'Stability ↑': [
+        'stable_count',
+        'unique_in_stable_count',
+        'sun_count',
+    ],
+    'Metastability ↑': [
+        'metastable_count',
+        'unique_in_metastable_count',
+        'msun_count',
+    ],
+    'Distribution ↓': [
+        'JSDistance',
+        'MMD',
+        'FrechetDistance',
+    ],
+    'Diversity ↑': [
+        'element_diversity',
+        'space_group_diversity',
+        'site_diversity',
+        'physical_size_diversity',
+    ],
+    'HHI ↓': [
+        'hhi_production_mean',
+        'hhi_reserve_mean',
+    ],
+}
+# Compact view columns (most important metrics visible without scrolling)
+COMPACT_VIEW_COLUMNS = [
+    'run_name',
+    'overall_valid_count',
+    'unique_count',
+    'novel_count',
+    'stable_count',
+    'metastable_count',
+    'sun_count',
+    'msun_count',
+    'stability_mean_above_hull',
+    'mean_formation_energy',
+    'mean_relaxation_RMSD',
+]
+# Full view columns (all metrics organized by groups)
+FULL_VIEW_COLUMNS = ['run_name', 'n_structures']
+for group_name, cols in METRIC_GROUPS.items():
+    FULL_VIEW_COLUMNS.extend(cols)
+# Default columns for backward compatibility
+DEFAULT_COLUMNS = COMPACT_VIEW_COLUMNS

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from pathlib import Path
 import json
 import pandas as pd
 import gradio as gr
 from datasets import load_dataset
@@ -8,16 +9,58 @@ from gradio_leaderboard import Leaderboard
 from datetime import datetime
 import os
-from about import PROBLEM_TYPES, TOKEN, CACHE_PATH, API, submissions_repo, results_repo
 def get_leaderboard():
     ds = load_dataset(results_repo, split='train', download_mode="force_redownload")
     full_df = pd.DataFrame(ds)
     if len(full_df) == 0:
         return pd.DataFrame({'date':[], 'model':[], 'score':[], 'verified':[]})
     return full_df
 def show_output_box(message):
     return gr.update(value=message, visible=True)
@@ -28,21 +71,66 @@ def gradio_interface() -> gr.Blocks:
     with gr.Blocks() as demo:
         gr.Markdown("## Welcome to the LeMaterial Generative Benchmark Leaderboard!")
         with gr.Tabs(elem_classes="tab-buttons"):
-            with gr.TabItem("🚀 Leaderboard", elem_id="boundary-benchmark-tab-table"):
-                gr.Markdown("# LeMat-Gen-Bench")
-                try:
-                    Leaderboard(
-                        value=get_leaderboard(),
-                        datatype=['date', 'model', 'score', 'verified'],
-                        select_columns=["model"],
-                        search_columns=["model"],
-                        filter_columns=["verified"],
-                        every=60,
-                        render=True
                     )
-                except:
-                    gr.Markdown("Leaderboard is empty.")
                 gr.Markdown("Verified submissions mean the results came from a model submission rather than a CIF submission.")

 from pathlib import Path
 import json
 import pandas as pd
+import numpy as np
 import gradio as gr
 from datasets import load_dataset
 from datetime import datetime
 import os
+from about import (
+    PROBLEM_TYPES, TOKEN, CACHE_PATH, API, submissions_repo, results_repo,
+    COLUMN_DISPLAY_NAMES, COUNT_BASED_METRICS,
+    COMPACT_VIEW_COLUMNS, FULL_VIEW_COLUMNS
+)
 def get_leaderboard():
     ds = load_dataset(results_repo, split='train', download_mode="force_redownload")
     full_df = pd.DataFrame(ds)
+    print(full_df.columns)
     if len(full_df) == 0:
         return pd.DataFrame({'date':[], 'model':[], 'score':[], 'verified':[]})
     return full_df
+def format_dataframe(df, show_percentage=False, view_mode="Compact"):
+    """Format the dataframe with proper column names and optional percentages."""
+    if len(df) == 0:
+        return df
+    # Select columns based on view mode
+    if view_mode == "Compact":
+        selected_cols = [col for col in COMPACT_VIEW_COLUMNS if col in df.columns]
+    else:  # Full view
+        selected_cols = [col for col in FULL_VIEW_COLUMNS if col in df.columns]
+    # Create a copy with selected columns
+    display_df = df[selected_cols].copy()
+    # Convert count-based metrics to percentages if requested
+    if show_percentage and 'n_structures' in df.columns:
+        n_structures = df['n_structures']
+        for col in COUNT_BASED_METRICS:
+            if col in display_df.columns:
+                # Calculate percentage and format as string with %
+                display_df[col] = (df[col] / n_structures * 100).round(1).astype(str) + '%'
+    # Round numeric columns for cleaner display
+    for col in display_df.columns:
+        if display_df[col].dtype in ['float64', 'float32']:
+            display_df[col] = display_df[col].round(4)
+    # Rename columns for display
+    display_df = display_df.rename(columns=COLUMN_DISPLAY_NAMES)
+    return display_df
+def update_leaderboard(show_percentage, view_mode):
+    """Update the leaderboard based on user selections."""
+    df = get_leaderboard()
+    return format_dataframe(df, show_percentage, view_mode)
 def show_output_box(message):
     return gr.update(value=message, visible=True)
     with gr.Blocks() as demo:
         gr.Markdown("## Welcome to the LeMaterial Generative Benchmark Leaderboard!")
         with gr.Tabs(elem_classes="tab-buttons"):
+            with gr.TabItem("🚀 Leaderboard", elem_id="boundary-benchmark-tab-table"):
+                gr.Markdown("# LeMat-GenBench")
+                # Display options
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        view_mode = gr.Radio(
+                            choices=["Compact", "Full"],
+                            value="Compact",
+                            label="View Mode",
+                            info="Compact: Key metrics | Full: All metrics grouped by category"
+                        )
+                    with gr.Column(scale=1):
+                        show_percentage = gr.Checkbox(
+                            value=False,
+                            label="Show as Percentages",
+                            info="Display count-based metrics as percentages of total structures"
+                        )
+                # Metric legend
+                with gr.Accordion("Metric Groups Legend", open=False):
+                    legend_md = """
+| Group | Metrics | Direction |
+|-------|---------|-----------|
+| **Validity** | Valid, Charge Neutral, Distance Valid, Plausibility Valid | ↑ Higher is better |
+| **Uniqueness & Novelty** | Unique, Novel | ↑ Higher is better |
+| **Energy Metrics** | E Above Hull, Formation Energy, Relaxation RMSD (with std) | ↓ Lower is better |
+| **Stability** | Stable, Unique in Stable, SUN | ↑ Higher is better |
+| **Metastability** | Metastable, Unique in Metastable, MSUN | ↑ Higher is better |
+| **Distribution** | JS Distance, MMD, FID | ↓ Lower is better |
+| **Diversity** | Element, Space Group, Atomic Site, Crystal Size | ↑ Higher is better |
+| **HHI** | HHI Production, HHI Reserve | ↓ Lower is better |
+"""
+                    gr.Markdown(legend_md)
+                try:
+                    # Initial dataframe
+                    initial_df = get_leaderboard()
+                    formatted_df = format_dataframe(initial_df, show_percentage=False, view_mode="Compact")
+                    leaderboard_table = gr.Dataframe(
+                        label="GenBench Leaderboard",
+                        value=formatted_df,
+                        interactive=False,
                     )
+                    # Update dataframe when options change
+                    show_percentage.change(
+                        fn=update_leaderboard,
+                        inputs=[show_percentage, view_mode],
+                        outputs=leaderboard_table
+                    )
+                    view_mode.change(
+                        fn=update_leaderboard,
+                        inputs=[show_percentage, view_mode],
+                        outputs=leaderboard_table
+                    )
+                except Exception as e:
+                    gr.Markdown(f"Leaderboard is empty or error loading: {str(e)}")
                 gr.Markdown("Verified submissions mean the results came from a model submission rather than a CIF submission.")