Spaces:
Running
Running
populating leaderboard
Browse files- __pycache__/about.cpython-310.pyc +0 -0
- about.py +129 -1
- app.py +103 -15
__pycache__/about.cpython-310.pyc
ADDED
|
Binary file (2.51 kB). View file
|
|
|
about.py
CHANGED
|
@@ -7,4 +7,132 @@ CACHE_PATH=os.getenv("HF_HOME", ".")
|
|
| 7 |
API = HfApi(token=TOKEN)
|
| 8 |
organization="LeMaterial"
|
| 9 |
submissions_repo = f'{organization}/lemat-gen-bench-submissions'
|
| 10 |
-
results_repo = f'{organization}/lemat-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
API = HfApi(token=TOKEN)
|
| 8 |
organization="LeMaterial"
|
| 9 |
submissions_repo = f'{organization}/lemat-gen-bench-submissions'
|
| 10 |
+
results_repo = f'{organization}/lemat-genbench-results'
|
| 11 |
+
|
| 12 |
+
# Column display names mapping
|
| 13 |
+
COLUMN_DISPLAY_NAMES = {
|
| 14 |
+
'run_name': 'Model',
|
| 15 |
+
'n_structures': 'Total Structures',
|
| 16 |
+
# Validity metrics
|
| 17 |
+
'overall_valid_count': 'Valid',
|
| 18 |
+
'charge_neutral_count': 'Charge Neutral',
|
| 19 |
+
'distance_valid_count': 'Distance Valid',
|
| 20 |
+
'plausibility_valid_count': 'Plausibility Valid',
|
| 21 |
+
# Uniqueness and Novelty
|
| 22 |
+
'unique_count': 'Unique',
|
| 23 |
+
'novel_count': 'Novel',
|
| 24 |
+
# Energy-based metrics
|
| 25 |
+
'mean_formation_energy': 'Formation Energy (eV)',
|
| 26 |
+
'formation_energy_std': 'Formation Energy Std',
|
| 27 |
+
'stability_mean_above_hull': 'E Above Hull (eV)',
|
| 28 |
+
'stability_std_e_above_hull': 'E Above Hull Std',
|
| 29 |
+
'mean_relaxation_RMSD': 'Relaxation RMSD (Γ
)',
|
| 30 |
+
'relaxation_RMSE_std': 'Relaxation RMSD Std',
|
| 31 |
+
# Stability metrics
|
| 32 |
+
'stable_count': 'Stable',
|
| 33 |
+
'unique_in_stable_count': 'Unique in Stable',
|
| 34 |
+
'sun_count': 'SUN',
|
| 35 |
+
# Metastability metrics
|
| 36 |
+
'metastable_count': 'Metastable',
|
| 37 |
+
'unique_in_metastable_count': 'Unique in Metastable',
|
| 38 |
+
'msun_count': 'MSUN',
|
| 39 |
+
# Distribution metrics
|
| 40 |
+
'JSDistance': 'JS Distance',
|
| 41 |
+
'MMD': 'MMD',
|
| 42 |
+
'FrechetDistance': 'FID',
|
| 43 |
+
# Diversity metrics
|
| 44 |
+
'element_diversity': 'Element Diversity',
|
| 45 |
+
'space_group_diversity': 'Space Group Diversity',
|
| 46 |
+
'site_diversity': 'Atomic Site Diversity',
|
| 47 |
+
'physical_size_diversity': 'Crystal Size Diversity',
|
| 48 |
+
# HHI metrics
|
| 49 |
+
'hhi_production_mean': 'HHI Production',
|
| 50 |
+
'hhi_reserve_mean': 'HHI Reserve',
|
| 51 |
+
'hhi_combined_mean': 'HHI Combined',
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
# Metrics that can be shown as percentages (count-based metrics)
|
| 55 |
+
COUNT_BASED_METRICS = [
|
| 56 |
+
'overall_valid_count',
|
| 57 |
+
'charge_neutral_count',
|
| 58 |
+
'distance_valid_count',
|
| 59 |
+
'plausibility_valid_count',
|
| 60 |
+
'unique_count',
|
| 61 |
+
'novel_count',
|
| 62 |
+
'stable_count',
|
| 63 |
+
'unique_in_stable_count',
|
| 64 |
+
'sun_count',
|
| 65 |
+
'metastable_count',
|
| 66 |
+
'unique_in_metastable_count',
|
| 67 |
+
'msun_count',
|
| 68 |
+
]
|
| 69 |
+
|
| 70 |
+
# Metric groups for organized display
|
| 71 |
+
METRIC_GROUPS = {
|
| 72 |
+
'Validity β': [
|
| 73 |
+
'overall_valid_count',
|
| 74 |
+
'charge_neutral_count',
|
| 75 |
+
'distance_valid_count',
|
| 76 |
+
'plausibility_valid_count',
|
| 77 |
+
],
|
| 78 |
+
'Uniqueness & Novelty β': [
|
| 79 |
+
'unique_count',
|
| 80 |
+
'novel_count',
|
| 81 |
+
],
|
| 82 |
+
'Energy Metrics β': [
|
| 83 |
+
'stability_mean_above_hull',
|
| 84 |
+
'stability_std_e_above_hull',
|
| 85 |
+
'mean_formation_energy',
|
| 86 |
+
'formation_energy_std',
|
| 87 |
+
'mean_relaxation_RMSD',
|
| 88 |
+
'relaxation_RMSE_std',
|
| 89 |
+
],
|
| 90 |
+
'Stability β': [
|
| 91 |
+
'stable_count',
|
| 92 |
+
'unique_in_stable_count',
|
| 93 |
+
'sun_count',
|
| 94 |
+
],
|
| 95 |
+
'Metastability β': [
|
| 96 |
+
'metastable_count',
|
| 97 |
+
'unique_in_metastable_count',
|
| 98 |
+
'msun_count',
|
| 99 |
+
],
|
| 100 |
+
'Distribution β': [
|
| 101 |
+
'JSDistance',
|
| 102 |
+
'MMD',
|
| 103 |
+
'FrechetDistance',
|
| 104 |
+
],
|
| 105 |
+
'Diversity β': [
|
| 106 |
+
'element_diversity',
|
| 107 |
+
'space_group_diversity',
|
| 108 |
+
'site_diversity',
|
| 109 |
+
'physical_size_diversity',
|
| 110 |
+
],
|
| 111 |
+
'HHI β': [
|
| 112 |
+
'hhi_production_mean',
|
| 113 |
+
'hhi_reserve_mean',
|
| 114 |
+
],
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
+
# Compact view columns (most important metrics visible without scrolling)
|
| 118 |
+
COMPACT_VIEW_COLUMNS = [
|
| 119 |
+
'run_name',
|
| 120 |
+
'overall_valid_count',
|
| 121 |
+
'unique_count',
|
| 122 |
+
'novel_count',
|
| 123 |
+
'stable_count',
|
| 124 |
+
'metastable_count',
|
| 125 |
+
'sun_count',
|
| 126 |
+
'msun_count',
|
| 127 |
+
'stability_mean_above_hull',
|
| 128 |
+
'mean_formation_energy',
|
| 129 |
+
'mean_relaxation_RMSD',
|
| 130 |
+
]
|
| 131 |
+
|
| 132 |
+
# Full view columns (all metrics organized by groups)
|
| 133 |
+
FULL_VIEW_COLUMNS = ['run_name', 'n_structures']
|
| 134 |
+
for group_name, cols in METRIC_GROUPS.items():
|
| 135 |
+
FULL_VIEW_COLUMNS.extend(cols)
|
| 136 |
+
|
| 137 |
+
# Default columns for backward compatibility
|
| 138 |
+
DEFAULT_COLUMNS = COMPACT_VIEW_COLUMNS
|
app.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
from pathlib import Path
|
| 2 |
import json
|
| 3 |
import pandas as pd
|
|
|
|
| 4 |
|
| 5 |
import gradio as gr
|
| 6 |
from datasets import load_dataset
|
|
@@ -8,16 +9,58 @@ from gradio_leaderboard import Leaderboard
|
|
| 8 |
from datetime import datetime
|
| 9 |
import os
|
| 10 |
|
| 11 |
-
from about import
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
def get_leaderboard():
|
| 14 |
ds = load_dataset(results_repo, split='train', download_mode="force_redownload")
|
| 15 |
full_df = pd.DataFrame(ds)
|
|
|
|
| 16 |
if len(full_df) == 0:
|
| 17 |
return pd.DataFrame({'date':[], 'model':[], 'score':[], 'verified':[]})
|
| 18 |
|
| 19 |
return full_df
|
| 20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
def show_output_box(message):
|
| 22 |
return gr.update(value=message, visible=True)
|
| 23 |
|
|
@@ -28,21 +71,66 @@ def gradio_interface() -> gr.Blocks:
|
|
| 28 |
with gr.Blocks() as demo:
|
| 29 |
gr.Markdown("## Welcome to the LeMaterial Generative Benchmark Leaderboard!")
|
| 30 |
with gr.Tabs(elem_classes="tab-buttons"):
|
| 31 |
-
with gr.TabItem("π Leaderboard", elem_id="boundary-benchmark-tab-table"):
|
| 32 |
-
gr.Markdown("# LeMat-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
)
|
| 44 |
-
|
| 45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
gr.Markdown("Verified submissions mean the results came from a model submission rather than a CIF submission.")
|
| 48 |
|
|
|
|
| 1 |
from pathlib import Path
|
| 2 |
import json
|
| 3 |
import pandas as pd
|
| 4 |
+
import numpy as np
|
| 5 |
|
| 6 |
import gradio as gr
|
| 7 |
from datasets import load_dataset
|
|
|
|
| 9 |
from datetime import datetime
|
| 10 |
import os
|
| 11 |
|
| 12 |
+
from about import (
|
| 13 |
+
PROBLEM_TYPES, TOKEN, CACHE_PATH, API, submissions_repo, results_repo,
|
| 14 |
+
COLUMN_DISPLAY_NAMES, COUNT_BASED_METRICS,
|
| 15 |
+
COMPACT_VIEW_COLUMNS, FULL_VIEW_COLUMNS
|
| 16 |
+
)
|
| 17 |
|
| 18 |
def get_leaderboard():
|
| 19 |
ds = load_dataset(results_repo, split='train', download_mode="force_redownload")
|
| 20 |
full_df = pd.DataFrame(ds)
|
| 21 |
+
print(full_df.columns)
|
| 22 |
if len(full_df) == 0:
|
| 23 |
return pd.DataFrame({'date':[], 'model':[], 'score':[], 'verified':[]})
|
| 24 |
|
| 25 |
return full_df
|
| 26 |
|
| 27 |
+
def format_dataframe(df, show_percentage=False, view_mode="Compact"):
|
| 28 |
+
"""Format the dataframe with proper column names and optional percentages."""
|
| 29 |
+
if len(df) == 0:
|
| 30 |
+
return df
|
| 31 |
+
|
| 32 |
+
# Select columns based on view mode
|
| 33 |
+
if view_mode == "Compact":
|
| 34 |
+
selected_cols = [col for col in COMPACT_VIEW_COLUMNS if col in df.columns]
|
| 35 |
+
else: # Full view
|
| 36 |
+
selected_cols = [col for col in FULL_VIEW_COLUMNS if col in df.columns]
|
| 37 |
+
|
| 38 |
+
# Create a copy with selected columns
|
| 39 |
+
display_df = df[selected_cols].copy()
|
| 40 |
+
|
| 41 |
+
# Convert count-based metrics to percentages if requested
|
| 42 |
+
if show_percentage and 'n_structures' in df.columns:
|
| 43 |
+
n_structures = df['n_structures']
|
| 44 |
+
for col in COUNT_BASED_METRICS:
|
| 45 |
+
if col in display_df.columns:
|
| 46 |
+
# Calculate percentage and format as string with %
|
| 47 |
+
display_df[col] = (df[col] / n_structures * 100).round(1).astype(str) + '%'
|
| 48 |
+
|
| 49 |
+
# Round numeric columns for cleaner display
|
| 50 |
+
for col in display_df.columns:
|
| 51 |
+
if display_df[col].dtype in ['float64', 'float32']:
|
| 52 |
+
display_df[col] = display_df[col].round(4)
|
| 53 |
+
|
| 54 |
+
# Rename columns for display
|
| 55 |
+
display_df = display_df.rename(columns=COLUMN_DISPLAY_NAMES)
|
| 56 |
+
|
| 57 |
+
return display_df
|
| 58 |
+
|
| 59 |
+
def update_leaderboard(show_percentage, view_mode):
|
| 60 |
+
"""Update the leaderboard based on user selections."""
|
| 61 |
+
df = get_leaderboard()
|
| 62 |
+
return format_dataframe(df, show_percentage, view_mode)
|
| 63 |
+
|
| 64 |
def show_output_box(message):
|
| 65 |
return gr.update(value=message, visible=True)
|
| 66 |
|
|
|
|
| 71 |
with gr.Blocks() as demo:
|
| 72 |
gr.Markdown("## Welcome to the LeMaterial Generative Benchmark Leaderboard!")
|
| 73 |
with gr.Tabs(elem_classes="tab-buttons"):
|
| 74 |
+
with gr.TabItem("π Leaderboard", elem_id="boundary-benchmark-tab-table"):
|
| 75 |
+
gr.Markdown("# LeMat-GenBench")
|
| 76 |
+
|
| 77 |
+
# Display options
|
| 78 |
+
with gr.Row():
|
| 79 |
+
with gr.Column(scale=1):
|
| 80 |
+
view_mode = gr.Radio(
|
| 81 |
+
choices=["Compact", "Full"],
|
| 82 |
+
value="Compact",
|
| 83 |
+
label="View Mode",
|
| 84 |
+
info="Compact: Key metrics | Full: All metrics grouped by category"
|
| 85 |
+
)
|
| 86 |
+
with gr.Column(scale=1):
|
| 87 |
+
show_percentage = gr.Checkbox(
|
| 88 |
+
value=False,
|
| 89 |
+
label="Show as Percentages",
|
| 90 |
+
info="Display count-based metrics as percentages of total structures"
|
| 91 |
+
)
|
| 92 |
+
|
| 93 |
+
# Metric legend
|
| 94 |
+
with gr.Accordion("Metric Groups Legend", open=False):
|
| 95 |
+
legend_md = """
|
| 96 |
+
| Group | Metrics | Direction |
|
| 97 |
+
|-------|---------|-----------|
|
| 98 |
+
| **Validity** | Valid, Charge Neutral, Distance Valid, Plausibility Valid | β Higher is better |
|
| 99 |
+
| **Uniqueness & Novelty** | Unique, Novel | β Higher is better |
|
| 100 |
+
| **Energy Metrics** | E Above Hull, Formation Energy, Relaxation RMSD (with std) | β Lower is better |
|
| 101 |
+
| **Stability** | Stable, Unique in Stable, SUN | β Higher is better |
|
| 102 |
+
| **Metastability** | Metastable, Unique in Metastable, MSUN | β Higher is better |
|
| 103 |
+
| **Distribution** | JS Distance, MMD, FID | β Lower is better |
|
| 104 |
+
| **Diversity** | Element, Space Group, Atomic Site, Crystal Size | β Higher is better |
|
| 105 |
+
| **HHI** | HHI Production, HHI Reserve | β Lower is better |
|
| 106 |
+
"""
|
| 107 |
+
gr.Markdown(legend_md)
|
| 108 |
+
|
| 109 |
+
try:
|
| 110 |
+
# Initial dataframe
|
| 111 |
+
initial_df = get_leaderboard()
|
| 112 |
+
formatted_df = format_dataframe(initial_df, show_percentage=False, view_mode="Compact")
|
| 113 |
+
|
| 114 |
+
leaderboard_table = gr.Dataframe(
|
| 115 |
+
label="GenBench Leaderboard",
|
| 116 |
+
value=formatted_df,
|
| 117 |
+
interactive=False,
|
| 118 |
)
|
| 119 |
+
|
| 120 |
+
# Update dataframe when options change
|
| 121 |
+
show_percentage.change(
|
| 122 |
+
fn=update_leaderboard,
|
| 123 |
+
inputs=[show_percentage, view_mode],
|
| 124 |
+
outputs=leaderboard_table
|
| 125 |
+
)
|
| 126 |
+
view_mode.change(
|
| 127 |
+
fn=update_leaderboard,
|
| 128 |
+
inputs=[show_percentage, view_mode],
|
| 129 |
+
outputs=leaderboard_table
|
| 130 |
+
)
|
| 131 |
+
|
| 132 |
+
except Exception as e:
|
| 133 |
+
gr.Markdown(f"Leaderboard is empty or error loading: {str(e)}")
|
| 134 |
|
| 135 |
gr.Markdown("Verified submissions mean the results came from a model submission rather than a CIF submission.")
|
| 136 |
|