cgeorgiaw HF Staff commited on
Commit
f83fa62
Β·
1 Parent(s): fbafbd7

populating leaderboard

Browse files
Files changed (3) hide show
  1. __pycache__/about.cpython-310.pyc +0 -0
  2. about.py +129 -1
  3. app.py +103 -15
__pycache__/about.cpython-310.pyc ADDED
Binary file (2.51 kB). View file
 
about.py CHANGED
@@ -7,4 +7,132 @@ CACHE_PATH=os.getenv("HF_HOME", ".")
7
  API = HfApi(token=TOKEN)
8
  organization="LeMaterial"
9
  submissions_repo = f'{organization}/lemat-gen-bench-submissions'
10
- results_repo = f'{organization}/lemat-gen-bench-results'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  API = HfApi(token=TOKEN)
8
  organization="LeMaterial"
9
  submissions_repo = f'{organization}/lemat-gen-bench-submissions'
10
+ results_repo = f'{organization}/lemat-genbench-results'
11
+
12
+ # Column display names mapping
13
+ COLUMN_DISPLAY_NAMES = {
14
+ 'run_name': 'Model',
15
+ 'n_structures': 'Total Structures',
16
+ # Validity metrics
17
+ 'overall_valid_count': 'Valid',
18
+ 'charge_neutral_count': 'Charge Neutral',
19
+ 'distance_valid_count': 'Distance Valid',
20
+ 'plausibility_valid_count': 'Plausibility Valid',
21
+ # Uniqueness and Novelty
22
+ 'unique_count': 'Unique',
23
+ 'novel_count': 'Novel',
24
+ # Energy-based metrics
25
+ 'mean_formation_energy': 'Formation Energy (eV)',
26
+ 'formation_energy_std': 'Formation Energy Std',
27
+ 'stability_mean_above_hull': 'E Above Hull (eV)',
28
+ 'stability_std_e_above_hull': 'E Above Hull Std',
29
+ 'mean_relaxation_RMSD': 'Relaxation RMSD (Γ…)',
30
+ 'relaxation_RMSE_std': 'Relaxation RMSD Std',
31
+ # Stability metrics
32
+ 'stable_count': 'Stable',
33
+ 'unique_in_stable_count': 'Unique in Stable',
34
+ 'sun_count': 'SUN',
35
+ # Metastability metrics
36
+ 'metastable_count': 'Metastable',
37
+ 'unique_in_metastable_count': 'Unique in Metastable',
38
+ 'msun_count': 'MSUN',
39
+ # Distribution metrics
40
+ 'JSDistance': 'JS Distance',
41
+ 'MMD': 'MMD',
42
+ 'FrechetDistance': 'FID',
43
+ # Diversity metrics
44
+ 'element_diversity': 'Element Diversity',
45
+ 'space_group_diversity': 'Space Group Diversity',
46
+ 'site_diversity': 'Atomic Site Diversity',
47
+ 'physical_size_diversity': 'Crystal Size Diversity',
48
+ # HHI metrics
49
+ 'hhi_production_mean': 'HHI Production',
50
+ 'hhi_reserve_mean': 'HHI Reserve',
51
+ 'hhi_combined_mean': 'HHI Combined',
52
+ }
53
+
54
+ # Metrics that can be shown as percentages (count-based metrics)
55
+ COUNT_BASED_METRICS = [
56
+ 'overall_valid_count',
57
+ 'charge_neutral_count',
58
+ 'distance_valid_count',
59
+ 'plausibility_valid_count',
60
+ 'unique_count',
61
+ 'novel_count',
62
+ 'stable_count',
63
+ 'unique_in_stable_count',
64
+ 'sun_count',
65
+ 'metastable_count',
66
+ 'unique_in_metastable_count',
67
+ 'msun_count',
68
+ ]
69
+
70
+ # Metric groups for organized display
71
+ METRIC_GROUPS = {
72
+ 'Validity ↑': [
73
+ 'overall_valid_count',
74
+ 'charge_neutral_count',
75
+ 'distance_valid_count',
76
+ 'plausibility_valid_count',
77
+ ],
78
+ 'Uniqueness & Novelty ↑': [
79
+ 'unique_count',
80
+ 'novel_count',
81
+ ],
82
+ 'Energy Metrics ↓': [
83
+ 'stability_mean_above_hull',
84
+ 'stability_std_e_above_hull',
85
+ 'mean_formation_energy',
86
+ 'formation_energy_std',
87
+ 'mean_relaxation_RMSD',
88
+ 'relaxation_RMSE_std',
89
+ ],
90
+ 'Stability ↑': [
91
+ 'stable_count',
92
+ 'unique_in_stable_count',
93
+ 'sun_count',
94
+ ],
95
+ 'Metastability ↑': [
96
+ 'metastable_count',
97
+ 'unique_in_metastable_count',
98
+ 'msun_count',
99
+ ],
100
+ 'Distribution ↓': [
101
+ 'JSDistance',
102
+ 'MMD',
103
+ 'FrechetDistance',
104
+ ],
105
+ 'Diversity ↑': [
106
+ 'element_diversity',
107
+ 'space_group_diversity',
108
+ 'site_diversity',
109
+ 'physical_size_diversity',
110
+ ],
111
+ 'HHI ↓': [
112
+ 'hhi_production_mean',
113
+ 'hhi_reserve_mean',
114
+ ],
115
+ }
116
+
117
+ # Compact view columns (most important metrics visible without scrolling)
118
+ COMPACT_VIEW_COLUMNS = [
119
+ 'run_name',
120
+ 'overall_valid_count',
121
+ 'unique_count',
122
+ 'novel_count',
123
+ 'stable_count',
124
+ 'metastable_count',
125
+ 'sun_count',
126
+ 'msun_count',
127
+ 'stability_mean_above_hull',
128
+ 'mean_formation_energy',
129
+ 'mean_relaxation_RMSD',
130
+ ]
131
+
132
+ # Full view columns (all metrics organized by groups)
133
+ FULL_VIEW_COLUMNS = ['run_name', 'n_structures']
134
+ for group_name, cols in METRIC_GROUPS.items():
135
+ FULL_VIEW_COLUMNS.extend(cols)
136
+
137
+ # Default columns for backward compatibility
138
+ DEFAULT_COLUMNS = COMPACT_VIEW_COLUMNS
app.py CHANGED
@@ -1,6 +1,7 @@
1
  from pathlib import Path
2
  import json
3
  import pandas as pd
 
4
 
5
  import gradio as gr
6
  from datasets import load_dataset
@@ -8,16 +9,58 @@ from gradio_leaderboard import Leaderboard
8
  from datetime import datetime
9
  import os
10
 
11
- from about import PROBLEM_TYPES, TOKEN, CACHE_PATH, API, submissions_repo, results_repo
 
 
 
 
12
 
13
  def get_leaderboard():
14
  ds = load_dataset(results_repo, split='train', download_mode="force_redownload")
15
  full_df = pd.DataFrame(ds)
 
16
  if len(full_df) == 0:
17
  return pd.DataFrame({'date':[], 'model':[], 'score':[], 'verified':[]})
18
 
19
  return full_df
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  def show_output_box(message):
22
  return gr.update(value=message, visible=True)
23
 
@@ -28,21 +71,66 @@ def gradio_interface() -> gr.Blocks:
28
  with gr.Blocks() as demo:
29
  gr.Markdown("## Welcome to the LeMaterial Generative Benchmark Leaderboard!")
30
  with gr.Tabs(elem_classes="tab-buttons"):
31
- with gr.TabItem("πŸš€ Leaderboard", elem_id="boundary-benchmark-tab-table"):
32
- gr.Markdown("# LeMat-Gen-Bench")
33
-
34
- try:
35
- Leaderboard(
36
- value=get_leaderboard(),
37
- datatype=['date', 'model', 'score', 'verified'],
38
- select_columns=["model"],
39
- search_columns=["model"],
40
- filter_columns=["verified"],
41
- every=60,
42
- render=True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  )
44
- except:
45
- gr.Markdown("Leaderboard is empty.")
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
  gr.Markdown("Verified submissions mean the results came from a model submission rather than a CIF submission.")
48
 
 
1
  from pathlib import Path
2
  import json
3
  import pandas as pd
4
+ import numpy as np
5
 
6
  import gradio as gr
7
  from datasets import load_dataset
 
9
  from datetime import datetime
10
  import os
11
 
12
+ from about import (
13
+ PROBLEM_TYPES, TOKEN, CACHE_PATH, API, submissions_repo, results_repo,
14
+ COLUMN_DISPLAY_NAMES, COUNT_BASED_METRICS,
15
+ COMPACT_VIEW_COLUMNS, FULL_VIEW_COLUMNS
16
+ )
17
 
18
  def get_leaderboard():
19
  ds = load_dataset(results_repo, split='train', download_mode="force_redownload")
20
  full_df = pd.DataFrame(ds)
21
+ print(full_df.columns)
22
  if len(full_df) == 0:
23
  return pd.DataFrame({'date':[], 'model':[], 'score':[], 'verified':[]})
24
 
25
  return full_df
26
 
27
+ def format_dataframe(df, show_percentage=False, view_mode="Compact"):
28
+ """Format the dataframe with proper column names and optional percentages."""
29
+ if len(df) == 0:
30
+ return df
31
+
32
+ # Select columns based on view mode
33
+ if view_mode == "Compact":
34
+ selected_cols = [col for col in COMPACT_VIEW_COLUMNS if col in df.columns]
35
+ else: # Full view
36
+ selected_cols = [col for col in FULL_VIEW_COLUMNS if col in df.columns]
37
+
38
+ # Create a copy with selected columns
39
+ display_df = df[selected_cols].copy()
40
+
41
+ # Convert count-based metrics to percentages if requested
42
+ if show_percentage and 'n_structures' in df.columns:
43
+ n_structures = df['n_structures']
44
+ for col in COUNT_BASED_METRICS:
45
+ if col in display_df.columns:
46
+ # Calculate percentage and format as string with %
47
+ display_df[col] = (df[col] / n_structures * 100).round(1).astype(str) + '%'
48
+
49
+ # Round numeric columns for cleaner display
50
+ for col in display_df.columns:
51
+ if display_df[col].dtype in ['float64', 'float32']:
52
+ display_df[col] = display_df[col].round(4)
53
+
54
+ # Rename columns for display
55
+ display_df = display_df.rename(columns=COLUMN_DISPLAY_NAMES)
56
+
57
+ return display_df
58
+
59
+ def update_leaderboard(show_percentage, view_mode):
60
+ """Update the leaderboard based on user selections."""
61
+ df = get_leaderboard()
62
+ return format_dataframe(df, show_percentage, view_mode)
63
+
64
  def show_output_box(message):
65
  return gr.update(value=message, visible=True)
66
 
 
71
  with gr.Blocks() as demo:
72
  gr.Markdown("## Welcome to the LeMaterial Generative Benchmark Leaderboard!")
73
  with gr.Tabs(elem_classes="tab-buttons"):
74
+ with gr.TabItem("πŸš€ Leaderboard", elem_id="boundary-benchmark-tab-table"):
75
+ gr.Markdown("# LeMat-GenBench")
76
+
77
+ # Display options
78
+ with gr.Row():
79
+ with gr.Column(scale=1):
80
+ view_mode = gr.Radio(
81
+ choices=["Compact", "Full"],
82
+ value="Compact",
83
+ label="View Mode",
84
+ info="Compact: Key metrics | Full: All metrics grouped by category"
85
+ )
86
+ with gr.Column(scale=1):
87
+ show_percentage = gr.Checkbox(
88
+ value=False,
89
+ label="Show as Percentages",
90
+ info="Display count-based metrics as percentages of total structures"
91
+ )
92
+
93
+ # Metric legend
94
+ with gr.Accordion("Metric Groups Legend", open=False):
95
+ legend_md = """
96
+ | Group | Metrics | Direction |
97
+ |-------|---------|-----------|
98
+ | **Validity** | Valid, Charge Neutral, Distance Valid, Plausibility Valid | ↑ Higher is better |
99
+ | **Uniqueness & Novelty** | Unique, Novel | ↑ Higher is better |
100
+ | **Energy Metrics** | E Above Hull, Formation Energy, Relaxation RMSD (with std) | ↓ Lower is better |
101
+ | **Stability** | Stable, Unique in Stable, SUN | ↑ Higher is better |
102
+ | **Metastability** | Metastable, Unique in Metastable, MSUN | ↑ Higher is better |
103
+ | **Distribution** | JS Distance, MMD, FID | ↓ Lower is better |
104
+ | **Diversity** | Element, Space Group, Atomic Site, Crystal Size | ↑ Higher is better |
105
+ | **HHI** | HHI Production, HHI Reserve | ↓ Lower is better |
106
+ """
107
+ gr.Markdown(legend_md)
108
+
109
+ try:
110
+ # Initial dataframe
111
+ initial_df = get_leaderboard()
112
+ formatted_df = format_dataframe(initial_df, show_percentage=False, view_mode="Compact")
113
+
114
+ leaderboard_table = gr.Dataframe(
115
+ label="GenBench Leaderboard",
116
+ value=formatted_df,
117
+ interactive=False,
118
  )
119
+
120
+ # Update dataframe when options change
121
+ show_percentage.change(
122
+ fn=update_leaderboard,
123
+ inputs=[show_percentage, view_mode],
124
+ outputs=leaderboard_table
125
+ )
126
+ view_mode.change(
127
+ fn=update_leaderboard,
128
+ inputs=[show_percentage, view_mode],
129
+ outputs=leaderboard_table
130
+ )
131
+
132
+ except Exception as e:
133
+ gr.Markdown(f"Leaderboard is empty or error loading: {str(e)}")
134
 
135
  gr.Markdown("Verified submissions mean the results came from a model submission rather than a CIF submission.")
136