|
|
|
|
|
|
|
|
<!doctype html> |
|
|
<html lang="en"> |
|
|
<head> |
|
|
<meta charset="utf-8" /> |
|
|
<meta name="viewport" content="width=device-width, initial-scale=1" /> |
|
|
<title>LLM Benchmarks — Overall Rank</title> |
|
|
|
|
|
<style> |
|
|
|
|
|
:root { |
|
|
--bg: #0b1220; |
|
|
--panel: #0e1626; |
|
|
--panel-2: #0b1220; |
|
|
--border: #1f2937; |
|
|
--text: #e5e7eb; |
|
|
--muted: #a3aab8; |
|
|
--head-bg: #111827; |
|
|
--head-fg: #ffffff; |
|
|
--accent: #60a5fa; |
|
|
--shadow: 0 8px 24px rgba(0,0,0,.35); |
|
|
} |
|
|
html, body { height: 100%; } |
|
|
body { |
|
|
margin: 0; |
|
|
background: var(--bg); |
|
|
color: var(--text); |
|
|
font: 14px/1.45 Inter, Roboto, "Helvetica Neue", Arial, system-ui, -apple-system, Segoe UI, Noto Sans, sans-serif; |
|
|
-webkit-font-smoothing: antialiased; |
|
|
font-feature-settings: "tnum" 1, "lnum" 1; |
|
|
} |
|
|
|
|
|
|
|
|
.wrap { |
|
|
max-width: 1200px; |
|
|
margin: 32px auto 48px; |
|
|
padding: 0 16px; |
|
|
} |
|
|
.title { |
|
|
margin: 0 0 12px; |
|
|
font-size: 22px; |
|
|
font-weight: 800; |
|
|
letter-spacing: .2px; |
|
|
} |
|
|
.subtitle { |
|
|
margin: 0 0 20px; |
|
|
color: var(--muted); |
|
|
font-size: 13px; |
|
|
} |
|
|
|
|
|
|
|
|
.table-card { |
|
|
background: var(--panel); |
|
|
border: 1px solid var(--border); |
|
|
border-radius: 14px; |
|
|
box-shadow: var(--shadow); |
|
|
overflow: auto; |
|
|
} |
|
|
|
|
|
|
|
|
table { |
|
|
width: 100%; |
|
|
border-collapse: separate; |
|
|
border-spacing: 0; |
|
|
min-width: 720px; |
|
|
} |
|
|
thead th { |
|
|
position: sticky; |
|
|
top: 0; |
|
|
z-index: 2; |
|
|
background: var(--head-bg); |
|
|
color: var(--head-fg); |
|
|
text-align: left; |
|
|
font-weight: 700; |
|
|
padding: 12px 14px; |
|
|
border-bottom: 1px solid var(--border); |
|
|
} |
|
|
tbody td { |
|
|
padding: 10px 14px; |
|
|
border-bottom: 1px solid var(--border); |
|
|
vertical-align: middle; |
|
|
} |
|
|
tbody tr:nth-child(odd) { background: var(--panel-2); } |
|
|
tbody tr:nth-child(even) { background: var(--panel); } |
|
|
tbody tr:hover { outline: 2px solid rgba(96,165,250,.35); outline-offset: -2px; } |
|
|
|
|
|
|
|
|
|
|
|
tbody td:first-child, thead th:first-child { text-align: left; } |
|
|
tbody td:not(:first-child), thead th:not(:first-child) { text-align: right; } |
|
|
|
|
|
|
|
|
thead th:first-child { border-top-left-radius: 14px; } |
|
|
thead th:last-child { border-top-right-radius: 14px; } |
|
|
tbody tr:last-child td:first-child { border-bottom-left-radius: 14px; } |
|
|
tbody tr:last-child td:last-child { border-bottom-right-radius: 14px; } |
|
|
|
|
|
|
|
|
.top1 td, .top2 td, .top3 td { font-weight: 800; } |
|
|
.medal { margin-left: 6px; font-size: 13px; opacity: .95; } |
|
|
|
|
|
|
|
|
caption { |
|
|
caption-side: top; |
|
|
padding: 14px 14px 6px; |
|
|
text-align: left; |
|
|
color: var(--muted); |
|
|
font-weight: 600; |
|
|
letter-spacing: .2px; |
|
|
} |
|
|
|
|
|
|
|
|
a { color: var(--accent); text-decoration: none; } |
|
|
a:hover { text-decoration: underline; } |
|
|
</style> |
|
|
</head> |
|
|
<body> |
|
|
<div class="wrap"> |
|
|
<h1 class="title">Overall Rank (Average Rank)</h1> |
|
|
|
|
|
<div class="table-card"> |
|
|
|
|
|
<div id="table-slot"> |
|
|
|
|
|
|
|
|
<table border="1" class="dataframe"> |
|
|
<thead> |
|
|
<tr style="text-align: right;"> |
|
|
<th>Model Name</th> |
|
|
<th>Total Time</th> |
|
|
<th>GPU Util Time</th> |
|
|
<th>Mean Score</th> |
|
|
<th>Overall Rank</th> |
|
|
</tr> |
|
|
</thead> |
|
|
<tbody> |
|
|
<tr> |
|
|
<td>google_gemma-3-12b-it</td> |
|
|
<td>15h 45m</td> |
|
|
<td>14h 8m</td> |
|
|
<td>0.6038</td> |
|
|
<td>1</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td>Qwen_Qwen3-14B (8bit)</td> |
|
|
<td>29h 45m</td> |
|
|
<td>17h 29m</td> |
|
|
<td>0.5961</td> |
|
|
<td>2</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td>openchat_openchat-3.6-8b-20240522</td> |
|
|
<td>7h 51m</td> |
|
|
<td>6h 59m</td> |
|
|
<td>0.5871</td> |
|
|
<td>3</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td>Qwen_Qwen3-8B</td> |
|
|
<td>15h 31m</td> |
|
|
<td>13h 44m</td> |
|
|
<td>0.5859</td> |
|
|
<td>4</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td>Qwen_Qwen2.5-7B-Instruct</td> |
|
|
<td>9h 36m</td> |
|
|
<td>8h 33m</td> |
|
|
<td>0.5788</td> |
|
|
<td>5</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td>Qwen_Qwen2.5-14B-Instruct (8bit)</td> |
|
|
<td>52h 44m</td> |
|
|
<td>29h 32m</td> |
|
|
<td>0.5775</td> |
|
|
<td>6</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td>01-ai_Yi-1.5-9B</td> |
|
|
<td>11h 43m</td> |
|
|
<td>10h 26m</td> |
|
|
<td>0.5676</td> |
|
|
<td>7</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td>Qwen_Qwen2.5-7B-Instruct-1M</td> |
|
|
<td>11h 17m</td> |
|
|
<td>10h 10m</td> |
|
|
<td>0.5672</td> |
|
|
<td>8</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td>meta-llama_Llama-3.1-8B-Instruct</td> |
|
|
<td>12h 19m</td> |
|
|
<td>10h 52m</td> |
|
|
<td>0.5653</td> |
|
|
<td>9</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td>01-ai_Yi-1.5-9B-Chat</td> |
|
|
<td>13h 54m</td> |
|
|
<td>12h 15m</td> |
|
|
<td>0.5621</td> |
|
|
<td>10</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td>mistralai_Ministral-8B-Instruct-2410</td> |
|
|
<td>10h 46m</td> |
|
|
<td>9h 27m</td> |
|
|
<td>0.5576</td> |
|
|
<td>11</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td>meta-llama_Meta-Llama-3-8B-Instruct</td> |
|
|
<td>6h 30m</td> |
|
|
<td>5h 46m</td> |
|
|
<td>0.5528</td> |
|
|
<td>12</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td>Qwen_Qwen3-4B</td> |
|
|
<td>5h 51m</td> |
|
|
<td>5h 3m</td> |
|
|
<td>0.5510</td> |
|
|
<td>13</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td>NousResearch_Hermes-2-Pro-Mistral-7B</td> |
|
|
<td>8h 27m</td> |
|
|
<td>7h 28m</td> |
|
|
<td>0.5480</td> |
|
|
<td>14</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td>mistralai_Mistral-7B-Instruct-v0.3</td> |
|
|
<td>8h 38m</td> |
|
|
<td>7h 41m</td> |
|
|
<td>0.5451</td> |
|
|
<td>15</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td>google_gemma-3-4b-it</td> |
|
|
<td>4h 51m</td> |
|
|
<td>3h 50m</td> |
|
|
<td>0.5368</td> |
|
|
<td>16</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td>01-ai_Yi-1.5-6B-Chat</td> |
|
|
<td>8h 4m</td> |
|
|
<td>7h 1m</td> |
|
|
<td>0.5335</td> |
|
|
<td>17</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td>01-ai_Yi-1.5-6B</td> |
|
|
<td>4h 28m</td> |
|
|
<td>3h 54m</td> |
|
|
<td>0.5312</td> |
|
|
<td>18</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td>Qwen_Qwen2-7B-Instruct</td> |
|
|
<td>11h 30m</td> |
|
|
<td>10h 11m</td> |
|
|
<td>0.5271</td> |
|
|
<td>19</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td>deepseek-ai_DeepSeek-R1-0528-Qwen3-8B</td> |
|
|
<td>17h 57m</td> |
|
|
<td>15h 30m</td> |
|
|
<td>0.5219</td> |
|
|
<td>20</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td>meta-llama_Llama-3.2-3B-Instruct</td> |
|
|
<td>7h 12m</td> |
|
|
<td>5h 57m</td> |
|
|
<td>0.5048</td> |
|
|
<td>21</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td>Qwen_Qwen2.5-3B-Instruct</td> |
|
|
<td>7h 48m</td> |
|
|
<td>6h 30m</td> |
|
|
<td>0.4939</td> |
|
|
<td>22</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td>Qwen_Qwen2.5-Math-7B</td> |
|
|
<td>27h 21m</td> |
|
|
<td>24h 38m</td> |
|
|
<td>0.4907</td> |
|
|
<td>23</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td>deepseek-ai_deepseek-llm-7b-chat</td> |
|
|
<td>10h 6m</td> |
|
|
<td>9h 8m</td> |
|
|
<td>0.4869</td> |
|
|
<td>24</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td>deepseek-ai_DeepSeek-R1-Distill-Llama-8B</td> |
|
|
<td>11h 46m</td> |
|
|
<td>10h 36m</td> |
|
|
<td>0.4830</td> |
|
|
<td>25</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td>meta-llama_Llama-2-13b-hf</td> |
|
|
<td>19h 21m</td> |
|
|
<td>17h 38m</td> |
|
|
<td>0.4819</td> |
|
|
<td>26</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td>meta-llama_Llama-2-13b-chat-hf</td> |
|
|
<td>17h 8m</td> |
|
|
<td>15h 37m</td> |
|
|
<td>0.4813</td> |
|
|
<td>27</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td>deepseek-ai_DeepSeek-R1-Distill-Qwen-7B</td> |
|
|
<td>6h 28m</td> |
|
|
<td>5h 43m</td> |
|
|
<td>0.4644</td> |
|
|
<td>28</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td>Qwen_Qwen2.5-1.5B-Instruct</td> |
|
|
<td>3h 20m</td> |
|
|
<td>2h 36m</td> |
|
|
<td>0.4608</td> |
|
|
<td>29</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td>Qwen_Qwen3-1.7B</td> |
|
|
<td>4h 25m</td> |
|
|
<td>3h 36m</td> |
|
|
<td>0.4597</td> |
|
|
<td>30</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td>Qwen_Qwen2.5-Math-7B-Instruct</td> |
|
|
<td>5h 37m</td> |
|
|
<td>4h 57m</td> |
|
|
<td>0.4596</td> |
|
|
<td>31</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td>meta-llama_Llama-2-7b-chat-hf</td> |
|
|
<td>6h 57m</td> |
|
|
<td>6h 7m</td> |
|
|
<td>0.4525</td> |
|
|
<td>32</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td>meta-llama_Llama-2-7b-hf</td> |
|
|
<td>5h 42m</td> |
|
|
<td>4h 59m</td> |
|
|
<td>0.4516</td> |
|
|
<td>33</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td>deepseek-ai_deepseek-llm-7b-base</td> |
|
|
<td>7h 11m</td> |
|
|
<td>6h 26m</td> |
|
|
<td>0.4451</td> |
|
|
<td>34</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td>deepseek-ai_deepseek-math-7b-rl</td> |
|
|
<td>8h 2m</td> |
|
|
<td>7h 12m</td> |
|
|
<td>0.4419</td> |
|
|
<td>35</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td>meta-llama_Llama-3.2-1B-Instruct</td> |
|
|
<td>3h 30m</td> |
|
|
<td>2h 35m</td> |
|
|
<td>0.4219</td> |
|
|
<td>36</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td>google_gemma-3-1b-it</td> |
|
|
<td>6h 50m</td> |
|
|
<td>4h 52m</td> |
|
|
<td>0.4013</td> |
|
|
<td>37</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td>deepseek-ai_DeepSeek-R1-Distill-Qwen-1.5B</td> |
|
|
<td>3h 40m</td> |
|
|
<td>2h 52m</td> |
|
|
<td>0.3986</td> |
|
|
<td>38</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td>Qwen_Qwen2.5-Math-1.5B-Instruct</td> |
|
|
<td>3h 25m</td> |
|
|
<td>2h 39m</td> |
|
|
<td>0.3838</td> |
|
|
<td>39</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td>Qwen_Qwen3-0.6B</td> |
|
|
<td>3h 45m</td> |
|
|
<td>2h 53m</td> |
|
|
<td>0.3816</td> |
|
|
<td>40</td> |
|
|
</tr> |
|
|
<tr> |
|
|
<td>Qwen_Qwen2.5-0.5B-Instruct</td> |
|
|
<td>2h 34m</td> |
|
|
<td>1h 48m</td> |
|
|
<td>0.3799</td> |
|
|
<td>41</td> |
|
|
</tr> |
|
|
</tbody> |
|
|
</table> |
|
|
|
|
|
</div> |
|
|
</div> |
|
|
</div> |
|
|
|
|
|
<script> |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
(function () { |
|
|
const slot = document.getElementById('table-slot'); |
|
|
const table = slot.querySelector('table'); |
|
|
if (!table) return; |
|
|
|
|
|
|
|
|
let thead = table.querySelector('thead'); |
|
|
if (!thead) { |
|
|
const firstRow = table.querySelector('tr'); |
|
|
if (firstRow) { |
|
|
thead = document.createElement('thead'); |
|
|
const headRow = firstRow.cloneNode(true); |
|
|
thead.appendChild(headRow); |
|
|
table.insertBefore(thead, firstRow); |
|
|
firstRow.remove(); |
|
|
const tbody = table.querySelector('tbody') || table.createTBody(); |
|
|
|
|
|
Array.from(table.querySelectorAll('tr')).forEach(tr => tbody.appendChild(tr)); |
|
|
} |
|
|
} |
|
|
|
|
|
const headCells = Array.from(table.querySelectorAll('thead th, thead td')).map(th => th.textContent.trim()); |
|
|
const bodyRows = Array.from(table.querySelectorAll('tbody tr')); |
|
|
|
|
|
|
|
|
const nameIdx = headCells.findIndex(h => /model\s*name/i.test(h)); |
|
|
const rankIdx = headCells.findIndex(h => /overall\s*rank/i.test(h)); |
|
|
const totalIdx = headCells.findIndex(h => /total\s*time/i.test(h)); |
|
|
const utilIdx = headCells.findIndex(h => /gpu\s*util\s*time/i.test(h)); |
|
|
|
|
|
|
|
|
bodyRows.forEach(row => { |
|
|
const cells = row.children; |
|
|
if (nameIdx >= 0 && cells[nameIdx]) cells[nameIdx].style.textAlign = 'left'; |
|
|
[rankIdx, totalIdx, utilIdx].forEach(i => { |
|
|
if (i >= 0 && cells[i]) cells[i].style.textAlign = 'right'; |
|
|
}); |
|
|
}); |
|
|
|
|
|
|
|
|
if (rankIdx >= 0) { |
|
|
bodyRows.forEach(row => { |
|
|
const cell = row.children[rankIdx]; |
|
|
if (!cell) return; |
|
|
const n = parseInt((cell.textContent || '').replace(/[^\d]/g, ''), 10); |
|
|
if (n === 1) { row.classList.add('top1'); cell.insertAdjacentHTML('beforeend', '<span class="medal">🥇</span>'); } |
|
|
if (n === 2) { row.classList.add('top2'); cell.insertAdjacentHTML('beforeend', '<span class="medal">🥈</span>'); } |
|
|
if (n === 3) { row.classList.add('top3'); cell.insertAdjacentHTML('beforeend', '<span class="medal">🥉</span>'); } |
|
|
}); |
|
|
} |
|
|
|
|
|
|
|
|
if (nameIdx >= 0) { |
|
|
bodyRows.forEach(row => { |
|
|
const c = row.children[nameIdx]; |
|
|
if (c) { c.style.whiteSpace = 'nowrap'; c.style.textOverflow = 'ellipsis'; c.style.overflow = 'hidden'; maxNameWidth(c, 520); } |
|
|
}); |
|
|
} |
|
|
function maxNameWidth(td, px) { td.style.maxWidth = px + 'px'; } |
|
|
})(); |
|
|
</script> |
|
|
</body> |
|
|
</html> |