| | <!DOCTYPE html>
|
| | <html lang="en">
|
| |
|
| | <head>
|
| | <meta charset="UTF-8" />
|
| | <meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
| | <script>
|
| | document.addEventListener('DOMContentLoaded', (event) => {
|
| | if (localStorage.getItem('darkMode') === 'enabled') {
|
| | document.body.classList.add('dark-mode');
|
| | }
|
| | });
|
| |
|
| | function toggleDarkMode() {
|
| | let isDark = document.body.classList.toggle('dark-mode');
|
| | localStorage.setItem('darkMode', isDark ? 'enabled' : 'disabled');
|
| | }
|
| |
|
| | function strToHtml(str) {
|
| | let parser = new DOMParser();
|
| | return parser.parseFromString(str, "text/html");
|
| | }
|
| |
|
| |
|
| |
|
| | function tableToObj(table) {
|
| | var rows = table.rows;
|
| | var propCells = rows[0].cells;
|
| | var propNames = [];
|
| | var results = [];
|
| | var obj, row, cells;
|
| |
|
| |
|
| |
|
| |
|
| | for (var i = 0, iLen = propCells.length; i < iLen; i++) {
|
| | propNames.push(
|
| | (propCells[i].textContent || propCells[i].innerText).trim()
|
| | );
|
| | }
|
| |
|
| |
|
| |
|
| |
|
| | for (var j = 1, jLen = rows.length; j < jLen; j++) {
|
| | cells = rows[j].cells;
|
| | obj = {};
|
| |
|
| | for (var k = 0; k < iLen; k++) {
|
| | obj[propNames[k]] = (
|
| | cells[k].textContent || cells[k].innerText
|
| | ).trim();
|
| | }
|
| | results.push(obj);
|
| | }
|
| | return results;
|
| | }
|
| |
|
| | function formatGpu(gpus) {
|
| | return gpus.map(
|
| | (g) => `${g["Product Name"]} - ${g["Memory"].split(",")[0]}`
|
| | );
|
| | }
|
| |
|
| | const gguf_quants = {
|
| | "IQ1_S": 1.56,
|
| | "IQ1_M": 1.75,
|
| | "IQ2_XXS": 2.06,
|
| | "IQ2_XS": 2.31,
|
| | "IQ2_S": 2.5,
|
| | "IQ3_XXS": 3.06,
|
| | "IQ3_XS": 3.3,
|
| | "IQ3_S": 3.44,
|
| | "IQ3_M": 3.66,
|
| | "Q2_K": 3.35,
|
| | "Q3_K_S": 3.5,
|
| | "Q3_K_M": 3.91,
|
| | "Q3_K_L": 4.27,
|
| | "IQ4_XS": 4.25,
|
| | "Q4_0": 4.55,
|
| | "Q4_K_S": 4.58,
|
| | "Q4_K_M": 4.85,
|
| | "Q5_0": 5.54,
|
| | "Q5_K_S": 5.54,
|
| | "Q5_K_M": 5.69,
|
| | "Q6_K": 6.59,
|
| | "Q8_0": 8.5,
|
| | }
|
| |
|
| | function sanitize(string) {
|
| | const map = {
|
| | '&': '&',
|
| | '<': '<',
|
| | '>': '>',
|
| | '"': '"',
|
| | "'": ''',
|
| | "/": '_',
|
| | '(': '',
|
| | ')': '',
|
| | '{': '',
|
| | '}': '',
|
| | '[': '',
|
| | ']': '',
|
| | };
|
| | const reg = /[&<>"'/\[\]\(\)\{\}]/ig;
|
| | return string.replace(reg, (match) => (map[match]));
|
| | }
|
| |
|
| | async function modelConfig(hf_model) {
|
| | let config = {};
|
| | let responseText;
|
| |
|
| | try {
|
| | let modelInfoPromise = fetch(
|
| | `https://huggingface.co/${hf_model}/raw/main/config.json`
|
| | );
|
| | responseText = await modelInfoPromise.then(r => r.text());
|
| | config = JSON.parse(responseText);
|
| | } catch (err) {
|
| | alert(sanitize(responseText))
|
| | return config;
|
| | }
|
| |
|
| | let model_size = 0
|
| | try {
|
| | model_size = (await fetch(`https://huggingface.co/${hf_model}/resolve/main/model.safetensors.index.json`).then(r => r.json()))["metadata"]["total_size"] / 2
|
| | if (isNaN(model_size)) {
|
| | throw new Erorr("no size in safetensors metadata")
|
| | }
|
| | } catch (e) {
|
| | try {
|
| | model_size = (await fetch(`https://huggingface.co/${hf_model}/resolve/main/pytorch_model.bin.index.json`).then(r => r.json()))["metadata"]["total_size"] / 2
|
| | if (isNaN(model_size)) {
|
| | throw new Erorr("no size in pytorch metadata")
|
| | }
|
| | } catch {
|
| | let model_page = await fetch(
|
| | "https://corsproxy.io/?" + encodeURIComponent(`https://huggingface.co/${hf_model}`)
|
| | ).then(r => r.text())
|
| | let el = document.createElement('html');
|
| | el.innerHTML = model_page
|
| | let params_el = el.querySelector('div[data-target="ModelSafetensorsParams"]')
|
| | if (params_el !== null) {
|
| | model_size = JSON.parse(params_el.attributes.getNamedItem("data-props").value)["safetensors"]["total"]
|
| | } else {
|
| | params_el = el.querySelector('div[data-target="ModelHeader"]')
|
| | model_size = JSON.parse(params_el.attributes.getNamedItem("data-props").value)["model"]["safetensors"]["total"]
|
| | }
|
| | }
|
| | }
|
| | config.parameters = model_size
|
| | return config
|
| | }
|
| |
|
| | function inputBuffer(context = 8192, model_config, bsz = 512) {
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | const inp_tokens = bsz
|
| | const inp_embd = model_config["hidden_size"] * bsz
|
| | const inp_pos = bsz
|
| | const inp_KQ_mask = context * bsz
|
| | const inp_K_shift = context
|
| | const inp_sum = bsz
|
| |
|
| | return inp_tokens + inp_embd + inp_pos + inp_KQ_mask + inp_K_shift + inp_sum
|
| | }
|
| |
|
| | function computeBuffer(context = 8192, model_config, bsz = 512) {
|
| | if (bsz != 512) {
|
| | alert("batch size other than 512 is currently not supported for the compute buffer, using batchsize 512 for compute buffer calculation, end result result will be an overestimatition")
|
| | }
|
| | return (context / 1024 * 2 + 0.75) * model_config["num_attention_heads"] * 1024 * 1024
|
| | }
|
| |
|
| | function kvCache(context = 8192, model_config, cache_bit = 16) {
|
| | const n_gqa = model_config["num_attention_heads"] / model_config["num_key_value_heads"]
|
| | const n_embd_gqa = model_config["hidden_size"] / n_gqa
|
| | const n_elements = n_embd_gqa * (model_config["num_hidden_layers"] * context)
|
| | const size = 2 * n_elements
|
| | return size * (cache_bit / 8)
|
| | }
|
| |
|
| | function contextSize(context = 8192, model_config, bsz = 512, cache_bit = 16) {
|
| | return Number.parseFloat((inputBuffer(context, model_config, bsz) + kvCache(context, model_config, cache_bit) + computeBuffer(context, model_config, bsz)).toFixed(2))
|
| | }
|
| |
|
| | function modelSize(model_config, bpw = 4.5) {
|
| | return Number.parseFloat((model_config["parameters"] * bpw / 8).toFixed(2))
|
| | }
|
| |
|
| | async function calculateSizes(format, context_loc) {
|
| |
|
| | format = "gguf"
|
| |
|
| | try {
|
| | const model_config = await modelConfig(document.getElementById("modelsearch").value)
|
| | const context = parseInt(document.getElementById("contextsize").value)
|
| | let bsz = 512
|
| | let cache_bit = 16
|
| | let bpw = 0
|
| | if (format === "gguf") {
|
| | bsz = parseInt(document.getElementById("batchsize").value)
|
| | bpw = gguf_quants[document.getElementById("quantsize").innerText]
|
| |
|
| | } else if (format == "exl2") {
|
| | cache_bit = Number.parseInt(document.getElementById("kvCache").value)
|
| | bpw = Number.parseFloat(document.getElementById("bpw").value)
|
| | }
|
| |
|
| | const model_size = modelSize(model_config, bpw)
|
| | const context_size = contextSize(context, model_config, bsz, cache_bit)
|
| | const total_size = ((model_size + context_size) / 2 ** 30)
|
| | document.getElementById("resultmodel").innerText = (model_size / 2 ** 30).toFixed(2)
|
| | document.getElementById("resultcontext").innerText = (context_size / 2 ** 30).toFixed(2)
|
| | const result_total_el = document.getElementById("resulttotal");
|
| | result_total_el.innerText = total_size.toFixed(2)
|
| |
|
| | const allocated_vram = Number.parseInt(document.getElementById("maxvram").value);
|
| | const vram = allocated_vram
|
| | if (vram - total_size > 0.5) {
|
| | result_total_el.style.backgroundColor = "#bef264"
|
| | } else if (vram - total_size > 0) {
|
| | result_total_el.style.backgroundColor = "#facc15"
|
| | } else {
|
| | result_total_el.style.backgroundColor = "#ef4444"
|
| | }
|
| |
|
| | result_total_el.style.color = "#000000"
|
| |
|
| | const layer_size = ((model_size / 2 ** 30) / model_config["num_hidden_layers"])
|
| | const layer_size_el = document.getElementById("layersize");
|
| | layer_size_el.innerText = layer_size.toFixed(2)
|
| |
|
| | const context_dealloc = context_loc === "vram" ? (context_size / 2 ** 30) : 0;
|
| | const layers_offload = Math.floor((allocated_vram - context_dealloc) / layer_size)
|
| |
|
| | const layers_offload_el = document.getElementById("layersoffload");
|
| | layers_offload_el.innerText = `${layers_offload > model_config["num_hidden_layers"] ? model_config["num_hidden_layers"] : Math.max(0, layers_offload)}/${model_config["num_hidden_layers"]}`
|
| |
|
| | } catch (e) {
|
| | alert(e);
|
| | }
|
| | }
|
| | </script>
|
| | <link href="./styles.css" rel="stylesheet">
|
| | <title>Can I split it? - GGUF VRAM Calculator</title>
|
| | </head>
|
| |
|
| | <body class="p-8">
|
| | <div>
|
| | <button onclick="toggleDarkMode()">Toggle Dark Mode</button>
|
| | </div>
|
| | <div x-data="{ format: 'gguf', context_loc: 'vram' }" class="flex flex-col max-h-screen items-center mt-16 gap-10">
|
| | <div style="text-align: center;">
|
| | <h1 class="text-xl font-semibold leading-6">
|
| | GGUF Model, Can I split it?
|
| | </h1>
|
| | <h3 class="font-semibold leading-6">
|
| | Based on <a href="https://huggingface.co/NyxKrage" style="color: blue;">NyxKrage</a>'s <a
|
| | href="https://huggingface.co/spaces/NyxKrage/LLM-Model-VRAM-Calculator" style="color: blue;">LLM VRAM
|
| | calculator</a>
|
| | </h3>
|
| | </div>
|
| | <div class="flex flex-col gap-10">
|
| | <div class="w-auto flex flex-col gap-4">
|
| | <div class="relative">
|
| | <label for="maxvram"
|
| | class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium">
|
| | Max Allocated VRAM
|
| | </label>
|
| | <input value="24" type="number" name="maxvram" id="maxvram" step="1"
|
| | class="block w-full rounded-md border-0 p-3 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" />
|
| | </div>
|
| |
|
| |
|
| |
|
| |
|
| | <div class="flex flex-row gap-4 relative">
|
| | <label for="contextsize"
|
| | class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium">
|
| | Model (unquantized)
|
| | </label>
|
| | <div
|
| | class="block w-full rounded-md border-0 p-3 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
|
| | x-data="{
|
| | open: false,
|
| | value: 'Nexusflow/Starling-LM-7B-beta',
|
| | results: null,
|
| | toggle() {
|
| | if (this.open) {
|
| | return this.close()
|
| | }
|
| |
|
| | this.$refs.input.focus()
|
| |
|
| | this.open = true
|
| | },
|
| | close(focusAfter) {
|
| | if (! this.open) return
|
| |
|
| | this.open = false
|
| |
|
| | focusAfter && focusAfter.focus()
|
| | }
|
| | }" x-on:keydown.escape.prevent.stop="close($refs.input)" x-id="['model-typeahead']"
|
| | class="relative">
|
| |
|
| | <input id="modelsearch" x-ref="input" x-on:click="toggle()"
|
| | @keypress.debounce.150ms="results = (await
|
| | fetch('https://huggingface.co/api/quicksearch?type=model&q=' +
|
| | encodeURIComponent(value)).then(r => r.json())).models.filter(m => !m.id.includes('GGUF') && !m.id.includes('AWQ') && !m.id.includes('GPTQ') && !m.id.includes('exl2'));"
|
| | :aria-expanded="open" :aria-controls="$id('model-typeahead')" x-model="value"
|
| | class="flex justify-between items-center gap-2 w-full" />
|
| |
|
| |
|
| | <div x-ref="panel" x-show="open" x-transition.origin.top.left x-on:click.outside="close($refs.input)"
|
| | :id="$id('model-typeahead')" style="display: none"
|
| | class="absolute left-0 mt-4 w-full rounded-md bg-white shadow-sm ring-1 ring-inset ring-gray-300 z-10">
|
| | <template x-for="result in results">
|
| | <a @click="value = result.id; close($refs.input)" x-text="result.id"
|
| | class="flex cursor-pointer items-center gap-2 w-full first-of-type:rounded-t-md last-of-type:rounded-b-md px-4 py-2.5 text-left text-sm hover:bg-gray-500/5 disabled:text-gray-500"></a>
|
| | </template>
|
| | </div>
|
| | </div>
|
| | </div>
|
| |
|
| |
|
| |
|
| | <div class="relative">
|
| | <label for="contextsize"
|
| | class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium">
|
| | Context Size
|
| | </label>
|
| | <input value="8192" type="number" name="contextsize" id="contextsize" step="1024"
|
| | class="block w-full rounded-md border-0 p-3 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" />
|
| | </div>
|
| |
|
| | <div class="relative">
|
| | <label class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium">Context
|
| | offloaded to</label>
|
| | <fieldset x-model="context_loc"
|
| | class="block w-full rounded-md border-0 p-3 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6">
|
| | <legend class="sr-only">Context location</legend>
|
| | <div class="space-y-4 sm:flex sm:items-center sm:space-x-10 sm:space-y-0">
|
| | <div class="flex items-center">
|
| | <input id="context-vram" name="context-allocation" type="radio" value="vram" checked
|
| | class="h-4 w-4 border-gray-300 text-indigo-600 focus:ring-indigo-600" />
|
| | <label for="context-vram" class="ml-3 block text-sm font-medium leading-6">VRAM</label>
|
| | </div>
|
| | <div class="flex items-center">
|
| | <input id="context-ram" name="context-allocation" type="radio" value="ram"
|
| | class="h-4 w-4 border-gray-300 text-indigo-600 focus:ring-indigo-600" />
|
| | <label for="context-ram" class="ml-3 block text-sm font-medium leading-6">RAM</label>
|
| | </div>
|
| | </div>
|
| | </fieldset>
|
| | </div>
|
| |
|
| |
|
| | <div x-show="format === 'gguf'" class="relative">
|
| | <div class="flex flex-row gap-4">
|
| | <label for="contextsize"
|
| | class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium">
|
| | Quantization Size
|
| | </label>
|
| | <div
|
| | class="block w-full rounded-md border-0 p-3 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
|
| | x-data="{
|
| | open: false,
|
| | value: '',
|
| | toggle() {
|
| | if (this.open) {
|
| | return this.close()
|
| | }
|
| |
|
| | this.$refs.button.focus()
|
| |
|
| | this.open = true
|
| | },
|
| | close(focusAfter) {
|
| | if (! this.open) return
|
| |
|
| | this.open = false
|
| |
|
| | focusAfter && focusAfter.focus()
|
| | }
|
| | }" x-on:keydown.escape.prevent.stop="close($refs.button)" x-id="['dropdown-button']" class="relative">
|
| |
|
| | <button x-ref="button" x-on:click="toggle()" :aria-expanded="open" :aria-controls="$id('dropdown-button')"
|
| | type="button" id="quantsize" x-text="value.length === 0 ? 'Q4_K_S' : value"
|
| | class="flex justify-between items-center gap-2 w-full">
|
| | Q4_K_S
|
| |
|
| |
|
| | <svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5 text-gray-400" viewBox="0 0 20 20"
|
| | fill="currentColor">
|
| | <path fill-rule="evenodd"
|
| | d="M5.293 7.293a1 1 0 011.414 0L10 10.586l3.293-3.293a1 1 0 111.414 1.414l-4 4a1 1 0 01-1.414 0l-4-4a1 1 0 010-1.414z"
|
| | clip-rule="evenodd" />
|
| | </svg>
|
| | </button>
|
| |
|
| |
|
| | <div x-data="{ quants: [
|
| | 'IQ1_S',
|
| | 'IQ1_M',
|
| | 'IQ2_XXS',
|
| | 'IQ2_XS',
|
| | 'IQ2_S',
|
| | 'IQ3_XXS',
|
| | 'IQ3_XS',
|
| | 'IQ3_S',
|
| | 'IQ3_M',
|
| | 'Q2_K',
|
| | 'Q3_K_S',
|
| | 'Q3_K_M',
|
| | 'Q3_K_L',
|
| | 'IQ4_XS',
|
| | 'Q4_0',
|
| | 'Q4_K_S',
|
| | 'Q4_K_M',
|
| | 'Q5_0',
|
| | 'Q5_K_S',
|
| | 'Q5_K_M',
|
| | 'Q6_K',
|
| | 'Q8_0'
|
| | ]}" x-ref="panel" x-show="open" x-transition.origin.top.left x-on:click.outside="close($refs.button)"
|
| | :id="$id('dropdown-button')" style="display: none"
|
| | class="absolute left-0 mt-4 w-full rounded-md bg-white shadow-sm ring-1 ring-inset ring-gray-300 z-10">
|
| | <template x-for="quant in quants">
|
| | <a @click="value = quant; close($refs.button)" x-text="quant"
|
| | class="flex cursor-pointer items-center gap-2 w-full first-of-type:rounded-t-md last-of-type:rounded-b-md px-4 py-2.5 text-left text-sm hover:bg-gray-500/5 disabled:text-gray-500"></a>
|
| | </template>
|
| | </div>
|
| | </div>
|
| | <div class="relative">
|
| | <label for="batchsize"
|
| | class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium">
|
| | Batch Size
|
| | </label>
|
| | <input value="512" type="number" step="128" id="batchsize"
|
| | class="block w-full rounded-md border-0 p-3 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" />
|
| | </div>
|
| | </div>
|
| | </div>
|
| | <button type="button"
|
| | class="rounded-md bg-slate-800 px-3 py-2 text-sm font-semibold text-white shadow-sm hover:bg-slate-700 focus-visible:outline focus-visible:outline-2 focus-visible:outline-offset-2 focus-visible:outline-indigo-600"
|
| | @click="calculateSizes(format, context_loc)">
|
| | Submit
|
| | </button>
|
| | </div>
|
| | <div class="w-auto flex flex-col gap-4">
|
| | <div class="relative">
|
| | <label class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium">
|
| | Model Size (GB)
|
| | </label>
|
| | <div id="resultmodel"
|
| | class="block w-full rounded-md border-0 p-3 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6">
|
| | 4.20</div>
|
| | </div>
|
| | <div class="relative">
|
| | <label class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium">
|
| | Context Size (GB)
|
| | </label>
|
| | <div id="resultcontext"
|
| | class="block w-full rounded-md border-0 p-3 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6">
|
| | 6.90</div>
|
| | </div>
|
| | <div class="relative">
|
| | <label class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium">
|
| | Total Size (GB)
|
| | </label>
|
| | <div id="resulttotal"
|
| | class="block w-full rounded-md border-0 p-3 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6">
|
| | 420.69</div>
|
| | </div>
|
| | <div class="relative">
|
| | <label class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium">
|
| | Layer size (GB)
|
| | </label>
|
| | <div id="layersize"
|
| | class="block w-full rounded-md border-0 p-3 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6">
|
| | 42.69</div>
|
| | </div>
|
| | <div class="relative">
|
| | <label class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium">
|
| | Layers offloaded to GPU (out of total)
|
| | </label>
|
| | <div id="layersoffload"
|
| | class="block w-full rounded-md border-0 p-3 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6">
|
| | 42</div>
|
| | </div>
|
| | </div>
|
| | </div>
|
| | </div>
|
| | <script src="https://cdn.jsdelivr.net/npm/alpinejs@3.x.x/dist/cdn.min.js"></script>
|
| | <script defer>
|
| | calculateSizes("gguf", "vram")
|
| | </script>
|
| | </body>
|
| |
|
| | </html> |