Spaces:
Running
Running
Commit
·
08debcd
1
Parent(s):
a726ecc
Vulkan: Set device max size for host memory to avoid OOM warning and fallback to CPU buffer (llama/14249)
Browse files
ggml/src/ggml-vulkan/ggml-vulkan.cpp
CHANGED
|
@@ -9495,6 +9495,12 @@ static size_t ggml_backend_vk_host_buffer_type_get_alignment(ggml_backend_buffer
|
|
| 9495 |
UNUSED(buft);
|
| 9496 |
}
|
| 9497 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9498 |
// Should be changed to return device-specific host buffer type
|
| 9499 |
// but that probably requires changes in llama.cpp
|
| 9500 |
ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type() {
|
|
@@ -9503,7 +9509,7 @@ ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type() {
|
|
| 9503 |
/* .get_name = */ ggml_backend_vk_host_buffer_type_name,
|
| 9504 |
/* .alloc_buffer = */ ggml_backend_vk_host_buffer_type_alloc_buffer,
|
| 9505 |
/* .get_alignment = */ ggml_backend_vk_host_buffer_type_get_alignment,
|
| 9506 |
-
/* .get_max_size = */
|
| 9507 |
/* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
|
| 9508 |
/* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host,
|
| 9509 |
},
|
|
|
|
| 9495 |
UNUSED(buft);
|
| 9496 |
}
|
| 9497 |
|
| 9498 |
+
static size_t ggml_backend_vk_host_buffer_type_get_max_size(ggml_backend_buffer_type_t buft) {
|
| 9499 |
+
return vk_instance.devices[0]->suballocation_block_size;
|
| 9500 |
+
|
| 9501 |
+
UNUSED(buft);
|
| 9502 |
+
}
|
| 9503 |
+
|
| 9504 |
// Should be changed to return device-specific host buffer type
|
| 9505 |
// but that probably requires changes in llama.cpp
|
| 9506 |
ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type() {
|
|
|
|
| 9509 |
/* .get_name = */ ggml_backend_vk_host_buffer_type_name,
|
| 9510 |
/* .alloc_buffer = */ ggml_backend_vk_host_buffer_type_alloc_buffer,
|
| 9511 |
/* .get_alignment = */ ggml_backend_vk_host_buffer_type_get_alignment,
|
| 9512 |
+
/* .get_max_size = */ ggml_backend_vk_host_buffer_type_get_max_size,
|
| 9513 |
/* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
|
| 9514 |
/* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host,
|
| 9515 |
},
|