Spaces:
Running
Running
ggml : remove OpenCL (llama/7735)
Browse files- ggml-metal.h +1 -1
- ggml.c +3 -59
- ggml.h +0 -1
ggml-metal.h
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
// An interface allowing to compute ggml_cgraph with Metal
|
| 2 |
//
|
| 3 |
// This is a fully functional interface that extends ggml with GPU support for Apple devices.
|
| 4 |
-
// A similar interface can be created for other GPU backends (e.g. Vulkan, CUDA,
|
| 5 |
//
|
| 6 |
// How it works?
|
| 7 |
//
|
|
|
|
| 1 |
// An interface allowing to compute ggml_cgraph with Metal
|
| 2 |
//
|
| 3 |
// This is a fully functional interface that extends ggml with GPU support for Apple devices.
|
| 4 |
+
// A similar interface can be created for other GPU backends (e.g. Vulkan, CUDA, etc.)
|
| 5 |
//
|
| 6 |
// How it works?
|
| 7 |
//
|
ggml.c
CHANGED
|
@@ -297,17 +297,12 @@ inline static void * ggml_calloc(size_t num, size_t size) {
|
|
| 297 |
|
| 298 |
#if defined(GGML_USE_ACCELERATE)
|
| 299 |
#include <Accelerate/Accelerate.h>
|
| 300 |
-
#if defined(GGML_USE_CLBLAST) // allow usage of CLBlast alongside Accelerate functions
|
| 301 |
-
#include "ggml-opencl.h"
|
| 302 |
-
#endif
|
| 303 |
#elif defined(GGML_USE_OPENBLAS)
|
| 304 |
#if defined(GGML_BLAS_USE_MKL)
|
| 305 |
#include <mkl.h>
|
| 306 |
#else
|
| 307 |
#include <cblas.h>
|
| 308 |
#endif
|
| 309 |
-
#elif defined(GGML_USE_CLBLAST)
|
| 310 |
-
#include "ggml-opencl.h"
|
| 311 |
#endif
|
| 312 |
|
| 313 |
// floating point type used to accumulate sums
|
|
@@ -3380,10 +3375,6 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
|
|
| 3380 |
GGML_PRINT_DEBUG("%s: g_state initialized in %f ms\n", __func__, (t_end - t_start)/1000.0f);
|
| 3381 |
}
|
| 3382 |
|
| 3383 |
-
#if defined(GGML_USE_CLBLAST)
|
| 3384 |
-
ggml_cl_init();
|
| 3385 |
-
#endif
|
| 3386 |
-
|
| 3387 |
ggml_setup_op_has_task_pass();
|
| 3388 |
|
| 3389 |
is_first_call = false;
|
|
@@ -9053,17 +9044,6 @@ static void ggml_compute_forward_add_f32(
|
|
| 9053 |
const int ith = params->ith;
|
| 9054 |
const int nth = params->nth;
|
| 9055 |
|
| 9056 |
-
#ifdef GGML_USE_CLBLAST
|
| 9057 |
-
if (src1->backend == GGML_BACKEND_TYPE_GPU) {
|
| 9058 |
-
// TODO: OpenCL kernel support full broadcast
|
| 9059 |
-
GGML_ASSERT(ggml_can_repeat_rows(src1, src0));
|
| 9060 |
-
if (ith == 0) {
|
| 9061 |
-
ggml_cl_add(src0, src1, dst);
|
| 9062 |
-
}
|
| 9063 |
-
return;
|
| 9064 |
-
}
|
| 9065 |
-
#endif
|
| 9066 |
-
|
| 9067 |
const int nr = ggml_nrows(src0);
|
| 9068 |
|
| 9069 |
GGML_TENSOR_BINARY_OP_LOCALS
|
|
@@ -10171,17 +10151,6 @@ static void ggml_compute_forward_mul_f32(
|
|
| 10171 |
const int ith = params->ith;
|
| 10172 |
const int nth = params->nth;
|
| 10173 |
|
| 10174 |
-
#if defined(GGML_USE_CLBLAST)
|
| 10175 |
-
if (src1->backend == GGML_BACKEND_TYPE_GPU) {
|
| 10176 |
-
// TODO: OpenCL kernel support full broadcast
|
| 10177 |
-
GGML_ASSERT(ggml_can_repeat_rows(src1, src0));
|
| 10178 |
-
if (ith == 0) {
|
| 10179 |
-
ggml_cl_mul(src0, src1, dst);
|
| 10180 |
-
}
|
| 10181 |
-
return;
|
| 10182 |
-
}
|
| 10183 |
-
#endif
|
| 10184 |
-
|
| 10185 |
const int64_t nr = ggml_nrows(src0);
|
| 10186 |
|
| 10187 |
GGML_TENSOR_BINARY_OP_LOCALS
|
|
@@ -12417,15 +12386,6 @@ static void ggml_compute_forward_mul_mat(
|
|
| 12417 |
// nb01 >= nb00 - src0 is not transposed
|
| 12418 |
// compute by src0 rows
|
| 12419 |
|
| 12420 |
-
#if defined(GGML_USE_CLBLAST)
|
| 12421 |
-
if (ggml_cl_can_mul_mat(src0, src1, dst)) {
|
| 12422 |
-
if (params->ith == 0 && params->type == GGML_TASK_TYPE_COMPUTE) {
|
| 12423 |
-
ggml_cl_mul_mat(src0, src1, dst, params->wdata, params->wsize);
|
| 12424 |
-
}
|
| 12425 |
-
return;
|
| 12426 |
-
}
|
| 12427 |
-
#endif
|
| 12428 |
-
|
| 12429 |
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
| 12430 |
if (ggml_compute_forward_mul_mat_use_blas(dst)) {
|
| 12431 |
const int64_t ne_plane = ne01*ne00;
|
|
@@ -12873,8 +12833,6 @@ static void ggml_compute_forward_out_prod_f32(
|
|
| 12873 |
// nb01 >= nb00 - src0 is not transposed
|
| 12874 |
// compute by src0 rows
|
| 12875 |
|
| 12876 |
-
// TODO: #if defined(GGML_USE_CLBLAST)
|
| 12877 |
-
|
| 12878 |
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
| 12879 |
bool use_blas = ggml_is_matrix(src0) &&
|
| 12880 |
ggml_is_matrix(src1) &&
|
|
@@ -13072,7 +13030,7 @@ static void ggml_compute_forward_out_prod_q_f32(
|
|
| 13072 |
// nb01 >= nb00 - src0 is not transposed
|
| 13073 |
// compute by src0 rows
|
| 13074 |
|
| 13075 |
-
// TODO: #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
| 13076 |
|
| 13077 |
if (params->type == GGML_TASK_TYPE_INIT) {
|
| 13078 |
if (ith != 0) {
|
|
@@ -19546,11 +19504,6 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa
|
|
| 19546 |
{
|
| 19547 |
const enum ggml_type vec_dot_type = type_traits[node->src[0]->type].vec_dot_type;
|
| 19548 |
|
| 19549 |
-
#if defined(GGML_USE_CLBLAST)
|
| 19550 |
-
if (ggml_cl_can_mul_mat(node->src[0], node->src[1], node)) {
|
| 19551 |
-
cur = ggml_cl_mul_mat_get_wsize(node->src[0], node->src[1], node);
|
| 19552 |
-
} else
|
| 19553 |
-
#endif
|
| 19554 |
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
| 19555 |
if (ggml_compute_forward_mul_mat_use_blas(node)) {
|
| 19556 |
if (node->src[0]->type != GGML_TYPE_F32) {
|
|
@@ -22859,7 +22812,7 @@ int ggml_cpu_has_wasm_simd(void) {
|
|
| 22859 |
}
|
| 22860 |
|
| 22861 |
int ggml_cpu_has_blas(void) {
|
| 22862 |
-
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CUDA) || defined(GGML_USE_VULKAN) || defined(
|
| 22863 |
return 1;
|
| 22864 |
#else
|
| 22865 |
return 0;
|
|
@@ -22874,14 +22827,6 @@ int ggml_cpu_has_cuda(void) {
|
|
| 22874 |
#endif
|
| 22875 |
}
|
| 22876 |
|
| 22877 |
-
int ggml_cpu_has_clblast(void) {
|
| 22878 |
-
#if defined(GGML_USE_CLBLAST)
|
| 22879 |
-
return 1;
|
| 22880 |
-
#else
|
| 22881 |
-
return 0;
|
| 22882 |
-
#endif
|
| 22883 |
-
}
|
| 22884 |
-
|
| 22885 |
int ggml_cpu_has_vulkan(void) {
|
| 22886 |
#if defined(GGML_USE_VULKAN)
|
| 22887 |
return 1;
|
|
@@ -22915,8 +22860,7 @@ int ggml_cpu_has_rpc(void) {
|
|
| 22915 |
}
|
| 22916 |
|
| 22917 |
int ggml_cpu_has_gpublas(void) {
|
| 22918 |
-
return ggml_cpu_has_cuda() ||
|
| 22919 |
-
ggml_cpu_has_sycl();
|
| 22920 |
}
|
| 22921 |
|
| 22922 |
int ggml_cpu_has_sse3(void) {
|
|
|
|
| 297 |
|
| 298 |
#if defined(GGML_USE_ACCELERATE)
|
| 299 |
#include <Accelerate/Accelerate.h>
|
|
|
|
|
|
|
|
|
|
| 300 |
#elif defined(GGML_USE_OPENBLAS)
|
| 301 |
#if defined(GGML_BLAS_USE_MKL)
|
| 302 |
#include <mkl.h>
|
| 303 |
#else
|
| 304 |
#include <cblas.h>
|
| 305 |
#endif
|
|
|
|
|
|
|
| 306 |
#endif
|
| 307 |
|
| 308 |
// floating point type used to accumulate sums
|
|
|
|
| 3375 |
GGML_PRINT_DEBUG("%s: g_state initialized in %f ms\n", __func__, (t_end - t_start)/1000.0f);
|
| 3376 |
}
|
| 3377 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3378 |
ggml_setup_op_has_task_pass();
|
| 3379 |
|
| 3380 |
is_first_call = false;
|
|
|
|
| 9044 |
const int ith = params->ith;
|
| 9045 |
const int nth = params->nth;
|
| 9046 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9047 |
const int nr = ggml_nrows(src0);
|
| 9048 |
|
| 9049 |
GGML_TENSOR_BINARY_OP_LOCALS
|
|
|
|
| 10151 |
const int ith = params->ith;
|
| 10152 |
const int nth = params->nth;
|
| 10153 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10154 |
const int64_t nr = ggml_nrows(src0);
|
| 10155 |
|
| 10156 |
GGML_TENSOR_BINARY_OP_LOCALS
|
|
|
|
| 12386 |
// nb01 >= nb00 - src0 is not transposed
|
| 12387 |
// compute by src0 rows
|
| 12388 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12389 |
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
| 12390 |
if (ggml_compute_forward_mul_mat_use_blas(dst)) {
|
| 12391 |
const int64_t ne_plane = ne01*ne00;
|
|
|
|
| 12833 |
// nb01 >= nb00 - src0 is not transposed
|
| 12834 |
// compute by src0 rows
|
| 12835 |
|
|
|
|
|
|
|
| 12836 |
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
| 12837 |
bool use_blas = ggml_is_matrix(src0) &&
|
| 12838 |
ggml_is_matrix(src1) &&
|
|
|
|
| 13030 |
// nb01 >= nb00 - src0 is not transposed
|
| 13031 |
// compute by src0 rows
|
| 13032 |
|
| 13033 |
+
// TODO: #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
| 13034 |
|
| 13035 |
if (params->type == GGML_TASK_TYPE_INIT) {
|
| 13036 |
if (ith != 0) {
|
|
|
|
| 19504 |
{
|
| 19505 |
const enum ggml_type vec_dot_type = type_traits[node->src[0]->type].vec_dot_type;
|
| 19506 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19507 |
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
| 19508 |
if (ggml_compute_forward_mul_mat_use_blas(node)) {
|
| 19509 |
if (node->src[0]->type != GGML_TYPE_F32) {
|
|
|
|
| 22812 |
}
|
| 22813 |
|
| 22814 |
int ggml_cpu_has_blas(void) {
|
| 22815 |
+
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CUDA) || defined(GGML_USE_VULKAN) || defined(GGML_USE_SYCL)
|
| 22816 |
return 1;
|
| 22817 |
#else
|
| 22818 |
return 0;
|
|
|
|
| 22827 |
#endif
|
| 22828 |
}
|
| 22829 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22830 |
int ggml_cpu_has_vulkan(void) {
|
| 22831 |
#if defined(GGML_USE_VULKAN)
|
| 22832 |
return 1;
|
|
|
|
| 22860 |
}
|
| 22861 |
|
| 22862 |
int ggml_cpu_has_gpublas(void) {
|
| 22863 |
+
return ggml_cpu_has_cuda() || ggml_cpu_has_vulkan() || ggml_cpu_has_kompute() || ggml_cpu_has_sycl();
|
|
|
|
| 22864 |
}
|
| 22865 |
|
| 22866 |
int ggml_cpu_has_sse3(void) {
|
ggml.h
CHANGED
|
@@ -2425,7 +2425,6 @@ extern "C" {
|
|
| 2425 |
GGML_API int ggml_cpu_has_wasm_simd (void);
|
| 2426 |
GGML_API int ggml_cpu_has_blas (void);
|
| 2427 |
GGML_API int ggml_cpu_has_cuda (void);
|
| 2428 |
-
GGML_API int ggml_cpu_has_clblast (void);
|
| 2429 |
GGML_API int ggml_cpu_has_vulkan (void);
|
| 2430 |
GGML_API int ggml_cpu_has_kompute (void);
|
| 2431 |
GGML_API int ggml_cpu_has_gpublas (void);
|
|
|
|
| 2425 |
GGML_API int ggml_cpu_has_wasm_simd (void);
|
| 2426 |
GGML_API int ggml_cpu_has_blas (void);
|
| 2427 |
GGML_API int ggml_cpu_has_cuda (void);
|
|
|
|
| 2428 |
GGML_API int ggml_cpu_has_vulkan (void);
|
| 2429 |
GGML_API int ggml_cpu_has_kompute (void);
|
| 2430 |
GGML_API int ggml_cpu_has_gpublas (void);
|