Diego Devesa commited on
Commit
7f269bb
·
1 Parent(s): 27e2fca

ggml : add backend registry / device interfaces to BLAS backend (llama/9752)

Browse files

* ggml : add backend registry / device interfaces to BLAS backend

* fix mmap usage when using host buffers

ggml/include/ggml-backend.h CHANGED
@@ -170,6 +170,7 @@ extern "C" {
170
 
171
  // Functions that may be obtained using ggml_backend_reg_get_proc_address
172
  typedef ggml_backend_buffer_type_t (*ggml_backend_split_buffer_type_t)(const float *);
 
173
 
174
  //
175
  // Backend registry
 
170
 
171
  // Functions that may be obtained using ggml_backend_reg_get_proc_address
172
  typedef ggml_backend_buffer_type_t (*ggml_backend_split_buffer_type_t)(const float *);
173
+ typedef void (*ggml_backend_set_n_threads_t)(ggml_backend_t, int);
174
 
175
  //
176
  // Backend registry
ggml/include/ggml-blas.h CHANGED
@@ -17,6 +17,8 @@ GGML_API bool ggml_backend_is_blas(ggml_backend_t backend);
17
  // for openblas and blis, this will also set the number of threads used for blas operations
18
  GGML_API void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads);
19
 
 
 
20
 
21
  #ifdef __cplusplus
22
  }
 
17
  // for openblas and blis, this will also set the number of threads used for blas operations
18
  GGML_API void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads);
19
 
20
+ GGML_API ggml_backend_reg_t ggml_backend_blas_reg(void);
21
+
22
 
23
  #ifdef __cplusplus
24
  }
ggml/src/CMakeLists.txt CHANGED
@@ -190,22 +190,24 @@ if (GGML_BLAS)
190
  # see https://gitlab.kitware.com/cmake/cmake/-/issues/20268
191
  find_package(PkgConfig REQUIRED)
192
  if (${GGML_BLAS_VENDOR} MATCHES "Generic")
193
- pkg_check_modules(DepBLAS REQUIRED blas)
194
  elseif (${GGML_BLAS_VENDOR} MATCHES "OpenBLAS")
195
  # As of openblas v0.3.22, the 64-bit is named openblas64.pc
196
  pkg_check_modules(DepBLAS openblas64)
197
  if (NOT DepBLAS_FOUND)
198
- pkg_check_modules(DepBLAS REQUIRED openblas)
199
  endif()
200
  elseif (${GGML_BLAS_VENDOR} MATCHES "FLAME")
201
- pkg_check_modules(DepBLAS REQUIRED blis)
 
202
  elseif (${GGML_BLAS_VENDOR} MATCHES "ATLAS")
203
- pkg_check_modules(DepBLAS REQUIRED blas-atlas)
204
  elseif (${GGML_BLAS_VENDOR} MATCHES "FlexiBLAS")
205
- pkg_check_modules(DepBLAS REQUIRED flexiblas_api)
206
  elseif (${GGML_BLAS_VENDOR} MATCHES "Intel")
 
207
  # all Intel* libraries share the same include path
208
- pkg_check_modules(DepBLAS REQUIRED mkl-sdl)
209
  elseif (${GGML_BLAS_VENDOR} MATCHES "NVHPC")
210
  # this doesn't provide pkg-config
211
  # suggest to assign BLAS_INCLUDE_DIRS on your own
 
190
  # see https://gitlab.kitware.com/cmake/cmake/-/issues/20268
191
  find_package(PkgConfig REQUIRED)
192
  if (${GGML_BLAS_VENDOR} MATCHES "Generic")
193
+ pkg_check_modules(DepBLAS blas)
194
  elseif (${GGML_BLAS_VENDOR} MATCHES "OpenBLAS")
195
  # As of openblas v0.3.22, the 64-bit is named openblas64.pc
196
  pkg_check_modules(DepBLAS openblas64)
197
  if (NOT DepBLAS_FOUND)
198
+ pkg_check_modules(DepBLAS openblas)
199
  endif()
200
  elseif (${GGML_BLAS_VENDOR} MATCHES "FLAME")
201
+ add_compile_definitions(GGML_BLAS_USE_BLIS)
202
+ pkg_check_modules(DepBLAS blis)
203
  elseif (${GGML_BLAS_VENDOR} MATCHES "ATLAS")
204
+ pkg_check_modules(DepBLAS blas-atlas)
205
  elseif (${GGML_BLAS_VENDOR} MATCHES "FlexiBLAS")
206
+ pkg_check_modules(DepBLAS flexiblas_api)
207
  elseif (${GGML_BLAS_VENDOR} MATCHES "Intel")
208
+ add_compile_definitions(GGML_BLAS_USE_MKL)
209
  # all Intel* libraries share the same include path
210
+ pkg_check_modules(DepBLAS mkl-sdl)
211
  elseif (${GGML_BLAS_VENDOR} MATCHES "NVHPC")
212
  # this doesn't provide pkg-config
213
  # suggest to assign BLAS_INCLUDE_DIRS on your own
ggml/src/ggml-backend-impl.h CHANGED
@@ -88,6 +88,7 @@ extern "C" {
88
 
89
  void (*free)(ggml_backend_t backend);
90
 
 
91
  // buffer allocation
92
  ggml_backend_buffer_type_t (*get_default_buffer_type)(ggml_backend_t backend);
93
 
@@ -112,17 +113,9 @@ extern "C" {
112
 
113
  // IMPORTANT: these functions have been moved to the device interface and will be removed from the backend interface
114
  // new backends should implement the device interface instead
115
-
116
  // These functions are being moved to the device interface
117
- // check if the backend can compute an operation
118
  bool (*supports_op) (ggml_backend_t backend, const struct ggml_tensor * op);
119
-
120
- // check if the backend can use tensors allocated in a buffer type
121
  bool (*supports_buft)(ggml_backend_t backend, ggml_backend_buffer_type_t buft);
122
-
123
- // check if the backend wants to run an operation, even if the weights are allocated in a CPU buffer
124
- // these should be expensive operations with large batch sizes that may benefit from running on this backend
125
- // even if the weight has to be copied from the CPU temporarily
126
  bool (*offload_op) (ggml_backend_t backend, const struct ggml_tensor * op);
127
 
128
  // (optional) event synchronization
@@ -184,9 +177,8 @@ extern "C" {
184
  // check if the backend can use tensors allocated in a buffer type
185
  bool (*supports_buft)(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft);
186
 
187
- // check if the backend wants to run an operation, even if the weights are allocated in a CPU buffer
188
- // these should be expensive operations with large batch sizes that may benefit from running on this backend
189
- // even if the weight has to be copied from the CPU temporarily
190
  bool (*offload_op)(ggml_backend_dev_t dev, const struct ggml_tensor * op);
191
 
192
  // (optional) event synchronization
 
88
 
89
  void (*free)(ggml_backend_t backend);
90
 
91
+ // Will be moved to the device interface
92
  // buffer allocation
93
  ggml_backend_buffer_type_t (*get_default_buffer_type)(ggml_backend_t backend);
94
 
 
113
 
114
  // IMPORTANT: these functions have been moved to the device interface and will be removed from the backend interface
115
  // new backends should implement the device interface instead
 
116
  // These functions are being moved to the device interface
 
117
  bool (*supports_op) (ggml_backend_t backend, const struct ggml_tensor * op);
 
 
118
  bool (*supports_buft)(ggml_backend_t backend, ggml_backend_buffer_type_t buft);
 
 
 
 
119
  bool (*offload_op) (ggml_backend_t backend, const struct ggml_tensor * op);
120
 
121
  // (optional) event synchronization
 
177
  // check if the backend can use tensors allocated in a buffer type
178
  bool (*supports_buft)(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft);
179
 
180
+ // (optional) check if the backend wants to run an operation, even if the weights are allocated in an incompatible buffer
181
+ // these should be expensive operations that may benefit from running on this backend instead of the CPU backend
 
182
  bool (*offload_op)(ggml_backend_dev_t dev, const struct ggml_tensor * op);
183
 
184
  // (optional) event synchronization
ggml/src/ggml-backend.cpp CHANGED
@@ -500,7 +500,11 @@ bool ggml_backend_dev_supports_buft(ggml_backend_dev_t device, ggml_backend_buff
500
  }
501
 
502
  bool ggml_backend_dev_offload_op(ggml_backend_dev_t device, const struct ggml_tensor * op) {
503
- return device->iface.offload_op(device, op);
 
 
 
 
504
  }
505
 
506
  // Backend (reg)
@@ -534,6 +538,10 @@ void * ggml_backend_reg_get_proc_address(ggml_backend_reg_t reg, const char * na
534
  #include "ggml-metal.h"
535
  #endif
536
 
 
 
 
 
537
  struct ggml_backend_registry {
538
  std::vector<ggml_backend_reg_t> backends;
539
  std::vector<ggml_backend_dev_t> devices;
@@ -545,10 +553,13 @@ struct ggml_backend_registry {
545
  #ifdef GGML_USE_METAL
546
  register_backend(ggml_backend_metal_reg());
547
  #endif
548
-
549
- register_backend(ggml_backend_cpu_reg());
 
550
 
551
  // TODO: sycl, vulkan, kompute, cann
 
 
552
  }
553
 
554
  void register_backend(ggml_backend_reg_t reg) {
@@ -1229,16 +1240,22 @@ static ggml_backend_dev_t ggml_backend_cpu_reg_get_device(ggml_backend_reg_t reg
1229
  };
1230
 
1231
  return &ggml_backend_cpu_device;
 
 
 
 
 
 
 
1232
 
1233
  GGML_UNUSED(reg);
1234
- GGML_UNUSED(index);
1235
  }
1236
 
1237
  static const struct ggml_backend_reg_i ggml_backend_cpu_reg_i = {
1238
  /* .get_name = */ ggml_backend_cpu_reg_get_name,
1239
  /* .get_device_count = */ ggml_backend_cpu_reg_get_device_count,
1240
  /* .get_device = */ ggml_backend_cpu_reg_get_device,
1241
- /* .get_proc_address = */ NULL,
1242
  };
1243
 
1244
  ggml_backend_reg_t ggml_backend_cpu_reg(void) {
 
500
  }
501
 
502
  bool ggml_backend_dev_offload_op(ggml_backend_dev_t device, const struct ggml_tensor * op) {
503
+ if (device->iface.offload_op != NULL) {
504
+ return device->iface.offload_op(device, op);
505
+ }
506
+
507
+ return false;
508
  }
509
 
510
  // Backend (reg)
 
538
  #include "ggml-metal.h"
539
  #endif
540
 
541
+ #ifdef GGML_USE_BLAS
542
+ #include "ggml-blas.h"
543
+ #endif
544
+
545
  struct ggml_backend_registry {
546
  std::vector<ggml_backend_reg_t> backends;
547
  std::vector<ggml_backend_dev_t> devices;
 
553
  #ifdef GGML_USE_METAL
554
  register_backend(ggml_backend_metal_reg());
555
  #endif
556
+ #ifdef GGML_USE_BLAS
557
+ register_backend(ggml_backend_blas_reg());
558
+ #endif
559
 
560
  // TODO: sycl, vulkan, kompute, cann
561
+
562
+ register_backend(ggml_backend_cpu_reg());
563
  }
564
 
565
  void register_backend(ggml_backend_reg_t reg) {
 
1240
  };
1241
 
1242
  return &ggml_backend_cpu_device;
1243
+ }
1244
+
1245
+ static void * ggml_backend_cpu_get_proc_address(ggml_backend_reg_t reg, const char * name) {
1246
+ if (strcmp(name, "ggml_backend_set_n_threads") == 0) {
1247
+ return (void *)ggml_backend_cpu_set_n_threads;
1248
+ }
1249
+ return NULL;
1250
 
1251
  GGML_UNUSED(reg);
 
1252
  }
1253
 
1254
  static const struct ggml_backend_reg_i ggml_backend_cpu_reg_i = {
1255
  /* .get_name = */ ggml_backend_cpu_reg_get_name,
1256
  /* .get_device_count = */ ggml_backend_cpu_reg_get_device_count,
1257
  /* .get_device = */ ggml_backend_cpu_reg_get_device,
1258
+ /* .get_proc_address = */ ggml_backend_cpu_get_proc_address,
1259
  };
1260
 
1261
  ggml_backend_reg_t ggml_backend_cpu_reg(void) {
ggml/src/ggml-blas.cpp CHANGED
@@ -4,6 +4,7 @@
4
 
5
  #include <future>
6
  #include <vector>
 
7
 
8
  #if defined(GGML_USE_ACCELERATE)
9
  # include <Accelerate/Accelerate.h>
@@ -26,30 +27,6 @@ struct ggml_backend_blas_context {
26
  #endif
27
  };
28
 
29
- // helper function to determine if it is better to use BLAS or not
30
- // for large matrices, BLAS is faster
31
- static bool ggml_backend_blas_use_blas(const struct ggml_tensor * dst) {
32
- const struct ggml_tensor * src0 = dst->src[0];
33
- const struct ggml_tensor * src1 = dst->src[1];
34
-
35
- const int64_t ne10 = src1->ne[0];
36
-
37
- const int64_t ne0 = dst->ne[0];
38
- const int64_t ne1 = dst->ne[1];
39
-
40
- // TODO: find the optimal values for these
41
- if (ggml_is_contiguous(src0) &&
42
- ggml_is_contiguous(src1) &&
43
- src1->type == GGML_TYPE_F32 &&
44
- (ne0 >= 32 && ne1 >= 32 && ne10 >= 32)) {
45
-
46
- /*printf("BLAS: %d %d %d %d %d\n", ne0, ne1, ne10, ne00, ne01);*/
47
- return true;
48
- }
49
-
50
- return false;
51
- }
52
-
53
  static void ggml_backend_blas_mul_mat(ggml_backend_blas_context * ctx, struct ggml_tensor * dst) {
54
  const struct ggml_tensor * src0 = dst->src[0];
55
  const struct ggml_tensor * src1 = dst->src[1];
@@ -235,7 +212,7 @@ static void ggml_backend_blas_out_prod(ggml_backend_blas_context * ctx, struct g
235
 
236
  // backend interface
237
 
238
- static const char * ggml_backend_blas_name(ggml_backend_t backend) {
239
  return "BLAS";
240
 
241
  GGML_UNUSED(backend);
@@ -285,29 +262,8 @@ static enum ggml_status ggml_backend_blas_graph_compute(ggml_backend_t backend,
285
  GGML_UNUSED(backend);
286
  }
287
 
288
- static bool ggml_backend_blas_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {
289
- const struct ggml_tensor * src0 = op->src[0];
290
- const struct ggml_tensor * src1 = op->src[1];
291
-
292
- return (op->op == GGML_OP_MUL_MAT && ggml_backend_blas_use_blas(op)) ||
293
- (op->op == GGML_OP_OUT_PROD && op->src[0]->type == GGML_TYPE_F32 &&
294
- op->src[1]->type == GGML_TYPE_F32 &&
295
- ggml_is_matrix(src0) &&
296
- ggml_is_matrix(src1) &&
297
- ggml_is_contiguous(src0) &&
298
- (ggml_is_contiguous(src1) || ggml_is_transposed(src1)));
299
-
300
- GGML_UNUSED(backend);
301
- }
302
-
303
- static bool ggml_backend_blas_supports_buft(ggml_backend_t backend, ggml_backend_buffer_type_t buft) {
304
- return ggml_backend_buft_is_host(buft);
305
-
306
- GGML_UNUSED(backend);
307
- }
308
-
309
  static struct ggml_backend_i blas_backend_i = {
310
- /* .get_name = */ ggml_backend_blas_name,
311
  /* .free = */ ggml_backend_blas_free,
312
  /* .get_default_buffer_type = */ ggml_backend_blas_get_default_buffer_type,
313
  /* .set_tensor_async = */ NULL,
@@ -319,8 +275,8 @@ static struct ggml_backend_i blas_backend_i = {
319
  /* .graph_plan_update = */ NULL,
320
  /* .graph_plan_compute = */ NULL,
321
  /* .graph_compute = */ ggml_backend_blas_graph_compute,
322
- /* .supports_op = */ ggml_backend_blas_supports_op,
323
- /* .supports_buft = */ ggml_backend_blas_supports_buft,
324
  /* .offload_op = */ NULL,
325
  /* .event_record = */ NULL,
326
  /* .event_wait = */ NULL,
@@ -337,7 +293,7 @@ ggml_backend_t ggml_backend_blas_init(void) {
337
  ggml_backend_t backend = new ggml_backend {
338
  /* .guid = */ ggml_backend_blas_guid(),
339
  /* .interface = */ blas_backend_i,
340
- /* .device = */ nullptr,
341
  /* .context = */ ctx,
342
  };
343
 
@@ -364,3 +320,203 @@ void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads)
364
  ggml_backend_blas_context * ctx = (ggml_backend_blas_context *)backend_blas->context;
365
  ctx->n_threads = n_threads;
366
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
  #include <future>
6
  #include <vector>
7
+ #include <cstring>
8
 
9
  #if defined(GGML_USE_ACCELERATE)
10
  # include <Accelerate/Accelerate.h>
 
27
  #endif
28
  };
29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  static void ggml_backend_blas_mul_mat(ggml_backend_blas_context * ctx, struct ggml_tensor * dst) {
31
  const struct ggml_tensor * src0 = dst->src[0];
32
  const struct ggml_tensor * src1 = dst->src[1];
 
212
 
213
  // backend interface
214
 
215
+ static const char * ggml_backend_blas_get_name(ggml_backend_t backend) {
216
  return "BLAS";
217
 
218
  GGML_UNUSED(backend);
 
262
  GGML_UNUSED(backend);
263
  }
264
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
265
  static struct ggml_backend_i blas_backend_i = {
266
+ /* .get_name = */ ggml_backend_blas_get_name,
267
  /* .free = */ ggml_backend_blas_free,
268
  /* .get_default_buffer_type = */ ggml_backend_blas_get_default_buffer_type,
269
  /* .set_tensor_async = */ NULL,
 
275
  /* .graph_plan_update = */ NULL,
276
  /* .graph_plan_compute = */ NULL,
277
  /* .graph_compute = */ ggml_backend_blas_graph_compute,
278
+ /* .supports_op = */ NULL,
279
+ /* .supports_buft = */ NULL,
280
  /* .offload_op = */ NULL,
281
  /* .event_record = */ NULL,
282
  /* .event_wait = */ NULL,
 
293
  ggml_backend_t backend = new ggml_backend {
294
  /* .guid = */ ggml_backend_blas_guid(),
295
  /* .interface = */ blas_backend_i,
296
+ /* .device = */ ggml_backend_reg_dev_get(ggml_backend_blas_reg(), 0),
297
  /* .context = */ ctx,
298
  };
299
 
 
320
  ggml_backend_blas_context * ctx = (ggml_backend_blas_context *)backend_blas->context;
321
  ctx->n_threads = n_threads;
322
  }
323
+
324
+ // device interface
325
+
326
+ static const char * ggml_backend_blas_device_get_name(ggml_backend_dev_t dev) {
327
+ return "BLAS";
328
+
329
+ GGML_UNUSED(dev);
330
+ }
331
+
332
+ static const char * ggml_backend_blas_device_get_description(ggml_backend_dev_t dev) {
333
+ #if defined(GGML_USE_ACCELERATE)
334
+ return "Accelerate";
335
+ #elif defined(GGML_BLAS_USE_MKL)
336
+ return "MKL";
337
+ #elif defined(GGML_BLAS_USE_BLIS)
338
+ return "BLIS";
339
+ #elif defined(GGML_BLAS_USE_NVPL)
340
+ return "NVPL";
341
+ #elif defined(OPENBLAS_VERSION)
342
+ return "OpenBLAS";
343
+ #else
344
+ return "BLAS";
345
+ #endif
346
+
347
+ GGML_UNUSED(dev);
348
+ }
349
+
350
+ static void ggml_backend_blas_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
351
+ // TODO
352
+ *free = 0;
353
+ *total = 0;
354
+
355
+ GGML_UNUSED(dev);
356
+ }
357
+
358
+ static enum ggml_backend_dev_type ggml_backend_blas_device_get_type(ggml_backend_dev_t dev) {
359
+ return GGML_BACKEND_DEVICE_TYPE_CPU;
360
+
361
+ GGML_UNUSED(dev);
362
+ }
363
+
364
+ static void ggml_backend_blas_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) {
365
+ props->name = ggml_backend_blas_device_get_name(dev);
366
+ props->description = ggml_backend_blas_device_get_description(dev);
367
+ props->type = ggml_backend_blas_device_get_type(dev);
368
+ ggml_backend_blas_device_get_memory(dev, &props->memory_free, &props->memory_total);
369
+ props->caps = {
370
+ /* .async = */ false,
371
+ /* .host_buffer = */ false,
372
+ /* .buffer_from_host_ptr = */ true,
373
+ /* .events = */ false,
374
+ };
375
+ }
376
+
377
+ static ggml_backend_t ggml_backend_blas_device_init(ggml_backend_dev_t dev, const char * params) {
378
+ return ggml_backend_blas_init();
379
+
380
+ GGML_UNUSED(dev);
381
+ GGML_UNUSED(params);
382
+ }
383
+
384
+ static ggml_backend_buffer_type_t ggml_backend_blas_device_get_buffer_type(ggml_backend_dev_t dev) {
385
+ return ggml_backend_cpu_buffer_type();
386
+
387
+ GGML_UNUSED(dev);
388
+ }
389
+
390
+ static ggml_backend_buffer_t ggml_backend_blas_device_buffer_from_ptr(ggml_backend_dev_t dev, void * ptr, size_t size, size_t max_tensor_size) {
391
+ return ggml_backend_cpu_buffer_from_ptr(ptr, size);
392
+
393
+ GGML_UNUSED(dev);
394
+ GGML_UNUSED(max_tensor_size);
395
+ }
396
+
397
+ static bool ggml_backend_blas_device_supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) {
398
+ const struct ggml_tensor * src0 = op->src[0];
399
+ const struct ggml_tensor * src1 = op->src[1];
400
+
401
+ switch (op->op) {
402
+ case GGML_OP_NONE:
403
+ case GGML_OP_RESHAPE:
404
+ case GGML_OP_VIEW:
405
+ case GGML_OP_PERMUTE:
406
+ case GGML_OP_TRANSPOSE:
407
+ return true;
408
+
409
+ case GGML_OP_MUL_MAT:
410
+ {
411
+ // BLAS usually is only faster for large matrices
412
+ const struct ggml_tensor * src0 = op->src[0];
413
+ const struct ggml_tensor * src1 = op->src[1];
414
+
415
+ const int64_t ne10 = src1->ne[0];
416
+
417
+ const int64_t ne0 = op->ne[0];
418
+ const int64_t ne1 = op->ne[1];
419
+
420
+ // TODO: find the optimal value
421
+ const int64_t min_batch = 32;
422
+
423
+ return (ggml_is_contiguous(src0) &&
424
+ ggml_is_contiguous(src1) &&
425
+ src1->type == GGML_TYPE_F32 &&
426
+ (ne0 >= min_batch && ne1 >= min_batch && ne10 >= min_batch));
427
+ }
428
+
429
+ case GGML_OP_OUT_PROD:
430
+ return (op->src[0]->type == GGML_TYPE_F32 &&
431
+ op->src[1]->type == GGML_TYPE_F32 &&
432
+ ggml_is_matrix(src0) &&
433
+ ggml_is_matrix(src1) &&
434
+ ggml_is_contiguous(src0) &&
435
+ (ggml_is_contiguous(src1) || ggml_is_transposed(src1)));
436
+
437
+ default:
438
+ return false;
439
+
440
+ }
441
+
442
+ GGML_UNUSED(dev);
443
+ }
444
+
445
+ static bool ggml_backend_blas_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
446
+ return ggml_backend_buft_is_host(buft);
447
+
448
+ GGML_UNUSED(dev);
449
+ }
450
+
451
+ static const struct ggml_backend_device_i ggml_backend_blas_device_i = {
452
+ /* .get_name = */ ggml_backend_blas_device_get_name,
453
+ /* .get_description = */ ggml_backend_blas_device_get_description,
454
+ /* .get_memory = */ ggml_backend_blas_device_get_memory,
455
+ /* .get_type = */ ggml_backend_blas_device_get_type,
456
+ /* .get_props = */ ggml_backend_blas_device_get_props,
457
+ /* .init_backend = */ ggml_backend_blas_device_init,
458
+ /* .get_buffer_type = */ ggml_backend_blas_device_get_buffer_type,
459
+ /* .get_host_buffer_type = */ NULL,
460
+ /* .buffer_from_host_ptr = */ ggml_backend_blas_device_buffer_from_ptr,
461
+ /* .supports_op = */ ggml_backend_blas_device_supports_op,
462
+ /* .supports_buft = */ ggml_backend_blas_device_supports_buft,
463
+ /* .offload_op = */ NULL,
464
+ /* .event_new = */ NULL,
465
+ /* .event_free = */ NULL,
466
+ /* .event_synchronize = */ NULL,
467
+ };
468
+
469
+ // backend reg interface
470
+
471
+ static const char * ggml_backend_blas_reg_get_name(ggml_backend_reg_t reg) {
472
+ return "BLAS";
473
+
474
+ GGML_UNUSED(reg);
475
+ }
476
+
477
+ static size_t ggml_backend_blas_reg_get_device_count(ggml_backend_reg_t reg) {
478
+ return 1;
479
+
480
+ GGML_UNUSED(reg);
481
+ }
482
+
483
+ static ggml_backend_dev_t ggml_backend_blas_reg_get_device(ggml_backend_reg_t reg, size_t index) {
484
+ GGML_ASSERT(index == 0);
485
+
486
+ static ggml_backend_device ggml_backend_blas_device = {
487
+ /* .iface = */ ggml_backend_blas_device_i,
488
+ /* .reg = */ reg,
489
+ /* .context = */ nullptr,
490
+ };
491
+
492
+ return &ggml_backend_blas_device;
493
+
494
+ GGML_UNUSED(reg);
495
+ GGML_UNUSED(index);
496
+ }
497
+
498
+ static void * ggml_backend_blas_get_proc_address(ggml_backend_reg_t reg, const char * name) {
499
+ if (std::strcmp(name, "ggml_backend_set_n_threads") == 0) {
500
+ return (void *)ggml_backend_blas_set_n_threads;
501
+ }
502
+ return NULL;
503
+
504
+ GGML_UNUSED(reg);
505
+ GGML_UNUSED(name);
506
+ }
507
+
508
+ static const struct ggml_backend_reg_i ggml_backend_blas_reg_i = {
509
+ /* .get_name = */ ggml_backend_blas_reg_get_name,
510
+ /* .get_device_count = */ ggml_backend_blas_reg_get_device_count,
511
+ /* .get_device = */ ggml_backend_blas_reg_get_device,
512
+ /* .get_proc_address = */ ggml_backend_blas_get_proc_address,
513
+ };
514
+
515
+ ggml_backend_reg_t ggml_backend_blas_reg(void) {
516
+ static struct ggml_backend_reg ggml_backend_blas_reg = {
517
+ /* .iface = */ ggml_backend_blas_reg_i,
518
+ /* .context = */ NULL,
519
+ };
520
+
521
+ return &ggml_backend_blas_reg;
522
+ }