finnvoorhees commited on
Commit
92f24ee
·
unverified ·
1 Parent(s): cda4a91

ggml : add error handling to graph_compute (#1714)

Browse files
bindings/ruby/ext/ggml-backend-impl.h CHANGED
@@ -70,7 +70,7 @@ extern "C" {
70
  void (*graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
71
 
72
  // compute graph without a plan
73
- void (*graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph);
74
 
75
  // check if the backend supports an operation
76
  bool (*supports_op)(ggml_backend_t backend, const struct ggml_tensor * op);
 
70
  void (*graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
71
 
72
  // compute graph without a plan
73
+ bool (*graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph);
74
 
75
  // check if the backend supports an operation
76
  bool (*supports_op)(ggml_backend_t backend, const struct ggml_tensor * op);
bindings/ruby/ext/ggml-backend.c CHANGED
@@ -156,8 +156,8 @@ void ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_
156
  backend->iface.graph_plan_compute(backend, plan);
157
  }
158
 
159
- void ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
160
- backend->iface.graph_compute(backend, cgraph);
161
  }
162
 
163
  bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {
 
156
  backend->iface.graph_plan_compute(backend, plan);
157
  }
158
 
159
+ bool ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
160
+ return backend->iface.graph_compute(backend, cgraph);
161
  }
162
 
163
  bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {
bindings/ruby/ext/ggml-backend.h CHANGED
@@ -52,7 +52,7 @@ extern "C" {
52
 
53
  GGML_API void ggml_backend_graph_plan_free (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
54
  GGML_API void ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
55
- GGML_API void ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);
56
  GGML_API bool ggml_backend_supports_op (ggml_backend_t backend, const struct ggml_tensor * op);
57
 
58
  // tensor copy between different backends
 
52
 
53
  GGML_API void ggml_backend_graph_plan_free (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
54
  GGML_API void ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
55
+ GGML_API bool ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);
56
  GGML_API bool ggml_backend_supports_op (ggml_backend_t backend, const struct ggml_tensor * op);
57
 
58
  // tensor copy between different backends
ggml-backend-impl.h CHANGED
@@ -90,7 +90,7 @@ extern "C" {
90
  void (*graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
91
 
92
  // compute graph without a plan
93
- void (*graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph);
94
 
95
  // check if the backend supports an operation
96
  bool (*supports_op)(ggml_backend_t backend, const struct ggml_tensor * op);
 
90
  void (*graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
91
 
92
  // compute graph without a plan
93
+ bool (*graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph);
94
 
95
  // check if the backend supports an operation
96
  bool (*supports_op)(ggml_backend_t backend, const struct ggml_tensor * op);
ggml-backend.c CHANGED
@@ -195,11 +195,14 @@ void ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_
195
  ggml_backend_synchronize(backend);
196
  }
197
 
198
- void ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
199
- backend->iface.graph_compute(backend, cgraph);
 
 
200
 
201
  // TODO: optional sync
202
  ggml_backend_synchronize(backend);
 
203
  }
204
 
205
  bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {
@@ -597,7 +600,7 @@ static void ggml_backend_cpu_graph_plan_compute(ggml_backend_t backend, ggml_bac
597
  GGML_UNUSED(backend);
598
  }
599
 
600
- static void ggml_backend_cpu_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
601
  struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context;
602
 
603
  struct ggml_cplan cplan = ggml_graph_plan(cgraph, cpu_ctx->n_threads);
@@ -611,6 +614,7 @@ static void ggml_backend_cpu_graph_compute(ggml_backend_t backend, struct ggml_c
611
  cplan.work_data = cpu_ctx->work_data;
612
 
613
  ggml_graph_compute(cgraph, &cplan);
 
614
  }
615
 
616
  static bool ggml_backend_cpu_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {
 
195
  ggml_backend_synchronize(backend);
196
  }
197
 
198
+ bool ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
199
+ if (!backend->iface.graph_compute(backend, cgraph)) {
200
+ return false;
201
+ }
202
 
203
  // TODO: optional sync
204
  ggml_backend_synchronize(backend);
205
+ return true;
206
  }
207
 
208
  bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {
 
600
  GGML_UNUSED(backend);
601
  }
602
 
603
+ static bool ggml_backend_cpu_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
604
  struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context;
605
 
606
  struct ggml_cplan cplan = ggml_graph_plan(cgraph, cpu_ctx->n_threads);
 
614
  cplan.work_data = cpu_ctx->work_data;
615
 
616
  ggml_graph_compute(cgraph, &cplan);
617
+ return true;
618
  }
619
 
620
  static bool ggml_backend_cpu_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {
ggml-backend.h CHANGED
@@ -58,7 +58,7 @@ extern "C" {
58
 
59
  GGML_API void ggml_backend_graph_plan_free (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
60
  GGML_API void ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
61
- GGML_API void ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);
62
  GGML_API bool ggml_backend_supports_op (ggml_backend_t backend, const struct ggml_tensor * op);
63
 
64
  // tensor copy between different backends
 
58
 
59
  GGML_API void ggml_backend_graph_plan_free (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
60
  GGML_API void ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
61
+ GGML_API bool ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);
62
  GGML_API bool ggml_backend_supports_op (ggml_backend_t backend, const struct ggml_tensor * op);
63
 
64
  // tensor copy between different backends
ggml-cuda.cu CHANGED
@@ -9910,7 +9910,7 @@ static void ggml_backend_cuda_graph_plan_compute(ggml_backend_t backend, ggml_ba
9910
  UNUSED(plan);
9911
  }
9912
 
9913
- static void ggml_backend_cuda_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) {
9914
  ggml_backend_context_cuda * cuda_ctx = (ggml_backend_context_cuda *)backend->context;
9915
 
9916
  ggml_cuda_set_main_device(cuda_ctx->device);
@@ -9967,6 +9967,8 @@ static void ggml_backend_cuda_graph_compute(ggml_backend_t backend, ggml_cgraph
9967
  }
9968
 
9969
  UNUSED(backend);
 
 
9970
  }
9971
 
9972
  static bool ggml_backend_cuda_supports_op(ggml_backend_t backend, const ggml_tensor * op) {
 
9910
  UNUSED(plan);
9911
  }
9912
 
9913
+ static bool ggml_backend_cuda_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) {
9914
  ggml_backend_context_cuda * cuda_ctx = (ggml_backend_context_cuda *)backend->context;
9915
 
9916
  ggml_cuda_set_main_device(cuda_ctx->device);
 
9967
  }
9968
 
9969
  UNUSED(backend);
9970
+
9971
+ return true;
9972
  }
9973
 
9974
  static bool ggml_backend_cuda_supports_op(ggml_backend_t backend, const ggml_tensor * op) {
ggml-metal.h CHANGED
@@ -87,7 +87,7 @@ int * ggml_metal_get_concur_list(struct ggml_metal_context * ctx);
87
 
88
  // same as ggml_graph_compute but uses Metal
89
  // creates gf->n_threads command buffers in parallel
90
- void ggml_metal_graph_compute(struct ggml_metal_context * ctx, struct ggml_cgraph * gf);
91
 
92
  //
93
  // backend API
 
87
 
88
  // same as ggml_graph_compute but uses Metal
89
  // creates gf->n_threads command buffers in parallel
90
+ bool ggml_metal_graph_compute(struct ggml_metal_context * ctx, struct ggml_cgraph * gf);
91
 
92
  //
93
  // backend API
ggml-metal.m CHANGED
@@ -977,7 +977,7 @@ static bool ggml_metal_supports_op(const struct ggml_tensor * op) {
977
  return false;
978
  }
979
  }
980
- void ggml_metal_graph_compute(
981
  struct ggml_metal_context * ctx,
982
  struct ggml_cgraph * gf) {
983
  @autoreleasepool {
@@ -2405,10 +2405,11 @@ void ggml_metal_graph_compute(
2405
  MTLCommandBufferStatus status = (MTLCommandBufferStatus) [ctx->command_buffers[i] status];
2406
  if (status != MTLCommandBufferStatusCompleted) {
2407
  GGML_METAL_LOG_INFO("%s: command buffer %d failed with status %lu\n", __func__, i, status);
2408
- GGML_ASSERT(false);
2409
  }
2410
  }
2411
 
 
2412
  }
2413
  }
2414
 
@@ -2688,10 +2689,10 @@ static ggml_backend_buffer_type_t ggml_backend_metal_get_default_buffer_type(ggm
2688
  UNUSED(backend);
2689
  }
2690
 
2691
- static void ggml_backend_metal_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
2692
  struct ggml_metal_context * metal_ctx = (struct ggml_metal_context *)backend->context;
2693
 
2694
- ggml_metal_graph_compute(metal_ctx, cgraph);
2695
  }
2696
 
2697
  static bool ggml_backend_metal_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {
 
977
  return false;
978
  }
979
  }
980
+ bool ggml_metal_graph_compute(
981
  struct ggml_metal_context * ctx,
982
  struct ggml_cgraph * gf) {
983
  @autoreleasepool {
 
2405
  MTLCommandBufferStatus status = (MTLCommandBufferStatus) [ctx->command_buffers[i] status];
2406
  if (status != MTLCommandBufferStatusCompleted) {
2407
  GGML_METAL_LOG_INFO("%s: command buffer %d failed with status %lu\n", __func__, i, status);
2408
+ return false;
2409
  }
2410
  }
2411
 
2412
+ return true;
2413
  }
2414
  }
2415
 
 
2689
  UNUSED(backend);
2690
  }
2691
 
2692
+ static bool ggml_backend_metal_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
2693
  struct ggml_metal_context * metal_ctx = (struct ggml_metal_context *)backend->context;
2694
 
2695
+ return ggml_metal_graph_compute(metal_ctx, cgraph);
2696
  }
2697
 
2698
  static bool ggml_backend_metal_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {
whisper.cpp CHANGED
@@ -152,7 +152,7 @@ static void whisper_log_callback_default(ggml_log_level level, const char * text
152
  // ggml helpers
153
  //
154
 
155
- static void ggml_graph_compute_helper(
156
  struct ggml_cgraph * graph,
157
  std::vector<uint8_t> & buf,
158
  int n_threads,
@@ -168,10 +168,10 @@ static void ggml_graph_compute_helper(
168
  plan.work_data = buf.data();
169
  }
170
 
171
- ggml_graph_compute(graph, &plan);
172
  }
173
 
174
- static void ggml_graph_compute_helper(
175
  struct ggml_backend * backend,
176
  struct ggml_cgraph * graph,
177
  int n_threads) {
@@ -183,7 +183,7 @@ static void ggml_graph_compute_helper(
183
  ggml_backend_metal_set_n_cb(backend, n_threads);
184
  }
185
  #endif
186
- ggml_backend_graph_compute(backend, graph);
187
  }
188
 
189
  // faster matrix multiplications for tensors that do not have dimension 0 divisible by "pad"
@@ -2103,7 +2103,9 @@ static bool whisper_encode_internal(
2103
  ggml_allocr_alloc_graph(alloc, gf);
2104
 
2105
  if (!whisper_encode_external(wstate)) {
2106
- ggml_graph_compute_helper(wstate.backend, gf, n_threads);
 
 
2107
  }
2108
  }
2109
 
@@ -2117,7 +2119,9 @@ static bool whisper_encode_internal(
2117
 
2118
  ggml_allocr_alloc_graph(alloc, gf);
2119
 
2120
- ggml_graph_compute_helper(wstate.backend, gf, n_threads);
 
 
2121
  }
2122
 
2123
  // cross
@@ -2130,7 +2134,9 @@ static bool whisper_encode_internal(
2130
 
2131
  ggml_allocr_alloc_graph(alloc, gf);
2132
 
2133
- ggml_graph_compute_helper(wstate.backend, gf, n_threads);
 
 
2134
  }
2135
 
2136
  wstate.t_encode_us += ggml_time_us() - t_start_us;
@@ -2552,7 +2558,9 @@ static bool whisper_decode_internal(
2552
 
2553
  logits = gf->nodes[gf->n_nodes - 1];
2554
 
2555
- ggml_graph_compute_helper(wstate.backend, gf, n_threads);
 
 
2556
  }
2557
 
2558
  logits_out.resize(n_tokens*n_vocab);
 
152
  // ggml helpers
153
  //
154
 
155
+ static bool ggml_graph_compute_helper(
156
  struct ggml_cgraph * graph,
157
  std::vector<uint8_t> & buf,
158
  int n_threads,
 
168
  plan.work_data = buf.data();
169
  }
170
 
171
+ return ggml_graph_compute(graph, &plan);
172
  }
173
 
174
+ static bool ggml_graph_compute_helper(
175
  struct ggml_backend * backend,
176
  struct ggml_cgraph * graph,
177
  int n_threads) {
 
183
  ggml_backend_metal_set_n_cb(backend, n_threads);
184
  }
185
  #endif
186
+ return ggml_backend_graph_compute(backend, graph);
187
  }
188
 
189
  // faster matrix multiplications for tensors that do not have dimension 0 divisible by "pad"
 
2103
  ggml_allocr_alloc_graph(alloc, gf);
2104
 
2105
  if (!whisper_encode_external(wstate)) {
2106
+ if (!ggml_graph_compute_helper(wstate.backend, gf, n_threads)) {
2107
+ return false;
2108
+ }
2109
  }
2110
  }
2111
 
 
2119
 
2120
  ggml_allocr_alloc_graph(alloc, gf);
2121
 
2122
+ if (!ggml_graph_compute_helper(wstate.backend, gf, n_threads)) {
2123
+ return false;
2124
+ }
2125
  }
2126
 
2127
  // cross
 
2134
 
2135
  ggml_allocr_alloc_graph(alloc, gf);
2136
 
2137
+ if (!ggml_graph_compute_helper(wstate.backend, gf, n_threads)) {
2138
+ return false;
2139
+ }
2140
  }
2141
 
2142
  wstate.t_encode_us += ggml_time_us() - t_start_us;
 
2558
 
2559
  logits = gf->nodes[gf->n_nodes - 1];
2560
 
2561
+ if (!ggml_graph_compute_helper(wstate.backend, gf, n_threads)) {
2562
+ return false;
2563
+ }
2564
  }
2565
 
2566
  logits_out.resize(n_tokens*n_vocab);