Diego Devesa commited on
Commit
8f2e8d6
·
1 Parent(s): f1535d7

cuda : fix device sync on buffer clear (llama/14033)

Browse files
Files changed (1) hide show
  1. ggml/src/ggml-cuda/ggml-cuda.cu +2 -3
ggml/src/ggml-cuda/ggml-cuda.cu CHANGED
@@ -615,9 +615,8 @@ static void ggml_backend_cuda_buffer_clear(ggml_backend_buffer_t buffer, uint8_t
615
  ggml_backend_cuda_buffer_context * ctx = (ggml_backend_cuda_buffer_context *)buffer->context;
616
 
617
  ggml_cuda_set_device(ctx->device);
618
- CUDA_CHECK(cudaDeviceSynchronize());
619
- CUDA_CHECK(cudaMemset(ctx->dev_ptr, value, buffer->size));
620
- CUDA_CHECK(cudaDeviceSynchronize());
621
  }
622
 
623
  static const ggml_backend_buffer_i ggml_backend_cuda_buffer_interface = {
 
615
  ggml_backend_cuda_buffer_context * ctx = (ggml_backend_cuda_buffer_context *)buffer->context;
616
 
617
  ggml_cuda_set_device(ctx->device);
618
+ CUDA_CHECK(cudaMemsetAsync(ctx->dev_ptr, value, buffer->size, cudaStreamPerThread));
619
+ CUDA_CHECK(cudaStreamSynchronize(cudaStreamPerThread));
 
620
  }
621
 
622
  static const ggml_backend_buffer_i ggml_backend_cuda_buffer_interface = {