ggerganov commited on
Commit
03a3210
·
unverified ·
1 Parent(s): d6276a7

whisper : make large version explicit + fix data size units (#1493)

Browse files
Makefile CHANGED
@@ -418,9 +418,9 @@ samples:
418
  .PHONY: medium
419
  .PHONY: large-v1
420
  .PHONY: large-v2
421
- .PHONY: large
422
 
423
- tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large: main
424
  bash ./models/download-ggml-model.sh $@
425
  @echo ""
426
  @echo "==============================================="
 
418
  .PHONY: medium
419
  .PHONY: large-v1
420
  .PHONY: large-v2
421
+ .PHONY: large-v3
422
 
423
+ tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3: main
424
  bash ./models/download-ggml-model.sh $@
425
  @echo ""
426
  @echo "==============================================="
README.md CHANGED
@@ -231,18 +231,18 @@ make medium.en
231
  make medium
232
  make large-v1
233
  make large-v2
234
- make large
235
  ```
236
 
237
  ## Memory usage
238
 
239
- | Model | Disk | Mem | SHA |
240
- | --- | --- | --- | --- |
241
- | tiny | 75 MB | ~125 MB | `bd577a113a864445d4c299885e0cb97d4ba92b5f` |
242
- | base | 142 MB | ~210 MB | `465707469ff3a37a2b9b8d8f89f2f99de7299dac` |
243
- | small | 466 MB | ~600 MB | `55356645c2b361a969dfd0ef2c5a50d530afd8d5` |
244
- | medium | 1.5 GB | ~1.7 GB | `fd9727b6e1217c2f614f9b698455c4ffd82463b4` |
245
- | large | 2.9 GB | ~3.3 GB | `ad82bf6a9043ceed055076d0fd39f5f186ff8062` |
246
 
247
  ## Quantization
248
 
 
231
  make medium
232
  make large-v1
233
  make large-v2
234
+ make large-v3
235
  ```
236
 
237
  ## Memory usage
238
 
239
+ | Model | Disk | Mem |
240
+ | --- | --- | --- |
241
+ | tiny | 75 MiB | ~273 MB |
242
+ | base | 142 MiB | ~388 MB |
243
+ | small | 466 MiB | ~852 MB |
244
+ | medium | 1.5 GiB | ~2.1 GB |
245
+ | large | 2.9 GiB | ~3.9 GB |
246
 
247
  ## Quantization
248
 
bindings/go/examples/go-model-download/main.go CHANGED
@@ -24,7 +24,7 @@ const (
24
 
25
  var (
26
  // The models which will be downloaded, if no model is specified as an argument
27
- modelNames = []string{"ggml-tiny.en", "ggml-tiny", "ggml-base.en", "ggml-base", "ggml-small.en", "ggml-small", "ggml-medium.en", "ggml-medium", "ggml-large-v1", "ggml-large-v2", "ggml-large"}
28
  )
29
 
30
  var (
 
24
 
25
  var (
26
  // The models which will be downloaded, if no model is specified as an argument
27
+ modelNames = []string{"ggml-tiny.en", "ggml-tiny", "ggml-base.en", "ggml-base", "ggml-small.en", "ggml-small", "ggml-medium.en", "ggml-medium", "ggml-large-v1", "ggml-large-v2", "ggml-large-v3"}
28
  )
29
 
30
  var (
examples/livestream.sh CHANGED
@@ -48,7 +48,7 @@ if [ -n "$3" ]; then
48
  fi
49
 
50
  # Whisper models
51
- models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large" )
52
 
53
  # list available models
54
  function list_models {
 
48
  fi
49
 
50
  # Whisper models
51
+ models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large-v3" )
52
 
53
  # list available models
54
  function list_models {
examples/twitch.sh CHANGED
@@ -21,7 +21,7 @@ help()
21
  echo "Usage: ./twitch.sh -s [step] -m [model] -t [threads] [url]"
22
  echo "options:"
23
  echo "-s Step in seconds (default is $step)."
24
- echo "-m Choose model, options are: 'tiny.en' 'tiny' 'base.en' 'base' 'small.en' 'small' 'medium.en' 'medium' 'large-v1' 'large-v2' 'large' (default is '$model')."
25
  echo "-t Number of threads to use."
26
  echo "-h Print this help page."
27
  echo
 
21
  echo "Usage: ./twitch.sh -s [step] -m [model] -t [threads] [url]"
22
  echo "options:"
23
  echo "-s Step in seconds (default is $step)."
24
+ echo "-m Choose model, options are: 'tiny.en' 'tiny' 'base.en' 'base' 'small.en' 'small' 'medium.en' 'medium' 'large-v1' 'large-v2' 'large-v3' (default is '$model')."
25
  echo "-t Number of threads to use."
26
  echo "-h Print this help page."
27
  echo
extra/convert-all.sh CHANGED
@@ -1,6 +1,6 @@
1
  #!/bin/bash
2
 
3
- models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large" )
4
 
5
  for model in "${models[@]}"; do
6
  python3 models/convert-pt-to-ggml.py ~/.cache/whisper/$model.pt ../whisper models/
 
1
  #!/bin/bash
2
 
3
+ models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large-v3" )
4
 
5
  for model in "${models[@]}"; do
6
  python3 models/convert-pt-to-ggml.py ~/.cache/whisper/$model.pt ../whisper models/
ggml-metal.m CHANGED
@@ -346,9 +346,9 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
346
  }
347
 
348
  GGML_METAL_LOG_INFO("%s: hasUnifiedMemory = %s\n", __func__, ctx->device.hasUnifiedMemory ? "true" : "false");
349
- GGML_METAL_LOG_INFO("%s: recommendedMaxWorkingSetSize = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
350
  if (ctx->device.maxTransferRate != 0) {
351
- GGML_METAL_LOG_INFO("%s: maxTransferRate = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1024.0 / 1024.0);
352
  } else {
353
  GGML_METAL_LOG_INFO("%s: maxTransferRate = built-in GPU\n", __func__);
354
  }
@@ -541,11 +541,11 @@ bool ggml_metal_add_buffer(
541
  ctx->buffers[ctx->n_buffers].metal = [ctx->device newBufferWithBytesNoCopy:data length:size_aligned options:MTLResourceStorageModeShared deallocator:nil];
542
 
543
  if (ctx->buffers[ctx->n_buffers].metal == nil) {
544
- GGML_METAL_LOG_ERROR("%s: error: failed to allocate '%-16s' buffer, size = %8.2f MB\n", __func__, name, size_aligned / 1024.0 / 1024.0);
545
  return false;
546
  }
547
 
548
- GGML_METAL_LOG_INFO("%s: allocated '%-16s' buffer, size = %8.2f MB", __func__, name, size_aligned / 1024.0 / 1024.0);
549
 
550
  ++ctx->n_buffers;
551
  } else {
@@ -565,11 +565,11 @@ bool ggml_metal_add_buffer(
565
  ctx->buffers[ctx->n_buffers].metal = [ctx->device newBufferWithBytesNoCopy:(void *) ((uint8_t *) data + i) length:size_step_aligned options:MTLResourceStorageModeShared deallocator:nil];
566
 
567
  if (ctx->buffers[ctx->n_buffers].metal == nil) {
568
- GGML_METAL_LOG_ERROR("%s: error: failed to allocate '%-16s' buffer, size = %8.2f MB\n", __func__, name, size_step_aligned / 1024.0 / 1024.0);
569
  return false;
570
  }
571
 
572
- GGML_METAL_LOG_INFO("%s: allocated '%-16s' buffer, size = %8.2f MB, offs = %12ld", __func__, name, size_step_aligned / 1024.0 / 1024.0, i);
573
  if (i + size_step < size) {
574
  GGML_METAL_LOG_INFO("\n");
575
  }
@@ -580,8 +580,8 @@ bool ggml_metal_add_buffer(
580
 
581
  #if TARGET_OS_OSX
582
  GGML_METAL_LOG_INFO(", (%8.2f / %8.2f)",
583
- ctx->device.currentAllocatedSize / 1024.0 / 1024.0,
584
- ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
585
 
586
  if (ctx->device.currentAllocatedSize > ctx->device.recommendedMaxWorkingSetSize) {
587
  GGML_METAL_LOG_WARN("%s: warning: current allocated size is greater than the recommended max working set size\n", __func__);
@@ -589,7 +589,7 @@ bool ggml_metal_add_buffer(
589
  GGML_METAL_LOG_INFO("\n");
590
  }
591
  #else
592
- GGML_METAL_LOG_INFO(", (%8.2f)\n", ctx->device.currentAllocatedSize / 1024.0 / 1024.0);
593
  #endif
594
  }
595
 
 
346
  }
347
 
348
  GGML_METAL_LOG_INFO("%s: hasUnifiedMemory = %s\n", __func__, ctx->device.hasUnifiedMemory ? "true" : "false");
349
+ GGML_METAL_LOG_INFO("%s: recommendedMaxWorkingSetSize = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1e6);
350
  if (ctx->device.maxTransferRate != 0) {
351
+ GGML_METAL_LOG_INFO("%s: maxTransferRate = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1e6);
352
  } else {
353
  GGML_METAL_LOG_INFO("%s: maxTransferRate = built-in GPU\n", __func__);
354
  }
 
541
  ctx->buffers[ctx->n_buffers].metal = [ctx->device newBufferWithBytesNoCopy:data length:size_aligned options:MTLResourceStorageModeShared deallocator:nil];
542
 
543
  if (ctx->buffers[ctx->n_buffers].metal == nil) {
544
+ GGML_METAL_LOG_ERROR("%s: error: failed to allocate '%-16s' buffer, size = %8.2f MB\n", __func__, name, size_aligned / 1e6);
545
  return false;
546
  }
547
 
548
+ GGML_METAL_LOG_INFO("%s: allocated '%-16s' buffer, size = %8.2f MB", __func__, name, size_aligned / 1e6);
549
 
550
  ++ctx->n_buffers;
551
  } else {
 
565
  ctx->buffers[ctx->n_buffers].metal = [ctx->device newBufferWithBytesNoCopy:(void *) ((uint8_t *) data + i) length:size_step_aligned options:MTLResourceStorageModeShared deallocator:nil];
566
 
567
  if (ctx->buffers[ctx->n_buffers].metal == nil) {
568
+ GGML_METAL_LOG_ERROR("%s: error: failed to allocate '%-16s' buffer, size = %8.2f MB\n", __func__, name, size_step_aligned / 1e6);
569
  return false;
570
  }
571
 
572
+ GGML_METAL_LOG_INFO("%s: allocated '%-16s' buffer, size = %8.2f MB, offs = %12ld", __func__, name, size_step_aligned / 1e6, i);
573
  if (i + size_step < size) {
574
  GGML_METAL_LOG_INFO("\n");
575
  }
 
580
 
581
  #if TARGET_OS_OSX
582
  GGML_METAL_LOG_INFO(", (%8.2f / %8.2f)",
583
+ ctx->device.currentAllocatedSize / 1e6,
584
+ ctx->device.recommendedMaxWorkingSetSize / 1e6);
585
 
586
  if (ctx->device.currentAllocatedSize > ctx->device.recommendedMaxWorkingSetSize) {
587
  GGML_METAL_LOG_WARN("%s: warning: current allocated size is greater than the recommended max working set size\n", __func__);
 
589
  GGML_METAL_LOG_INFO("\n");
590
  }
591
  #else
592
+ GGML_METAL_LOG_INFO(", (%8.2f)\n", ctx->device.currentAllocatedSize / 1e6);
593
  #endif
594
  }
595
 
models/README.md CHANGED
@@ -39,19 +39,19 @@ https://huggingface.co/ggerganov/whisper.cpp/tree/main
39
 
40
  ## Available models
41
 
42
- | Model | Disk | Mem | SHA |
43
- | --- | --- | --- | --- |
44
- | tiny | 75 MB | ~390 MB | `bd577a113a864445d4c299885e0cb97d4ba92b5f` |
45
- | tiny.en | 75 MB | ~390 MB | `c78c86eb1a8faa21b369bcd33207cc90d64ae9df` |
46
- | base | 142 MB | ~500 MB | `465707469ff3a37a2b9b8d8f89f2f99de7299dac` |
47
- | base.en | 142 MB | ~500 MB | `137c40403d78fd54d454da0f9bd998f78703390c` |
48
- | small | 466 MB | ~1.0 GB | `55356645c2b361a969dfd0ef2c5a50d530afd8d5` |
49
- | small.en | 466 MB | ~1.0 GB | `db8a495a91d927739e50b3fc1cc4c6b8f6c2d022` |
50
- | medium | 1.5 GB | ~2.6 GB | `fd9727b6e1217c2f614f9b698455c4ffd82463b4` |
51
- | medium.en | 1.5 GB | ~2.6 GB | `8c30f0e44ce9560643ebd10bbe50cd20eafd3723` |
52
- | large-v1 | 2.9 GB | ~4.7 GB | `b1caaf735c4cc1429223d5a74f0f4d0b9b59a299` |
53
- | large-v2 | 2.9 GB | ~4.7 GB | `0f4c8e34f21cf1a914c59d8b3ce882345ad349d6` |
54
- | large | 2.9 GB | ~4.7 GB | `ad82bf6a9043ceed055076d0fd39f5f186ff8062` |
55
 
56
  ## Model files for testing purposes
57
 
 
39
 
40
  ## Available models
41
 
42
+ | Model | Disk | SHA |
43
+ | --- | --- | --- |
44
+ | tiny | 75 MiB | `bd577a113a864445d4c299885e0cb97d4ba92b5f` |
45
+ | tiny.en | 75 MiB | `c78c86eb1a8faa21b369bcd33207cc90d64ae9df` |
46
+ | base | 142 MiB | `465707469ff3a37a2b9b8d8f89f2f99de7299dac` |
47
+ | base.en | 142 MiB | `137c40403d78fd54d454da0f9bd998f78703390c` |
48
+ | small | 466 MiB | `55356645c2b361a969dfd0ef2c5a50d530afd8d5` |
49
+ | small.en | 466 MiB | `db8a495a91d927739e50b3fc1cc4c6b8f6c2d022` |
50
+ | medium | 1.5 GiB | `fd9727b6e1217c2f614f9b698455c4ffd82463b4` |
51
+ | medium.en | 1.5 GiB | `8c30f0e44ce9560643ebd10bbe50cd20eafd3723` |
52
+ | large-v1 | 2.9 GiB | `b1caaf735c4cc1429223d5a74f0f4d0b9b59a299` |
53
+ | large-v2 | 2.9 GiB | `0f4c8e34f21cf1a914c59d8b3ce882345ad349d6` |
54
+ | large-v3 | 2.9 GiB | `ad82bf6a9043ceed055076d0fd39f5f186ff8062` |
55
 
56
  ## Model files for testing purposes
57
 
models/convert-h5-to-coreml.py CHANGED
@@ -78,14 +78,14 @@ def convert_hf_whisper(hf_model_name_or_path: str, whisper_state_path: str):
78
  # Ported from models/convert-whisper-to-coreml.py
79
  if __name__ == "__main__":
80
  parser = argparse.ArgumentParser()
81
- parser.add_argument("--model-name", type=str, help="name of model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large, large-v1, large-v2)", required=True)
82
  parser.add_argument("--model-path", type=str, help="path to the model (e.g. if published on HuggingFace: Oblivion208/whisper-tiny-cantonese)", required=True)
83
  parser.add_argument("--encoder-only", type=bool, help="only convert encoder", default=False)
84
  parser.add_argument("--quantize", type=bool, help="quantize weights to F16", default=False)
85
  parser.add_argument("--optimize-ane", type=bool, help="optimize for ANE execution (currently broken)", default=False)
86
  args = parser.parse_args()
87
 
88
- if args.model_name not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large", "large-v1", "large-v2"]:
89
  raise ValueError("Invalid model name")
90
 
91
  pt_target_path = f"models/hf-{args.model_name}.pt"
 
78
  # Ported from models/convert-whisper-to-coreml.py
79
  if __name__ == "__main__":
80
  parser = argparse.ArgumentParser()
81
+ parser.add_argument("--model-name", type=str, help="name of model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large-v1, large-v2, large-v3)", required=True)
82
  parser.add_argument("--model-path", type=str, help="path to the model (e.g. if published on HuggingFace: Oblivion208/whisper-tiny-cantonese)", required=True)
83
  parser.add_argument("--encoder-only", type=bool, help="only convert encoder", default=False)
84
  parser.add_argument("--quantize", type=bool, help="quantize weights to F16", default=False)
85
  parser.add_argument("--optimize-ane", type=bool, help="optimize for ANE execution (currently broken)", default=False)
86
  args = parser.parse_args()
87
 
88
+ if args.model_name not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large-v1", "large-v2", "large-v3"]:
89
  raise ValueError("Invalid model name")
90
 
91
  pt_target_path = f"models/hf-{args.model_name}.pt"
models/convert-whisper-to-coreml.py CHANGED
@@ -296,13 +296,13 @@ def convert_decoder(hparams, model, quantize=False):
296
 
297
  if __name__ == "__main__":
298
  parser = argparse.ArgumentParser()
299
- parser.add_argument("--model", type=str, help="model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large, large-v1, large-v2)", required=True)
300
  parser.add_argument("--encoder-only", type=bool, help="only convert encoder", default=False)
301
  parser.add_argument("--quantize", type=bool, help="quantize weights to F16", default=False)
302
  parser.add_argument("--optimize-ane", type=bool, help="optimize for ANE execution (currently broken)", default=False)
303
  args = parser.parse_args()
304
 
305
- if args.model not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "small.en-tdrz", "medium", "medium.en", "large", "large-v1", "large-v2"]:
306
  raise ValueError("Invalid model name")
307
 
308
  whisper = load_model(args.model).cpu()
 
296
 
297
  if __name__ == "__main__":
298
  parser = argparse.ArgumentParser()
299
+ parser.add_argument("--model", type=str, help="model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large-v1, large-v2, large-v3)", required=True)
300
  parser.add_argument("--encoder-only", type=bool, help="only convert encoder", default=False)
301
  parser.add_argument("--quantize", type=bool, help="quantize weights to F16", default=False)
302
  parser.add_argument("--optimize-ane", type=bool, help="optimize for ANE execution (currently broken)", default=False)
303
  args = parser.parse_args()
304
 
305
+ if args.model not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "small.en-tdrz", "medium", "medium.en", "large-v1", "large-v2", "large-v3"]:
306
  raise ValueError("Invalid model name")
307
 
308
  whisper = load_model(args.model).cpu()
models/convert-whisper-to-openvino.py CHANGED
@@ -38,10 +38,10 @@ def convert_encoder(hparams, encoder, mname):
38
 
39
  if __name__ == "__main__":
40
  parser = argparse.ArgumentParser()
41
- parser.add_argument("--model", type=str, help="model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large, large-v1, large-v2)", required=True)
42
  args = parser.parse_args()
43
 
44
- if args.model not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large", "large-v1", "large-v2"]:
45
  raise ValueError("Invalid model name")
46
 
47
  whisper = load_model(args.model).cpu()
 
38
 
39
  if __name__ == "__main__":
40
  parser = argparse.ArgumentParser()
41
+ parser.add_argument("--model", type=str, help="model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large-v1, large-v2, large-v3)", required=True)
42
  args = parser.parse_args()
43
 
44
+ if args.model not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large-v1", "large-v2", "large-v3"]:
45
  raise ValueError("Invalid model name")
46
 
47
  whisper = load_model(args.model).cpu()
models/download-coreml-model.sh CHANGED
@@ -19,7 +19,7 @@ function get_script_path() {
19
  models_path="$(get_script_path)"
20
 
21
  # Whisper models
22
- models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large" )
23
 
24
  # list available models
25
  function list_models {
 
19
  models_path="$(get_script_path)"
20
 
21
  # Whisper models
22
+ models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large-v3" )
23
 
24
  # list available models
25
  function list_models {
models/download-ggml-model.cmd CHANGED
@@ -8,7 +8,7 @@ popd
8
  set argc=0
9
  for %%x in (%*) do set /A argc+=1
10
 
11
- set models=tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large
12
 
13
  if %argc% neq 1 (
14
  echo.
 
8
  set argc=0
9
  for %%x in (%*) do set /A argc+=1
10
 
11
+ set models=tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3
12
 
13
  if %argc% neq 1 (
14
  echo.
models/download-ggml-model.sh CHANGED
@@ -22,7 +22,7 @@ function get_script_path() {
22
  models_path="$(get_script_path)"
23
 
24
  # Whisper models
25
- models=(
26
  "tiny.en"
27
  "tiny"
28
  "tiny-q5_1"
@@ -42,7 +42,7 @@ models=(
42
  "medium.en-q5_0"
43
  "large-v1"
44
  "large-v2"
45
- "large"
46
  "large-q5_0"
47
  )
48
 
 
22
  models_path="$(get_script_path)"
23
 
24
  # Whisper models
25
+ models=(
26
  "tiny.en"
27
  "tiny"
28
  "tiny-q5_1"
 
42
  "medium.en-q5_0"
43
  "large-v1"
44
  "large-v2"
45
+ "large-v3"
46
  "large-q5_0"
47
  )
48
 
tests/run-tests.sh CHANGED
@@ -19,7 +19,7 @@
19
  cd `dirname $0`
20
 
21
  # Whisper models
22
- models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large" )
23
 
24
  # list available models
25
  function list_models {
 
19
  cd `dirname $0`
20
 
21
  # Whisper models
22
+ models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large-v3" )
23
 
24
  # list available models
25
  function list_models {
whisper.cpp CHANGED
@@ -1522,7 +1522,7 @@ static bool whisper_model_load(struct whisper_model_loader * loader, whisper_con
1522
 
1523
  model.buffer = ggml_backend_alloc_buffer(wctx.backend, size_main);
1524
 
1525
- WHISPER_LOG_INFO("%s: %8s buffer size = %8.2f MB\n", __func__, ggml_backend_name(wctx.backend), size_main / 1024.0 / 1024.0);
1526
  }
1527
 
1528
  ggml_allocr * alloc = ggml_allocr_new_from_buffer(model.buffer);
@@ -1637,12 +1637,12 @@ static bool whisper_model_load(struct whisper_model_loader * loader, whisper_con
1637
  ggml_backend_tensor_set(tensor, read_buf.data(), 0, ggml_nbytes(tensor));
1638
  }
1639
 
1640
- //printf("%48s - [%5d, %5d, %5d], type = %6s, %6.2f MB\n", name.data(), ne[0], ne[1], ne[2], ggml_type_name((ggml_type) ttype), ggml_nbytes(tensor)/1024.0/1024.0);
1641
  total_size += ggml_nbytes(tensor);
1642
  model.n_loaded++;
1643
  }
1644
 
1645
- WHISPER_LOG_INFO("%s: model size = %7.2f MB\n", __func__, total_size/1024.0/1024.0);
1646
 
1647
  if (model.n_loaded == 0) {
1648
  WHISPER_LOG_WARN("%s: WARN no tensors loaded from model file - assuming empty model for testing\n", __func__);
@@ -2027,11 +2027,11 @@ static struct ggml_cgraph * whisper_build_graph_encoder(
2027
  ////////////////////////////////////////////////////////////////////////////
2028
 
2029
  //printf("%s: used_mem = %f MB, %f MB, %f MB %f MB %f MB\n", __func__,
2030
- // ggml_used_mem(ctx0)/1024.0/1024.0,
2031
- // wstate.get_buf_max_mem(0)/1024.0/1024.0,
2032
- // wstate.get_buf_max_mem(1)/1024.0/1024.0,
2033
- // wstate.get_buf_max_mem(2)/1024.0/1024.0,
2034
- // wstate.get_buf_max_mem(3)/1024.0/1024.0);
2035
 
2036
  ggml_free(ctx0);
2037
 
@@ -2613,11 +2613,11 @@ static bool whisper_decode_internal(
2613
 
2614
  if (batch.n_tokens > 1) {
2615
  //printf("%s: used_mem = %f MB, %f MB, %f MB %f MB %f MB\n", __func__,
2616
- // ggml_used_mem(ctx0)/1024.0/1024.0,
2617
- // wstate.get_buf_max_mem(0)/1024.0/1024.0,
2618
- // wstate.get_buf_max_mem(1)/1024.0/1024.0,
2619
- // wstate.get_buf_max_mem(2)/1024.0/1024.0,
2620
- // wstate.get_buf_max_mem(3)/1024.0/1024.0);
2621
  }
2622
 
2623
  if (batch.n_tokens == 1) {
@@ -3057,7 +3057,7 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) {
3057
 
3058
  {
3059
  const size_t memory_size = ggml_nbytes(state->kv_self.k) + ggml_nbytes(state->kv_self.v);
3060
- WHISPER_LOG_INFO("%s: kv self size = %7.2f MB\n", __func__, memory_size / 1024.0 / 1024.0);
3061
  }
3062
 
3063
  if (!kv_cache_init(ctx->model.hparams, state->kv_cross, ctx->backend, ctx->itype, ctx->model.hparams.n_audio_ctx)) {
@@ -3068,7 +3068,7 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) {
3068
 
3069
  {
3070
  const size_t memory_size = ggml_nbytes(state->kv_cross.k) + ggml_nbytes(state->kv_cross.v);
3071
- WHISPER_LOG_INFO("%s: kv cross size = %7.2f MB\n", __func__, memory_size / 1024.0 / 1024.0);
3072
  }
3073
 
3074
  #ifdef WHISPER_USE_COREML
@@ -3110,7 +3110,7 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) {
3110
  return whisper_build_graph_conv(*ctx, *state, 0);
3111
  });
3112
 
3113
- WHISPER_LOG_INFO("%s: compute buffer (conv) = %7.2f MB\n", __func__, whisper_allocr_size(state->alloc_conv) / 1024.0 / 1024.0);
3114
  }
3115
 
3116
  // encoder allocator
@@ -3120,7 +3120,7 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) {
3120
  return whisper_build_graph_encoder(*ctx, *state);
3121
  });
3122
 
3123
- WHISPER_LOG_INFO("%s: compute buffer (encode) = %7.2f MB\n", __func__, whisper_allocr_size(state->alloc_encode) / 1024.0 / 1024.0);
3124
  }
3125
 
3126
  // cross allocator
@@ -3130,7 +3130,7 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) {
3130
  return whisper_build_graph_cross(*ctx, *state);
3131
  });
3132
 
3133
- WHISPER_LOG_INFO("%s: compute buffer (cross) = %7.2f MB\n", __func__, whisper_allocr_size(state->alloc_cross) / 1024.0 / 1024.0);
3134
  }
3135
 
3136
  // decoder allocator
@@ -3148,7 +3148,7 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) {
3148
  return whisper_build_graph_decoder(*ctx, *state, state->batch);
3149
  });
3150
 
3151
- WHISPER_LOG_INFO("%s: compute buffer (decode) = %7.2f MB\n", __func__, whisper_allocr_size(state->alloc_decode) / 1024.0 / 1024.0);
3152
  }
3153
 
3154
  whisper_allocr_graph_realloc(state->alloc_conv, ctx->backend);
@@ -6072,8 +6072,8 @@ WHISPER_API const char * whisper_bench_memcpy_str(int n_threads) {
6072
  size_t n = 20;
6073
  size_t arr = n_threads > 0 ? 1024llu : n_threads; // trick to avoid compiler optimizations
6074
 
6075
- // 1GB MB array
6076
- const size_t size = arr*1024llu*1024llu;
6077
 
6078
  // single-thread
6079
  {
@@ -6099,7 +6099,7 @@ WHISPER_API const char * whisper_bench_memcpy_str(int n_threads) {
6099
  src[rand() % size] = rand() % 256;
6100
  }
6101
 
6102
- snprintf(strbuf, sizeof(strbuf), "memcpy: %.2f GB/s (1 thread)\n", (double) (n*size)/(tsum*1024llu*1024llu*1024llu));
6103
  s += strbuf;
6104
 
6105
  // needed to prevent the compiler from optimizing the memcpy away
 
1522
 
1523
  model.buffer = ggml_backend_alloc_buffer(wctx.backend, size_main);
1524
 
1525
+ WHISPER_LOG_INFO("%s: %8s buffer size = %8.2f MB\n", __func__, ggml_backend_name(wctx.backend), size_main / 1e6);
1526
  }
1527
 
1528
  ggml_allocr * alloc = ggml_allocr_new_from_buffer(model.buffer);
 
1637
  ggml_backend_tensor_set(tensor, read_buf.data(), 0, ggml_nbytes(tensor));
1638
  }
1639
 
1640
+ //printf("%48s - [%5d, %5d, %5d], type = %6s, %6.2f MB\n", name.data(), ne[0], ne[1], ne[2], ggml_type_name((ggml_type) ttype), ggml_nbytes(tensor)/1e6);
1641
  total_size += ggml_nbytes(tensor);
1642
  model.n_loaded++;
1643
  }
1644
 
1645
+ WHISPER_LOG_INFO("%s: model size = %7.2f MB\n", __func__, total_size/1e6);
1646
 
1647
  if (model.n_loaded == 0) {
1648
  WHISPER_LOG_WARN("%s: WARN no tensors loaded from model file - assuming empty model for testing\n", __func__);
 
2027
  ////////////////////////////////////////////////////////////////////////////
2028
 
2029
  //printf("%s: used_mem = %f MB, %f MB, %f MB %f MB %f MB\n", __func__,
2030
+ // ggml_used_mem(ctx0)/1e6,
2031
+ // wstate.get_buf_max_mem(0)/1e6,
2032
+ // wstate.get_buf_max_mem(1)/1e6,
2033
+ // wstate.get_buf_max_mem(2)/1e6,
2034
+ // wstate.get_buf_max_mem(3)/1e6);
2035
 
2036
  ggml_free(ctx0);
2037
 
 
2613
 
2614
  if (batch.n_tokens > 1) {
2615
  //printf("%s: used_mem = %f MB, %f MB, %f MB %f MB %f MB\n", __func__,
2616
+ // ggml_used_mem(ctx0)/1e6,
2617
+ // wstate.get_buf_max_mem(0)/1e6,
2618
+ // wstate.get_buf_max_mem(1)/1e6,
2619
+ // wstate.get_buf_max_mem(2)/1e6,
2620
+ // wstate.get_buf_max_mem(3)/1e6);
2621
  }
2622
 
2623
  if (batch.n_tokens == 1) {
 
3057
 
3058
  {
3059
  const size_t memory_size = ggml_nbytes(state->kv_self.k) + ggml_nbytes(state->kv_self.v);
3060
+ WHISPER_LOG_INFO("%s: kv self size = %7.2f MB\n", __func__, memory_size / 1e6);
3061
  }
3062
 
3063
  if (!kv_cache_init(ctx->model.hparams, state->kv_cross, ctx->backend, ctx->itype, ctx->model.hparams.n_audio_ctx)) {
 
3068
 
3069
  {
3070
  const size_t memory_size = ggml_nbytes(state->kv_cross.k) + ggml_nbytes(state->kv_cross.v);
3071
+ WHISPER_LOG_INFO("%s: kv cross size = %7.2f MB\n", __func__, memory_size / 1e6);
3072
  }
3073
 
3074
  #ifdef WHISPER_USE_COREML
 
3110
  return whisper_build_graph_conv(*ctx, *state, 0);
3111
  });
3112
 
3113
+ WHISPER_LOG_INFO("%s: compute buffer (conv) = %7.2f MB\n", __func__, whisper_allocr_size(state->alloc_conv) / 1e6);
3114
  }
3115
 
3116
  // encoder allocator
 
3120
  return whisper_build_graph_encoder(*ctx, *state);
3121
  });
3122
 
3123
+ WHISPER_LOG_INFO("%s: compute buffer (encode) = %7.2f MB\n", __func__, whisper_allocr_size(state->alloc_encode) / 1e6);
3124
  }
3125
 
3126
  // cross allocator
 
3130
  return whisper_build_graph_cross(*ctx, *state);
3131
  });
3132
 
3133
+ WHISPER_LOG_INFO("%s: compute buffer (cross) = %7.2f MB\n", __func__, whisper_allocr_size(state->alloc_cross) / 1e6);
3134
  }
3135
 
3136
  // decoder allocator
 
3148
  return whisper_build_graph_decoder(*ctx, *state, state->batch);
3149
  });
3150
 
3151
+ WHISPER_LOG_INFO("%s: compute buffer (decode) = %7.2f MB\n", __func__, whisper_allocr_size(state->alloc_decode) / 1e6);
3152
  }
3153
 
3154
  whisper_allocr_graph_realloc(state->alloc_conv, ctx->backend);
 
6072
  size_t n = 20;
6073
  size_t arr = n_threads > 0 ? 1024llu : n_threads; // trick to avoid compiler optimizations
6074
 
6075
+ // 1GB array
6076
+ const size_t size = arr*1e9;
6077
 
6078
  // single-thread
6079
  {
 
6099
  src[rand() % size] = rand() % 256;
6100
  }
6101
 
6102
+ snprintf(strbuf, sizeof(strbuf), "memcpy: %.2f GB/s (1 thread)\n", (double) (n*size)/(tsum*1e9));
6103
  s += strbuf;
6104
 
6105
  // needed to prevent the compiler from optimizing the memcpy away