ggerganov commited on
Commit
793fa90
·
unverified ·
1 Parent(s): 88c40d2

models : add the new "large" model release by OpenAI

Browse files

The old "large" model is now renamed "large-v1".
If you have been using it, make sure to rename it and download the new
"large" model for best results.

Makefile CHANGED
@@ -189,9 +189,10 @@ samples:
189
  .PHONY: small
190
  .PHONY: medium.en
191
  .PHONY: medium
 
192
  .PHONY: large
193
 
194
- tiny.en tiny base.en base small.en small medium.en medium large: main
195
  bash ./models/download-ggml-model.sh $@
196
  @echo ""
197
  @echo "==============================================="
 
189
  .PHONY: small
190
  .PHONY: medium.en
191
  .PHONY: medium
192
+ .PHONY: large-v1
193
  .PHONY: large
194
 
195
+ tiny.en tiny base.en base small.en small medium.en medium large-v1 large: main
196
  bash ./models/download-ggml-model.sh $@
197
  @echo ""
198
  @echo "==============================================="
README.md CHANGED
@@ -206,6 +206,7 @@ make small.en
206
  make small
207
  make medium.en
208
  make medium
 
209
  make large
210
  ```
211
 
@@ -217,7 +218,7 @@ make large
217
  | base | 142 MB | ~500 MB | `465707469ff3a37a2b9b8d8f89f2f99de7299dac` |
218
  | small | 466 MB | ~1.0 GB | `55356645c2b361a969dfd0ef2c5a50d530afd8d5` |
219
  | medium | 1.5 GB | ~2.6 GB | `fd9727b6e1217c2f614f9b698455c4ffd82463b4` |
220
- | large | 2.9 GB | ~4.7 GB | `b1caaf735c4cc1429223d5a74f0f4d0b9b59a299` |
221
 
222
  ## Another example
223
 
 
206
  make small
207
  make medium.en
208
  make medium
209
+ make large-v1
210
  make large
211
  ```
212
 
 
218
  | base | 142 MB | ~500 MB | `465707469ff3a37a2b9b8d8f89f2f99de7299dac` |
219
  | small | 466 MB | ~1.0 GB | `55356645c2b361a969dfd0ef2c5a50d530afd8d5` |
220
  | medium | 1.5 GB | ~2.6 GB | `fd9727b6e1217c2f614f9b698455c4ffd82463b4` |
221
+ | large | 2.9 GB | ~4.7 GB | `0f4c8e34f21cf1a914c59d8b3ce882345ad349d6` |
222
 
223
  ## Another example
224
 
examples/livestream.sh CHANGED
@@ -34,7 +34,7 @@ if [ -n "$3" ]; then
34
  fi
35
 
36
  # Whisper models
37
- models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large" )
38
 
39
  # list available models
40
  function list_models {
 
34
  fi
35
 
36
  # Whisper models
37
+ models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large" )
38
 
39
  # list available models
40
  function list_models {
extra/convert-all.sh CHANGED
@@ -1,6 +1,6 @@
1
  #!/bin/bash
2
 
3
- models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large" )
4
 
5
  for model in "${models[@]}"; do
6
  python3 models/convert-pt-to-ggml.py ~/.cache/whisper/$model.pt ../whisper models/
 
1
  #!/bin/bash
2
 
3
+ models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large" )
4
 
5
  for model in "${models[@]}"; do
6
  python3 models/convert-pt-to-ggml.py ~/.cache/whisper/$model.pt ../whisper models/
models/README.md CHANGED
@@ -37,7 +37,8 @@ https://huggingface.co/datasets/ggerganov/whisper.cpp/tree/main
37
  | small.en | 466 MB | ~1.0 GB | `db8a495a91d927739e50b3fc1cc4c6b8f6c2d022` |
38
  | medium | 1.5 GB | ~2.6 GB | `fd9727b6e1217c2f614f9b698455c4ffd82463b4` |
39
  | medium.en | 1.5 GB | ~2.6 GB | `8c30f0e44ce9560643ebd10bbe50cd20eafd3723` |
40
- | large | 2.9 GB | ~4.7 GB | `b1caaf735c4cc1429223d5a74f0f4d0b9b59a299` |
 
41
 
42
  ## Model files for testing purposes
43
 
 
37
  | small.en | 466 MB | ~1.0 GB | `db8a495a91d927739e50b3fc1cc4c6b8f6c2d022` |
38
  | medium | 1.5 GB | ~2.6 GB | `fd9727b6e1217c2f614f9b698455c4ffd82463b4` |
39
  | medium.en | 1.5 GB | ~2.6 GB | `8c30f0e44ce9560643ebd10bbe50cd20eafd3723` |
40
+ | large-v1 | 2.9 GB | ~4.7 GB | `b1caaf735c4cc1429223d5a74f0f4d0b9b59a299` |
41
+ | large | 2.9 GB | ~4.7 GB | `0f4c8e34f21cf1a914c59d8b3ce882345ad349d6` |
42
 
43
  ## Model files for testing purposes
44
 
models/download-ggml-model.cmd CHANGED
@@ -7,7 +7,7 @@ popd
7
  set argc=0
8
  for %%x in (%*) do set /A argc+=1
9
 
10
- set models=tiny.en tiny base.en base small.en small medium.en medium large
11
 
12
  if %argc% neq 1 (
13
  echo.
 
7
  set argc=0
8
  for %%x in (%*) do set /A argc+=1
9
 
10
+ set models=tiny.en tiny base.en base small.en small medium.en medium large-v1 large
11
 
12
  if %argc% neq 1 (
13
  echo.
models/download-ggml-model.sh CHANGED
@@ -22,7 +22,7 @@ function get_script_path() {
22
  models_path=$(get_script_path)
23
 
24
  # Whisper models
25
- models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large" )
26
 
27
  # list available models
28
  function list_models {
 
22
  models_path=$(get_script_path)
23
 
24
  # Whisper models
25
+ models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large" )
26
 
27
  # list available models
28
  function list_models {
tests/run-tests.sh CHANGED
@@ -19,7 +19,7 @@
19
  cd `dirname $0`
20
 
21
  # Whisper models
22
- models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large" )
23
 
24
  # list available models
25
  function list_models {
@@ -107,7 +107,7 @@ function run_lang() {
107
 
108
  $main -m ../models/ggml-$model.bin -f $fname_dst -l $lang -otxt 2> /dev/null
109
 
110
- git diff --no-index --word-diff=color --word-diff-regex=. $fname_dst.txt $lang-$i-ref.txt
111
 
112
  i=$(($i+1))
113
  done
 
19
  cd `dirname $0`
20
 
21
  # Whisper models
22
+ models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large" )
23
 
24
  # list available models
25
  function list_models {
 
107
 
108
  $main -m ../models/ggml-$model.bin -f $fname_dst -l $lang -otxt 2> /dev/null
109
 
110
+ git diff --no-index --word-diff=color --word-diff-regex=. $lang-$i-ref.txt $fname_dst.txt
111
 
112
  i=$(($i+1))
113
  done