bebound commited on
Commit
a5ad309
·
unverified ·
1 Parent(s): 8eaa345

readme : update help (#1560)

Browse files
Files changed (2) hide show
  1. README.md +5 -3
  2. examples/main/README.md +12 -3
README.md CHANGED
@@ -110,8 +110,8 @@ options:
110
  -mc N, --max-context N [-1 ] maximum number of text context tokens to store
111
  -ml N, --max-len N [0 ] maximum segment length in characters
112
  -sow, --split-on-word [false ] split on word rather than on token
113
- -bo N, --best-of N [2 ] number of best candidates to keep
114
- -bs N, --beam-size N [-1 ] beam size for beam search
115
  -wt N, --word-thold N [0.01 ] word timestamp probability threshold
116
  -et N, --entropy-thold N [2.40 ] entropy threshold for decoder fail
117
  -lpt N, --logprob-thold N [-1.00 ] log probability threshold for decoder fail
@@ -128,6 +128,7 @@ options:
128
  -fp, --font-path [/System/Library/Fonts/Supplemental/Courier New Bold.ttf] path to a monospace font for karaoke video
129
  -ocsv, --output-csv [false ] output result in a CSV file
130
  -oj, --output-json [false ] output result in a JSON file
 
131
  -of FNAME, --output-file FNAME [ ] output file path (without file extension)
132
  -ps, --print-special [false ] print special tokens
133
  -pc, --print-colors [false ] print colors
@@ -139,7 +140,8 @@ options:
139
  -m FNAME, --model FNAME [models/ggml-base.en.bin] model path
140
  -f FNAME, --file FNAME [ ] input WAV file path
141
  -oved D, --ov-e-device DNAME [CPU ] the OpenVINO device used for encode inference
142
- -ls, --log-score [false ] log best decoder scores of token
 
143
 
144
 
145
  bash ./models/download-ggml-model.sh base.en
 
110
  -mc N, --max-context N [-1 ] maximum number of text context tokens to store
111
  -ml N, --max-len N [0 ] maximum segment length in characters
112
  -sow, --split-on-word [false ] split on word rather than on token
113
+ -bo N, --best-of N [5 ] number of best candidates to keep
114
+ -bs N, --beam-size N [5 ] beam size for beam search
115
  -wt N, --word-thold N [0.01 ] word timestamp probability threshold
116
  -et N, --entropy-thold N [2.40 ] entropy threshold for decoder fail
117
  -lpt N, --logprob-thold N [-1.00 ] log probability threshold for decoder fail
 
128
  -fp, --font-path [/System/Library/Fonts/Supplemental/Courier New Bold.ttf] path to a monospace font for karaoke video
129
  -ocsv, --output-csv [false ] output result in a CSV file
130
  -oj, --output-json [false ] output result in a JSON file
131
+ -ojf, --output-json-full [false ] include more information in the JSON file
132
  -of FNAME, --output-file FNAME [ ] output file path (without file extension)
133
  -ps, --print-special [false ] print special tokens
134
  -pc, --print-colors [false ] print colors
 
140
  -m FNAME, --model FNAME [models/ggml-base.en.bin] model path
141
  -f FNAME, --file FNAME [ ] input WAV file path
142
  -oved D, --ov-e-device DNAME [CPU ] the OpenVINO device used for encode inference
143
+ -ls, --log-score [false ] log best decoder scores of tokens
144
+ -ng, --no-gpu [false ] disable GPU
145
 
146
 
147
  bash ./models/download-ggml-model.sh base.en
examples/main/README.md CHANGED
@@ -17,28 +17,37 @@ options:
17
  -d N, --duration N [0 ] duration of audio to process in milliseconds
18
  -mc N, --max-context N [-1 ] maximum number of text context tokens to store
19
  -ml N, --max-len N [0 ] maximum segment length in characters
 
20
  -bo N, --best-of N [5 ] number of best candidates to keep
21
- -bs N, --beam-size N [-1 ] beam size for beam search
22
  -wt N, --word-thold N [0.01 ] word timestamp probability threshold
23
  -et N, --entropy-thold N [2.40 ] entropy threshold for decoder fail
24
  -lpt N, --logprob-thold N [-1.00 ] log probability threshold for decoder fail
25
- -su, --speed-up [false ] speed up audio by x2 (reduced accuracy)
26
  -tr, --translate [false ] translate from source language to english
27
  -di, --diarize [false ] stereo audio diarization
 
28
  -nf, --no-fallback [false ] do not use temperature fallback while decoding
29
  -otxt, --output-txt [false ] output result in a text file
30
  -ovtt, --output-vtt [false ] output result in a vtt file
31
  -osrt, --output-srt [false ] output result in a srt file
 
32
  -owts, --output-words [false ] output script for generating karaoke video
 
33
  -ocsv, --output-csv [false ] output result in a CSV file
34
  -oj, --output-json [false ] output result in a JSON file
 
35
  -of FNAME, --output-file FNAME [ ] output file path (without file extension)
36
  -ps, --print-special [false ] print special tokens
37
  -pc, --print-colors [false ] print colors
38
  -pp, --print-progress [false ] print progress
39
- -nt, --no-timestamps [true ] do not print timestamps
40
  -l LANG, --language LANG [en ] spoken language ('auto' for auto-detect)
 
41
  --prompt PROMPT [ ] initial prompt
42
  -m FNAME, --model FNAME [models/ggml-base.en.bin] model path
43
  -f FNAME, --file FNAME [ ] input WAV file path
 
 
 
44
  ```
 
17
  -d N, --duration N [0 ] duration of audio to process in milliseconds
18
  -mc N, --max-context N [-1 ] maximum number of text context tokens to store
19
  -ml N, --max-len N [0 ] maximum segment length in characters
20
+ -sow, --split-on-word [false ] split on word rather than on token
21
  -bo N, --best-of N [5 ] number of best candidates to keep
22
+ -bs N, --beam-size N [5 ] beam size for beam search
23
  -wt N, --word-thold N [0.01 ] word timestamp probability threshold
24
  -et N, --entropy-thold N [2.40 ] entropy threshold for decoder fail
25
  -lpt N, --logprob-thold N [-1.00 ] log probability threshold for decoder fail
26
+ -debug, --debug-mode [false ] enable debug mode (eg. dump log_mel)
27
  -tr, --translate [false ] translate from source language to english
28
  -di, --diarize [false ] stereo audio diarization
29
+ -tdrz, --tinydiarize [false ] enable tinydiarize (requires a tdrz model)
30
  -nf, --no-fallback [false ] do not use temperature fallback while decoding
31
  -otxt, --output-txt [false ] output result in a text file
32
  -ovtt, --output-vtt [false ] output result in a vtt file
33
  -osrt, --output-srt [false ] output result in a srt file
34
+ -olrc, --output-lrc [false ] output result in a lrc file
35
  -owts, --output-words [false ] output script for generating karaoke video
36
+ -fp, --font-path [/System/Library/Fonts/Supplemental/Courier New Bold.ttf] path to a monospace font for karaoke video
37
  -ocsv, --output-csv [false ] output result in a CSV file
38
  -oj, --output-json [false ] output result in a JSON file
39
+ -ojf, --output-json-full [false ] include more information in the JSON file
40
  -of FNAME, --output-file FNAME [ ] output file path (without file extension)
41
  -ps, --print-special [false ] print special tokens
42
  -pc, --print-colors [false ] print colors
43
  -pp, --print-progress [false ] print progress
44
+ -nt, --no-timestamps [false ] do not print timestamps
45
  -l LANG, --language LANG [en ] spoken language ('auto' for auto-detect)
46
+ -dl, --detect-language [false ] exit after automatically detecting language
47
  --prompt PROMPT [ ] initial prompt
48
  -m FNAME, --model FNAME [models/ggml-base.en.bin] model path
49
  -f FNAME, --file FNAME [ ] input WAV file path
50
+ -oved D, --ov-e-device DNAME [CPU ] the OpenVINO device used for encode inference
51
+ -ls, --log-score [false ] log best decoder scores of tokens
52
+ -ng, --no-gpu [false ] disable GPU
53
  ```