intheblueyonder commited on
Commit
38f9f3b
·
unverified ·
1 Parent(s): 984a856

go : exposed various parts to the Go Interface (#697)

Browse files
bindings/go/params.go CHANGED
@@ -105,6 +105,10 @@ func (p *Params) SetMaxSegmentLength(n int) {
105
  p.max_len = C.int(n)
106
  }
107
 
 
 
 
 
108
  // Set max tokens per segment (0 = no limit)
109
  func (p *Params) SetMaxTokensPerSegment(n int) {
110
  p.max_tokens = C.int(n)
 
105
  p.max_len = C.int(n)
106
  }
107
 
108
+ func (p *Params) SetTokenTimestamps(b bool) {
109
+ p.token_timestamps = toBool(b)
110
+ }
111
+
112
  // Set max tokens per segment (0 = no limit)
113
  func (p *Params) SetMaxTokensPerSegment(n int) {
114
  p.max_tokens = C.int(n)
bindings/go/pkg/whisper/context.go CHANGED
@@ -111,6 +111,11 @@ func (context *context) SetMaxSegmentLength(n uint) {
111
  context.params.SetMaxSegmentLength(int(n))
112
  }
113
 
 
 
 
 
 
114
  // Set max tokens per segment (0 = no limit)
115
  func (context *context) SetMaxTokensPerSegment(n uint) {
116
  context.params.SetMaxTokensPerSegment(int(n))
@@ -280,10 +285,14 @@ func toSegment(ctx *whisper.Context, n int) Segment {
280
  func toTokens(ctx *whisper.Context, n int) []Token {
281
  result := make([]Token, ctx.Whisper_full_n_tokens(n))
282
  for i := 0; i < len(result); i++ {
 
 
283
  result[i] = Token{
284
- Id: int(ctx.Whisper_full_get_token_id(n, i)),
285
- Text: strings.TrimSpace(ctx.Whisper_full_get_token_text(n, i)),
286
- P: ctx.Whisper_full_get_token_p(n, i),
 
 
287
  }
288
  }
289
  return result
 
111
  context.params.SetMaxSegmentLength(int(n))
112
  }
113
 
114
+ // Set token timestamps flag
115
+ func (context *context) SetTokenTimestamps(b bool) {
116
+ context.params.SetTokenTimestamps(b)
117
+ }
118
+
119
  // Set max tokens per segment (0 = no limit)
120
  func (context *context) SetMaxTokensPerSegment(n uint) {
121
  context.params.SetMaxTokensPerSegment(int(n))
 
285
  func toTokens(ctx *whisper.Context, n int) []Token {
286
  result := make([]Token, ctx.Whisper_full_n_tokens(n))
287
  for i := 0; i < len(result); i++ {
288
+ data := ctx.Whisper_full_get_token_data(n, i)
289
+
290
  result[i] = Token{
291
+ Id: int(ctx.Whisper_full_get_token_id(n, i)),
292
+ Text: ctx.Whisper_full_get_token_text(n, i),
293
+ P: ctx.Whisper_full_get_token_p(n, i),
294
+ Start: time.Duration(data.T0()) * time.Millisecond * 10,
295
+ End: time.Duration(data.T1()) * time.Millisecond * 10,
296
  }
297
  }
298
  return result
bindings/go/pkg/whisper/interface.go CHANGED
@@ -41,6 +41,7 @@ type Context interface {
41
  SetTokenThreshold(float32) // Set timestamp token probability threshold
42
  SetTokenSumThreshold(float32) // Set timestamp token sum probability threshold
43
  SetMaxSegmentLength(uint) // Set max segment length in characters
 
44
  SetMaxTokensPerSegment(uint) // Set max tokens per segment (0 = no limit)
45
 
46
  // Process mono audio data and return any errors.
@@ -85,7 +86,8 @@ type Segment struct {
85
 
86
  // Token is a text or special token
87
  type Token struct {
88
- Id int
89
- Text string
90
- P float32
 
91
  }
 
41
  SetTokenThreshold(float32) // Set timestamp token probability threshold
42
  SetTokenSumThreshold(float32) // Set timestamp token sum probability threshold
43
  SetMaxSegmentLength(uint) // Set max segment length in characters
44
+ SetTokenTimestamps(bool) // Set token timestamps flag
45
  SetMaxTokensPerSegment(uint) // Set max tokens per segment (0 = no limit)
46
 
47
  // Process mono audio data and return any errors.
 
86
 
87
  // Token is a text or special token
88
  type Token struct {
89
+ Id int
90
+ Text string
91
+ P float32
92
+ Start, End time.Duration
93
  }
bindings/go/whisper.go CHANGED
@@ -356,7 +356,7 @@ func (ctx *Context) Whisper_full_get_token_id(segment int, token int) Token {
356
 
357
  // Get token data for the specified token in the specified segment.
358
  // This contains probabilities, timestamps, etc.
359
- func (ctx *Context) whisper_full_get_token_data(segment int, token int) TokenData {
360
  return TokenData(C.whisper_full_get_token_data((*C.struct_whisper_context)(ctx), C.int(segment), C.int(token)))
361
  }
362
 
@@ -407,3 +407,11 @@ func callEncoderBegin(user_data unsafe.Pointer) C.bool {
407
  }
408
  return true
409
  }
 
 
 
 
 
 
 
 
 
356
 
357
  // Get token data for the specified token in the specified segment.
358
  // This contains probabilities, timestamps, etc.
359
+ func (ctx *Context) Whisper_full_get_token_data(segment int, token int) TokenData {
360
  return TokenData(C.whisper_full_get_token_data((*C.struct_whisper_context)(ctx), C.int(segment), C.int(token)))
361
  }
362
 
 
407
  }
408
  return true
409
  }
410
+
411
+ func (t TokenData) T0() int64 {
412
+ return int64(t.t0)
413
+ }
414
+
415
+ func (t TokenData) T1() int64 {
416
+ return int64(t.t1)
417
+ }