Spaces:
Sleeping
Sleeping
go : exposed various parts to the Go Interface (#697)
Browse files
bindings/go/params.go
CHANGED
|
@@ -105,6 +105,10 @@ func (p *Params) SetMaxSegmentLength(n int) {
|
|
| 105 |
p.max_len = C.int(n)
|
| 106 |
}
|
| 107 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
// Set max tokens per segment (0 = no limit)
|
| 109 |
func (p *Params) SetMaxTokensPerSegment(n int) {
|
| 110 |
p.max_tokens = C.int(n)
|
|
|
|
| 105 |
p.max_len = C.int(n)
|
| 106 |
}
|
| 107 |
|
| 108 |
+
func (p *Params) SetTokenTimestamps(b bool) {
|
| 109 |
+
p.token_timestamps = toBool(b)
|
| 110 |
+
}
|
| 111 |
+
|
| 112 |
// Set max tokens per segment (0 = no limit)
|
| 113 |
func (p *Params) SetMaxTokensPerSegment(n int) {
|
| 114 |
p.max_tokens = C.int(n)
|
bindings/go/pkg/whisper/context.go
CHANGED
|
@@ -111,6 +111,11 @@ func (context *context) SetMaxSegmentLength(n uint) {
|
|
| 111 |
context.params.SetMaxSegmentLength(int(n))
|
| 112 |
}
|
| 113 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
// Set max tokens per segment (0 = no limit)
|
| 115 |
func (context *context) SetMaxTokensPerSegment(n uint) {
|
| 116 |
context.params.SetMaxTokensPerSegment(int(n))
|
|
@@ -280,10 +285,14 @@ func toSegment(ctx *whisper.Context, n int) Segment {
|
|
| 280 |
func toTokens(ctx *whisper.Context, n int) []Token {
|
| 281 |
result := make([]Token, ctx.Whisper_full_n_tokens(n))
|
| 282 |
for i := 0; i < len(result); i++ {
|
|
|
|
|
|
|
| 283 |
result[i] = Token{
|
| 284 |
-
Id:
|
| 285 |
-
Text:
|
| 286 |
-
P:
|
|
|
|
|
|
|
| 287 |
}
|
| 288 |
}
|
| 289 |
return result
|
|
|
|
| 111 |
context.params.SetMaxSegmentLength(int(n))
|
| 112 |
}
|
| 113 |
|
| 114 |
+
// Set token timestamps flag
|
| 115 |
+
func (context *context) SetTokenTimestamps(b bool) {
|
| 116 |
+
context.params.SetTokenTimestamps(b)
|
| 117 |
+
}
|
| 118 |
+
|
| 119 |
// Set max tokens per segment (0 = no limit)
|
| 120 |
func (context *context) SetMaxTokensPerSegment(n uint) {
|
| 121 |
context.params.SetMaxTokensPerSegment(int(n))
|
|
|
|
| 285 |
func toTokens(ctx *whisper.Context, n int) []Token {
|
| 286 |
result := make([]Token, ctx.Whisper_full_n_tokens(n))
|
| 287 |
for i := 0; i < len(result); i++ {
|
| 288 |
+
data := ctx.Whisper_full_get_token_data(n, i)
|
| 289 |
+
|
| 290 |
result[i] = Token{
|
| 291 |
+
Id: int(ctx.Whisper_full_get_token_id(n, i)),
|
| 292 |
+
Text: ctx.Whisper_full_get_token_text(n, i),
|
| 293 |
+
P: ctx.Whisper_full_get_token_p(n, i),
|
| 294 |
+
Start: time.Duration(data.T0()) * time.Millisecond * 10,
|
| 295 |
+
End: time.Duration(data.T1()) * time.Millisecond * 10,
|
| 296 |
}
|
| 297 |
}
|
| 298 |
return result
|
bindings/go/pkg/whisper/interface.go
CHANGED
|
@@ -41,6 +41,7 @@ type Context interface {
|
|
| 41 |
SetTokenThreshold(float32) // Set timestamp token probability threshold
|
| 42 |
SetTokenSumThreshold(float32) // Set timestamp token sum probability threshold
|
| 43 |
SetMaxSegmentLength(uint) // Set max segment length in characters
|
|
|
|
| 44 |
SetMaxTokensPerSegment(uint) // Set max tokens per segment (0 = no limit)
|
| 45 |
|
| 46 |
// Process mono audio data and return any errors.
|
|
@@ -85,7 +86,8 @@ type Segment struct {
|
|
| 85 |
|
| 86 |
// Token is a text or special token
|
| 87 |
type Token struct {
|
| 88 |
-
Id
|
| 89 |
-
Text
|
| 90 |
-
P
|
|
|
|
| 91 |
}
|
|
|
|
| 41 |
SetTokenThreshold(float32) // Set timestamp token probability threshold
|
| 42 |
SetTokenSumThreshold(float32) // Set timestamp token sum probability threshold
|
| 43 |
SetMaxSegmentLength(uint) // Set max segment length in characters
|
| 44 |
+
SetTokenTimestamps(bool) // Set token timestamps flag
|
| 45 |
SetMaxTokensPerSegment(uint) // Set max tokens per segment (0 = no limit)
|
| 46 |
|
| 47 |
// Process mono audio data and return any errors.
|
|
|
|
| 86 |
|
| 87 |
// Token is a text or special token
|
| 88 |
type Token struct {
|
| 89 |
+
Id int
|
| 90 |
+
Text string
|
| 91 |
+
P float32
|
| 92 |
+
Start, End time.Duration
|
| 93 |
}
|
bindings/go/whisper.go
CHANGED
|
@@ -356,7 +356,7 @@ func (ctx *Context) Whisper_full_get_token_id(segment int, token int) Token {
|
|
| 356 |
|
| 357 |
// Get token data for the specified token in the specified segment.
|
| 358 |
// This contains probabilities, timestamps, etc.
|
| 359 |
-
func (ctx *Context)
|
| 360 |
return TokenData(C.whisper_full_get_token_data((*C.struct_whisper_context)(ctx), C.int(segment), C.int(token)))
|
| 361 |
}
|
| 362 |
|
|
@@ -407,3 +407,11 @@ func callEncoderBegin(user_data unsafe.Pointer) C.bool {
|
|
| 407 |
}
|
| 408 |
return true
|
| 409 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 356 |
|
| 357 |
// Get token data for the specified token in the specified segment.
|
| 358 |
// This contains probabilities, timestamps, etc.
|
| 359 |
+
func (ctx *Context) Whisper_full_get_token_data(segment int, token int) TokenData {
|
| 360 |
return TokenData(C.whisper_full_get_token_data((*C.struct_whisper_context)(ctx), C.int(segment), C.int(token)))
|
| 361 |
}
|
| 362 |
|
|
|
|
| 407 |
}
|
| 408 |
return true
|
| 409 |
}
|
| 410 |
+
|
| 411 |
+
func (t TokenData) T0() int64 {
|
| 412 |
+
return int64(t.t0)
|
| 413 |
+
}
|
| 414 |
+
|
| 415 |
+
func (t TokenData) T1() int64 {
|
| 416 |
+
return int64(t.t1)
|
| 417 |
+
}
|