Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implements vad_events for SpeechStarted #162

Merged
merged 1 commit into from
Jan 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions examples/streaming/microphone/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ func (c MyCallback) Message(mr *api.MessageResponse) error {
if len(mr.Channel.Alternatives) == 0 || len(sentence) == 0 {
return nil
}
fmt.Printf("\n%s\n", sentence)
fmt.Printf("\nspeaker: %s\n", sentence)
return nil
}

Expand All @@ -41,6 +41,11 @@ func (c MyCallback) Metadata(md *api.MetadataResponse) error {
return nil
}

func (c MyCallback) SpeechStarted(ssr *api.SpeechStartedResponse) error {
fmt.Printf("\n[SpeechStarted] Received\n")
return nil
}

func (c MyCallback) UtteranceEnd(ur *api.UtteranceEndResponse) error {
fmt.Printf("\n[UtteranceEnd] Received\n")
return nil
Expand Down Expand Up @@ -85,8 +90,9 @@ func main() {
SampleRate: 16000,
SmartFormat: true,
// To get UtteranceEnd, the following must be set:
// InterimResults: true,
// UtteranceEndMs: "1000",
InterimResults: true,
UtteranceEndMs: "1000",
VadEvents: true,
}

// example on how to send a custom parameter
Expand Down
35 changes: 35 additions & 0 deletions pkg/api/live/v1/default.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,41 @@ func (dch DefaultCallbackHandler) Metadata(md *interfaces.MetadataResponse) erro
return nil
}

func (dch DefaultCallbackHandler) SpeechStarted(ssr *interfaces.SpeechStartedResponse) error {
var debugStr string
if v := os.Getenv("DEEPGRAM_DEBUG"); v != "" {
klog.V(4).Infof("DEEPGRAM_DEBUG found")
debugStr = v
}

if strings.Compare(strings.ToLower(debugStr), "true") == 0 {
data, err := json.Marshal(ssr)
if err != nil {
klog.V(1).Infof("SpeechStarted json.Marshal failed. Err: %v\n", err)
return err
}

prettyJson, err := prettyjson.Format(data)
if err != nil {
klog.V(1).Infof("prettyjson.Marshal failed. Err: %v\n", err)
return err
}
klog.V(2).Infof("\n\nSpeechStarted Object:\n%s\n\n", prettyJson)

return nil
}

// handle the message
fmt.Printf("\nSpeechStarted.Timestamp: %f\n", ssr.Timestamp)
fmt.Printf("SpeechStarted.Channels:\n")
for _, val := range ssr.Channel {
fmt.Printf("\tChannel: %d\n", val)
}
fmt.Printf("\n")

return nil
}

func (dch DefaultCallbackHandler) UtteranceEnd(ur *interfaces.UtteranceEndResponse) error {
fmt.Printf("\nUtteranceEnd\n")
return nil
Expand Down
9 changes: 5 additions & 4 deletions pkg/api/live/v1/interfaces/constants.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2023 Deepgram SDK contributors. All Rights Reserved.
// Copyright 2023-2024 Deepgram SDK contributors. All Rights Reserved.
// Use of this source code is governed by a MIT license that can be found in the LICENSE file.
// SPDX-License-Identifier: MIT

Expand All @@ -7,9 +7,10 @@ package interfaces
// These are the message types that can be received from the live API
const (
// message types
TypeMessageResponse string = "Results"
TypeMetadataResponse string = "Metadata"
TypeUtteranceEndResponse string = "UtteranceEnd"
TypeMessageResponse string = "Results"
TypeMetadataResponse string = "Metadata"
TypeUtteranceEndResponse string = "UtteranceEnd"
TypeSpeechStartedResponse string = "SpeechStarted"

// Error type
TypeErrorResponse string = "Error"
Expand Down
1 change: 1 addition & 0 deletions pkg/api/live/v1/interfaces/interfaces.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ package interfaces
type LiveMessageCallback interface {
Message(mr *MessageResponse) error
Metadata(md *MetadataResponse) error
SpeechStarted(ssr *SpeechStartedResponse) error
UtteranceEnd(ur *UtteranceEndResponse) error
Error(er *ErrorResponse) error
// TODO: implement other conversation insights
Expand Down
6 changes: 6 additions & 0 deletions pkg/api/live/v1/interfaces/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,12 @@ type UtteranceEndResponse struct {
LastWordEnd float64 `json:"last_word_end,omitempty"`
}

type SpeechStartedResponse struct {
Type string `json:"type,omitempty"`
Channel []int `json:"channel,omitempty"`
Timestamp float64 `json:"timestamp,omitempty"`
}

// ErrorResponse is the response from a live transcription
type ErrorResponse struct {
Description string `json:"description"`
Expand Down
33 changes: 33 additions & 0 deletions pkg/api/live/v1/router.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ func (r *MessageRouter) Message(byMsg []byte) error {
err = r.MessageResponse(byMsg)
case interfaces.TypeMetadataResponse:
err = r.MetadataResponse(byMsg)
case interfaces.TypeSpeechStartedResponse:
err = r.SpeechStartedResponse(byMsg)
case interfaces.TypeUtteranceEndResponse:
err = r.UtteranceEndResponse(byMsg)
case interfaces.TypeErrorResponse:
Expand Down Expand Up @@ -150,6 +152,37 @@ func (r *MessageRouter) MetadataResponse(byMsg []byte) error {
return nil
}

func (r *MessageRouter) SpeechStartedResponse(byMsg []byte) error {
klog.V(6).Infof("router.SpeechStartedResponse ENTER\n")

// trace debugging
r.printDebugMessages(5, "SpeechStartedResponse", byMsg)

var ssr interfaces.SpeechStartedResponse
err := json.Unmarshal(byMsg, &ssr)
if err != nil {
klog.V(1).Infof("SpeechStartedResponse json.Unmarshal failed. Err: %v\n", err)
klog.V(6).Infof("router.SpeechStartedResponse LEAVE\n")
return err
}

if r.callback != nil {
err := r.callback.SpeechStarted(&ssr)
if err != nil {
klog.V(1).Infof("callback.SpeechStartedResponse failed. Err: %v\n", err)
} else {
klog.V(5).Infof("callback.SpeechStartedResponse succeeded\n")
}
klog.V(6).Infof("router.SpeechStartedResponse LEAVE\n")
return err
}

klog.V(1).Infof("User callback is undefined\n")
klog.V(6).Infof("router.SpeechStartedResponse ENTER\n")

return nil
}

func (r *MessageRouter) UtteranceEndResponse(byMsg []byte) error {
klog.V(6).Infof("router.UtteranceEndResponse ENTER\n")

Expand Down
1 change: 1 addition & 0 deletions pkg/client/interfaces/types-stream.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,6 @@ type LiveTranscriptionOptions struct {
SmartFormat bool `json:"smart_format,omitempty" url:"smart_format,omitempty"`
Tag []string `json:"tag,omitempty" url:"tag,omitempty"`
UtteranceEndMs string `json:"utterance_end_ms,omitempty" url:"utterance_end_ms,omitempty"`
VadEvents bool `json:"vad_events,omitempty" url:"vad_events,omitempty"`
Version string `json:"version,omitempty" url:"version,omitempty"`
}
Loading