Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
## v0.4.1 2026-03-23
Changes:
* Reduce streamed audio packetization from 1 second to smaller realtime intervals for faster partial transcripts and SafeToStopAudio handling.
* Add GetLPCMStreamInfo to centralize LPCM chunk-size and streaming-interval calculation.
* Validate LPCM inputs more strictly (numChans, bitDepth, sampleRate, and targetStreamIntervalMs).
* Refactor the example streamer to use the shared LPCM stream info helper.
* Add isolated table-driven tests covering expected chunk sizes and streaming intervals for multiple sample rates and intervals, plus invalid-input cases.
* Replaced the timer.Sleep() to ticker in audio streaming example - to avoid any drifts over time. This change will also help prevent writing any chunks after SafeToStopAudio has been received.
* Other upgrades:
* Update go version to 1.26
* Remove usage of deprecated io/ioutil
* Remove usage of deprecated github.com/pkg/errors
* Replace gotest.tools/assert with github.com/stretchr/testify/assert

## v0.3.4 2019-07-17
Features:
* Pass the SafeToStopAudio flag recieved from the server with the PartialTranscript (See
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ The SDK allows you to make voice and text queries to the Houndify API. The SDK c

## Requirements

- Go v1.8+
- Go v1.26+
- Houndify account available from [Houndify.com](https://www.houndify.com)

## Installing
Expand Down
77 changes: 47 additions & 30 deletions example/example.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,16 @@ import (
"crypto/tls"
"flag"
"fmt"
"github.com/go-audio/wav"
houndify "github.com/soundhound/houndify-sdk-go"
"io"
"io/ioutil"
"log"
"net/http/httptrace"
"net/textproto"
"os"
"strings"
"time"

"github.com/go-audio/wav"
houndify "github.com/soundhound/houndify-sdk-go"
)

const (
Expand Down Expand Up @@ -78,7 +78,7 @@ func main() {
case *voiceFlag != "" && !*streamFlag:
// voice query
audioFilePath := *voiceFlag
fileContents, err := ioutil.ReadFile(audioFilePath)
fileContents, err := os.ReadFile(audioFilePath)
if err != nil {
log.Fatalf("failed to read contents of file %q, err: %v", audioFilePath, err)
}
Expand Down Expand Up @@ -168,28 +168,35 @@ func main() {
}
}

// Stream an audio file to the server. This example demonstrates streaming a wav file,
// however this could easily be changed to stream audio from a microphone or something.
// Basically it just writes data from a buffer to the Request body every 1 second. The
// advantage of how golang has the http.Request's Body field is it's a Reader, so using
// io.Pipe() you can actually write any data into it. That means any stream of WAV data
// can just be piped in, and the requests will be made.
// Streams audio to the server using a WAV file as the source. While this example
// uses a file, the same pattern can be used for other sources like a microphone.
//
// This function also demonstrates how you can use the SafeToStopAudio flag to know when
// the server has all the data it needs.
// Audio is sent in frame-aligned chunks at a realtime interval to better reflect
// live streaming behavior.
//
// The request body is backed by an io.Pipe, which allows arbitrary data to be
// written as a stream. This makes it easy to feed any WAV audio source directly
// into the request as it becomes available.
//
// The function also shows how to use the SafeToStopAudio signal to determine when
// the server has received enough audio and no more data needs to be sent.
func StreamAudio(client houndify.Client, fname, uid string) {
f, err := os.Open(fname)
defer f.Close()
if err != nil {
log.Fatalf("failed to read contents of file %q, err: %v\n", fname, err)
}
defer f.Close()

// Read WAV file data, determine bytes per second
d := wav.NewDecoder(f)
d.ReadInfo()

// Use 1 second chunks
bps := int(d.AvgBytesPerSec) * 1
targetStreamingIntervalMs := 20
streamInfo, err := houndify.GetLPCMStreamInfo(int(d.NumChans),
int(d.BitDepth), int(d.SampleRate), targetStreamingIntervalMs)
if err != nil {
log.Fatalf("failed to get LPCM chunk info: %v", err)
}

// Build pipe that lets us write into the io.Reader that is in the request
rp, wp := io.Pipe()
Expand All @@ -200,36 +207,46 @@ func StreamAudio(client houndify.Client, fname, uid string) {
RequestID: createRequestID(),
}

// Start the function to write 1 second of data per 1 real second, by using a buffer
// that is the size of 1 second of data. Note that using the .Read() function results
// Start the function to stream audio in realtime
// Note that using the .Read() function results
// in the header portion of the file not being read. We have to use the ReadAt()
// function to specify starting at the very first position of the actual file, or the
// header isn't read.
var loc int64 = 0
buf := make([]byte, bps)
done := make(chan bool)
go func(wp *io.PipeWriter) {
defer wp.Close()

var (
loc int64 = 0
buf = make([]byte, streamInfo.ChunkSize())
ticker = time.NewTicker(streamInfo.StreamingInterval())
)
defer ticker.Stop()

for {
select {
case <-done:
//fmt.Println("Exiting write loop")
fmt.Println("Context received done, exiting write loop")
return
default:

case <-ticker.C:

n, err := f.ReadAt(buf, loc)
loc += int64(n)

// At the EOF, the buffer will still have bytes read into it, have to write
// those out before breaking the loop
if err == io.EOF {
if n > 0 {
loc += int64(n)
// Write the amount of bytes that were read in
wp.Write(buf[:n])
return
}

// Write the amount of bytes that were read in
wp.Write(buf[:n])
time.Sleep(time.Duration(1) * time.Second)
if err != nil {
if err != io.EOF {
// handle error
} else {
fmt.Println("Reached end of file")
}
return
}
}
}
}(wp)
Expand Down Expand Up @@ -284,7 +301,7 @@ func derefOrFetchFromEnv(strPtr *string, envKey string) string {
}

func getDefaultClientTrace() *httptrace.ClientTrace {
traceLogger := log.New(os.Stdout, "[httptrace] ", log.Ltime | log.Lmicroseconds)
traceLogger := log.New(os.Stdout, "[httptrace] ", log.Ltime|log.Lmicroseconds)
trace := &httptrace.ClientTrace{
GotConn: func(info httptrace.GotConnInfo) {
traceLogger.Println("GotConn: ", info)
Expand Down
16 changes: 11 additions & 5 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
module github.com/soundhound/houndify-sdk-go

go 1.12
go 1.26

require (
github.com/go-audio/wav v1.0.0
github.com/google/go-cmp v0.3.0 // indirect
github.com/pkg/errors v0.8.1
gotest.tools v2.2.0+incompatible
github.com/go-audio/wav v1.1.0
github.com/stretchr/testify v1.11.1
)

require (
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/go-audio/audio v1.0.0 // indirect
github.com/go-audio/riff v1.0.0 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)
20 changes: 12 additions & 8 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4=
github.com/go-audio/audio v1.0.0/go.mod h1:6uAu0+H2lHkwdGsAY+j2wHPNPpPoeg5AaEFh9FlA+Zs=
github.com/go-audio/riff v1.0.0 h1:d8iCGbDvox9BfLagY94fBynxSPHO80LmZCaOsmKxokA=
github.com/go-audio/riff v1.0.0/go.mod h1:l3cQwc85y79NQFCRB7TiPoNiaijp6q8Z0Uv38rVG498=
github.com/go-audio/wav v1.0.0 h1:WdSGLhtyud6bof6XHL28xKeCQRzCV06pOFo3LZsFdyE=
github.com/go-audio/wav v1.0.0/go.mod h1:3yoReyQOsiARkvPl3ERCi8JFjihzG6WhjYpZCf5zAWE=
github.com/google/go-cmp v0.3.0 h1:crn/baboCvb5fXaQ0IJ1SGTsTVrWpDsCWC8EGETZijY=
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
gotest.tools v2.2.0+incompatible h1:VsBPFP1AI068pPrMxtb/S8Zkgf9xEmTLJjfM+P5UIEo=
gotest.tools v2.2.0+incompatible/go.mod h1:DsYFclhRJ6vuDpmuTbkuFWG+y2sxOXAzmJt81HFBacw=
github.com/go-audio/wav v1.1.0 h1:jQgLtbqBzY7G+BM8fXF7AHUk1uHUviWS4X39d5rsL2g=
github.com/go-audio/wav v1.1.0/go.mod h1:mpe9qfwbScEbkd8uybLuIpTgHyrISw/OTuvjUW2iGtE=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
11 changes: 5 additions & 6 deletions houndify_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,9 @@ package houndify
import (
"bufio"
"encoding/json"
"errors"
"fmt"
"github.com/pkg/errors"
"io"
"io/ioutil"
"net/http"
"strconv"
"strings"
Expand Down Expand Up @@ -109,7 +108,7 @@ func (c *Client) TextSearch(textReq TextRequest) (string, error) {
return "", errors.New("failed to successfully run request: " + err.Error())
}

body, err := ioutil.ReadAll(resp.Body)
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", errors.New("failed to read body: " + err.Error())
}
Expand All @@ -131,7 +130,7 @@ func (c *Client) TextSearch(textReq TextRequest) (string, error) {
if c.enableConversationState {
newConvState, err := parseConversationState(bodyStr)
if err != nil {
return bodyStr, errors.Wrap(err, "unable to parse new conversation state from response")
return bodyStr, fmt.Errorf("unable to parse new conversation state from response: %w", err)
}
c.conversationState = newConvState
}
Expand Down Expand Up @@ -178,7 +177,7 @@ func (c *Client) VoiceSearch(voiceReq VoiceRequest, partialTranscriptChan chan P
if err != nil {
return "", err
}
req.Body = ioutil.NopCloser(voiceReq.AudioStream)
req.Body = io.NopCloser(voiceReq.AudioStream)

if c.HttpClient == nil {
c.HttpClient = &http.Client{}
Expand Down Expand Up @@ -262,7 +261,7 @@ func (c *Client) VoiceSearch(voiceReq VoiceRequest, partialTranscriptChan chan P
if c.enableConversationState {
newConvState, err := parseConversationState(bodyStr)
if err != nil {
return bodyStr, errors.Wrap(err, "unable to parse new conversation state from response")
return bodyStr, fmt.Errorf("unable to parse new conversation state from response: %w", err)
}
c.conversationState = newConvState
}
Expand Down
125 changes: 125 additions & 0 deletions lpcm_stream_info.go
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we move this file into example folder to indicate this is not part of the core SDK but some helper functions used in the client code?

We could keep it here if we want add some simpler API to allow SDK users to stream with a certain interval in the future.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I’m intentionally including this in the main SDK so clients can directly configure streaming chunking and duration for LPCM audio formats. This makes it a first-class part of the SDK, available to anyone who needs it. If it lived under github.com/soundhound/houndify-sdk-go/example, clients would have to either copy the code each time or import an example package, which isn’t ideal for production use. As it stands, they can simply import github.com/soundhound/houndify-sdk-go and use it out of the box.

Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
package houndify

import (
"fmt"
"time"
)

type LPCMStreamInfo struct {
numChans int
bitDepth int
sampleRate int
targetStreamingIntervalMs int

// Calculated output fields:
idealChunkSize int // raw, unaligned byte count for the target streaming interval.

// The client should stream one chunk of chunkSize bytes every streamingInterval.
chunkSize int // frame-aligned byte count that should be streamed each tick.
streamingInterval time.Duration // the duration represented by chunkSize
}

func (info *LPCMStreamInfo) NumChans() int {
return info.numChans
}

func (info *LPCMStreamInfo) BitDepth() int {
return info.bitDepth
}

func (info *LPCMStreamInfo) SampleRate() int {
return info.sampleRate
}

func (info *LPCMStreamInfo) TargetStreamingIntervalMs() int {
return info.targetStreamingIntervalMs
}

func (info *LPCMStreamInfo) IdealChunkSize() int {
return info.idealChunkSize
}

func (info *LPCMStreamInfo) ChunkSize() int {
return info.chunkSize
}

func (info *LPCMStreamInfo) StreamingInterval() time.Duration {
return info.streamingInterval
}

// GetLPCMStreamInfo computes the appropriate chunk size and streaming interval
// for streaming linear PCM audio data.
//
// It takes audio parameters (number of channels, bit depth, target streaming interval in
// milliseconds, and sample rate) and calculates the frame-aligned chunk size and the actual
// streaming interval implied by that aligned chunk size. The resulting interval is chosen
// to be as close as possible to the requested target and will often match it exactly.
//
// The function performs three steps:
// 1. Calculates the ideal number of bytes needed to represent the target streaming interval
// 2. Aligns the byte count to full audio frames to ensure valid audio boundaries
// 3. Derives the actual streaming interval from the aligned byte count
//
// Parameters:
// - numChans: Number of audio channels
// - bitDepth: Bit depth of each audio sample (e.g., 8, 16, 24, 32)
// - sampleRate: Sample rate in Hz (e.g., 16000, 44100)
// - targetStreamingIntervalMs: Target streaming interval in milliseconds
//
// Returns an LPCMStreamInfo containing both the source audio metadata and the
// calculated streaming values:
// - numChans: the number of audio channels in the source stream
// - bitDepth: the bits per sample for each channel
// - sampleRate: the sampling rate in Hz
// - targetStreamingIntervalMs: the requested streaming cadence in milliseconds
// - idealChunkSize: the exact byte count for the requested interval before frame alignment
// - chunkSize: the frame-aligned byte count to write for each chunk
// - streamingInterval: the duration represented by actualChunkSize
func GetLPCMStreamInfo(
numChans int,
bitDepth int,
sampleRate int,
targetStreamingIntervalMs int,
) (*LPCMStreamInfo, error) {

if numChans < 1 {
return nil,
fmt.Errorf("invalid input: numChans must be >= 1, got %d", numChans)
}
if bitDepth < 8 || (bitDepth%8 != 0) {
return nil,
fmt.Errorf("invalid input: bitDepth must be >= 8 and multiple of 8, got %d", bitDepth)
}
if sampleRate < 8000 {
return nil,
fmt.Errorf("invalid input: sampleRate must be >= 8000, got %d", sampleRate)
}
if targetStreamingIntervalMs < 1 {
return nil,
fmt.Errorf("invalid input: targetStreamingIntervalMs must be >= 1, got %d",
targetStreamingIntervalMs)
}

bytesPerFrame := numChans * (bitDepth / 8)
bytesPerSecond := sampleRate * bytesPerFrame

// Step 1: ideal (non-aligned) byte size
idealChunkSize := (bytesPerSecond * targetStreamingIntervalMs) / 1000

// Step 2: align to full frames
chunkSize := (idealChunkSize / bytesPerFrame) * bytesPerFrame

// Step 3: derive the actual streaming interval from bytes
streamingInterval := (time.Duration(chunkSize) * time.Second) / time.Duration(bytesPerSecond)

return &LPCMStreamInfo{
numChans: numChans,
bitDepth: bitDepth,
sampleRate: sampleRate,
targetStreamingIntervalMs: targetStreamingIntervalMs,

idealChunkSize: idealChunkSize,
chunkSize: chunkSize,
streamingInterval: streamingInterval,
}, nil
}
Loading