-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathsound.go
141 lines (120 loc) · 3.51 KB
/
sound.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
// Copyright (c) 2019, The Emergent Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package sound
import (
"log"
"os"
"github.com/emer/etable/etensor"
"github.com/go-audio/audio"
"github.com/go-audio/wav"
)
type Endian int32
const (
BigEndian = iota // Samples are big endian byte order
LittleEndian // Samples are little endian byte order
)
type SoundSampleType int32
const (
Unknown = iota // Not set
SignedInt // Samples are signed integers
UnSignedInt
Float
)
type Wave struct {
Buf *audio.IntBuffer `inactive:"+"`
}
// Load loads the sound file and decodes it
func (snd *Wave) Load(fn string) error {
f, err := os.Open(fn)
if err != nil {
log.Printf("sound.Load: couldn't open %s %v", fn, err)
return err
}
defer f.Close()
d := wav.NewDecoder(f)
snd.Buf, err = d.FullPCMBuffer()
if err != nil {
log.Fatal(err)
}
return err
}
// WriteWave encodes the signal data and writes it to file using the sample rate and
// other values of the buf object
func (snd *Wave) WriteWave(fn string) error {
out, err := os.Create(fn)
if err != nil {
log.Printf("unable to create %s: %v", fn, err)
return err
}
PCM := 1
e := wav.NewEncoder(out, snd.SampleRate(), snd.Buf.SourceBitDepth, snd.Channels(), PCM)
if err = e.Write(snd.Buf); err != nil {
log.Printf("Encoding failed on write: %v", err)
return err
}
if err = e.Close(); err != nil {
log.Printf("could not close wav file encoder")
out.Close()
return err
}
out.Close()
return nil
}
// SampleRate returns the sample rate of the sound or 0 is snd is nil
func (snd *Wave) SampleRate() int {
if snd == nil {
log.Printf("sound.SampleRate: Sound is nil")
return 0
}
return int(snd.Buf.Format.SampleRate)
}
// SampleSize returns the sample rate of the sound or 0 is snd is nil
func (snd *Wave) SampleSize() int {
if snd == nil {
log.Printf("sound.SampleSize: Sound is nil")
return 0
}
return 16
}
// Channels returns the number of channels in the wav data or 0 is snd is nil
func (snd *Wave) Channels() int {
if snd == nil {
log.Printf("sound.Channels: Sound is nil")
return 0
}
return int(snd.Buf.Format.NumChannels)
}
// todo: return to this
// SampleType
func (snd *Wave) SampleType() SoundSampleType {
return SignedInt
}
// SoundToTensor converts sound data to floating point etensor with normalized -1..1 values (unless sound is stored as a
// float natively, in which case it is not guaranteed to be normalized) -- for use in signal processing routines --
// can optionally select a specific channel (formats sound_data as a single-dimensional matrix of frames size),
// and -1 gets all available channels (formats sound_data as two-dimensional matrix with outer dimension as
// channels and inner dimension frames
func (snd *Wave) SoundToTensor(samples *etensor.Float64) bool {
nFrames := snd.Buf.NumFrames()
shape := make([]int, 1)
shape[0] = nFrames
samples.SetShape(shape, nil, nil)
for i := 0; i < nFrames; i++ {
samples.SetFloat1D(i, float64(snd.GetFloatAtIdx(snd.Buf, i)))
}
return true
}
// GetFloatAtIdx
func (snd *Wave) GetFloatAtIdx(buf *audio.IntBuffer, idx int) float64 {
if buf.SourceBitDepth == 32 {
return float64(buf.Data[idx]) / float64(0x7FFFFFFF)
} else if buf.SourceBitDepth == 24 {
return float64(buf.Data[idx]) / float64(0x7FFFFF)
} else if buf.SourceBitDepth == 16 {
return float64(buf.Data[idx]) / float64(0x7FFF)
} else if buf.SourceBitDepth == 8 {
return float64(buf.Data[idx]) / float64(0x7F)
}
return 0
}