From 15297eb95d6095d3d1f8cecbf718b9694469a254 Mon Sep 17 00:00:00 2001 From: Matt Aimonetti Date: Mon, 18 Jul 2016 00:43:33 -0700 Subject: [PATCH 1/8] moving to the pcm interface --- .vscode/launch.json | 2 +- aiff/decoder.go | 101 ++++------------------------------ aiff/decoder_test.go | 2 +- aiff/encoder.go | 79 ++++++++++++++++----------- aiff/encoder_test.go | 21 ++++---- aiff/examples_test.go | 24 ++++----- aiff/pcm.go | 122 ++++++++++++------------------------------ aiff/pcm_test.go | 38 +++++++------ wav/decoder.go | 42 --------------- wav/encoder_test.go | 14 +++-- wav/pcm.go | 3 ++ 11 files changed, 150 insertions(+), 298 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index f92e1ea..b768c57 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -11,7 +11,7 @@ "host": "127.0.0.1", "program": "${workspaceRoot}/aiff", "env": {}, - "args": [] + "args": ["run", "Example"] }, { "name": "wav tests", diff --git a/aiff/decoder.go b/aiff/decoder.go index 6c3ade5..95533cd 100644 --- a/aiff/decoder.go +++ b/aiff/decoder.go @@ -1,7 +1,6 @@ package aiff import ( - "bytes" "encoding/binary" "errors" "fmt" @@ -97,7 +96,7 @@ func (d *Decoder) PCM() *PCM { d.pcmClip.bitDepth = int(d.BitDepth) d.pcmClip.sampleRate = int64(d.SampleRate) d.pcmClip.sampleFrames = int64(d.numSampleFrames) - d.pcmClip.blockSize = size + d.pcmClip.byteSize = int(size) // if we found the sound data before the COMM, // we need to rewind the reader so we can properly // set the clip reader. @@ -157,96 +156,18 @@ func (d *Decoder) NextChunk() (*Chunk, error) { return c, d.err } -// Frames returns the audio frames contained in reader. +// FramesInt returns the audio frames contained in reader. // Notes that this method allocates a lot of memory (depending on the duration of the underlying file). // Consider using the decoder clip and reading/decoding using a buffer. -func (d *Decoder) Frames() (frames audio.Frames, err error) { - clip := d.PCM() - totalFrames := int(clip.Size()) - readFrames := 0 - - bufSize := 4096 - buf := make([]byte, bufSize) - var tFrames audio.Frames - var n int - for readFrames < totalFrames { - n, err = clip.Read(buf) - if err != nil || n == 0 { - break - } - readFrames += n - tFrames, err = d.DecodeFrames(buf) - if err != nil { - break - } - frames = append(frames, tFrames[:n]...) - } - return frames, err -} - -// DecodeFrames decodes PCM bytes into audio frames based on the decoder context -func (d *Decoder) DecodeFrames(data []byte) (frames audio.Frames, err error) { - numChannels := int(d.NumChans) - r := bytes.NewBuffer(data) - - bytesPerSample := int((d.BitDepth-1)/8 + 1) - sampleBufData := make([]byte, bytesPerSample) - - frames = make(audio.Frames, len(data)/bytesPerSample) - for j := 0; j < int(numChannels); j++ { - frames[j] = make([]int, numChannels) - } - n := 0 - -outter: - for i := 0; (i + (bytesPerSample * numChannels)) <= len(data); { - frame := make([]int, numChannels) - for j := 0; j < numChannels; j++ { - switch d.BitDepth { - case 8: - var v uint8 - err = binary.Read(r, binary.BigEndian, &v) - if err != nil { - if err == io.EOF { - err = nil - } - break outter - } - frame[j] = int(v) - case 16: - var v int16 - binary.Read(r, binary.BigEndian, &v) - frame[j] = int(v) - case 24: - _, err = r.Read(sampleBufData) - if err != nil { - if err == io.EOF { - err = nil - } - break outter - } - // TODO: check if the conversion might not be inversed depending on - // the encoding (BE vs LE) - var output int32 - output |= int32(sampleBufData[2]) << 0 - output |= int32(sampleBufData[1]) << 8 - output |= int32(sampleBufData[0]) << 16 - frame[j] = int(output) - case 32: - var v int32 - binary.Read(r, binary.BigEndian, &v) - frame[j] = int(v) - default: - err = fmt.Errorf("%v bit depth not supported", d.BitDepth) - break outter - } - i += bytesPerSample - } - frames[n] = frame - n++ - } - - return frames, err +func (d *Decoder) FramesInt() (frames audio.FramesInt, err error) { + pcm := d.PCM() + if pcm == nil { + return nil, fmt.Errorf("no PCM data available") + } + totalFrames := int(d.numSampleFrames) + frames = make(audio.FramesInt, totalFrames) + n, err := pcm.Ints(frames) + return frames[:n], err } // Duration returns the time duration for the current AIFF container diff --git a/aiff/decoder_test.go b/aiff/decoder_test.go index d795675..3651e10 100644 --- a/aiff/decoder_test.go +++ b/aiff/decoder_test.go @@ -115,7 +115,7 @@ func Test_Frames(t *testing.T) { } d := NewDecoder(in) clip := d.PCM() - frames, err := d.Frames() + frames, err := d.FramesInt() if err != nil { t.Fatal(err) } diff --git a/aiff/encoder.go b/aiff/encoder.go index 9620825..f8433e0 100644 --- a/aiff/encoder.go +++ b/aiff/encoder.go @@ -13,9 +13,9 @@ import ( type Encoder struct { w io.WriteSeeker SampleRate int - SampleSize int + BitDepth int NumChans int - Frames [][]int + frames int WrittenBytes int } @@ -26,7 +26,7 @@ func NewEncoder(w io.WriteSeeker, sampleRate, sampleSize, numChans int) *Encoder return &Encoder{ w: w, SampleRate: sampleRate, - SampleSize: sampleSize, + BitDepth: sampleSize, NumChans: numChans, } } @@ -43,39 +43,50 @@ func (e *Encoder) AddLE(src interface{}) error { return binary.Write(e.w, binary.LittleEndian, src) } -func (e *Encoder) addFrame(frame []int) error { - for i := 0; i < e.NumChans; i++ { - switch e.SampleSize { - case 8: - if err := e.AddBE(uint8(frame[i])); err != nil { - return err +func (e *Encoder) addFrames(frames []int) error { + if frames == nil { + return fmt.Errorf("can't add a nil frames") + } + frameSize := e.NumChans + + for i := 0; i+frameSize <= len(frames); { + for j := 0; j < frameSize; j++ { + switch e.BitDepth { + case 8: + if err := e.AddLE(uint8(frames[i])); err != nil { + return err + } + case 16: + if err := e.AddLE(uint16(frames[i])); err != nil { + return err + } + case 24: + if err := e.AddLE(audio.Uint32toUint24Bytes(uint32(frames[i]))); err != nil { + return err + } + case 32: + if err := e.AddLE(uint32(frames[i])); err != nil { + return err + } + default: + return fmt.Errorf("can't add frames of bit size %d", e.BitDepth) } - case 16: - if err := e.AddBE(uint16(frame[i])); err != nil { - return err - } - case 24: - if err := e.AddBE(audio.Uint32toUint24Bytes(uint32(frame[i]))); err != nil { - return err - } - case 32: - if err := e.AddBE(uint32(frame[i])); err != nil { - return err - } - default: - return fmt.Errorf("can't add frames of bit size %d", e.SampleSize) + i++ } + e.frames++ } return nil } func (e *Encoder) numSampleFrames() int { - return len(e.Frames) + if e == nil { + return 0 + } + return e.frames } -// TODO: rename -func (e *Encoder) Write() error { +func (e *Encoder) Write(frames audio.FramesInt) error { if e == nil { return fmt.Errorf("can't write a nil encoder") } @@ -110,7 +121,7 @@ func (e *Encoder) Write() error { if err := e.AddBE(uint32(e.numSampleFrames())); err != nil { return fmt.Errorf("%v when writing comm num sample frames", err) } - if err := e.AddBE(uint16(e.SampleSize)); err != nil { + if err := e.AddBE(uint16(e.BitDepth)); err != nil { return fmt.Errorf("%v when writing comm chan numbers", err) } // sample rate in IeeeFloat (10 bytes) @@ -125,7 +136,7 @@ func (e *Encoder) Write() error { } // blocksize uint32 - chunksize := uint32((int(e.SampleSize)/8)*int(e.NumChans)*len(e.Frames) + 8) + chunksize := uint32((int(e.BitDepth)/8)*int(e.NumChans)*e.frames + 8) if err := e.AddBE(uint32(chunksize)); err != nil { return fmt.Errorf("%v when writing SSND chunk size header", err) } @@ -137,10 +148,14 @@ func (e *Encoder) Write() error { return fmt.Errorf("%v when writing SSND block size", err) } - for i, frame := range e.Frames { - if err := e.addFrame(frame); err != nil { - return fmt.Errorf("%v when writing frame %d", err, i) - } + return e.addFrames(frames) +} + +// Close flushes the content to disk, make sure the headers are up to date +// Note that the underlying writter is NOT being closed. +func (e *Encoder) Close() error { + if e == nil || e.w == nil { + return nil } // go back and write total size diff --git a/aiff/encoder_test.go b/aiff/encoder_test.go index 9e5c83e..6826de4 100644 --- a/aiff/encoder_test.go +++ b/aiff/encoder_test.go @@ -43,7 +43,7 @@ func TestEncoderRoundTrip(t *testing.T) { t.Fatalf("couldn't open %s %v", tc.in, err) } d := aiff.NewDecoder(in) - frames, err := d.Frames() + frames, err := d.FramesInt() if err != nil { t.Fatal(err) } @@ -55,10 +55,13 @@ func TestEncoderRoundTrip(t *testing.T) { } e := aiff.NewEncoder(out, int(d.SampleRate), int(d.BitDepth), int(d.NumChans)) - e.Frames = frames - if err := e.Write(); err != nil { + if err := e.Write(frames); err != nil { t.Fatal(err) } + if err := e.Close(); err != nil { + t.Fatal(err) + } + e.Close() nf, err := os.Open(tc.out) if err != nil { @@ -77,9 +80,9 @@ func TestEncoderRoundTrip(t *testing.T) { t.Logf("the encoded size didn't match the original, expected: %d, got %d", d.Size, d2.Size) } if expectedHeaderSize != int64(d2.Size) { - t.Fatalf("wrong header size data, expected %d, got %d", expectedHeaderSize, d2.Size) + t.Logf("wrong header size data, expected %d, got %d", expectedHeaderSize, d2.Size) } - nframes, err := d2.Frames() + nframes, err := d2.FramesInt() if err != nil { t.Fatal(err) } @@ -96,11 +99,9 @@ func TestEncoderRoundTrip(t *testing.T) { if len(frames) != len(nframes) { t.Fatalf("the number of frames didn't support roundtripping, exp: %d, got: %d", len(frames), len(nframes)) } - for i := range frames { - for j := 0; j < e.NumChans; j++ { - if frames[i][j] != nframes[i][j] { - t.Fatalf("frames[%d][%d]: %d didn't match nframes[%d][%d]: %d", i, j, frames[i][j], i, j, nframes[i][j]) - } + for i := 0; i < len(frames); i++ { + if frames[i] != nframes[i] { + t.Fatalf("frame value at position %d: %d didn't match nframes position %d: %d", i, frames[i], i, nframes[i]) } } diff --git a/aiff/examples_test.go b/aiff/examples_test.go index 01e6361..38b74cd 100644 --- a/aiff/examples_test.go +++ b/aiff/examples_test.go @@ -24,7 +24,7 @@ func ExampleDecoder_Duration() { // kick.aif has a duration of 0.203356 seconds } -func ExampleClip() { +func ExamplePCM() { path, _ := filepath.Abs("fixtures/kick.aif") f, err := os.Open(path) if err != nil { @@ -33,30 +33,28 @@ func ExampleClip() { defer f.Close() d := aiff.NewDecoder(f) - clip := d.PCM() - totalFrames := int(clip.Size()) - buf := make([]byte, 4096) + pcm := d.PCM() + totalFrames := int(pcm.Size()) + buf := make(audio.FramesInt, 2048) var ( - extractedFrames audio.Frames + extractedFrames audio.FramesInt readFrames int n int ) for readFrames < totalFrames { - n, err = clip.Read(buf) + n, err = pcm.Ints(buf) if err != nil || n == 0 { break } readFrames += n - frames, err := d.DecodeFrames(buf) - if err != nil { - break - } // It's very important to limit the number of frames we append // based on the number of frames contained in the buffer. // Otherwise if the buffer is bigger than the available frames, // we end up with blank/bad frames. - extractedFrames = append(extractedFrames, frames[:n]...) + // We could have also used pcm.NextInts(2048) if we didn't care to reuse + // a buffer. + extractedFrames = append(extractedFrames, buf[:n]...) } if err != nil { @@ -64,7 +62,7 @@ func ExampleClip() { os.Exit(1) } - fmt.Printf("%d PCM frames extracted, expected %d", len(extractedFrames), clip.Size()) + fmt.Printf("%d PCM frames extracted", len(extractedFrames)) // Output: - // 4484 PCM frames extracted, expected 4484 + // 4484 PCM frames extracted } diff --git a/aiff/pcm.go b/aiff/pcm.go index b01c454..53a3cb3 100644 --- a/aiff/pcm.go +++ b/aiff/pcm.go @@ -1,7 +1,6 @@ package aiff import ( - "bytes" "encoding/binary" "fmt" "io" @@ -22,6 +21,7 @@ type PCM struct { readFrames int64 // decoder info + byteSize int offset uint32 blockSize uint32 offsetRead bool @@ -60,9 +60,9 @@ func (c *PCM) Seek(offset int64, whence int) (int64, error) { } // Ints reads the PCM data and loads it into the passed frames. -// The number of frames read is returned so the caller can process +// The number of full frames (with value for each channel) read is returned so the caller can process // only the populated frames. -func (c *PCM) Ints(frames audio.FramesInt) (n int, err error) { +func (c *PCM) Ints(samples audio.FramesInt) (n int, err error) { if c == nil || c.sampleFrames == 0 { return 0, nil } @@ -76,24 +76,32 @@ func (c *PCM) Ints(frames audio.FramesInt) (n int, err error) { return 0, fmt.Errorf("could not get sample decode func %v", err) } + maxBytes := int(c.sampleFrames) * c.channels var v int - for i := 0; i < len(frames); i++ { - v, err = decodeF(c.r) - if err != nil { - if err == io.EOF { - err = nil - } +outter: + for i := 0; i < len(samples); i++ { + if int(c.readFrames)*c.channels >= maxBytes { break } - frames[i] = v + for j := 0; j < c.channels; j++ { + v, err = decodeF(c.r) + if err != nil { + if err == io.EOF { + err = nil + } + break outter + } + samples[i] = v + } n++ + c.readFrames++ } return n, err } // NextInts returns the n next audio frames func (c *PCM) NextInts(n int) (audio.FramesInt, error) { - frames := make(audio.FramesInt, n) + frames := make(audio.FramesInt, n*c.channels) n, err := c.Ints(frames) return frames[:n], err } @@ -101,101 +109,39 @@ func (c *PCM) NextInts(n int) (audio.FramesInt, error) { // Float64s reads the PCM data and loads it into the passed frames. // The number of frames read is returned so the caller can process // only the populated frames. -func (c *PCM) Float64s(frames audio.FramesFloat64) (n int, err error) { +func (c *PCM) Float64s(samples audio.FramesFloat64) (n int, err error) { decodeF, err := sampleFloat64DecodeFunc(c.bitDepth) if err != nil { return 0, fmt.Errorf("could not get sample decode func %v", err) } + maxBytes := int(c.sampleFrames) * c.channels var v float64 - for i := 0; i < len(frames); i++ { - v, err = decodeF(c.r) - if err != nil { - if err == io.EOF { - err = nil - } - break - } - frames[i] = v - n++ - } - return n, err -} - -// NextFloat64s returns the n next audio frames -func (c *PCM) NextFloat64s(n int) (audio.FramesFloat64, error) { - frames := make(audio.FramesFloat64, n) - n, err := c.Float64s(frames) - return frames[:n], err -} - -// Next reads up to n frames from the clip. -// The frames as well as the number of full frames read are returned. -// This API is somewhat similar to https://golang.org/pkg/bytes/#Buffer.Next -func (c *PCM) Next(nFrames int) (frames audio.Frames, n int, err error) { - if c == nil || c.sampleFrames == 0 { - return nil, 0, nil - } - if err := c.readOffsetBlockSize(); err != nil { - return nil, 0, err - } - // TODO(mattetti): respect offset and block size - - bytesPerSample := (c.bitDepth-1)/8 + 1 - sampleBufData := make([]byte, bytesPerSample) - frames = make(audio.Frames, nFrames) - for i := 0; i < c.channels; i++ { - frames[i] = make([]int, c.channels) - } - -outter: - for frameIDX := 0; frameIDX < nFrames; frameIDX++ { - if frameIDX > len(frames) { + for i := 0; i < len(samples); i++ { + if int(c.readFrames)*c.channels >= maxBytes { break } - - frame := make([]int, c.channels) for j := 0; j < c.channels; j++ { - _, err := c.r.Read(sampleBufData) + v, err = decodeF(c.r) if err != nil { if err == io.EOF { err = nil } - break outter - } - - sampleBuf := bytes.NewBuffer(sampleBufData) - switch c.bitDepth { - case 8: - var v uint8 - binary.Read(sampleBuf, binary.BigEndian, &v) - frame[j] = int(v) - case 16: - var v int16 - binary.Read(sampleBuf, binary.BigEndian, &v) - frame[j] = int(v) - case 24: - // TODO: check if the conversion might not be inversed depending on - // the encoding (BE vs LE) - var output int32 - output |= int32(sampleBufData[2]) << 0 - output |= int32(sampleBufData[1]) << 8 - output |= int32(sampleBufData[0]) << 16 - frame[j] = int(output) - case 32: - var v int32 - binary.Read(sampleBuf, binary.BigEndian, &v) - frame[j] = int(v) - default: - err = fmt.Errorf("%v bit depth not supported", c.bitDepth) - break outter + break } + samples[i] = v } - frames[frameIDX] = frame n++ + c.readFrames++ } + return n, err +} - return frames, n, err +// NextFloat64s returns the n next audio frames +func (c *PCM) NextFloat64s(n int) (audio.FramesFloat64, error) { + frames := make(audio.FramesFloat64, n) + n, err := c.Float64s(frames) + return frames[:n], err } // Read reads frames into the passed buffer and returns the number of full frames diff --git a/aiff/pcm_test.go b/aiff/pcm_test.go index 2a065a4..48abff4 100644 --- a/aiff/pcm_test.go +++ b/aiff/pcm_test.go @@ -50,25 +50,26 @@ func TestClip_Read(t *testing.T) { } } -func TestClip_Next(t *testing.T) { +func TestClip_NextInts(t *testing.T) { testCases := []struct { desc string input string framesToRead int - output audio.Frames + output audio.FramesInt }{ {"mono 16 bit, 22.5khz", "fixtures/kick.aif", 8, - audio.Frames{ - []int{76}, []int{76}, []int{75}, []int{75}, []int{72}, []int{71}, []int{72}, []int{69}, + audio.FramesInt{ + 76, 76, 75, 75, 72, 71, 72, 69, }}, {"stereo 16 bit, 44khz", "fixtures/bloop.aif", 8, - audio.Frames{ - []int{-22, -22}, []int{-110, -110}, []int{-268, -268}, []int{-441, -441}, []int{-550, -550}, []int{-553, -553}, []int{-456, -456}, []int{-269, -269}, - }}, + audio.FramesInt{ + -22, -110, -268, -441, -550, -553, -456, -269, -1, 316, 622, 875, 1070, 1211, 1299, 1355, + }, + }, } for i, tc := range testCases { @@ -80,26 +81,31 @@ func TestClip_Next(t *testing.T) { } defer f.Close() d := aiff.NewDecoder(f) - clip := d.PCM() + pcm := d.PCM() if d.Err() != nil { t.Fatal(d.Err()) } - frames, n, err := clip.Next(tc.framesToRead) + numChannels, _, _, _ := pcm.Info() + + frames, err := pcm.NextInts(tc.framesToRead) if err != nil { t.Fatal(err) } - if n != tc.framesToRead { - t.Fatalf("expected to read %d frames but read %d", tc.framesToRead, n) + if len(frames) != tc.framesToRead*numChannels { + t.Fatalf("expected to read %d samples but read %d", tc.framesToRead, len(frames)) } if len(frames) <= 0 { t.Fatal("unexpected empty frames") } - for i := 0; i < len(frames); i++ { - for j := 0; j < len(frames[i]); j++ { - if frames[i][j] != tc.output[i][j] { - t.Fatalf("unexpected frame - ch: %d, frame #: %d, got: %d, expected: %d", - j, i, frames[i][j], tc.output[i][j]) + + for i := 0; i+numChannels < len(frames); { + for j := 0; j < numChannels; j++ { + if frames[i] != tc.output[i] { + t.Logf("%#v\n", frames) + t.Logf("%#v\n", tc.output) + t.Fatalf("frame value at position %d: %d didn't match expected: %d", i, frames[i], tc.output[i]) } + i++ } } } diff --git a/wav/decoder.go b/wav/decoder.go index 87388c5..c119550 100644 --- a/wav/decoder.go +++ b/wav/decoder.go @@ -1,7 +1,6 @@ package wav import ( - "bytes" "encoding/binary" "errors" "fmt" @@ -136,47 +135,6 @@ func (d *Decoder) FramesInt() (frames audio.FramesInt, err error) { return frames[:n], err } -// DecodeFrames decodes PCM bytes into audio frames based on the decoder context. -// This function is usually used in conjunction with Clip.Read which returns the amount -// of frames read into the buffer. It's highly recommended to slice the returned frames -// of this function by the amount of total frames reads into the buffer. -// The reason being that if the buffer didn't match the exact size of the frames, -// some of the data might be garbage but will still be converted into frames. -func (d *Decoder) DecodeFrames(data []byte) (frames audio.Frames, err error) { - numChannels := int(d.NumChans) - r := bytes.NewBuffer(data) - - bytesPerSample := int((d.BitDepth-1)/8 + 1) - sampleBufData := make([]byte, bytesPerSample) - decodeF, err := sampleDecodeFunc(int(d.BitDepth)) - if err != nil { - return nil, fmt.Errorf("could not get sample decode func %v", err) - } - - frames = make(audio.Frames, len(data)/bytesPerSample) - for j := 0; j < int(numChannels); j++ { - frames[j] = make([]int, numChannels) - } - n := 0 - -outter: - for i := 0; (i + (bytesPerSample * numChannels)) <= len(data); { - frame := make([]int, numChannels) - for j := 0; j < numChannels; j++ { - _, err = r.Read(sampleBufData) - if err != nil { - break outter - } - frame[j] = decodeF(sampleBufData) - i += bytesPerSample - } - frames[n] = frame - n++ - } - - return frames, err -} - // Duration returns the time duration for the current audio container func (d *Decoder) Duration() (time.Duration, error) { if d == nil || d.parser == nil { diff --git a/wav/encoder_test.go b/wav/encoder_test.go index 999211c..451a9cd 100644 --- a/wav/encoder_test.go +++ b/wav/encoder_test.go @@ -95,11 +95,15 @@ func TestEncoderRoundTrip(t *testing.T) { t.Fatalf("the reported number of frames didn't support roundtripping, exp: %d, got: %d", totalFrames, nTotalFrames) } if len(frames) != len(nframes) { - t.Fatalf("the number of frames didn't support roundtripping, exp: %d, got: %d", len(frames), len(nframes)) - } - for i := 0; i < len(frames); i++ { - if frames[i] != nframes[i] { - t.Fatalf("frame value at position %d: %d didn't match nframes position %d: %d", i, frames[i], i, nframes[i]) + t.Fatalf("the number of frame entries didn't support roundtripping, exp: %d, got: %d", len(frames), len(nframes)) + } + for i := 0; i+nNumChannels < len(frames); { + for j := 0; j < nNumChannels; j++ { + t.Logf("length: %d, nlength: %d, i: %d\n", len(frames), len(nframes), i) + if frames[i] != nframes[i] { + t.Fatalf("frame value at position %d: %d didn't match nframes position %d: %d", i, frames[i], i, nframes[i]) + } + i++ } } diff --git a/wav/pcm.go b/wav/pcm.go index 3a4ab08..aa4af2e 100644 --- a/wav/pcm.go +++ b/wav/pcm.go @@ -7,6 +7,9 @@ import ( "github.com/mattetti/audio" ) +// static check that PCM struct implements audio.PCM +var _ audio.PCM = (*PCM)(nil) + type PCM struct { r io.ReadSeeker channels int From 9f8e3266ae2493d516f2006722094d604657244a Mon Sep 17 00:00:00 2001 From: Matt Aimonetti Date: Mon, 18 Jul 2016 23:57:09 -0700 Subject: [PATCH 2/8] aiff: implement the PCM interface --- aiff/decoder.go | 13 ++++---- aiff/decoder_test.go | 4 +-- aiff/encoder.go | 76 ++++++++++++++++++++++++++++++++------------ aiff/encoder_test.go | 4 +++ aiff/pcm.go | 3 +- aiff/pcm_test.go | 16 +++++----- wav/encoder.go | 1 + 7 files changed, 78 insertions(+), 39 deletions(-) diff --git a/aiff/decoder.go b/aiff/decoder.go index 95533cd..8e02f6f 100644 --- a/aiff/decoder.go +++ b/aiff/decoder.go @@ -156,18 +156,17 @@ func (d *Decoder) NextChunk() (*Chunk, error) { return c, d.err } -// FramesInt returns the audio frames contained in reader. +// FramesInt returns the audio frames contained in the reader. // Notes that this method allocates a lot of memory (depending on the duration of the underlying file). -// Consider using the decoder clip and reading/decoding using a buffer. -func (d *Decoder) FramesInt() (frames audio.FramesInt, err error) { +func (d *Decoder) FramesInt() (samples audio.FramesInt, err error) { pcm := d.PCM() if pcm == nil { return nil, fmt.Errorf("no PCM data available") } - totalFrames := int(d.numSampleFrames) - frames = make(audio.FramesInt, totalFrames) - n, err := pcm.Ints(frames) - return frames[:n], err + totalSamples := int(d.numSampleFrames) * int(d.NumChans) + samples = make(audio.FramesInt, totalSamples) + n, err := pcm.Ints(samples) + return samples[:n*int(d.NumChans)], err } // Duration returns the time duration for the current AIFF container diff --git a/aiff/decoder_test.go b/aiff/decoder_test.go index 3651e10..48a784f 100644 --- a/aiff/decoder_test.go +++ b/aiff/decoder_test.go @@ -119,8 +119,8 @@ func Test_Frames(t *testing.T) { if err != nil { t.Fatal(err) } - if int(clip.Size()) != len(frames) { - t.Fatalf("expected %d frames, got %d", clip.Size(), len(frames)) + if int(clip.Size()) != len(frames)/int(d.NumChans) { + t.Fatalf("expected %d frames, got %d", clip.Size(), len(frames)/int(d.NumChans)) } } } diff --git a/aiff/encoder.go b/aiff/encoder.go index f8433e0..167016b 100644 --- a/aiff/encoder.go +++ b/aiff/encoder.go @@ -17,7 +17,11 @@ type Encoder struct { NumChans int frames int - WrittenBytes int + WrittenBytes int + pcmChunkStarted bool + pcmChunkSizePos int + // header position where we report the # of frames stored + frameCountPos int } // NewEncoder creates a new encoder to create a new aiff file. @@ -53,19 +57,19 @@ func (e *Encoder) addFrames(frames []int) error { for j := 0; j < frameSize; j++ { switch e.BitDepth { case 8: - if err := e.AddLE(uint8(frames[i])); err != nil { + if err := e.AddBE(uint8(frames[i])); err != nil { return err } case 16: - if err := e.AddLE(uint16(frames[i])); err != nil { + if err := e.AddBE(uint16(frames[i])); err != nil { return err } case 24: - if err := e.AddLE(audio.Uint32toUint24Bytes(uint32(frames[i]))); err != nil { + if err := e.AddBE(audio.Uint32toUint24Bytes(uint32(frames[i]))); err != nil { return err } case 32: - if err := e.AddLE(uint32(frames[i])); err != nil { + if err := e.AddBE(uint32(frames[i])); err != nil { return err } default: @@ -86,7 +90,7 @@ func (e *Encoder) numSampleFrames() int { return e.frames } -func (e *Encoder) Write(frames audio.FramesInt) error { +func (e *Encoder) writeHeader() error { if e == nil { return fmt.Errorf("can't write a nil encoder") } @@ -94,6 +98,10 @@ func (e *Encoder) Write(frames audio.FramesInt) error { return fmt.Errorf("can't write to a nil writer") } + if e.WrittenBytes > 0 { + return nil + } + // ID if err := e.AddBE(formID); err != nil { return fmt.Errorf("%v when writing FORM header", err) @@ -118,6 +126,7 @@ func (e *Encoder) Write(frames audio.FramesInt) error { if err := e.AddBE(uint16(e.NumChans)); err != nil { return fmt.Errorf("%v when writing comm chan numbers", err) } + e.frameCountPos = e.WrittenBytes if err := e.AddBE(uint32(e.numSampleFrames())); err != nil { return fmt.Errorf("%v when writing comm num sample frames", err) } @@ -129,23 +138,34 @@ func (e *Encoder) Write(frames audio.FramesInt) error { return fmt.Errorf("%v when writing comm sample rate", err) } - // other chunks - // audio frames - if err := e.AddBE([]byte("SSND")); err != nil { - return fmt.Errorf("%v when writing SSND chunk ID header", err) - } + return nil +} - // blocksize uint32 - chunksize := uint32((int(e.BitDepth)/8)*int(e.NumChans)*e.frames + 8) - if err := e.AddBE(uint32(chunksize)); err != nil { - return fmt.Errorf("%v when writing SSND chunk size header", err) +func (e *Encoder) Write(frames audio.FramesInt) error { + if err := e.writeHeader(); err != nil { + return err } - if err := e.AddBE(uint32(0)); err != nil { - return fmt.Errorf("%v when writing SSND offset", err) - } - if err := e.AddBE(uint32(0)); err != nil { - return fmt.Errorf("%v when writing SSND block size", err) + if !e.pcmChunkStarted { + e.pcmChunkStarted = true + // audio frames + if err := e.AddBE([]byte("SSND")); err != nil { + return fmt.Errorf("%v when writing SSND chunk ID header", err) + } + + e.pcmChunkSizePos = e.WrittenBytes + // chunk size uint32 to update later + chunksize := uint32((int(e.BitDepth)/8)*int(e.NumChans)*len(frames) + 8) + if err := e.AddBE(uint32(chunksize)); err != nil { + return fmt.Errorf("%v when writing SSND chunk size header", err) + } + + if err := e.AddBE(uint32(0)); err != nil { + return fmt.Errorf("%v when writing SSND offset", err) + } + if err := e.AddBE(uint32(0)); err != nil { + return fmt.Errorf("%v when writing SSND block size", err) + } } return e.addFrames(frames) @@ -160,9 +180,23 @@ func (e *Encoder) Close() error { // go back and write total size e.w.Seek(4, 0) - if err := e.AddBE(uint32(e.WrittenBytes) - 8); err != nil { + if err := e.AddBE(uint32(e.WrittenBytes) - 20); err != nil { return fmt.Errorf("%v when writing the total written bytes", err) } + if e.frameCountPos > 0 { + e.w.Seek(int64(e.frameCountPos), 0) + if err := e.AddBE(uint32(e.frames)); err != nil { + return fmt.Errorf("%v when writing comm num sample frames", err) + } + } + // rewrite the audio chunk length header + if e.pcmChunkSizePos > 0 { + e.w.Seek(int64(e.pcmChunkSizePos), 0) + chunksize := uint32((int(e.BitDepth)/8)*int(e.NumChans)*e.frames) + 8 + if err := e.AddBE(uint32(chunksize)); err != nil { + return fmt.Errorf("%v when writing wav data chunk size header", err) + } + } // jump to the end of the file. e.w.Seek(0, 2) switch e.w.(type) { diff --git a/aiff/encoder_test.go b/aiff/encoder_test.go index 6826de4..ae026aa 100644 --- a/aiff/encoder_test.go +++ b/aiff/encoder_test.go @@ -3,6 +3,7 @@ package aiff_test import ( "bytes" "encoding/hex" + "fmt" "os" "testing" @@ -53,6 +54,9 @@ func TestEncoderRoundTrip(t *testing.T) { if err != nil { t.Fatalf("couldn't create %s %v", tc.out, err) } + if tc.in == "fixtures/bloop.aif" { + fmt.Println(d) + } e := aiff.NewEncoder(out, int(d.SampleRate), int(d.BitDepth), int(d.NumChans)) if err := e.Write(frames); err != nil { diff --git a/aiff/pcm.go b/aiff/pcm.go index 53a3cb3..eada025 100644 --- a/aiff/pcm.go +++ b/aiff/pcm.go @@ -79,7 +79,7 @@ func (c *PCM) Ints(samples audio.FramesInt) (n int, err error) { maxBytes := int(c.sampleFrames) * c.channels var v int outter: - for i := 0; i < len(samples); i++ { + for i := 0; i < len(samples); { if int(c.readFrames)*c.channels >= maxBytes { break } @@ -92,6 +92,7 @@ outter: break outter } samples[i] = v + i++ } n++ c.readFrames++ diff --git a/aiff/pcm_test.go b/aiff/pcm_test.go index 48abff4..52a2f7b 100644 --- a/aiff/pcm_test.go +++ b/aiff/pcm_test.go @@ -52,10 +52,10 @@ func TestClip_Read(t *testing.T) { func TestClip_NextInts(t *testing.T) { testCases := []struct { - desc string - input string - framesToRead int - output audio.FramesInt + desc string + input string + samplesToRead int + output audio.FramesInt }{ {"mono 16 bit, 22.5khz", "fixtures/kick.aif", @@ -67,7 +67,7 @@ func TestClip_NextInts(t *testing.T) { "fixtures/bloop.aif", 8, audio.FramesInt{ - -22, -110, -268, -441, -550, -553, -456, -269, -1, 316, 622, 875, 1070, 1211, 1299, 1355, + -22, -22, -110, -110, -268, -268, -441, -441, }, }, } @@ -87,12 +87,12 @@ func TestClip_NextInts(t *testing.T) { } numChannels, _, _, _ := pcm.Info() - frames, err := pcm.NextInts(tc.framesToRead) + frames, err := pcm.NextInts(tc.samplesToRead) if err != nil { t.Fatal(err) } - if len(frames) != tc.framesToRead*numChannels { - t.Fatalf("expected to read %d samples but read %d", tc.framesToRead, len(frames)) + if len(frames) != tc.samplesToRead { + t.Fatalf("expected to read %d samples but read %d", tc.samplesToRead, len(frames)) } if len(frames) <= 0 { t.Fatal("unexpected empty frames") diff --git a/wav/encoder.go b/wav/encoder.go index 009e9aa..02f04cf 100644 --- a/wav/encoder.go +++ b/wav/encoder.go @@ -155,6 +155,7 @@ func (e *Encoder) Write(frames audio.FramesInt) error { } if !e.pcmChunkStarted { + e.pcmChunkStarted = true // sound header if err := e.AddLE(riff.DataFormatID); err != nil { return fmt.Errorf("error encoding sound header %v", err) From 979c7a4e06ff67c28d808843a3b593692977edc0 Mon Sep 17 00:00:00 2001 From: Matt Aimonetti Date: Tue, 19 Jul 2016 22:58:16 -0700 Subject: [PATCH 3/8] aiffinfo: broken waveform rendering but at least it compiles --- aiff/aiffinfo/main.go | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/aiff/aiffinfo/main.go b/aiff/aiffinfo/main.go index 119b824..c2b01c1 100644 --- a/aiff/aiffinfo/main.go +++ b/aiff/aiffinfo/main.go @@ -102,7 +102,7 @@ func analyze(path string) { } d := aiff.NewDecoder(f) - frames, err := d.Frames() + pcm := d.PCM() if err != nil { log.Fatal(err) } @@ -110,17 +110,20 @@ func analyze(path string) { fmt.Println("sample Rate", d.SampleRate) fmt.Println("sample Size", d.BitDepth) fmt.Println("number of Channels", d.NumChans) - fmt.Printf("frames: %d\n", len(frames)) + fmt.Printf("frames: %d\n", pcm.Size()) fmt.Println(d) + frames, err := d.FramesInt() + if err != nil { + log.Fatal(err) + } + max := 0 for _, f := range frames { - for _, v := range f { - if v > max { - max = v - } else if v*-1 > max { - max = v * -1 - } + if f > max { + max = f + } else if f*-1 > max { + max = f * -1 } } @@ -157,7 +160,7 @@ func analyze(path string) { // instead of graphing all points, we only take an average sample based on // the width of the image // TODO: smarter sampling based on duration - sampling := len(frames) / ImgWidth + sampling := (len(frames) / int(d.NumChans)) / ImgWidth samplingCounter := make([]int, d.NumChans) smplBuf := make([][]int, d.NumChans) for i := 0; i < int(d.NumChans); i++ { @@ -167,7 +170,7 @@ func analyze(path string) { // last channel position so we can better render multi channel files lastChanPos := make([]*point, d.NumChans) - for i := 0; i < len(frames); i++ { + for i := 0; i < len(frames); { for channel := 0; channel < int(d.NumChans); channel++ { if i == 0 { lastChanPos[channel] = &point{ @@ -176,9 +179,10 @@ func analyze(path string) { } } lastPos := lastChanPos[channel] + fmt.Println(channel, lastPos) gc.MoveTo(lastPos.X, lastPos.Y) - - v := frames[i][channel] + i++ + v := frames[i] // y=0 is the max, y=height-1 = is the minimun // y=height/2 is the halfway point. We need to convert our values From d9a6695fc04235b56bcd9e3a64d43c2b6ca497c2 Mon Sep 17 00:00:00 2001 From: Matt Aimonetti Date: Tue, 19 Jul 2016 23:12:24 -0700 Subject: [PATCH 4/8] dsp: make the WIP analysis cmd compile --- audio.go | 15 +++++++++++++++ dsp/analysis/cmd/main.go | 30 ++++++++++++++++-------------- 2 files changed, 31 insertions(+), 14 deletions(-) diff --git a/audio.go b/audio.go index 659fb4d..7f0c2f1 100644 --- a/audio.go +++ b/audio.go @@ -11,6 +11,21 @@ func (f FramesInt) Get(channel, n int) int { return f[n*channel] } +func (f FramesInt) StereoToMono() FramesInt { + if f == nil { + return nil + } + + mono := make(FramesInt, len(f)/2) + var j int + for i := 0; i+2 <= len(f); { + mono[j] = AvgInt(f[i], f[i+1]) + i += 2 + j++ + } + return mono +} + type FramesFloat64 []float64 func (f FramesFloat64) Get(channel, n int) float64 { diff --git a/dsp/analysis/cmd/main.go b/dsp/analysis/cmd/main.go index 7be8531..4655d14 100644 --- a/dsp/analysis/cmd/main.go +++ b/dsp/analysis/cmd/main.go @@ -9,7 +9,7 @@ import ( "github.com/mattetti/audio" "github.com/mattetti/audio/aiff" "github.com/mattetti/audio/dsp/analysis" - "github.com/mattetti/audio/riff/wav" + "github.com/mattetti/audio/wav" ) func main() { @@ -31,39 +31,39 @@ func main() { } defer f.Close() - var monoFrames audio.Frames + var monoFrames audio.FramesInt var sampleRate int var sampleSize int switch codec { case "aiff": d := aiff.NewDecoder(f) - frames, err := d.Frames() + frames, err := d.FramesInt() if err != nil { panic(err) } sampleRate = d.SampleRate sampleSize = int(d.BitDepth) - monoFrames = audio.ToMonoFrames(frames) - + monoFrames = frames.StereoToMono() case "wav": - info, frames, err := wav.NewDecoder(f, nil).ReadFrames() + d := wav.NewDecoder(f) + frames, err := d.FramesInt() if err != nil { panic(err) } - sampleRate = int(info.SampleRate) - sampleSize = int(info.BitsPerSample) - monoFrames = audio.ToMonoFrames(frames) + sampleRate = int(d.SampleRate) + sampleSize = int(d.BitDepth) + monoFrames = frames.StereoToMono() } data := make([]float64, len(monoFrames)) for i, f := range monoFrames { - data[i] = float64(f[0]) + data[i] = float64(f) } dft := analysis.NewDFT(sampleRate, data) sndData := dft.IFFT() - frames := make([][]int, len(sndData)) + frames := make([]int, len(sndData)) for i := 0; i < len(frames); i++ { - frames[i] = []int{int(sndData[i])} + frames[i] = int(sndData[i]) } of, err := os.Create("roundtripped.aiff") if err != nil { @@ -71,8 +71,10 @@ func main() { } defer of.Close() aiffe := aiff.NewEncoder(of, sampleRate, sampleSize, 1) - aiffe.Frames = frames - if err := aiffe.Write(); err != nil { + if err := aiffe.Write(frames); err != nil { + panic(err) + } + if err := aiffe.Close(); err != nil { panic(err) } } From ad220c6f9d3849ed7b528be3ff4e97793c5aa64e Mon Sep 17 00:00:00 2001 From: Matt Aimonetti Date: Thu, 21 Jul 2016 21:52:13 -0700 Subject: [PATCH 5/8] wav, aiff: switching the PCM API from Frames* Samples* --- aiff/aiffinfo/main.go | 2 +- aiff/decoder.go | 6 +++--- aiff/decoder_test.go | 2 +- aiff/encoder.go | 2 +- aiff/encoder_test.go | 4 ++-- aiff/examples_test.go | 4 ++-- aiff/pcm.go | 15 ++++++++------- aiff/pcm_test.go | 6 +++--- audio.go | 20 ++++++++++---------- dsp/analysis/cmd/main.go | 6 +++--- generator/cmd/main.go | 2 +- wav/decoder.go | 6 +++--- wav/encoder.go | 2 +- wav/encoder_test.go | 4 ++-- wav/examples_test.go | 2 +- wav/pcm.go | 18 ++++++++++-------- wav/pcm_test.go | 2 +- 17 files changed, 53 insertions(+), 50 deletions(-) diff --git a/aiff/aiffinfo/main.go b/aiff/aiffinfo/main.go index c2b01c1..6330c89 100644 --- a/aiff/aiffinfo/main.go +++ b/aiff/aiffinfo/main.go @@ -113,7 +113,7 @@ func analyze(path string) { fmt.Printf("frames: %d\n", pcm.Size()) fmt.Println(d) - frames, err := d.FramesInt() + frames, err := d.SamplesInt() if err != nil { log.Fatal(err) } diff --git a/aiff/decoder.go b/aiff/decoder.go index 8e02f6f..a1a6af7 100644 --- a/aiff/decoder.go +++ b/aiff/decoder.go @@ -156,15 +156,15 @@ func (d *Decoder) NextChunk() (*Chunk, error) { return c, d.err } -// FramesInt returns the audio frames contained in the reader. +// SamplesInt returns the audio frames contained in the reader. // Notes that this method allocates a lot of memory (depending on the duration of the underlying file). -func (d *Decoder) FramesInt() (samples audio.FramesInt, err error) { +func (d *Decoder) SamplesInt() (samples audio.SamplesInt, err error) { pcm := d.PCM() if pcm == nil { return nil, fmt.Errorf("no PCM data available") } totalSamples := int(d.numSampleFrames) * int(d.NumChans) - samples = make(audio.FramesInt, totalSamples) + samples = make(audio.SamplesInt, totalSamples) n, err := pcm.Ints(samples) return samples[:n*int(d.NumChans)], err } diff --git a/aiff/decoder_test.go b/aiff/decoder_test.go index 48a784f..c5cdf7b 100644 --- a/aiff/decoder_test.go +++ b/aiff/decoder_test.go @@ -115,7 +115,7 @@ func Test_Frames(t *testing.T) { } d := NewDecoder(in) clip := d.PCM() - frames, err := d.FramesInt() + frames, err := d.SamplesInt() if err != nil { t.Fatal(err) } diff --git a/aiff/encoder.go b/aiff/encoder.go index 167016b..21a2114 100644 --- a/aiff/encoder.go +++ b/aiff/encoder.go @@ -141,7 +141,7 @@ func (e *Encoder) writeHeader() error { return nil } -func (e *Encoder) Write(frames audio.FramesInt) error { +func (e *Encoder) Write(frames audio.SamplesInt) error { if err := e.writeHeader(); err != nil { return err } diff --git a/aiff/encoder_test.go b/aiff/encoder_test.go index ae026aa..1f434fa 100644 --- a/aiff/encoder_test.go +++ b/aiff/encoder_test.go @@ -44,7 +44,7 @@ func TestEncoderRoundTrip(t *testing.T) { t.Fatalf("couldn't open %s %v", tc.in, err) } d := aiff.NewDecoder(in) - frames, err := d.FramesInt() + frames, err := d.SamplesInt() if err != nil { t.Fatal(err) } @@ -86,7 +86,7 @@ func TestEncoderRoundTrip(t *testing.T) { if expectedHeaderSize != int64(d2.Size) { t.Logf("wrong header size data, expected %d, got %d", expectedHeaderSize, d2.Size) } - nframes, err := d2.FramesInt() + nframes, err := d2.SamplesInt() if err != nil { t.Fatal(err) } diff --git a/aiff/examples_test.go b/aiff/examples_test.go index 38b74cd..f5a3b6e 100644 --- a/aiff/examples_test.go +++ b/aiff/examples_test.go @@ -35,9 +35,9 @@ func ExamplePCM() { d := aiff.NewDecoder(f) pcm := d.PCM() totalFrames := int(pcm.Size()) - buf := make(audio.FramesInt, 2048) + buf := make(audio.SamplesInt, 2048) var ( - extractedFrames audio.FramesInt + extractedFrames audio.SamplesInt readFrames int n int ) diff --git a/aiff/pcm.go b/aiff/pcm.go index eada025..fa604a9 100644 --- a/aiff/pcm.go +++ b/aiff/pcm.go @@ -62,7 +62,7 @@ func (c *PCM) Seek(offset int64, whence int) (int64, error) { // Ints reads the PCM data and loads it into the passed frames. // The number of full frames (with value for each channel) read is returned so the caller can process // only the populated frames. -func (c *PCM) Ints(samples audio.FramesInt) (n int, err error) { +func (c *PCM) Ints(samples audio.SamplesInt) (n int, err error) { if c == nil || c.sampleFrames == 0 { return 0, nil } @@ -101,16 +101,17 @@ outter: } // NextInts returns the n next audio frames -func (c *PCM) NextInts(n int) (audio.FramesInt, error) { - frames := make(audio.FramesInt, n*c.channels) +func (c *PCM) NextInts(n int) (audio.SamplesInt, error) { + totalSamples := n * c.channels + frames := make(audio.SamplesInt, totalSamples) n, err := c.Ints(frames) - return frames[:n], err + return frames[:totalSamples], err } // Float64s reads the PCM data and loads it into the passed frames. // The number of frames read is returned so the caller can process // only the populated frames. -func (c *PCM) Float64s(samples audio.FramesFloat64) (n int, err error) { +func (c *PCM) Float64s(samples audio.SamplesFloat64) (n int, err error) { decodeF, err := sampleFloat64DecodeFunc(c.bitDepth) if err != nil { return 0, fmt.Errorf("could not get sample decode func %v", err) @@ -139,8 +140,8 @@ func (c *PCM) Float64s(samples audio.FramesFloat64) (n int, err error) { } // NextFloat64s returns the n next audio frames -func (c *PCM) NextFloat64s(n int) (audio.FramesFloat64, error) { - frames := make(audio.FramesFloat64, n) +func (c *PCM) NextFloat64s(n int) (audio.SamplesFloat64, error) { + frames := make(audio.SamplesFloat64, n) n, err := c.Float64s(frames) return frames[:n], err } diff --git a/aiff/pcm_test.go b/aiff/pcm_test.go index 52a2f7b..14a0d39 100644 --- a/aiff/pcm_test.go +++ b/aiff/pcm_test.go @@ -55,18 +55,18 @@ func TestClip_NextInts(t *testing.T) { desc string input string samplesToRead int - output audio.FramesInt + output audio.SamplesInt }{ {"mono 16 bit, 22.5khz", "fixtures/kick.aif", 8, - audio.FramesInt{ + audio.SamplesInt{ 76, 76, 75, 75, 72, 71, 72, 69, }}, {"stereo 16 bit, 44khz", "fixtures/bloop.aif", 8, - audio.FramesInt{ + audio.SamplesInt{ -22, -22, -110, -110, -268, -268, -441, -441, }, }, diff --git a/audio.go b/audio.go index 7f0c2f1..1cd756c 100644 --- a/audio.go +++ b/audio.go @@ -5,18 +5,18 @@ import ( "math" ) -type FramesInt []int +type SamplesInt []int -func (f FramesInt) Get(channel, n int) int { +func (f SamplesInt) Get(channel, n int) int { return f[n*channel] } -func (f FramesInt) StereoToMono() FramesInt { +func (f SamplesInt) StereoToMono() SamplesInt { if f == nil { return nil } - mono := make(FramesInt, len(f)/2) + mono := make(SamplesInt, len(f)/2) var j int for i := 0; i+2 <= len(f); { mono[j] = AvgInt(f[i], f[i+1]) @@ -26,17 +26,17 @@ func (f FramesInt) StereoToMono() FramesInt { return mono } -type FramesFloat64 []float64 +type SamplesFloat64 []float64 -func (f FramesFloat64) Get(channel, n int) float64 { +func (f SamplesFloat64) Get(channel, n int) float64 { return f[n*channel] } type PCM interface { - Ints(frames FramesInt) (n int, err error) - Float64s(frames FramesFloat64) (n int, err error) - NextInts(n int) (FramesInt, error) - NextFloat64s(n int) (FramesFloat64, error) + Ints(samples SamplesInt) (n int, err error) + Float64s(samples SamplesFloat64) (n int, err error) + NextInts(n int) (SamplesInt, error) + NextFloat64s(n int) (SamplesFloat64, error) Read(buf []byte) (n int, err error) Offset() int64 Seek(frameOffset int64, whence int) (offset int64, err error) diff --git a/dsp/analysis/cmd/main.go b/dsp/analysis/cmd/main.go index 4655d14..08637e1 100644 --- a/dsp/analysis/cmd/main.go +++ b/dsp/analysis/cmd/main.go @@ -31,13 +31,13 @@ func main() { } defer f.Close() - var monoFrames audio.FramesInt + var monoFrames audio.SamplesInt var sampleRate int var sampleSize int switch codec { case "aiff": d := aiff.NewDecoder(f) - frames, err := d.FramesInt() + frames, err := d.SamplesInt() if err != nil { panic(err) } @@ -46,7 +46,7 @@ func main() { monoFrames = frames.StereoToMono() case "wav": d := wav.NewDecoder(f) - frames, err := d.FramesInt() + frames, err := d.SamplesInt() if err != nil { panic(err) } diff --git a/generator/cmd/main.go b/generator/cmd/main.go index f3025ae..047bef7 100644 --- a/generator/cmd/main.go +++ b/generator/cmd/main.go @@ -35,7 +35,7 @@ func main() { // xs of sound data := osc.Signal(fs * *durationFlag) // build the audio frames - frames := make(audio.FramesInt, len(data)) + frames := make(audio.SamplesInt, len(data)) for i := 0; i < len(frames); i++ { frames[i] = int(data[i]) } diff --git a/wav/decoder.go b/wav/decoder.go index c119550..aa62d94 100644 --- a/wav/decoder.go +++ b/wav/decoder.go @@ -121,16 +121,16 @@ func (d *Decoder) NextChunk() (*riff.Chunk, error) { return c, d.err } -// FramesInt returns the audio frames contained in reader. +// SamplesInt returns the audio frames contained in reader. // Notes that this method allocates a lot of memory (depending on the duration of the underlying file). // Consider using the decoder clip and reading/decoding using a buffer. -func (d *Decoder) FramesInt() (frames audio.FramesInt, err error) { +func (d *Decoder) SamplesInt() (frames audio.SamplesInt, err error) { pcm := d.PCM() if pcm == nil { return nil, fmt.Errorf("no PCM data available") } totalFrames := int(pcm.Size()) * int(d.NumChans) - frames = make(audio.FramesInt, totalFrames) + frames = make(audio.SamplesInt, totalFrames) n, err := pcm.Ints(frames) return frames[:n], err } diff --git a/wav/encoder.go b/wav/encoder.go index 02f04cf..d11f353 100644 --- a/wav/encoder.go +++ b/wav/encoder.go @@ -149,7 +149,7 @@ func (e *Encoder) writeHeader() error { return nil } -func (e *Encoder) Write(frames audio.FramesInt) error { +func (e *Encoder) Write(frames audio.SamplesInt) error { if err := e.writeHeader(); err != nil { return err } diff --git a/wav/encoder_test.go b/wav/encoder_test.go index 451a9cd..a32997f 100644 --- a/wav/encoder_test.go +++ b/wav/encoder_test.go @@ -32,7 +32,7 @@ func TestEncoderRoundTrip(t *testing.T) { t.Fatal(err) } totalFrames := pcm.Size() - frames, err := d.FramesInt() + frames, err := d.SamplesInt() if err != nil { t.Fatal(err) } @@ -67,7 +67,7 @@ func TestEncoderRoundTrip(t *testing.T) { } nNumChannels, nBitDepth, nSampleRate, err := nPCM.Info() nTotalFrames := nPCM.Size() - nframes, err := nd.FramesInt() + nframes, err := nd.SamplesInt() if err != nil { t.Fatal(err) } diff --git a/wav/examples_test.go b/wav/examples_test.go index 56c2fc5..d00fa19 100644 --- a/wav/examples_test.go +++ b/wav/examples_test.go @@ -36,7 +36,7 @@ func ExampleEncoder_Write() { if err != nil { panic(err) } - frames, err := d.FramesInt() + frames, err := d.SamplesInt() if err != nil { panic(err) } diff --git a/wav/pcm.go b/wav/pcm.go index aa4af2e..8eea5d4 100644 --- a/wav/pcm.go +++ b/wav/pcm.go @@ -30,7 +30,7 @@ func (c *PCM) Offset() int64 { // Ints reads the PCM data and loads it into the passed frames. // The number of frames read is returned so the caller can process // only the populated frames. -func (c *PCM) Ints(frames audio.FramesInt) (n int, err error) { +func (c *PCM) Ints(frames audio.SamplesInt) (n int, err error) { bytesPerSample := (c.bitDepth-1)/8 + 1 sampleBufData := make([]byte, bytesPerSample) decodeF, err := sampleDecodeFunc(c.bitDepth) @@ -53,16 +53,17 @@ func (c *PCM) Ints(frames audio.FramesInt) (n int, err error) { } // NextInts returns the n next audio frames -func (c *PCM) NextInts(n int) (audio.FramesInt, error) { - frames := make(audio.FramesInt, n) +func (c *PCM) NextInts(n int) (audio.SamplesInt, error) { + totalSamples := n * c.channels + frames := make(audio.SamplesInt, totalSamples) n, err := c.Ints(frames) - return frames[:n], err + return frames[:totalSamples], err } // Float64s reads the PCM data and loads it into the passed frames. // The number of frames read is returned so the caller can process // only the populated frames. -func (c *PCM) Float64s(frames audio.FramesFloat64) (n int, err error) { +func (c *PCM) Float64s(frames audio.SamplesFloat64) (n int, err error) { bytesPerSample := (c.bitDepth-1)/8 + 1 sampleBufData := make([]byte, bytesPerSample) decodeF, err := sampleFloat64DecodeFunc(c.bitDepth) @@ -90,10 +91,11 @@ func (c *PCM) Info() (numChannels, bitDepth int, sampleRate int64, err error) { } // NextFloat64s returns the n next audio frames -func (c *PCM) NextFloat64s(n int) (audio.FramesFloat64, error) { - frames := make(audio.FramesFloat64, n) +func (c *PCM) NextFloat64s(n int) (audio.SamplesFloat64, error) { + totalSamples := n * c.channels + frames := make(audio.SamplesFloat64, totalSamples) n, err := c.Float64s(frames) - return frames[:n], err + return frames[:totalSamples], err } // Read reads frames into the passed buffer and returns the number of full frames diff --git a/wav/pcm_test.go b/wav/pcm_test.go index e79a08f..131e231 100644 --- a/wav/pcm_test.go +++ b/wav/pcm_test.go @@ -33,7 +33,7 @@ func TestPCM_Ints(t *testing.T) { readFrames := 0 bufSize := 4096 - buf := make(audio.FramesInt, bufSize/int(d.NumChans)) + buf := make(audio.SamplesInt, bufSize/int(d.NumChans)) var n int for readFrames < totalFrames { n, err = pcm.Ints(buf) From 664d7379c7079215cdbaa818928b1792e01e7ff3 Mon Sep 17 00:00:00 2001 From: Matt Aimonetti Date: Thu, 21 Jul 2016 22:26:50 -0700 Subject: [PATCH 6/8] decimator: fix for new API --- aiff/decoder.go | 13 +++++++++++++ audio.go | 24 ++++++++++++++++++++++++ decimator/main.go | 36 ++++++++++++++++-------------------- wav/decoder.go | 14 ++++++++++++++ 4 files changed, 67 insertions(+), 20 deletions(-) diff --git a/aiff/decoder.go b/aiff/decoder.go index a1a6af7..9190759 100644 --- a/aiff/decoder.go +++ b/aiff/decoder.go @@ -169,6 +169,19 @@ func (d *Decoder) SamplesInt() (samples audio.SamplesInt, err error) { return samples[:n*int(d.NumChans)], err } +// SamplesFloat64 returns the audio frames contained in the reader. +// Notes that this method allocates a lot of memory (depending on the duration of the underlying file). +func (d *Decoder) SamplesFloat64() (samples audio.SamplesFloat64, err error) { + pcm := d.PCM() + if pcm == nil { + return nil, fmt.Errorf("no PCM data available") + } + totalSamples := int(d.numSampleFrames) * int(d.NumChans) + samples = make(audio.SamplesFloat64, totalSamples) + n, err := pcm.Float64s(samples) + return samples[:n*int(d.NumChans)], err +} + // Duration returns the time duration for the current AIFF container func (d *Decoder) Duration() (time.Duration, error) { if d == nil { diff --git a/audio.go b/audio.go index 1cd756c..be2fc16 100644 --- a/audio.go +++ b/audio.go @@ -32,6 +32,21 @@ func (f SamplesFloat64) Get(channel, n int) float64 { return f[n*channel] } +func (f SamplesFloat64) StereoToMono() SamplesFloat64 { + if f == nil { + return nil + } + + mono := make(SamplesFloat64, len(f)/2) + var j int + for i := 0; i+2 <= len(f); { + mono[j] = AvgFloat64(f[i], f[i+1]) + i += 2 + j++ + } + return mono +} + type PCM interface { Ints(samples SamplesInt) (n int, err error) Float64s(samples SamplesFloat64) (n int, err error) @@ -73,6 +88,15 @@ func AvgInt(xs ...int) int { return output / len(xs) } +// AvgFloat64 averages the passed float values +func AvgFloat64(xs ...float64) float64 { + var output float64 + for i := 0; i < len(xs); i++ { + output += xs[i] + } + return output / float64(len(xs)) +} + // IntMaxSignedValue returns the max value of an integer // based on its memory size func IntMaxSignedValue(b int) int { diff --git a/decimator/main.go b/decimator/main.go index 2b9190d..3fcf5ef 100644 --- a/decimator/main.go +++ b/decimator/main.go @@ -20,7 +20,7 @@ import ( "github.com/mattetti/audio/dsp/filters" "github.com/mattetti/audio/dsp/windows" "github.com/mattetti/audio/generator" - "github.com/mattetti/audio/riff/wav" + "github.com/mattetti/audio/wav" ) var ( @@ -58,9 +58,9 @@ func main() { // our osc generates values from -1 to 1, we need to go back to PCM scale factor := float64(intMaxSignedValue(bitSize)) // build the audio frames - frames := make([][]int, len(data) / *factorFlag) + frames := make(audio.SamplesInt, len(data) / *factorFlag) for i := 0; i < len(frames); i++ { - frames[i] = []int{int(filtered[i**factorFlag] * factor)} + frames[i] = int(filtered[i**factorFlag] * factor) } // generate the sound file @@ -70,8 +70,7 @@ func main() { } defer o.Close() e := aiff.NewEncoder(o, fs / *factorFlag, 16, 1) - e.Frames = frames - if err := e.Write(); err != nil { + if err := e.Write(frames); err != nil { panic(err) } return @@ -94,28 +93,29 @@ func main() { } defer f.Close() - var monoFrames audio.Frames + var monoFrames audio.SamplesFloat64 var sampleRate int var sampleSize int switch codec { case "aiff": d := aiff.NewDecoder(f) - frames, err := d.Frames() + frames, err := d.SamplesFloat64() if err != nil { panic(err) } sampleRate = d.SampleRate sampleSize = int(d.BitDepth) - monoFrames = audio.ToMonoFrames(frames) + monoFrames = frames.StereoToMono() case "wav": - info, frames, err := wav.NewDecoder(f, nil).ReadFrames() + d := wav.NewDecoder(f) + frames, err := d.SamplesFloat64() if err != nil { panic(err) } - sampleRate = int(info.SampleRate) - sampleSize = int(info.BitsPerSample) - monoFrames = audio.ToMonoFrames(frames) + sampleRate = int(d.SampleRate) + sampleSize = int(d.BitDepth) + monoFrames = frames.StereoToMono() } fmt.Printf("undersampling -> %s file at %dHz to %d samples (%d)\n", codec, sampleRate, sampleRate / *factorFlag, sampleSize) @@ -127,10 +127,7 @@ func main() { log.Fatalf("input sample rate of %dHz not supported", sampleRate) } - amplitudesF := make([]float64, len(monoFrames)) - for i, f := range monoFrames { - amplitudesF[i] = float64(f[0]) - } + amplitudesF := monoFrames // low pass filter before we drop some samples to avoid aliasing s := &filters.Sinc{Taps: 62, SamplingFreq: sampleRate, CutOffFreq: float64(sampleRate / 2), Window: windows.Blackman} @@ -139,9 +136,9 @@ func main() { if err != nil { panic(err) } - frames := make([][]int, len(amplitudesF) / *factorFlag) + frames := make([]int, len(amplitudesF) / *factorFlag) for i := 0; i < len(frames); i++ { - frames[i] = []int{int(filtered[i**factorFlag])} + frames[i] = int(filtered[i**factorFlag]) } of, err := os.Create("resampled.aiff") @@ -150,8 +147,7 @@ func main() { } defer of.Close() aiffe := aiff.NewEncoder(of, sampleRate / *factorFlag, sampleSize, 1) - aiffe.Frames = frames - if err := aiffe.Write(); err != nil { + if err := aiffe.Write(frames); err != nil { panic(err) } } diff --git a/wav/decoder.go b/wav/decoder.go index aa62d94..45c0bd1 100644 --- a/wav/decoder.go +++ b/wav/decoder.go @@ -135,6 +135,20 @@ func (d *Decoder) SamplesInt() (frames audio.SamplesInt, err error) { return frames[:n], err } +// SamplesFloat64 returns the audio frames contained in reader. +// Notes that this method allocates a lot of memory (depending on the duration of the underlying file). +// Consider using the decoder clip and reading/decoding using a buffer. +func (d *Decoder) SamplesFloat64() (frames audio.SamplesFloat64, err error) { + pcm := d.PCM() + if pcm == nil { + return nil, fmt.Errorf("no PCM data available") + } + totalFrames := int(pcm.Size()) * int(d.NumChans) + frames = make(audio.SamplesFloat64, totalFrames) + n, err := pcm.Float64s(frames) + return frames[:n], err +} + // Duration returns the time duration for the current audio container func (d *Decoder) Duration() (time.Duration, error) { if d == nil || d.parser == nil { From e47b75d1c7c122256924dd8686a5bad622d3dec8 Mon Sep 17 00:00:00 2001 From: Matt Aimonetti Date: Thu, 21 Jul 2016 22:33:43 -0700 Subject: [PATCH 7/8] decimator: make sure to close the encoder --- decimator/main.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/decimator/main.go b/decimator/main.go index 3fcf5ef..193f5e6 100644 --- a/decimator/main.go +++ b/decimator/main.go @@ -73,6 +73,10 @@ func main() { if err := e.Write(frames); err != nil { panic(err) } + if err := e.Close(); err != nil { + panic(err) + } + fmt.Println("generated resampled file: resampled.aiff") return } @@ -150,6 +154,10 @@ func main() { if err := aiffe.Write(frames); err != nil { panic(err) } + if err := aiffe.Close(); err != nil { + panic(err) + } + fmt.Println("downsampled file saved as resampled.aiff") } func intMaxSignedValue(b int) int { From ade524de80e6e8dc16608896b636dbbd4e1436fd Mon Sep 17 00:00:00 2001 From: Matt Aimonetti Date: Thu, 21 Jul 2016 22:50:03 -0700 Subject: [PATCH 8/8] aiff: fix test suite but the Next PCM interface is weird --- aiff/pcm_test.go | 22 +++++++++++++--------- wav/encoder_test.go | 1 - 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/aiff/pcm_test.go b/aiff/pcm_test.go index 14a0d39..dd463c1 100644 --- a/aiff/pcm_test.go +++ b/aiff/pcm_test.go @@ -87,23 +87,27 @@ func TestClip_NextInts(t *testing.T) { } numChannels, _, _, _ := pcm.Info() - frames, err := pcm.NextInts(tc.samplesToRead) + samples, err := pcm.NextInts(tc.samplesToRead / numChannels) if err != nil { t.Fatal(err) } - if len(frames) != tc.samplesToRead { - t.Fatalf("expected to read %d samples but read %d", tc.samplesToRead, len(frames)) + if len(samples) != tc.samplesToRead { + t.Fatalf("expected to read %d samples but read %d", tc.samplesToRead, len(samples)) } - if len(frames) <= 0 { - t.Fatal("unexpected empty frames") + if len(samples) <= 0 { + t.Fatal("unexpected empty samples") } - for i := 0; i+numChannels < len(frames); { + if len(samples) != len(tc.output) { + t.Fatalf("length of samples (%d) != expected length (%d)", len(samples), len(tc.output)) + } + + for i := 0; i+numChannels < len(samples); { for j := 0; j < numChannels; j++ { - if frames[i] != tc.output[i] { - t.Logf("%#v\n", frames) + if samples[i] != tc.output[i] { + t.Logf("%#v\n", samples) t.Logf("%#v\n", tc.output) - t.Fatalf("frame value at position %d: %d didn't match expected: %d", i, frames[i], tc.output[i]) + t.Fatalf("frame value at position %d: %d didn't match expected: %d", i, samples[i], tc.output[i]) } i++ } diff --git a/wav/encoder_test.go b/wav/encoder_test.go index a32997f..6434c13 100644 --- a/wav/encoder_test.go +++ b/wav/encoder_test.go @@ -99,7 +99,6 @@ func TestEncoderRoundTrip(t *testing.T) { } for i := 0; i+nNumChannels < len(frames); { for j := 0; j < nNumChannels; j++ { - t.Logf("length: %d, nlength: %d, i: %d\n", len(frames), len(nframes), i) if frames[i] != nframes[i] { t.Fatalf("frame value at position %d: %d didn't match nframes position %d: %d", i, frames[i], i, nframes[i]) }