go-whisper-api/transcode/aac_decode.go
admin b5c083e06f
Some checks failed
CodeQL / Analyze (go) (push) Successful in 6m28s
Docker Image / build-docker (push) Failing after 13m26s
Lint and Testing / lint (push) Successful in 11m17s
Lint and Testing / test (push) Successful in 11m17s
Lint and Testing / golangci (push) Successful in 2m40s
first commit
2026-06-04 18:10:52 +07:00

133 lines
3.3 KiB
Go

package transcode
import (
"fmt"
"io"
"math"
"os"
"path/filepath"
"strings"
"github.com/olivier-w/climp-aac-decoder/aacfile"
)
func decodeAACPath(path, ext string) ([]float64, int, int, error) {
f, err := os.Open(path)
if err != nil {
return nil, 0, 0, err
}
defer f.Close()
st, err := f.Stat()
if err != nil {
return nil, 0, 0, err
}
// aacfile picks the parser from the *name* extension, not file content (.mp4 → .m4a).
name := aacOpenName(path, ext)
size := st.Size()
r, err := aacfile.Open(f, size, name)
if err != nil && isMP4SampleDeltaError(err) {
return decodeMP4AACRelaxed(f, size)
}
if err != nil {
return nil, 0, 0, err
}
defer r.Close()
sr := r.SampleRate()
ch := r.ChannelCount()
pcm, err := io.ReadAll(r)
if err != nil {
return nil, 0, 0, fmt.Errorf("read aac pcm: %w", err)
}
samples := pcm16LEToFloat(pcm, ch)
if ch > 1 {
samples = interleavedToMono(samples, ch)
ch = 1
}
return samples, sr, ch, nil
}
// aacContainerExt maps file extensions to a container name understood by aacfile.
func aacContainerExt(ext string) string {
switch strings.ToLower(ext) {
case ".mp4", ".m4v", ".mov", ".3gp", ".3g2":
return ".m4a"
case ".aac", ".m4a", ".m4b":
return ext
default:
return ".m4a"
}
}
func aacOpenName(path, ext string) string {
containerExt := aacContainerExt(ext)
if containerExt == "" {
containerExt = ".m4a"
}
base := filepath.Base(path)
if e := strings.ToLower(filepath.Ext(base)); e == containerExt {
return base
}
stem := strings.TrimSuffix(base, filepath.Ext(base))
if stem == "" || stem == base {
stem = "audio"
}
return stem + containerExt
}
func pcm16LEToFloat(pcm []byte, channels int) []float64 {
if channels <= 0 {
channels = 1
}
frameBytes := 2 * channels
nFrames := len(pcm) / frameBytes
out := make([]float64, nFrames*channels)
for i := 0; i < nFrames*channels; i++ {
off := i * 2
if off+1 >= len(pcm) {
break
}
v := int16(pcm[off]) | int16(pcm[off+1])<<8
out[i] = float64(v) / 32768.0
}
return out
}
func interleavedToMono(samples []float64, channels int) []float64 {
if channels <= 1 {
return samples
}
nFrames := len(samples) / channels
out := make([]float64, nFrames)
for i := 0; i < nFrames; i++ {
var sum float64
for c := 0; c < channels; c++ {
sum += samples[i*channels+c]
}
out[i] = sum / float64(channels)
}
return out
}
func resampleLinear(samples []float64, fromRate, toRate int) []float64 {
if fromRate <= 0 || toRate <= 0 || fromRate == toRate || len(samples) == 0 {
return samples
}
ratio := float64(fromRate) / float64(toRate)
outLen := int(math.Round(float64(len(samples)) / ratio))
if outLen < 1 {
outLen = 1
}
out := make([]float64, outLen)
for i := 0; i < outLen; i++ {
src := float64(i) * ratio
j := int(src)
if j >= len(samples)-1 {
out[i] = samples[len(samples)-1]
continue
}
frac := src - float64(j)
out[i] = samples[j]*(1-frac) + samples[j+1]*frac
}
return out
}