Some checks failed
CodeQL / Analyze (go) (push) Successful in 6m28s
Docker Image / build-docker (push) Failing after 13m26s
Lint and Testing / lint (push) Successful in 11m17s
Lint and Testing / test (push) Successful in 11m17s
Lint and Testing / golangci (push) Successful in 2m40s
289 lines
9.2 KiB
Go
289 lines
9.2 KiB
Go
package transcode
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
|
|
"github.com/Eyevinn/mp4ff/mp4"
|
|
"github.com/olivier-w/climp-aac-decoder/aacfile"
|
|
aacdec "github.com/skrashevich/go-aac/pkg/decoder"
|
|
)
|
|
|
|
type mp4AACSample struct {
|
|
offset int64
|
|
size int
|
|
}
|
|
|
|
func isMP4SampleDeltaError(err error) bool {
|
|
var uf *aacfile.UnsupportedFeatureError
|
|
if !errors.As(err, &uf) {
|
|
return false
|
|
}
|
|
return uf.Feature == "MP4 sample delta" || uf.Feature == "MP4 sample delta layout"
|
|
}
|
|
|
|
// decodeMP4AACRelaxed demuxes MP4/M4A with mp4ff (ignoring stts sample deltas) and
|
|
// decodes raw AAC frames with go-aac. Used when climp-aac-decoder rejects stts
|
|
// entries whose delta is not exactly 1024 (common in ffmpeg/phone muxers).
|
|
func decodeMP4AACRelaxed(r io.ReaderAt, size int64) ([]float64, int, int, error) {
|
|
asc, samples, leading, err := demuxMP4AAC(r, size)
|
|
if err != nil {
|
|
return nil, 0, 0, err
|
|
}
|
|
dec := aacdec.New()
|
|
if err := dec.SetASC(asc); err != nil {
|
|
return nil, 0, 0, fmt.Errorf("aac config: %w", err)
|
|
}
|
|
ch := dec.Config.ChanConfig
|
|
if ch < 1 {
|
|
return nil, 0, 0, fmt.Errorf("aac config: invalid channel count %d", ch)
|
|
}
|
|
sr := dec.Config.SampleRate
|
|
if sr <= 0 {
|
|
return nil, 0, 0, fmt.Errorf("aac config: invalid sample rate %d", sr)
|
|
}
|
|
|
|
maxSize := 0
|
|
for _, s := range samples {
|
|
if s.size > maxSize {
|
|
maxSize = s.size
|
|
}
|
|
}
|
|
buf := make([]byte, maxSize)
|
|
var pcm []float32
|
|
for i, s := range samples {
|
|
if cap(buf) < s.size {
|
|
buf = make([]byte, s.size)
|
|
}
|
|
frame := buf[:s.size]
|
|
if _, err := r.ReadAt(frame, s.offset); err != nil {
|
|
return nil, 0, 0, fmt.Errorf("read mp4 aac sample %d: %w", i, err)
|
|
}
|
|
out, err := dec.DecodeFrame(frame)
|
|
if err != nil {
|
|
return nil, 0, 0, fmt.Errorf("decode mp4 aac sample %d: %w", i, err)
|
|
}
|
|
pcm = append(pcm, out...)
|
|
}
|
|
|
|
skipSamples := leading * ch
|
|
if skipSamples > len(pcm) {
|
|
skipSamples = len(pcm)
|
|
}
|
|
pcm = pcm[skipSamples:]
|
|
|
|
samplesF64 := make([]float64, len(pcm))
|
|
for i, v := range pcm {
|
|
samplesF64[i] = float64(v)
|
|
}
|
|
if ch > 1 {
|
|
samplesF64 = float32InterleavedToMono(samplesF64, ch)
|
|
ch = 1
|
|
}
|
|
return samplesF64, sr, ch, nil
|
|
}
|
|
|
|
func float32InterleavedToMono(samples []float64, channels int) []float64 {
|
|
if channels <= 1 {
|
|
return samples
|
|
}
|
|
nFrames := len(samples) / channels
|
|
out := make([]float64, nFrames)
|
|
for i := 0; i < nFrames; i++ {
|
|
var sum float64
|
|
for c := 0; c < channels; c++ {
|
|
sum += samples[i*channels+c]
|
|
}
|
|
out[i] = sum / float64(channels)
|
|
}
|
|
return out
|
|
}
|
|
|
|
func demuxMP4AAC(r io.ReaderAt, size int64) (asc []byte, samples []mp4AACSample, leading int, err error) {
|
|
file, err := mp4.DecodeFile(io.NewSectionReader(r, 0, size), mp4.WithDecodeMode(mp4.DecModeLazyMdat))
|
|
if err != nil {
|
|
return nil, nil, 0, fmt.Errorf("mp4 decode: %w", err)
|
|
}
|
|
if file.IsFragmented() {
|
|
return nil, nil, 0, fmt.Errorf("fragmented mp4 is not supported")
|
|
}
|
|
if file.Moov == nil {
|
|
return nil, nil, 0, fmt.Errorf("mp4: missing moov")
|
|
}
|
|
|
|
var audioTracks []*mp4.TrakBox
|
|
for _, trak := range file.Moov.Traks {
|
|
if trak != nil && trak.Mdia != nil && trak.Mdia.Hdlr != nil && trak.Mdia.Hdlr.HandlerType == "soun" {
|
|
audioTracks = append(audioTracks, trak)
|
|
}
|
|
}
|
|
if len(audioTracks) != 1 {
|
|
return nil, nil, 0, fmt.Errorf("mp4: expected one audio track, found %d", len(audioTracks))
|
|
}
|
|
trak := audioTracks[0]
|
|
if trak.Mdia == nil || trak.Mdia.Minf == nil || trak.Mdia.Minf.Stbl == nil || trak.Mdia.Minf.Stbl.Stsd == nil {
|
|
return nil, nil, 0, fmt.Errorf("mp4: incomplete audio track")
|
|
}
|
|
stsd := trak.Mdia.Minf.Stbl.Stsd
|
|
if len(stsd.Children) != 1 {
|
|
return nil, nil, 0, fmt.Errorf("mp4: multiple sample descriptions")
|
|
}
|
|
if stsd.Enca != nil {
|
|
return nil, nil, 0, fmt.Errorf("mp4: encrypted audio")
|
|
}
|
|
sampleEntry := stsd.Mp4a
|
|
if sampleEntry == nil {
|
|
return nil, nil, 0, fmt.Errorf("mp4: unsupported audio sample entry %s", stsd.Children[0].Type())
|
|
}
|
|
if sampleEntry.Sinf != nil {
|
|
return nil, nil, 0, fmt.Errorf("mp4: encrypted audio")
|
|
}
|
|
if sampleEntry.Esds == nil ||
|
|
sampleEntry.Esds.DecConfigDescriptor == nil ||
|
|
sampleEntry.Esds.DecConfigDescriptor.DecSpecificInfo == nil ||
|
|
len(sampleEntry.Esds.DecConfigDescriptor.DecSpecificInfo.DecConfig) == 0 {
|
|
return nil, nil, 0, fmt.Errorf("mp4: missing AudioSpecificConfig")
|
|
}
|
|
asc = append([]byte(nil), sampleEntry.Esds.DecConfigDescriptor.DecSpecificInfo.DecConfig...)
|
|
|
|
leading, _ = mp4LeadingTrimRelaxed(trak)
|
|
|
|
samples, err = buildMP4AACSamples(trak, size)
|
|
if err != nil {
|
|
return nil, nil, 0, err
|
|
}
|
|
if len(samples) == 0 {
|
|
return nil, nil, 0, fmt.Errorf("mp4: no audio samples")
|
|
}
|
|
return asc, samples, leading, nil
|
|
}
|
|
|
|
func mp4LeadingTrimRelaxed(trak *mp4.TrakBox) (int, error) {
|
|
if trak.Edts == nil || len(trak.Edts.Elst) == 0 {
|
|
return 0, nil
|
|
}
|
|
if len(trak.Edts.Elst) != 1 || len(trak.Edts.Elst[0].Entries) != 1 {
|
|
return 0, nil
|
|
}
|
|
entry := trak.Edts.Elst[0].Entries[0]
|
|
if entry.MediaRateInteger != 1 || entry.MediaRateFraction != 0 {
|
|
return 0, nil
|
|
}
|
|
if entry.MediaTime < 0 {
|
|
return 0, nil
|
|
}
|
|
return int(entry.MediaTime), nil
|
|
}
|
|
|
|
func buildMP4AACSamples(trak *mp4.TrakBox, size int64) ([]mp4AACSample, error) {
|
|
if trak.Mdia == nil || trak.Mdia.Minf == nil || trak.Mdia.Minf.Stbl == nil {
|
|
return nil, fmt.Errorf("mp4: incomplete sample table")
|
|
}
|
|
stbl := trak.Mdia.Minf.Stbl
|
|
if stbl.Stsc == nil || stbl.Stsz == nil {
|
|
return nil, fmt.Errorf("mp4: incomplete sample table")
|
|
}
|
|
if stbl.Stco == nil && stbl.Co64 == nil {
|
|
return nil, fmt.Errorf("mp4: missing chunk offsets")
|
|
}
|
|
if len(stbl.Stsc.Entries) == 0 {
|
|
return nil, fmt.Errorf("mp4: empty chunk map")
|
|
}
|
|
|
|
totalSamples := int(trak.GetNrSamples())
|
|
if totalSamples <= 0 {
|
|
return nil, fmt.Errorf("mp4: empty sample table")
|
|
}
|
|
|
|
sampleSizes, err := mp4AACSampleSizes(stbl.Stsz, totalSamples)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
chunkOffsets, err := mp4AACChunkOffsets(stbl)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
out := make([]mp4AACSample, 0, totalSamples)
|
|
sampleIndex := 0
|
|
entryIndex := 0
|
|
entry := stbl.Stsc.Entries[entryIndex]
|
|
|
|
for chunkIndex := 0; chunkIndex < len(chunkOffsets) && sampleIndex < totalSamples; chunkIndex++ {
|
|
chunkNr := uint32(chunkIndex + 1)
|
|
for entryIndex+1 < len(stbl.Stsc.Entries) && chunkNr >= stbl.Stsc.Entries[entryIndex+1].FirstChunk {
|
|
entryIndex++
|
|
entry = stbl.Stsc.Entries[entryIndex]
|
|
}
|
|
if entry.SamplesPerChunk == 0 {
|
|
return nil, fmt.Errorf("mp4: zero samples per chunk")
|
|
}
|
|
|
|
offset := chunkOffsets[chunkIndex]
|
|
samplesPerChunk := int(entry.SamplesPerChunk)
|
|
for i := 0; i < samplesPerChunk && sampleIndex < totalSamples; i++ {
|
|
sampleSize := sampleSizes[sampleIndex]
|
|
end := offset + int64(sampleSize)
|
|
if offset < 0 || end < offset || end > size {
|
|
return nil, fmt.Errorf("mp4: invalid sample bounds at sample %d", sampleIndex+1)
|
|
}
|
|
out = append(out, mp4AACSample{offset: offset, size: sampleSize})
|
|
offset = end
|
|
sampleIndex++
|
|
}
|
|
}
|
|
if sampleIndex != totalSamples {
|
|
return nil, fmt.Errorf("mp4: sample table mismatch")
|
|
}
|
|
return out, nil
|
|
}
|
|
|
|
func mp4AACSampleSizes(stsz *mp4.StszBox, totalSamples int) ([]int, error) {
|
|
if stsz == nil {
|
|
return nil, fmt.Errorf("mp4: missing sample sizes")
|
|
}
|
|
if int(stsz.GetNrSamples()) != totalSamples {
|
|
return nil, fmt.Errorf("mp4: sample size count mismatch")
|
|
}
|
|
sizes := make([]int, totalSamples)
|
|
if stsz.SampleUniformSize != 0 {
|
|
sz := int(stsz.SampleUniformSize)
|
|
for i := range sizes {
|
|
sizes[i] = sz
|
|
}
|
|
return sizes, nil
|
|
}
|
|
if len(stsz.SampleSize) != totalSamples {
|
|
return nil, fmt.Errorf("mp4: sample size table mismatch")
|
|
}
|
|
for i, sz := range stsz.SampleSize {
|
|
sizes[i] = int(sz)
|
|
}
|
|
return sizes, nil
|
|
}
|
|
|
|
func mp4AACChunkOffsets(stbl *mp4.StblBox) ([]int64, error) {
|
|
switch {
|
|
case stbl == nil:
|
|
return nil, fmt.Errorf("mp4: incomplete sample table")
|
|
case stbl.Stco != nil:
|
|
offsets := make([]int64, len(stbl.Stco.ChunkOffset))
|
|
for i, off := range stbl.Stco.ChunkOffset {
|
|
offsets[i] = int64(off)
|
|
}
|
|
return offsets, nil
|
|
case stbl.Co64 != nil:
|
|
offsets := make([]int64, len(stbl.Co64.ChunkOffset))
|
|
for i, off := range stbl.Co64.ChunkOffset {
|
|
if off > uint64(^uint64(0)>>1) {
|
|
return nil, fmt.Errorf("mp4: invalid chunk offset")
|
|
}
|
|
offsets[i] = int64(off)
|
|
}
|
|
return offsets, nil
|
|
default:
|
|
return nil, fmt.Errorf("mp4: missing chunk offsets")
|
|
}
|
|
}
|