go-whisper-api/transcode/ogg_decode.go
admin b5c083e06f
Some checks failed
CodeQL / Analyze (go) (push) Successful in 6m28s
Docker Image / build-docker (push) Failing after 13m26s
Lint and Testing / lint (push) Successful in 11m17s
Lint and Testing / test (push) Successful in 11m17s
Lint and Testing / golangci (push) Successful in 2m40s
first commit
2026-06-04 18:10:52 +07:00

155 lines
3.9 KiB
Go

package transcode
import (
"bytes"
"errors"
"fmt"
"io"
"os"
"strings"
"github.com/gopxl/beep"
"github.com/gopxl/beep/vorbis"
"github.com/pion/opus"
"github.com/pion/opus/pkg/oggreader"
)
func decodeOggFile(path string) (beep.Streamer, beep.Format, io.Closer, error) {
switch sniffOggCodec(path) {
case "opus":
return decodeOggOpus(path)
case "vorbis":
return decodeOggVorbis(path)
default:
streamer, format, closer, err := decodeOggVorbis(path)
if err == nil {
return streamer, format, closer, nil
}
if isVorbisInvalidHeader(err) {
return decodeOggOpus(path)
}
return nil, beep.Format{}, nil, err
}
}
func sniffOggCodec(path string) string {
f, err := os.Open(path)
if err != nil {
return ""
}
defer f.Close()
buf := make([]byte, 8192)
n, _ := io.ReadFull(f, buf)
buf = buf[:n]
if len(buf) < 4 || !bytes.HasPrefix(buf, []byte("OggS")) {
return ""
}
if bytes.Contains(buf, []byte("OpusHead")) {
return "opus"
}
// Vorbis ID packet: 0x01 + "vorbis"
if bytes.Contains(buf, []byte{0x01, 'v', 'o', 'r', 'b', 'i', 's'}) {
return "vorbis"
}
return ""
}
func isVorbisInvalidHeader(err error) bool {
if err == nil {
return false
}
msg := strings.ToLower(err.Error())
return strings.Contains(msg, "invalid header") || strings.Contains(msg, "vorbis:")
}
func decodeOggVorbis(path string) (beep.Streamer, beep.Format, io.Closer, error) {
f, err := os.Open(path)
if err != nil {
return nil, beep.Format{}, nil, err
}
streamer, format, decErr := vorbis.Decode(f)
if decErr != nil {
f.Close()
return nil, beep.Format{}, nil, fmt.Errorf("ogg/vorbis: %w", decErr)
}
return streamer, format, f, nil
}
func decodeOggOpus(path string) (beep.Streamer, beep.Format, io.Closer, error) {
f, err := os.Open(path)
if err != nil {
return nil, beep.Format{}, nil, err
}
ogg, header, err := oggreader.NewWith(f)
if err != nil {
f.Close()
return nil, beep.Format{}, nil, fmt.Errorf("ogg/opus: %w", err)
}
sr := int(header.SampleRate)
if sr <= 0 {
sr = 48000
}
ch := int(header.Channels)
if ch <= 0 {
ch = 1
}
dec, err := opus.NewDecoderWithOutput(sr, ch)
if err != nil {
f.Close()
return nil, beep.Format{}, nil, fmt.Errorf("ogg/opus decoder: %w", err)
}
const maxFrameSamples = 5760
pcmBuf := make([]float32, maxFrameSamples*ch)
var samples []float64
for {
pkt, _, err := ogg.ParseNextPacket()
if errors.Is(err, io.EOF) {
break
}
if err != nil {
f.Close()
return nil, beep.Format{}, nil, fmt.Errorf("ogg/opus read: %w", err)
}
if len(pkt) == 0 || bytes.HasPrefix(pkt, []byte("OpusHead")) || bytes.HasPrefix(pkt, []byte("OpusTags")) {
continue
}
n, err := dec.DecodeToFloat32(pkt, pcmBuf)
if err != nil {
f.Close()
return nil, beep.Format{}, nil, fmt.Errorf("ogg/opus decode: %w", err)
}
if n <= 0 {
continue
}
total := n * ch
if total > len(pcmBuf) {
total = len(pcmBuf)
}
for i := 0; i < total; i++ {
samples = append(samples, float64(pcmBuf[i]))
}
}
if len(samples) == 0 {
f.Close()
return nil, beep.Format{}, nil, fmt.Errorf("ogg/opus: no audio samples")
}
outCh := ch
if outCh > 1 {
samples = interleavedToMono(samples, outCh)
outCh = 1
}
return newSamplesStreamer(samples, sr), beep.Format{
SampleRate: beep.SampleRate(sr),
NumChannels: outCh,
Precision: 2,
}, f, nil
}