go-whisper-api/whisper/audio_load.go
admin b5c083e06f
Some checks failed
CodeQL / Analyze (go) (push) Successful in 6m28s
Docker Image / build-docker (push) Failing after 13m26s
Lint and Testing / lint (push) Successful in 11m17s
Lint and Testing / test (push) Successful in 11m17s
Lint and Testing / golangci (push) Successful in 2m40s
first commit
2026-06-04 18:10:52 +07:00

61 lines
1.6 KiB
Go

package whisper
import (
"fmt"
"os"
"path/filepath"
"go-whisper-api/config"
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
"github.com/go-audio/wav"
)
// LoadPCM16Mono reads 16 kHz mono WAV into float32 samples (for diarization).
func LoadPCM16Mono(path string) ([]float32, error) {
return loadPCM16Mono(path)
}
func loadPCM16Mono(path string) ([]float32, error) {
fh, err := os.Open(path)
if err != nil {
return nil, err
}
defer fh.Close()
dec := wav.NewDecoder(fh)
buf, err := dec.FullPCMBuffer()
if err != nil {
return nil, err
}
if dec.SampleRate != whisper.SampleRate {
return nil, fmt.Errorf("unsupported sample rate: %d", dec.SampleRate)
}
if dec.NumChans != 1 {
return nil, fmt.Errorf("unsupported number of channels: %d", dec.NumChans)
}
return buf.AsFloat32Buffer().Data, nil
}
func prepareAudioPCM(sourcePath string) (data []float32, cleanup func(), err error) {
cleanup = func() {}
if data, err = loadPCM16Mono(sourcePath); err == nil {
return data, cleanup, nil
}
dir, err := config.MkdirTemp("go-whisper-api-whisper-*")
if err != nil {
return nil, nil, err
}
cleanup = func() { os.RemoveAll(dir) }
converted := filepath.Join(dir, "converted.wav")
if err := AudioToWav(sourcePath, converted); err != nil {
cleanup()
return nil, nil, err
}
data, err = loadPCM16Mono(converted)
if err != nil {
cleanup()
return nil, nil, err
}
return data, cleanup, nil
}