package whisper import ( "fmt" "os" "path/filepath" "go-whisper-api/config" "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper" "github.com/go-audio/wav" ) // LoadPCM16Mono reads 16 kHz mono WAV into float32 samples (for diarization). func LoadPCM16Mono(path string) ([]float32, error) { return loadPCM16Mono(path) } func loadPCM16Mono(path string) ([]float32, error) { fh, err := os.Open(path) if err != nil { return nil, err } defer fh.Close() dec := wav.NewDecoder(fh) buf, err := dec.FullPCMBuffer() if err != nil { return nil, err } if dec.SampleRate != whisper.SampleRate { return nil, fmt.Errorf("unsupported sample rate: %d", dec.SampleRate) } if dec.NumChans != 1 { return nil, fmt.Errorf("unsupported number of channels: %d", dec.NumChans) } return buf.AsFloat32Buffer().Data, nil } func prepareAudioPCM(sourcePath string) (data []float32, cleanup func(), err error) { cleanup = func() {} if data, err = loadPCM16Mono(sourcePath); err == nil { return data, cleanup, nil } dir, err := config.MkdirTemp("go-whisper-api-whisper-*") if err != nil { return nil, nil, err } cleanup = func() { os.RemoveAll(dir) } converted := filepath.Join(dir, "converted.wav") if err := AudioToWav(sourcePath, converted); err != nil { cleanup() return nil, nil, err } data, err = loadPCM16Mono(converted) if err != nil { cleanup() return nil, nil, err } return data, cleanup, nil }