Some checks failed
CodeQL / Analyze (go) (push) Successful in 6m28s
Docker Image / build-docker (push) Failing after 13m26s
Lint and Testing / lint (push) Successful in 11m17s
Lint and Testing / test (push) Successful in 11m17s
Lint and Testing / golangci (push) Successful in 2m40s
61 lines
1.6 KiB
Go
61 lines
1.6 KiB
Go
package whisper
|
|
|
|
import (
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
|
|
"go-whisper-api/config"
|
|
|
|
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
|
|
"github.com/go-audio/wav"
|
|
)
|
|
|
|
// LoadPCM16Mono reads 16 kHz mono WAV into float32 samples (for diarization).
|
|
func LoadPCM16Mono(path string) ([]float32, error) {
|
|
return loadPCM16Mono(path)
|
|
}
|
|
|
|
func loadPCM16Mono(path string) ([]float32, error) {
|
|
fh, err := os.Open(path)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer fh.Close()
|
|
dec := wav.NewDecoder(fh)
|
|
buf, err := dec.FullPCMBuffer()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if dec.SampleRate != whisper.SampleRate {
|
|
return nil, fmt.Errorf("unsupported sample rate: %d", dec.SampleRate)
|
|
}
|
|
if dec.NumChans != 1 {
|
|
return nil, fmt.Errorf("unsupported number of channels: %d", dec.NumChans)
|
|
}
|
|
return buf.AsFloat32Buffer().Data, nil
|
|
}
|
|
|
|
func prepareAudioPCM(sourcePath string) (data []float32, cleanup func(), err error) {
|
|
cleanup = func() {}
|
|
if data, err = loadPCM16Mono(sourcePath); err == nil {
|
|
return data, cleanup, nil
|
|
}
|
|
dir, err := config.MkdirTemp("go-whisper-api-whisper-*")
|
|
if err != nil {
|
|
return nil, nil, err
|
|
}
|
|
cleanup = func() { os.RemoveAll(dir) }
|
|
converted := filepath.Join(dir, "converted.wav")
|
|
if err := AudioToWav(sourcePath, converted); err != nil {
|
|
cleanup()
|
|
return nil, nil, err
|
|
}
|
|
data, err = loadPCM16Mono(converted)
|
|
if err != nil {
|
|
cleanup()
|
|
return nil, nil, err
|
|
}
|
|
return data, cleanup, nil
|
|
}
|