go-whisper-api/api/transcribe_opts.go
admin b5c083e06f
Some checks failed
CodeQL / Analyze (go) (push) Successful in 6m28s
Docker Image / build-docker (push) Failing after 13m26s
Lint and Testing / lint (push) Successful in 11m17s
Lint and Testing / test (push) Successful in 11m17s
Lint and Testing / golangci (push) Successful in 2m40s
first commit
2026-06-04 18:10:52 +07:00

78 lines
2.2 KiB
Go

package api
import (
"fmt"
"net/http"
"strings"
"go-whisper-api/config"
"go-whisper-api/whisper"
)
type sttOptions struct {
language string
punctuate bool
speakers bool
numClusters int
}
func (s *Server) parseSTTOptions(r *http.Request) (sttOptions, error) {
opts := sttOptions{
language: resolveLanguage(r, s.cfg.Language),
punctuate: s.punctCfg.ShouldApplyAPI(r, s.cfg.DefaultPunctuation),
}
sp, clusters, err := querySpeakers(r, s.cfg.DefaultSpeakers, s.diarCfg, s.diarizer.Active())
if err != nil {
return opts, err
}
opts.speakers = sp
opts.numClusters = clusters
return opts, nil
}
func resolveLanguage(r *http.Request, defaultLang string) string {
if v := strings.TrimSpace(r.URL.Query().Get("language")); v != "" {
return v
}
return strings.TrimSpace(defaultLang)
}
func querySpeakers(r *http.Request, defaultOn bool, dc config.Diarization, diarizerActive bool) (enabled bool, numClusters int, err error) {
counter := queryInt(r, "speaker_counter", -999)
if counter == -1 {
return false, 0, nil
}
speakersQ := r.URL.Query().Get("speakers")
enabled = defaultOn
if speakersQ != "" {
enabled = queryInt(r, "speakers", 0) == 1
}
if !enabled {
return false, 0, nil
}
if !dc.Active() {
return false, 0, fmt.Errorf("speaker diarization is disabled in config (diarization.enabled: true)")
}
if !diarizerActive {
return false, 0, fmt.Errorf("speaker diarization requires server built with -tags sherpa (make build-sherpa) and models (make download-diarization-models)")
}
if counter > 0 {
numClusters = counter
} else if dc.NumClusters > 0 {
numClusters = dc.NumClusters
}
return true, numClusters, nil
}
func (s *Server) whisperRunOpts(stt sttOptions, turns []whisper.Turn) whisper.RunOptions {
t := s.cfg.Transcript.WithDefaults()
return whisper.RunOptions{
Turns: turns,
Format: whisper.FormatOptions{
PauseGap: t.PauseGapDuration(),
SpeakerLabel: t.SpeakerLabel,
UseSpeakers: stt.speakers && len(turns) > 0,
},
}
}