Some checks failed
CodeQL / Analyze (go) (push) Successful in 6m28s
Docker Image / build-docker (push) Failing after 13m26s
Lint and Testing / lint (push) Successful in 11m17s
Lint and Testing / test (push) Successful in 11m17s
Lint and Testing / golangci (push) Successful in 2m40s
252 lines
7.5 KiB
Go
252 lines
7.5 KiB
Go
package main
|
|
|
|
import (
|
|
"os"
|
|
"runtime"
|
|
"strconv"
|
|
"time"
|
|
|
|
"go-whisper-api/api"
|
|
"go-whisper-api/config"
|
|
|
|
_ "github.com/joho/godotenv/autoload"
|
|
"github.com/mattn/go-isatty"
|
|
"github.com/rs/zerolog"
|
|
"github.com/rs/zerolog/log"
|
|
"github.com/urfave/cli/v2"
|
|
)
|
|
|
|
var (
|
|
Version string
|
|
)
|
|
|
|
func main() {
|
|
isTerm := isatty.IsTerminal(os.Stdout.Fd())
|
|
zerolog.SetGlobalLevel(zerolog.InfoLevel)
|
|
log.Logger = log.Output(
|
|
zerolog.ConsoleWriter{
|
|
Out: os.Stderr,
|
|
NoColor: !isTerm,
|
|
},
|
|
)
|
|
zerolog.CallerMarshalFunc = func(pc uintptr, file string, line int) string {
|
|
short := file
|
|
for i := len(file) - 1; i > 0; i-- {
|
|
if file[i] == '/' {
|
|
short = file[i+1:]
|
|
break
|
|
}
|
|
}
|
|
file = short
|
|
return file + ":" + strconv.Itoa(line)
|
|
}
|
|
app := cli.NewApp()
|
|
app.Name = "go-whisper-api"
|
|
app.Usage = "HTTP API for speech-to-text (SPR + OpenAI-compatible)."
|
|
app.Copyright = "Copyright (c) " + strconv.Itoa(time.Now().Year()) + " Bo-Yi Wu"
|
|
app.Authors = []*cli.Author{
|
|
{
|
|
Name: "Bo-Yi Wu",
|
|
Email: "appleboy.tw@gmail.com",
|
|
},
|
|
}
|
|
app.Version = Version
|
|
app.Commands = []*cli.Command{
|
|
{
|
|
Name: "serve",
|
|
Aliases: []string{"s", "api"},
|
|
Usage: "start HTTP API server",
|
|
Flags: serveFlags(),
|
|
Action: runServe,
|
|
},
|
|
}
|
|
app.Flags = append(append([]cli.Flag{configFlag()}, punctuationFlags()...), serveFlags()...)
|
|
app.Action = runServe
|
|
if err := app.Run(os.Args); err != nil {
|
|
log.Fatal().Err(err).Msg("can't run app")
|
|
}
|
|
}
|
|
|
|
func configFlag() cli.Flag {
|
|
return &cli.StringFlag{
|
|
Name: "config",
|
|
Usage: "path to YAML config file (default: config.yaml if present)",
|
|
EnvVars: []string{"CONFIG_PATH", "GO_WHISPER_CONFIG"},
|
|
}
|
|
}
|
|
|
|
func punctuationFlags() []cli.Flag {
|
|
return []cli.Flag{
|
|
&cli.BoolFlag{
|
|
Name: "punctuation-enabled",
|
|
Usage: "master switch: enable or disable punctuation (YAML: punctuation.enabled)",
|
|
EnvVars: []string{"PUNCTUATION_ENABLED"},
|
|
},
|
|
&cli.BoolFlag{
|
|
Name: "punctuation-default-on",
|
|
Usage: "apply punctuation by default when query/flags omitted (YAML: punctuation.default_on)",
|
|
},
|
|
&cli.StringFlag{
|
|
Name: "punctuation-engine",
|
|
Usage: "punctuation engine: off, heuristic, sherpa, sherpa-online, http, xlm",
|
|
EnvVars: []string{"PUNCTUATION_ENGINE"},
|
|
},
|
|
}
|
|
}
|
|
|
|
func vadFlags() []cli.Flag {
|
|
return []cli.Flag{
|
|
&cli.BoolFlag{
|
|
Name: "vad",
|
|
Usage: "enable voice activity detection (Silero VAD)",
|
|
EnvVars: []string{"API_VAD"},
|
|
},
|
|
&cli.StringFlag{
|
|
Name: "vad-model",
|
|
Usage: "path to ggml Silero VAD model (e.g. models/ggml-silero-v6.2.0.bin)",
|
|
EnvVars: []string{"API_VAD_MODEL"},
|
|
},
|
|
&cli.Float64Flag{
|
|
Name: "vad-threshold",
|
|
Usage: "VAD speech threshold (0.0-1.0)",
|
|
EnvVars: []string{"API_VAD_THRESHOLD"},
|
|
},
|
|
&cli.IntFlag{
|
|
Name: "vad-min-speech-ms",
|
|
Usage: "minimum speech duration in ms",
|
|
EnvVars: []string{"API_VAD_MIN_SPEECH_MS"},
|
|
},
|
|
&cli.IntFlag{
|
|
Name: "vad-min-silence-ms",
|
|
Usage: "minimum silence between segments in ms",
|
|
EnvVars: []string{"API_VAD_MIN_SILENCE_MS"},
|
|
},
|
|
&cli.Float64Flag{
|
|
Name: "vad-max-speech-sec",
|
|
Usage: "maximum speech segment length in seconds (0 = unlimited)",
|
|
EnvVars: []string{"API_VAD_MAX_SPEECH_SEC"},
|
|
},
|
|
&cli.IntFlag{
|
|
Name: "vad-speech-pad-ms",
|
|
Usage: "padding around speech segments in ms",
|
|
EnvVars: []string{"API_VAD_SPEECH_PAD_MS"},
|
|
},
|
|
&cli.Float64Flag{
|
|
Name: "vad-samples-overlap",
|
|
Usage: "overlap between VAD segments in seconds",
|
|
EnvVars: []string{"API_VAD_SAMPLES_OVERLAP"},
|
|
},
|
|
}
|
|
}
|
|
|
|
func serveFlags() []cli.Flag {
|
|
flags := []cli.Flag{
|
|
&cli.StringFlag{
|
|
Name: "addr",
|
|
Usage: "HTTP listen address",
|
|
EnvVars: []string{"API_ADDR"},
|
|
Value: ":8080",
|
|
},
|
|
&cli.StringFlag{
|
|
Name: "models-dir",
|
|
Usage: "directory with ggml *.bin whisper models",
|
|
EnvVars: []string{"API_MODELS_DIR"},
|
|
Value: "./models",
|
|
},
|
|
&cli.StringFlag{
|
|
Name: "cache-dir",
|
|
Usage: "directory for async task cache (waiting/ready)",
|
|
EnvVars: []string{"API_CACHE_DIR"},
|
|
Value: "./cache",
|
|
},
|
|
&cli.StringFlag{
|
|
Name: "language",
|
|
Usage: "default language for speech recognition",
|
|
EnvVars: []string{"API_LANGUAGE"},
|
|
Value: "auto",
|
|
},
|
|
&cli.UintFlag{
|
|
Name: "threads",
|
|
Usage: "number of threads for whisper",
|
|
EnvVars: []string{"API_THREADS"},
|
|
Value: uint(runtime.NumCPU()),
|
|
},
|
|
&cli.BoolFlag{
|
|
Name: "debug",
|
|
Usage: "enable debug mode",
|
|
EnvVars: []string{"API_DEBUG"},
|
|
},
|
|
&cli.BoolFlag{
|
|
Name: "speedup",
|
|
Usage: "speed up audio by x2",
|
|
EnvVars: []string{"API_SPEEDUP"},
|
|
},
|
|
&cli.BoolFlag{
|
|
Name: "translate",
|
|
Usage: "translate to english",
|
|
EnvVars: []string{"API_TRANSLATE"},
|
|
},
|
|
&cli.StringFlag{
|
|
Name: "prompt",
|
|
Usage: "initial prompt",
|
|
EnvVars: []string{"API_PROMPT"},
|
|
},
|
|
&cli.UintFlag{
|
|
Name: "max-context",
|
|
Usage: "maximum text context tokens",
|
|
EnvVars: []string{"API_MAX_CONTEXT"},
|
|
Value: 32,
|
|
},
|
|
&cli.UintFlag{
|
|
Name: "beam-size",
|
|
Usage: "beam size for beam search",
|
|
EnvVars: []string{"API_BEAM_SIZE"},
|
|
Value: 5,
|
|
},
|
|
&cli.Float64Flag{
|
|
Name: "entropy-thold",
|
|
Usage: "entropy threshold",
|
|
EnvVars: []string{"API_ENTROPY_THOLD"},
|
|
Value: 2.4,
|
|
},
|
|
&cli.BoolFlag{
|
|
Name: "default-punctuation",
|
|
Usage: "enable punctuation on STT by default",
|
|
EnvVars: []string{"API_DEFAULT_PUNCTUATION"},
|
|
},
|
|
}
|
|
return append(flags, vadFlags()...)
|
|
}
|
|
|
|
func runServe(c *cli.Context) error {
|
|
apiCfg, err := config.APIFromCLI(c)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if apiCfg.Debug {
|
|
zerolog.SetGlobalLevel(zerolog.DebugLevel)
|
|
log.Logger = log.With().Caller().Logger()
|
|
}
|
|
vad := apiCfg.VAD.WithDefaults()
|
|
if vad.Enabled {
|
|
vad.Model = vad.ResolveModelPath(apiCfg.ModelsDir)
|
|
if err := vad.Validate(); err != nil {
|
|
return err
|
|
}
|
|
apiCfg.VAD = vad
|
|
}
|
|
tc, err := config.TranscodeFromCLI(c)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
pc, err := config.PunctuationFromCLI(c)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
dc, err := config.DiarizationFromCLI(c)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return api.Run(c.Context, apiCfg, tc, pc, dc)
|
|
}
|