first commit
Some checks failed
Some checks failed
This commit is contained in:
parent
b5c083e06f
commit
318b736244
904
api/swagger.json
904
api/swagger.json
File diff suppressed because it is too large
Load Diff
131
internal/apidoc/swagger_test.go
Normal file
131
internal/apidoc/swagger_test.go
Normal file
@ -0,0 +1,131 @@
|
||||
package apidoc_test
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func swaggerPath(t *testing.T) string {
|
||||
t.Helper()
|
||||
path := filepath.Join("..", "..", "api", "swagger.json")
|
||||
abs, err := filepath.Abs(path)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if _, err := os.Stat(abs); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
return abs
|
||||
}
|
||||
|
||||
var documentedPaths = []struct {
|
||||
path string
|
||||
method string
|
||||
}{
|
||||
{"/swagger.json", "get"},
|
||||
{"/spr/models", "get"},
|
||||
{"/spr/hostname", "get"},
|
||||
{"/spr/queue", "get"},
|
||||
{"/spr/queue/{taskID}", "get"},
|
||||
{"/spr/queue/{taskID}", "delete"},
|
||||
{"/spr/stt/{id}", "post"},
|
||||
{"/spr/result/{taskID}", "get"},
|
||||
{"/spr/audio/{taskID}", "get"},
|
||||
{"/spr/waveform/{taskID}", "get"},
|
||||
{"/spr/import/{id}", "post"},
|
||||
{"/spr/export/{id}", "get"},
|
||||
{"/spr/delete/{id}", "delete"},
|
||||
{"/v1/models", "get"},
|
||||
{"/v1/audio/transcriptions", "post"},
|
||||
{"/v1/audio/transcriptions/", "post"},
|
||||
}
|
||||
|
||||
func TestSwaggerJSON_validAndComplete(t *testing.T) {
|
||||
data, err := os.ReadFile(swaggerPath(t))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
var spec struct {
|
||||
Swagger string `json:"swagger"`
|
||||
Paths map[string]map[string]json.RawMessage `json:"paths"`
|
||||
Info struct {
|
||||
Title string `json:"title"`
|
||||
} `json:"info"`
|
||||
}
|
||||
if err := json.Unmarshal(data, &spec); err != nil {
|
||||
t.Fatalf("invalid JSON: %v", err)
|
||||
}
|
||||
if spec.Swagger != "2.0" {
|
||||
t.Fatalf("swagger version: got %q", spec.Swagger)
|
||||
}
|
||||
for _, want := range documentedPaths {
|
||||
methods, ok := spec.Paths[want.path]
|
||||
if !ok {
|
||||
t.Errorf("missing path %s", want.path)
|
||||
continue
|
||||
}
|
||||
if _, ok := methods[want.method]; !ok {
|
||||
t.Errorf("path %s missing method %s", want.path, want.method)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestSwaggerJSON_sttQueryParamsMatchImplementation(t *testing.T) {
|
||||
data, err := os.ReadFile(swaggerPath(t))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
var spec struct {
|
||||
Paths map[string]struct {
|
||||
Post struct {
|
||||
Parameters []struct {
|
||||
Name string `json:"name"`
|
||||
In string `json:"in"`
|
||||
} `json:"parameters"`
|
||||
} `json:"post"`
|
||||
} `json:"paths"`
|
||||
}
|
||||
if err := json.Unmarshal(data, &spec); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
stt := spec.Paths["/spr/stt/{id}"]
|
||||
var queryNames []string
|
||||
for _, p := range stt.Post.Parameters {
|
||||
if p.In == "query" {
|
||||
queryNames = append(queryNames, p.Name)
|
||||
}
|
||||
}
|
||||
for _, required := range []string{"async", "language", "punctuation", "speakers", "speaker_counter"} {
|
||||
if !contains(queryNames, required) {
|
||||
t.Errorf("STT missing query param %q", required)
|
||||
}
|
||||
}
|
||||
for _, removed := range []string{"webhook", "toxicity", "normalization", "vad", "classifiers"} {
|
||||
if contains(queryNames, removed) {
|
||||
t.Errorf("STT documents unused param %q", removed)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestSwaggerJSON_hasOpenAIPaths(t *testing.T) {
|
||||
data, err := os.ReadFile(swaggerPath(t))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
s := string(data)
|
||||
if !strings.Contains(s, "/v1/audio/transcriptions") {
|
||||
t.Fatal("missing OpenAI transcription path")
|
||||
}
|
||||
}
|
||||
|
||||
func contains(ss []string, s string) bool {
|
||||
for _, x := range ss {
|
||||
if x == s {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
@ -69,14 +69,6 @@ func heuristicEN(s string) string {
|
||||
return s
|
||||
}
|
||||
|
||||
func hasTerminalPunct(s string) bool {
|
||||
s = strings.TrimSpace(s)
|
||||
if s == "" {
|
||||
return false
|
||||
}
|
||||
r, _ := utf8.DecodeLastRuneInString(s)
|
||||
return r == '.' || r == '?' || r == '!' || r == '…'
|
||||
}
|
||||
|
||||
func ensureTerminalPunct(s string) string {
|
||||
if hasTerminalPunct(s) {
|
||||
|
||||
70
punctuation/normalize.go
Normal file
70
punctuation/normalize.go
Normal file
@ -0,0 +1,70 @@
|
||||
package punctuation
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// terminalPunctRunes — знаки, после которых не добавляем ещё одну фразовую точку.
|
||||
var terminalPunctRunes = map[rune]bool{
|
||||
'.': true, '?': true, '!': true, '…': true,
|
||||
',': true, ';': true, ':': true,
|
||||
')': true, ']': true, '"': true, '\'': true,
|
||||
'»': true, '”': true, '’': true,
|
||||
'。': true, ',': true, '?': true, '!': true,
|
||||
}
|
||||
|
||||
// CleanExcessive collapses duplicate and conflicting punctuation marks.
|
||||
func CleanExcessive(s string) string {
|
||||
s = strings.TrimSpace(s)
|
||||
if s == "" {
|
||||
return s
|
||||
}
|
||||
var b strings.Builder
|
||||
b.Grow(len(s))
|
||||
prevClass := 0 // 0 none, 1 comma-like, 2 end, 3 other punct
|
||||
for i := 0; i < len(s); {
|
||||
r, size := utf8.DecodeRuneInString(s[i:])
|
||||
cls := punctClass(r)
|
||||
if cls != 0 && cls == prevClass {
|
||||
i += size
|
||||
continue
|
||||
}
|
||||
if cls == 2 && prevClass == 1 {
|
||||
// drop sentence end right after comma-like (e.g. "привет,.")
|
||||
i += size
|
||||
continue
|
||||
}
|
||||
b.WriteRune(r)
|
||||
if cls != 0 {
|
||||
prevClass = cls
|
||||
} else if !unicode.IsSpace(r) {
|
||||
prevClass = 0
|
||||
}
|
||||
i += size
|
||||
}
|
||||
return strings.TrimSpace(b.String())
|
||||
}
|
||||
|
||||
func punctClass(r rune) int {
|
||||
switch r {
|
||||
case ',', ',', '、', '،', ';', '؛', ':':
|
||||
return 1
|
||||
case '.', '?', '!', '…', '。', '?', '!':
|
||||
return 2
|
||||
}
|
||||
if unicode.IsPunct(r) {
|
||||
return 3
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func hasTerminalPunct(s string) bool {
|
||||
s = strings.TrimSpace(s)
|
||||
if s == "" {
|
||||
return false
|
||||
}
|
||||
r, _ := utf8.DecodeLastRuneInString(s)
|
||||
return terminalPunctRunes[r]
|
||||
}
|
||||
46
punctuation/normalize_test.go
Normal file
46
punctuation/normalize_test.go
Normal file
@ -0,0 +1,46 @@
|
||||
package punctuation
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestCleanExcessive(t *testing.T) {
|
||||
cases := []struct {
|
||||
in, want string
|
||||
}{
|
||||
{"привет,,", "привет,"},
|
||||
{"привет,.", "привет,"},
|
||||
{"hello..", "hello."},
|
||||
{"what??", "what?"},
|
||||
{"ok!!!", "ok!"},
|
||||
{"а. б. в.", "а. б. в."},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
got := CleanExcessive(tc.in)
|
||||
if got != tc.want {
|
||||
t.Errorf("CleanExcessive(%q) = %q, want %q", tc.in, got, tc.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestHasTerminalPunct_comma(t *testing.T) {
|
||||
if !hasTerminalPunct("привет,") {
|
||||
t.Fatal("comma should count as terminal for heuristic")
|
||||
}
|
||||
if hasTerminalPunct("привет") {
|
||||
t.Fatal("bare word should not")
|
||||
}
|
||||
}
|
||||
|
||||
func TestHeuristic_noCommaPeriod(t *testing.T) {
|
||||
h := Heuristic{}
|
||||
out, err := h.Restore(context.Background(), "привет, мир", "ru")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if strings.Contains(out, ",.") {
|
||||
t.Fatalf("unexpected comma+period: %q", out)
|
||||
}
|
||||
}
|
||||
@ -116,7 +116,11 @@ func Apply(ctx context.Context, r Restorer, enabled bool, text, language string)
|
||||
if text == "" {
|
||||
return text, nil
|
||||
}
|
||||
return r.Restore(ctx, text, language)
|
||||
out, err := r.Restore(ctx, text, language)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return CleanExcessive(out), nil
|
||||
}
|
||||
|
||||
func Close(r Restorer) {
|
||||
|
||||
@ -135,7 +135,8 @@ func min32(a, b float32) float32 {
|
||||
return b
|
||||
}
|
||||
|
||||
// PunctuateSegments runs punctuation on each segment separately (preserves line breaks).
|
||||
// PunctuateSegments runs punctuation per Whisper segment (legacy helper).
|
||||
// Prefer punctuating the full transcript after FormatSegments (see Engine.Result).
|
||||
func PunctuateSegments(segments []wpkg.Segment, restore func(text string) (string, error)) ([]wpkg.Segment, error) {
|
||||
out := make([]wpkg.Segment, len(segments))
|
||||
copy(out, segments)
|
||||
|
||||
@ -186,13 +186,12 @@ func (e *Engine) SetTranscriptText(text string) {
|
||||
|
||||
func (e *Engine) Result() TranscriptResult {
|
||||
segments := e.segments
|
||||
if e.runOpts.PunctuateRestore != nil {
|
||||
updated, err := PunctuateSegments(segments, e.runOpts.PunctuateRestore)
|
||||
if err == nil {
|
||||
segments = updated
|
||||
}
|
||||
}
|
||||
text := FormatSegments(segments, e.runOpts.Turns, e.runOpts.Format)
|
||||
if e.runOpts.PunctuateRestore != nil && text != "" {
|
||||
if updated, err := e.runOpts.PunctuateRestore(text); err == nil && strings.TrimSpace(updated) != "" {
|
||||
text = updated
|
||||
}
|
||||
}
|
||||
var words []Word
|
||||
for _, segment := range segments {
|
||||
words = append(words, segmentWords(segment)...)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user