go-whisper-api/punctuation/normalize_test.go
admin 318b736244
Some checks failed
Docker Image / build-docker (push) Failing after 1m26s
Lint and Testing / lint (push) Successful in 43s
Lint and Testing / test (push) Successful in 5m38s
Lint and Testing / golangci (push) Successful in 1m14s
CodeQL / Analyze (go) (push) Successful in 6m23s
first commit
2026-06-04 19:25:56 +07:00

47 lines
1.1 KiB
Go
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package punctuation
import (
"context"
"strings"
"testing"
)
func TestCleanExcessive(t *testing.T) {
cases := []struct {
in, want string
}{
{"привет,,", "привет,"},
{"привет,.", "привет,"},
{"hello..", "hello."},
{"what??", "what?"},
{"ok!!!", "ok!"},
{"а. б. в.", "а. б. в."},
}
for _, tc := range cases {
got := CleanExcessive(tc.in)
if got != tc.want {
t.Errorf("CleanExcessive(%q) = %q, want %q", tc.in, got, tc.want)
}
}
}
func TestHasTerminalPunct_comma(t *testing.T) {
if !hasTerminalPunct("привет,") {
t.Fatal("comma should count as terminal for heuristic")
}
if hasTerminalPunct("привет") {
t.Fatal("bare word should not")
}
}
func TestHeuristic_noCommaPeriod(t *testing.T) {
h := Heuristic{}
out, err := h.Restore(context.Background(), "привет, мир", "ru")
if err != nil {
t.Fatal(err)
}
if strings.Contains(out, ",.") {
t.Fatalf("unexpected comma+period: %q", out)
}
}