Some checks failed
CodeQL / Analyze (go) (push) Successful in 6m28s
Docker Image / build-docker (push) Failing after 13m26s
Lint and Testing / lint (push) Successful in 11m17s
Lint and Testing / test (push) Successful in 11m17s
Lint and Testing / golangci (push) Successful in 2m40s
87 lines
2.2 KiB
Go
87 lines
2.2 KiB
Go
package punctuation
|
|
|
|
import (
|
|
"context"
|
|
"regexp"
|
|
"strings"
|
|
"unicode"
|
|
"unicode/utf8"
|
|
)
|
|
|
|
type Heuristic struct{}
|
|
|
|
func (Heuristic) Active() bool {
|
|
return true
|
|
}
|
|
|
|
func (Heuristic) Restore(ctx context.Context, text, language string) (string, error) {
|
|
_ = ctx
|
|
text = strings.TrimSpace(text)
|
|
if text == "" {
|
|
return text, nil
|
|
}
|
|
text = normalizeSpaces(text)
|
|
text = capitalizeFirst(text)
|
|
lang := strings.ToLower(strings.TrimSpace(language))
|
|
if lang == "ru" || lang == "rus" || lang == "russian" || lang == "auto" {
|
|
text = heuristicRU(text)
|
|
} else {
|
|
text = heuristicEN(text)
|
|
}
|
|
return ensureTerminalPunct(text), nil
|
|
}
|
|
|
|
func normalizeSpaces(s string) string {
|
|
return strings.Join(strings.Fields(s), " ")
|
|
}
|
|
|
|
func capitalizeFirst(s string) string {
|
|
r, size := utf8.DecodeRuneInString(s)
|
|
if r == utf8.RuneError {
|
|
return s
|
|
}
|
|
return string(unicode.ToUpper(r)) + s[size:]
|
|
}
|
|
|
|
var (
|
|
reQuestionRU = regexp.MustCompile(`(?i)(^|.*\s)(как|что|где|когда|почему|зачем|кто|чей|какой|какая|какое|какие|сколько|зачем|откуда|куда|ли)(\s+[^.?!]+)$`)
|
|
reQuestionEN = regexp.MustCompile(`(?i)^(who|what|when|where|why|how|which|whose|whom|is|are|am|was|were|do|does|did|can|could|would|will|shall|should)\b`)
|
|
)
|
|
|
|
func heuristicRU(s string) string {
|
|
if reQuestionRU.MatchString(s) && !strings.HasSuffix(s, "?") {
|
|
return s + "?"
|
|
}
|
|
if !hasTerminalPunct(s) && len(strings.Fields(s)) <= 24 {
|
|
return s + "."
|
|
}
|
|
return s
|
|
}
|
|
|
|
func heuristicEN(s string) string {
|
|
lower := strings.ToLower(s)
|
|
if reQuestionEN.MatchString(lower) && !strings.HasSuffix(s, "?") {
|
|
return s + "?"
|
|
}
|
|
if !hasTerminalPunct(s) && len(strings.Fields(s)) <= 24 {
|
|
return s + "."
|
|
}
|
|
return s
|
|
}
|
|
|
|
func hasTerminalPunct(s string) bool {
|
|
s = strings.TrimSpace(s)
|
|
if s == "" {
|
|
return false
|
|
}
|
|
r, _ := utf8.DecodeLastRuneInString(s)
|
|
return r == '.' || r == '?' || r == '!' || r == '…'
|
|
}
|
|
|
|
func ensureTerminalPunct(s string) string {
|
|
if hasTerminalPunct(s) {
|
|
return s
|
|
}
|
|
return s + "."
|
|
}
|