package punctuation import ( "context" "regexp" "strings" "unicode" "unicode/utf8" ) type Heuristic struct{} func (Heuristic) Active() bool { return true } func (Heuristic) Restore(ctx context.Context, text, language string) (string, error) { _ = ctx text = strings.TrimSpace(text) if text == "" { return text, nil } text = normalizeSpaces(text) text = capitalizeFirst(text) lang := strings.ToLower(strings.TrimSpace(language)) if lang == "ru" || lang == "rus" || lang == "russian" || lang == "auto" { text = heuristicRU(text) } else { text = heuristicEN(text) } return ensureTerminalPunct(text), nil } func normalizeSpaces(s string) string { return strings.Join(strings.Fields(s), " ") } func capitalizeFirst(s string) string { r, size := utf8.DecodeRuneInString(s) if r == utf8.RuneError { return s } return string(unicode.ToUpper(r)) + s[size:] } var ( reQuestionRU = regexp.MustCompile(`(?i)(^|.*\s)(как|что|где|когда|почему|зачем|кто|чей|какой|какая|какое|какие|сколько|зачем|откуда|куда|ли)(\s+[^.?!]+)$`) reQuestionEN = regexp.MustCompile(`(?i)^(who|what|when|where|why|how|which|whose|whom|is|are|am|was|were|do|does|did|can|could|would|will|shall|should)\b`) ) func heuristicRU(s string) string { if reQuestionRU.MatchString(s) && !strings.HasSuffix(s, "?") { return s + "?" } if !hasTerminalPunct(s) && len(strings.Fields(s)) <= 24 { return s + "." } return s } func heuristicEN(s string) string { lower := strings.ToLower(s) if reQuestionEN.MatchString(lower) && !strings.HasSuffix(s, "?") { return s + "?" } if !hasTerminalPunct(s) && len(strings.Fields(s)) <= 24 { return s + "." } return s } func hasTerminalPunct(s string) bool { s = strings.TrimSpace(s) if s == "" { return false } r, _ := utf8.DecodeLastRuneInString(s) return r == '.' || r == '?' || r == '!' || r == '…' } func ensureTerminalPunct(s string) string { if hasTerminalPunct(s) { return s } return s + "." }