package punctuation

import (
    "strings"
    "unicode"
    "unicode/utf8"
)

// terminalPunctRunes — знаки, после которых не добавляем ещё одну фразовую точку.
var terminalPunctRunes = map[rune]bool{
    '.': true, '?': true, '!': true, '…': true,
    ',': true, ';': true, ':': true,
    ')': true, ']': true, '"': true, '\'': true,
    '»': true, '”': true, '’': true,
    '。': true, '，': true, '？': true, '！': true,
}

// CleanExcessive collapses duplicate and conflicting punctuation marks.
func CleanExcessive(s string) string {
    s = strings.TrimSpace(s)
    if s == "" {
        return s
    }
    var b strings.Builder
    b.Grow(len(s))
    prevClass := 0 // 0 none, 1 comma-like, 2 end, 3 other punct
    for i := 0; i < len(s); {
        r, size := utf8.DecodeRuneInString(s[i:])
        cls := punctClass(r)
        if cls != 0 && cls == prevClass {
            i += size
            continue
        }
        if cls == 2 && prevClass == 1 {
            // drop sentence end right after comma-like (e.g. "привет,.")
            i += size
            continue
        }
        b.WriteRune(r)
        if cls != 0 {
            prevClass = cls
        } else if !unicode.IsSpace(r) {
            prevClass = 0
        }
        i += size
    }
    return strings.TrimSpace(b.String())
}

func punctClass(r rune) int {
    switch r {
    case ',', '，', '、', '،', ';', '؛', ':':
        return 1
    case '.', '?', '!', '…', '。', '？', '！':
        return 2
    }
    if unicode.IsPunct(r) {
        return 3
    }
    return 0
}

func hasTerminalPunct(s string) bool {
    s = strings.TrimSpace(s)
    if s == "" {
        return false
    }
    r, _ := utf8.DecodeLastRuneInString(s)
    return terminalPunctRunes[r]
}