package garbage import ( "regexp" "strings" ) var spaceCollapse = regexp.MustCompile(`\s+`) // Word is a timed token for garbage filtering (mirrors whisper.Word JSON shape). type Word struct { Word string `json:"word"` Start int `json:"start"` Stop int `json:"stop"` } // FilterText removes configured artifact substrings and normalizes whitespace. func FilterText(text string, patterns []string) string { for _, p := range patterns { p = strings.TrimSpace(p) if p == "" { continue } text = strings.ReplaceAll(text, p, " ") } return strings.TrimSpace(spaceCollapse.ReplaceAllString(text, " ")) } // FilterWords drops tokens that match any garbage pattern. func FilterWords(words []Word, patterns []string) []Word { if len(words) == 0 { return words } out := make([]Word, 0, len(words)) for _, w := range words { if matchesGarbage(w.Word, patterns) { continue } out = append(out, w) } return out } func matchesGarbage(word string, patterns []string) bool { word = strings.TrimSpace(word) for _, p := range patterns { p = strings.TrimSpace(p) if p == "" { continue } if word == p || strings.Contains(word, p) { return true } } return false }