admin 318b736244
Some checks failed
Docker Image / build-docker (push) Failing after 1m26s
Lint and Testing / lint (push) Successful in 43s
Lint and Testing / test (push) Successful in 5m38s
Lint and Testing / golangci (push) Successful in 1m14s
CodeQL / Analyze (go) (push) Successful in 6m23s
first commit
2026-06-04 19:25:56 +07:00

156 lines
4.0 KiB
Go

package whisper
import (
"fmt"
"strings"
"time"
wpkg "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
)
// Turn is a speaker-active time range from diarization (seconds).
type Turn struct {
Start float32
End float32
Speaker int
}
// FormatOptions controls joining Whisper segments into one string with newlines.
type FormatOptions struct {
PauseGap time.Duration
SpeakerLabel string
UseSpeakers bool
}
// FormatSegments joins segment texts with \n on long pauses and optional speaker labels.
func FormatSegments(segments []wpkg.Segment, turns []Turn, opts FormatOptions) string {
if len(segments) == 0 {
return ""
}
if opts.PauseGap <= 0 {
opts.PauseGap = 1500 * time.Millisecond
}
label := strings.TrimSpace(opts.SpeakerLabel)
if label == "" {
label = "Спикер"
}
lines := make([]segmentLine, len(segments))
for i, seg := range segments {
lines[i] = segmentLine{
Text: strings.TrimSpace(seg.Text),
Start: seg.Start,
End: seg.End,
Speaker: -1,
}
}
if opts.UseSpeakers && len(turns) > 0 {
assignSpeakers(lines, turns)
}
var b strings.Builder
prevSpeaker := -2
prevIdx := -1
for i, line := range lines {
if line.Text == "" {
continue
}
if b.Len() > 0 && prevIdx >= 0 {
speakerBreak := opts.UseSpeakers && line.Speaker >= 0 && line.Speaker != prevSpeaker
pauseBreak := line.Start-lines[prevIdx].End >= opts.PauseGap
switch {
case speakerBreak:
b.WriteString("\n\n")
fmt.Fprintf(&b, "%s %d: ", label, line.Speaker+1)
case pauseBreak:
b.WriteString("\n")
default:
if !strings.HasSuffix(b.String(), " ") && !strings.HasSuffix(b.String(), "\n") {
b.WriteByte(' ')
}
}
} else if opts.UseSpeakers && line.Speaker >= 0 {
fmt.Fprintf(&b, "%s %d: ", label, line.Speaker+1)
}
b.WriteString(line.Text)
if line.Speaker >= 0 {
prevSpeaker = line.Speaker
}
prevIdx = i
}
return strings.TrimSpace(b.String())
}
type segmentLine struct {
Text string
Start time.Duration
End time.Duration
Speaker int
}
func assignSpeakers(lines []segmentLine, turns []Turn) {
for i := range lines {
mid := lines[i].Start + (lines[i].End-lines[i].Start)/2
lines[i].Speaker = speakerAt(mid, turns)
}
}
func speakerAt(t time.Duration, turns []Turn) int {
sec := float32(t.Seconds())
bestSpeaker := -1
bestOverlap := float32(0)
for _, tr := range turns {
if sec >= tr.Start && sec < tr.End {
return tr.Speaker
}
overlap := intervalOverlap(sec, sec, tr.Start, tr.End)
if overlap > bestOverlap {
bestOverlap = overlap
bestSpeaker = tr.Speaker
}
}
return bestSpeaker
}
func intervalOverlap(a0, a1, b0, b1 float32) float32 {
start := max32(a0, b0)
end := min32(a1, b1)
if end <= start {
return 0
}
return end - start
}
func max32(a, b float32) float32 {
if a > b {
return a
}
return b
}
func min32(a, b float32) float32 {
if a < b {
return a
}
return b
}
// PunctuateSegments runs punctuation per Whisper segment (legacy helper).
// Prefer punctuating the full transcript after FormatSegments (see Engine.Result).
func PunctuateSegments(segments []wpkg.Segment, restore func(text string) (string, error)) ([]wpkg.Segment, error) {
out := make([]wpkg.Segment, len(segments))
copy(out, segments)
for i := range out {
t := strings.TrimSpace(out[i].Text)
if t == "" {
continue
}
p, err := restore(t)
if err != nil {
return nil, err
}
out[i].Text = " " + strings.TrimSpace(p)
}
return out, nil
}