vosk-recognize-dev/pkg/audio/pcm_buffer.go
2025-10-23 13:06:22 +07:00

390 lines
9.8 KiB
Go

package audio
import "math"
type PCMDataFormat uint8
const (
DataTypeUnknown PCMDataFormat = iota
DataTypeI8
DataTypeI16
DataTypeI32
DataTypeF32
DataTypeF64
)
var _ Buffer = (*PCMBuffer)(nil)
type PCMBuffer struct {
Format *Format
I8 []int8
I16 []int16
I32 []int32
F32 []float32
F64 []float64
DataType PCMDataFormat
SourceBitDepth uint8
}
func (b *PCMBuffer) Len() int {
if b == nil {
return 0
}
switch b.DataType {
case DataTypeI8:
return len(b.I8)
case DataTypeI16:
return len(b.I16)
case DataTypeI32:
return len(b.I32)
case DataTypeF32:
return len(b.F32)
case DataTypeF64:
return len(b.F64)
default:
return 0
}
}
func (b *PCMBuffer) PCMFormat() *Format {
if b == nil {
return nil
}
return b.Format
}
func (b *PCMBuffer) NumFrames() int {
if b == nil || b.Format == nil {
return 0
}
numChannels := b.Format.NumChannels
if numChannels == 0 {
numChannels = 1
}
return b.Len() / numChannels
}
func (b *PCMBuffer) AsFloatBuffer() *FloatBuffer {
newB := &FloatBuffer{}
newB.Data = b.AsF64()
if b.Format != nil {
newB.Format = &Format{NumChannels: b.Format.NumChannels, SampleRate: b.Format.SampleRate}
}
return newB
}
func (b *PCMBuffer) AsFloat32Buffer() *Float32Buffer {
newB := &Float32Buffer{}
newB.Data = b.AsF32()
if b.Format != nil {
newB.Format = &Format{
NumChannels: b.Format.NumChannels,
SampleRate: b.Format.SampleRate,
}
}
return newB
}
func (b *PCMBuffer) AsIntBuffer() *IntBuffer {
newB := &IntBuffer{}
newB.Data = b.AsInt()
if b.Format != nil {
newB.Format = &Format{NumChannels: b.Format.NumChannels, SampleRate: b.Format.SampleRate}
}
return newB
}
func (b *PCMBuffer) AsI8() (out []int8) {
if b == nil {
return nil
}
switch b.DataType {
case DataTypeI8:
return b.I8
case DataTypeI16:
out = make([]int8, len(b.I16))
for i := 0; i < len(b.I16); i++ {
out[i] = int8(b.I16[i])
}
case DataTypeI32:
out = make([]int8, len(b.I32))
for i := 0; i < len(b.I32); i++ {
out[i] = int8(b.I32[i])
}
case DataTypeF32:
out = make([]int8, len(b.F32))
for i := 0; i < len(b.F32); i++ {
out[i] = int8(b.F32[i])
}
case DataTypeF64:
out = make([]int8, len(b.F64))
for i := 0; i < len(b.F64); i++ {
out[i] = int8(b.F64[i])
}
}
return out
}
func (b *PCMBuffer) AsI16() (out []int16) {
if b == nil {
return nil
}
switch b.DataType {
case DataTypeI8:
out = make([]int16, len(b.I8))
for i := 0; i < len(b.I8); i++ {
out[i] = int16(b.I8[i])
}
case DataTypeI16:
return b.I16
case DataTypeI32:
out = make([]int16, len(b.I32))
for i := 0; i < len(b.I32); i++ {
out[i] = int16(b.I32[i])
}
case DataTypeF32:
out = make([]int16, len(b.F32))
for i := 0; i < len(b.F32); i++ {
out[i] = int16(b.F32[i])
}
case DataTypeF64:
out = make([]int16, len(b.F64))
for i := 0; i < len(b.F64); i++ {
out[i] = int16(b.F64[i])
}
}
return out
}
func (b *PCMBuffer) AsI32() (out []int32) {
if b == nil {
return nil
}
switch b.DataType {
case DataTypeI8:
out = make([]int32, len(b.I8))
for i := 0; i < len(b.I8); i++ {
out[i] = int32(b.I8[i])
}
case DataTypeI16:
out = make([]int32, len(b.I16))
for i := 0; i < len(b.I16); i++ {
out[i] = int32(b.I16[i])
}
case DataTypeI32:
return b.I32
case DataTypeF32:
out = make([]int32, len(b.F32))
for i := 0; i < len(b.F32); i++ {
out[i] = int32(b.F32[i])
}
case DataTypeF64:
out = make([]int32, len(b.F64))
for i := 0; i < len(b.F64); i++ {
out[i] = int32(b.F64[i])
}
}
return out
}
func (b *PCMBuffer) AsInt() (out []int) {
int32s := b.AsI32()
out = make([]int, len(int32s))
for i := 0; i < len(int32s); i++ {
out[i] = int(int32s[i])
}
return out
}
func (b *PCMBuffer) AsF32() (out []float32) {
if b == nil {
return nil
}
switch b.DataType {
case DataTypeI8:
bitDepth := b.calculateIntBitDepth()
factor := math.Pow(2, 8*float64(bitDepth/8)-1)
out = make([]float32, len(b.I8))
for i := 0; i < len(b.I8); i++ {
out[i] = float32(float64(int64(b.I8[i])) / factor)
}
case DataTypeI16:
bitDepth := b.calculateIntBitDepth()
factor := math.Pow(2, 8*float64(bitDepth/8)-1)
out = make([]float32, len(b.I16))
for i := 0; i < len(b.I16); i++ {
out[i] = float32(float64(int64(b.I16[i])) / factor)
}
case DataTypeI32:
bitDepth := b.calculateIntBitDepth()
factor := math.Pow(2, 8*float64(bitDepth/8)-1)
out = make([]float32, len(b.I16))
for i := 0; i < len(b.I16); i++ {
out[i] = float32(float64(int64(b.I16[i])) / factor)
}
case DataTypeF32:
return b.F32
case DataTypeF64:
out = make([]float32, len(b.F64))
for i := 0; i < len(b.F64); i++ {
out[i] = float32(b.F64[i])
}
}
return out
}
func (b *PCMBuffer) AsF64() (out []float64) {
if b == nil {
return nil
}
switch b.DataType {
case DataTypeI8:
bitDepth := b.calculateIntBitDepth()
factor := math.Pow(2, 8*float64(bitDepth/8)-1)
out = make([]float64, len(b.I8))
for i := 0; i < len(b.I8); i++ {
out[i] = float64(int64(b.I8[i])) / factor
}
case DataTypeI16:
bitDepth := b.calculateIntBitDepth()
factor := math.Pow(2, 8*float64(bitDepth/8)-1)
out = make([]float64, len(b.I16))
for i := 0; i < len(b.I16); i++ {
out[i] = float64(int64(b.I16[i])) / factor
}
case DataTypeI32:
bitDepth := b.calculateIntBitDepth()
factor := math.Pow(2, 8*float64(bitDepth/8)-1)
out = make([]float64, len(b.I16))
for i := 0; i < len(b.I16); i++ {
out[i] = float64(int64(b.I16[i])) / factor
}
case DataTypeF32:
out = make([]float64, len(b.F32))
for i := 0; i < len(b.F32); i++ {
out[i] = float64(b.F32[i])
}
case DataTypeF64:
return b.F64
}
return out
}
func (b *PCMBuffer) Clone() Buffer {
if b == nil {
return nil
}
newB := &PCMBuffer{DataType: b.DataType}
switch b.DataType {
case DataTypeI8:
newB.I8 = make([]int8, len(b.I8))
copy(newB.I8, b.I8)
case DataTypeI16:
newB.I16 = make([]int16, len(b.I16))
copy(newB.I16, b.I16)
case DataTypeI32:
newB.I32 = make([]int32, len(b.I32))
copy(newB.I32, b.I32)
case DataTypeF32:
newB.F32 = make([]float32, len(b.F32))
copy(newB.F32, b.F32)
case DataTypeF64:
newB.F64 = make([]float64, len(b.F64))
copy(newB.F64, b.F64)
}
newB.Format = &Format{NumChannels: b.Format.NumChannels, SampleRate: b.Format.SampleRate}
return newB
}
func (b *PCMBuffer) SwitchPrimaryType(t PCMDataFormat) {
if b == nil || t == b.DataType {
return
}
switch t {
case DataTypeI8:
b.I8 = b.AsI8()
b.I16 = nil
b.I32 = nil
b.F32 = nil
b.F64 = nil
case DataTypeI16:
b.I8 = nil
b.I16 = b.AsI16()
b.I32 = nil
b.F32 = nil
b.F64 = nil
case DataTypeI32:
b.I8 = nil
b.I16 = nil
b.I32 = b.AsI32()
b.F32 = nil
b.F64 = nil
case DataTypeF32:
b.I8 = nil
b.I16 = nil
b.I32 = nil
b.F32 = b.AsF32()
b.F64 = nil
case DataTypeF64:
b.I8 = nil
b.I16 = nil
b.I32 = nil
b.F32 = nil
b.F64 = b.AsF64()
}
b.DataType = t
}
func (b *PCMBuffer) calculateIntBitDepth() uint8 {
if b == nil {
return 0
}
bitDepth := b.SourceBitDepth
if bitDepth != 0 {
return bitDepth
}
var max int64
switch b.DataType {
case DataTypeI8:
var i8max int8
for _, s := range b.I8 {
if s > i8max {
i8max = s
}
}
max = int64(i8max)
case DataTypeI16:
var i16max int16
for _, s := range b.I16 {
if s > i16max {
i16max = s
}
}
max = int64(i16max)
case DataTypeI32:
var i32max int32
for _, s := range b.I32 {
if s > i32max {
i32max = s
}
}
max = int64(i32max)
default:
return bitDepth
}
bitDepth = 8
if max > 127 {
bitDepth = 16
}
if max > 32767 {
bitDepth = 24
}
if max > 8388607 {
bitDepth = 32
}
if max > 4294967295 {
bitDepth = 64
}
return bitDepth
}