stash/vendor/github.com/asticode/go-astikit/pcm.go
cj c1a096a1a6
Caption support (#2462)
Co-authored-by: WithoutPants <53250216+WithoutPants@users.noreply.github.com>
2022-05-06 11:59:28 +10:00

426 lines
10 KiB
Go

package astikit
import (
"fmt"
"math"
"sync"
"time"
)
// PCMLevel computes the PCM level of samples
// https://dsp.stackexchange.com/questions/2951/loudness-of-pcm-stream
// https://dsp.stackexchange.com/questions/290/getting-loudness-of-a-track-with-rms?noredirect=1&lq=1
func PCMLevel(samples []int) float64 {
// Compute sum of square values
var sum float64
for _, s := range samples {
sum += math.Pow(float64(s), 2)
}
// Square root
return math.Sqrt(sum / float64(len(samples)))
}
func maxPCMSample(bitDepth int) int {
return int(math.Pow(2, float64(bitDepth))/2.0) - 1
}
// PCMNormalize normalizes the PCM samples
func PCMNormalize(samples []int, bitDepth int) (o []int) {
// Get max sample
var m int
for _, s := range samples {
if v := int(math.Abs(float64(s))); v > m {
m = v
}
}
// Get max for bit depth
max := maxPCMSample(bitDepth)
// Loop through samples
for _, s := range samples {
o = append(o, s*max/m)
}
return
}
// ConvertPCMBitDepth converts the PCM bit depth
func ConvertPCMBitDepth(srcSample int, srcBitDepth, dstBitDepth int) (dstSample int, err error) {
// Nothing to do
if srcBitDepth == dstBitDepth {
dstSample = srcSample
return
}
// Convert
if srcBitDepth < dstBitDepth {
dstSample = srcSample << uint(dstBitDepth-srcBitDepth)
} else {
dstSample = srcSample >> uint(srcBitDepth-dstBitDepth)
}
return
}
// PCMSampleFunc is a func that can process a sample
type PCMSampleFunc func(s int) error
// PCMSampleRateConverter is an object capable of converting a PCM's sample rate
type PCMSampleRateConverter struct {
b [][]int
dstSampleRate int
fn PCMSampleFunc
numChannels int
numChannelsProcessed int
numSamplesOutputed int
numSamplesProcessed int
srcSampleRate int
}
// NewPCMSampleRateConverter creates a new PCMSampleRateConverter
func NewPCMSampleRateConverter(srcSampleRate, dstSampleRate, numChannels int, fn PCMSampleFunc) *PCMSampleRateConverter {
return &PCMSampleRateConverter{
b: make([][]int, numChannels),
dstSampleRate: dstSampleRate,
fn: fn,
numChannels: numChannels,
srcSampleRate: srcSampleRate,
}
}
// Reset resets the converter
func (c *PCMSampleRateConverter) Reset() {
c.b = make([][]int, c.numChannels)
c.numChannelsProcessed = 0
c.numSamplesOutputed = 0
c.numSamplesProcessed = 0
}
// Add adds a new sample to the converter
func (c *PCMSampleRateConverter) Add(i int) (err error) {
// Forward sample
if c.srcSampleRate == c.dstSampleRate {
if err = c.fn(i); err != nil {
err = fmt.Errorf("astikit: handling sample failed: %w", err)
return
}
return
}
// Increment num channels processed
c.numChannelsProcessed++
// Reset num channels processed
if c.numChannelsProcessed > c.numChannels {
c.numChannelsProcessed = 1
}
// Only increment num samples processed if all channels have been processed
if c.numChannelsProcessed == c.numChannels {
c.numSamplesProcessed++
}
// Append sample to buffer
c.b[c.numChannelsProcessed-1] = append(c.b[c.numChannelsProcessed-1], i)
// Throw away data
if c.srcSampleRate > c.dstSampleRate {
// Make sure to always keep the first sample but do nothing until we have all channels or target sample has been
// reached
if (c.numSamplesOutputed > 0 && float64(c.numSamplesProcessed) < 1.0+float64(c.numSamplesOutputed)*float64(c.srcSampleRate)/float64(c.dstSampleRate)) || c.numChannelsProcessed < c.numChannels {
return
}
// Loop through channels
for idx, b := range c.b {
// Merge samples
var s int
for _, v := range b {
s += v
}
s /= len(b)
// Reset buffer
c.b[idx] = []int{}
// Custom
if err = c.fn(s); err != nil {
err = fmt.Errorf("astikit: handling sample failed: %w", err)
return
}
}
// Increment num samples outputted
c.numSamplesOutputed++
return
}
// Do nothing until we have all channels
if c.numChannelsProcessed < c.numChannels {
return
}
// Repeat data
for c.numSamplesOutputed == 0 || float64(c.numSamplesProcessed)+1.0 > 1.0+float64(c.numSamplesOutputed)*float64(c.srcSampleRate)/float64(c.dstSampleRate) {
// Loop through channels
for _, b := range c.b {
// Invalid length
if len(b) != 1 {
err = fmt.Errorf("astikit: invalid buffer item length %d", len(b))
return
}
// Custom
if err = c.fn(b[0]); err != nil {
err = fmt.Errorf("astikit: handling sample failed: %w", err)
return
}
}
// Increment num samples outputted
c.numSamplesOutputed++
}
// Reset buffer
c.b = make([][]int, c.numChannels)
return
}
// PCMChannelsConverter is an object of converting PCM's channels
type PCMChannelsConverter struct {
dstNumChannels int
fn PCMSampleFunc
srcNumChannels int
srcSamples int
}
// NewPCMChannelsConverter creates a new PCMChannelsConverter
func NewPCMChannelsConverter(srcNumChannels, dstNumChannels int, fn PCMSampleFunc) *PCMChannelsConverter {
return &PCMChannelsConverter{
dstNumChannels: dstNumChannels,
fn: fn,
srcNumChannels: srcNumChannels,
}
}
// Reset resets the converter
func (c *PCMChannelsConverter) Reset() {
c.srcSamples = 0
}
// Add adds a new sample to the converter
func (c *PCMChannelsConverter) Add(i int) (err error) {
// Forward sample
if c.srcNumChannels == c.dstNumChannels {
if err = c.fn(i); err != nil {
err = fmt.Errorf("astikit: handling sample failed: %w", err)
return
}
return
}
// Reset
if c.srcSamples == c.srcNumChannels {
c.srcSamples = 0
}
// Increment src samples
c.srcSamples++
// Throw away data
if c.srcNumChannels > c.dstNumChannels {
// Throw away sample
if c.srcSamples > c.dstNumChannels {
return
}
// Custom
if err = c.fn(i); err != nil {
err = fmt.Errorf("astikit: handling sample failed: %w", err)
return
}
return
}
// Store
var ss []int
if c.srcSamples < c.srcNumChannels {
ss = []int{i}
} else {
// Repeat data
for idx := c.srcNumChannels; idx <= c.dstNumChannels; idx++ {
ss = append(ss, i)
}
}
// Loop through samples
for _, s := range ss {
// Custom
if err = c.fn(s); err != nil {
err = fmt.Errorf("astikit: handling sample failed: %w", err)
return
}
}
return
}
// PCMSilenceDetector represents a PCM silence detector
type PCMSilenceDetector struct {
analyses []pcmSilenceDetectorAnalysis
buf []int
m *sync.Mutex // Locks buf
minAnalysesPerSilence int
o PCMSilenceDetectorOptions
samplesPerAnalysis int
}
type pcmSilenceDetectorAnalysis struct {
level float64
samples []int
}
// PCMSilenceDetectorOptions represents a PCM silence detector options
type PCMSilenceDetectorOptions struct {
MaxSilenceLevel float64 `toml:"max_silence_level"`
MinSilenceDuration time.Duration `toml:"min_silence_duration"`
SampleRate int `toml:"sample_rate"`
StepDuration time.Duration `toml:"step_duration"`
}
// NewPCMSilenceDetector creates a new silence detector
func NewPCMSilenceDetector(o PCMSilenceDetectorOptions) (d *PCMSilenceDetector) {
// Create
d = &PCMSilenceDetector{
m: &sync.Mutex{},
o: o,
}
// Reset
d.Reset()
// Default option values
if d.o.MinSilenceDuration == 0 {
d.o.MinSilenceDuration = time.Second
}
if d.o.StepDuration == 0 {
d.o.StepDuration = 30 * time.Millisecond
}
// Compute attributes depending on options
d.samplesPerAnalysis = int(math.Floor(float64(d.o.SampleRate) * d.o.StepDuration.Seconds()))
d.minAnalysesPerSilence = int(math.Floor(d.o.MinSilenceDuration.Seconds() / d.o.StepDuration.Seconds()))
return
}
// Reset resets the silence detector
func (d *PCMSilenceDetector) Reset() {
// Lock
d.m.Lock()
defer d.m.Unlock()
// Reset
d.analyses = []pcmSilenceDetectorAnalysis{}
d.buf = []int{}
}
// Add adds samples to the buffer and checks whether there are valid samples between silences
func (d *PCMSilenceDetector) Add(samples []int) (validSamples [][]int) {
// Lock
d.m.Lock()
defer d.m.Unlock()
// Append samples to buffer
d.buf = append(d.buf, samples...)
// Analyze samples by step
for len(d.buf) >= d.samplesPerAnalysis {
// Append analysis
d.analyses = append(d.analyses, pcmSilenceDetectorAnalysis{
level: PCMLevel(d.buf[:d.samplesPerAnalysis]),
samples: append([]int(nil), d.buf[:d.samplesPerAnalysis]...),
})
// Remove samples from buffer
d.buf = d.buf[d.samplesPerAnalysis:]
}
// Loop through analyses
var leadingSilence, inBetween, trailingSilence int
for i := 0; i < len(d.analyses); i++ {
if d.analyses[i].level < d.o.MaxSilenceLevel {
// This is a silence
// This is a leading silence
if inBetween == 0 {
leadingSilence++
// The leading silence is valid
// We can trim its useless part
if leadingSilence > d.minAnalysesPerSilence {
d.analyses = d.analyses[leadingSilence-d.minAnalysesPerSilence:]
i -= leadingSilence - d.minAnalysesPerSilence
leadingSilence = d.minAnalysesPerSilence
}
continue
}
// This is a trailing silence
trailingSilence++
// Trailing silence is invalid
if trailingSilence < d.minAnalysesPerSilence {
continue
}
// Trailing silence is valid
// Loop through analyses
var ss []int
for _, a := range d.analyses[:i+1] {
ss = append(ss, a.samples...)
}
// Append valid samples
validSamples = append(validSamples, ss)
// Remove leading silence and non silence
d.analyses = d.analyses[leadingSilence+inBetween:]
i -= leadingSilence + inBetween
// Reset counts
leadingSilence, inBetween, trailingSilence = trailingSilence, 0, 0
} else {
// This is not a silence
// This is a leading non silence
// We need to remove it
if i == 0 {
d.analyses = d.analyses[1:]
i = -1
continue
}
// This is the first in-between
if inBetween == 0 {
// The leading silence is invalid
// We need to remove it as well as this first non silence
if leadingSilence < d.minAnalysesPerSilence {
d.analyses = d.analyses[i+1:]
i = -1
continue
}
}
// This non-silence was preceded by a silence not big enough to be a valid trailing silence
// We incorporate it in the in-between
if trailingSilence > 0 {
inBetween += trailingSilence
trailingSilence = 0
}
// This is an in-between
inBetween++
continue
}
}
return
}