/*
|
* vad.cpp
|
*
|
* This is a simple vad implementation. It isn't tuned and testing
|
* has been somewhat limited... but it seems to work ok. All numbers
|
* that are db are multiplied by 100 to keep them slightly more
|
* accurate and easy to use. (so -7450 is -74.50db) This code use
|
* three functions not provided as part of this file : log10_32, bqInit
|
* and bqProcess. The log function calculates :
|
* 100 * 10 * log(x) - 9333
|
* which sould be an approximation of the signal energy x 100.
|
* bqInit initializes a biquad data structure and bqProcess applies a
|
* a biquad to a signal buffer. A biquad is used by the vad to filter
|
* out lower frequency background noise.
|
*
|
* This code operates under the assumption that it will get frames with
|
* lengths which are a multiple of 5ms. This is relatively easy to
|
* change since the only real dependency is the hangover count -- its in
|
* frames now but could easily be changed to samples.
|
*
|
*/
|
|
#define VAD_CPP
|
|
#include "c_utils.h"
|
#include "c_vad.h"
|
|
#define LOCAL static
|
|
#define DATA_FRAME_LENGTH (5*8)
|
|
/*
|
* x = 1.5sec * 1000ms/sec * 8 samples/1 ms
|
* samples => +1dB every x samples
|
*/
|
#define VAD_NOISEFLOOR_CNT_INIT (int)(8*1500)
|
#define VAD_SIGNALMAX_CNT_INIT (int)(8*1500)
|
|
/* Power Thresholds */
|
#define VAD_NOISE_TH_BASE (float) 10.00 /* 10.00 dB Noise Threshold */
|
#define VAD_NOISE_FLOOR_INIT (float)-74.00 /* -74.00 dB Initial Noise Floor */
|
#define VAD_SIGNAL_MAX_INIT (float)-80.00 /* -80.00 dB Initial Noise Max */
|
#define VAD_NOISE_TH_MIN (float) 1.00 /* 1.00 dB Minimum Noise Threshold */
|
|
/* High Pass Filter for getting rid of background noise from
|
* input signal before energy calculations */
|
/* Butter : */
|
#define vhpfB0 (S2byte) 14339
|
#define vhpfB1 (S2byte)-28678
|
#define vhpfB2 (S2byte) 14339
|
#define vhpfA1 (S2byte)-28422
|
#define vhpfA2 (S2byte) 12550
|
|
/* Number of samples of silence before we declare silence period */
|
/* #samples = 8 samples/ms * 500ms */
|
#define VAD_HANGOVER_CNT_INIT (int)(8*500)
|
|
typedef enum {
|
VadState_Silence = 0,
|
VadState_Speech,
|
VadState_Unknown
|
} t_VadState;
|
|
typedef struct _vad {
|
|
boolean enabled;
|
|
/* Saved STA between input frames */
|
U4byte sta;
|
|
/* state == 1 if VOICE
|
* state == 0 if SILENCE */
|
t_VadState state;
|
|
/* Countdown of consecutive frames before we declare silence */
|
int hangoverCnt;
|
|
/* Threshold above which a signal is considered to be speech */
|
float noiseTH;
|
|
/* Countdown after which the noise floor is
|
* incremented by 1dB */
|
int noiseFloorCnt;
|
|
/* Noise floor in dB */
|
float noiseFloor;
|
|
/* Countdown after which the signal max is
|
* decremented by 1dB */
|
int signalMaxCnt;
|
|
/* Signal max in dB */
|
float signalMax;
|
|
/* STARise == 1 if sta is rising
|
* STARise == 0 if sta is falling */
|
int STARise;
|
|
int stateTxCount;
|
|
/* High Pass Filter for input signal */
|
t_biquad *bq;
|
|
} t_vad;
|
|
/* LOCAL */
|
LOCAL t_biquad vadbq;
|
LOCAL t_vad vadd;
|
|
void
|
vadInit()
|
{
|
|
vadd.enabled = TRUE;
|
vadd.bq = &vadbq;
|
vadd.sta = 10000;
|
vadd.noiseTH = VAD_NOISE_TH_BASE;
|
vadd.state = VadState_Unknown;
|
vadd.noiseFloorCnt = VAD_NOISEFLOOR_CNT_INIT;
|
vadd.noiseFloor = VAD_NOISE_FLOOR_INIT;
|
vadd.hangoverCnt = VAD_HANGOVER_CNT_INIT;
|
vadd.STARise = 1;
|
|
vadd.stateTxCount = 0;
|
|
vadd.signalMax = VAD_SIGNAL_MAX_INIT;
|
vadd.signalMaxCnt = VAD_SIGNALMAX_CNT_INIT;
|
|
bqInit(vadd.bq, vhpfB0, vhpfB1, vhpfB2, vhpfA1, vhpfA2);
|
}
|
|
LOCAL U4byte
|
computeSTA(S2byte *pdata, int length, U4byte *minSta)
|
{
|
int i;
|
S4byte acc0,acc1;
|
U4byte maxSta;
|
|
*minSta = vadd.sta;
|
maxSta = vadd.sta;
|
|
for (i = 0; i < length; i++)
|
{
|
|
/* q.15 * q.15 = q.30 */
|
acc1 = pdata[i] * pdata[i];
|
|
if ( vadd.STARise )
|
{
|
acc0 = -1 * (S4byte)(vadd.sta >> 6);
|
acc1 = acc1 >> 5;
|
}
|
else
|
{
|
acc0 = -1 * (S4byte)(vadd.sta >> 9);
|
acc1 = acc1 >> 8;
|
} /* if */
|
|
acc0 += acc1;
|
vadd.STARise = ( 0 >= acc0 ) ? 0 : 1;
|
vadd.sta += acc0;
|
|
if ( vadd.sta > maxSta )
|
{
|
maxSta = vadd.sta; // arijit - i added the cast
|
}
|
else if ( vadd.sta < *minSta )
|
{
|
*minSta = vadd.sta;
|
}
|
|
} /* for */
|
|
return maxSta;
|
|
}
|
|
LOCAL void
|
computeNFE(float minpower, float maxpower, int length)
|
{
|
|
if ( minpower <= vadd.noiseFloor )
|
{
|
vadd.noiseFloor = minpower;
|
vadd.noiseFloorCnt = VAD_NOISEFLOOR_CNT_INIT;
|
}
|
else
|
{
|
if ( vadd.noiseFloorCnt < length )
|
{
|
vadd.noiseFloor += 1;
|
vadd.noiseFloorCnt =
|
(VAD_NOISEFLOOR_CNT_INIT + vadd.noiseFloorCnt - length);
|
}
|
else
|
{
|
vadd.noiseFloorCnt -= length;
|
}
|
}
|
|
}
|
|
unsigned long stopCount = 32000;
|
|
LOCAL boolean
|
vadSubProcess(S2byte *data, int length)
|
{
|
boolean SpeechDetected;
|
boolean FrameSpeechFlag;
|
S2byte tmpData[DATA_FRAME_LENGTH];
|
U4byte sta[2];
|
float power[2];
|
static unsigned long count = 0;
|
|
SpeechDetected = TRUE;
|
FrameSpeechFlag = FALSE;
|
|
bqProcess(vadd.bq, data, tmpData, length);
|
|
sta[1] = computeSTA(tmpData, length, &sta[0]);
|
|
calcPower(2, sta, power);
|
|
computeNFE(power[0], power[1], length);
|
|
count += length;
|
if (count >= stopCount)
|
{
|
count = 0;
|
}
|
|
if (power[1] > (vadd.noiseFloor + vadd.noiseTH))
|
{
|
FrameSpeechFlag = TRUE;
|
}
|
|
if ( FrameSpeechFlag == FALSE)
|
{
|
if ( vadd.hangoverCnt < length )
|
{
|
SpeechDetected = FALSE;
|
vadd.hangoverCnt = 0;
|
if ( vadd.state != VadState_Silence )
|
{
|
vadd.stateTxCount++;
|
}
|
vadd.state = VadState_Silence;
|
}
|
else
|
{
|
vadd.hangoverCnt -= length;
|
}
|
}
|
else
|
{
|
vadd.hangoverCnt = VAD_HANGOVER_CNT_INIT;
|
if ( vadd.state == VadState_Silence )
|
{
|
vadd.stateTxCount++;
|
}
|
vadd.state = VadState_Speech;
|
}
|
|
return SpeechDetected;
|
|
}
|
|
/*
|
* Returns: true for speech
|
* false for silence
|
*/
|
boolean
|
vadProcess(S2byte *data, int length)
|
{
|
|
/* vadProcess locals */
|
int idx;
|
int step;
|
boolean ret;
|
|
ret = FALSE;
|
|
if ( vadd.enabled == TRUE )
|
{
|
|
/* Cut up the frame into 5ms chunks for processing purposes */
|
for (idx = 0; length > 0; length -= step)
|
{
|
step = (length < DATA_FRAME_LENGTH) ? length : DATA_FRAME_LENGTH;
|
ret |= vadSubProcess(&data[idx], step);
|
idx += step;
|
}
|
|
}
|
else
|
{
|
ret = TRUE;
|
}
|
|
return ret;
|
|
}
|