268 lines
7.2 KiB
C
268 lines
7.2 KiB
C
/*
|
|
* FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application
|
|
* Copyright (C) 2018-2020, Anthony Minessale II <anthm@freeswitch.org>
|
|
*
|
|
* Version: MPL 1.1
|
|
*
|
|
* The contents of this file are subject to the Mozilla Public License Version
|
|
* 1.1 (the "License"); you may not use this file except in compliance with
|
|
* the License. You may obtain a copy of the License at
|
|
* http://www.mozilla.org/MPL/
|
|
*
|
|
* Software distributed under the License is distributed on an "AS IS" basis,
|
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
|
* for the specific language governing rights and limitations under the
|
|
* License.
|
|
*
|
|
* The Original Code is FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application
|
|
*
|
|
* The Initial Developer of the Original Code is
|
|
* Anthony Minessale II <anthm@freeswitch.org>
|
|
* Portions created by the Initial Developer are Copyright (C)
|
|
* the Initial Developer. All Rights Reserved.
|
|
*
|
|
* Contributor(s):
|
|
*
|
|
* Seven Du <dujinfang@gmail.com>
|
|
* Chris Rienzo <chris@signalwire.com>
|
|
*
|
|
*
|
|
* switch_vad.c VAD code with optional libfvad
|
|
*
|
|
*/
|
|
|
|
#include <switch.h>
|
|
|
|
#ifdef SWITCH_HAVE_FVAD
|
|
#include <fvad.h>
|
|
#endif
|
|
|
|
struct switch_vad_s {
|
|
// configs
|
|
int channels;
|
|
int sample_rate;
|
|
int debug;
|
|
int divisor;
|
|
int thresh;
|
|
int voice_samples_thresh;
|
|
int silence_samples_thresh;
|
|
|
|
// VAD state
|
|
int voice_samples;
|
|
int silence_samples;
|
|
switch_vad_state_t vad_state;
|
|
#ifdef SWITCH_HAVE_FVAD
|
|
Fvad *fvad;
|
|
#endif
|
|
};
|
|
|
|
SWITCH_DECLARE(const char *) switch_vad_state2str(switch_vad_state_t state)
|
|
{
|
|
switch(state) {
|
|
case SWITCH_VAD_STATE_NONE:
|
|
return "none";
|
|
case SWITCH_VAD_STATE_START_TALKING:
|
|
return "start_talking";
|
|
case SWITCH_VAD_STATE_TALKING:
|
|
return "talking";
|
|
case SWITCH_VAD_STATE_STOP_TALKING:
|
|
return "stop_talking";
|
|
default:
|
|
return "error";
|
|
}
|
|
}
|
|
|
|
SWITCH_DECLARE(switch_vad_t *) switch_vad_init(int sample_rate, int channels)
|
|
{
|
|
switch_vad_t *vad = malloc(sizeof(switch_vad_t));
|
|
|
|
if (!vad) return NULL;
|
|
|
|
memset(vad, 0, sizeof(*vad));
|
|
vad->sample_rate = sample_rate ? sample_rate : 8000;
|
|
vad->channels = channels;
|
|
vad->silence_samples_thresh = 500 * (vad->sample_rate / 1000);
|
|
vad->voice_samples_thresh = 200 * (vad->sample_rate / 1000);
|
|
vad->thresh = 100;
|
|
vad->divisor = vad->sample_rate / 8000;
|
|
if (vad->divisor <= 0) {
|
|
vad->divisor = 1;
|
|
}
|
|
switch_vad_reset(vad);
|
|
|
|
return vad;
|
|
}
|
|
|
|
SWITCH_DECLARE(int) switch_vad_set_mode(switch_vad_t *vad, int mode)
|
|
{
|
|
#ifdef SWITCH_HAVE_FVAD
|
|
int ret = 0;
|
|
|
|
if (mode < 0) {
|
|
if (vad->fvad) fvad_free(vad->fvad);
|
|
|
|
vad->fvad = NULL;
|
|
return ret;
|
|
} else if (mode > 3) {
|
|
mode = 3;
|
|
}
|
|
|
|
if (!vad->fvad) {
|
|
vad->fvad = fvad_new();
|
|
|
|
if (!vad->fvad) {
|
|
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "libfvad init error\n");
|
|
}
|
|
}
|
|
|
|
if (vad->fvad) {
|
|
ret = fvad_set_mode(vad->fvad, mode);
|
|
fvad_set_sample_rate(vad->fvad, vad->sample_rate);
|
|
}
|
|
|
|
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "libfvad started, mode = %d\n", mode);
|
|
return ret;
|
|
#else
|
|
if (vad->debug) switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "set vad mode = %d\n", mode);
|
|
|
|
return 0;
|
|
#endif
|
|
}
|
|
|
|
SWITCH_DECLARE(void) switch_vad_set_param(switch_vad_t *vad, const char *key, int val)
|
|
{
|
|
if (!key) return;
|
|
|
|
if (!strcmp(key, "hangover_len")) {
|
|
/* convert old-style hits to samples assuming 20ms ptime */
|
|
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "hangover_len is deprecated, setting silence_ms to %d\n", 20 * val);
|
|
switch_vad_set_param(vad, "silence_ms", val * 20);
|
|
} else if (!strcmp(key, "silence_ms")) {
|
|
if (val > 0) {
|
|
vad->silence_samples_thresh = val * (vad->sample_rate / 1000);
|
|
} else {
|
|
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Ignoring invalid silence_ms of %d\n", val);
|
|
}
|
|
} else if (!strcmp(key, "thresh")) {
|
|
vad->thresh = val;
|
|
} else if (!strcmp(key, "debug")) {
|
|
vad->debug = val;
|
|
} else if (!strcmp(key, "voice_ms")) {
|
|
if (val > 0) {
|
|
vad->voice_samples_thresh = val * (vad->sample_rate / 1000);
|
|
} else {
|
|
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Ignoring invalid voice_ms of %d\n", val);
|
|
}
|
|
} else if (!strcmp(key, "listen_hits")) {
|
|
/* convert old-style hits to samples assuming 20ms ptime */
|
|
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "listen_hits is deprecated, setting voice_ms to %d\n", 20 * val);
|
|
switch_vad_set_param(vad, "voice_ms", 20 * val);
|
|
}
|
|
|
|
if (vad->debug) {
|
|
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "set %s to %d\n", key, val);
|
|
}
|
|
}
|
|
|
|
SWITCH_DECLARE(void) switch_vad_reset(switch_vad_t *vad)
|
|
{
|
|
#ifdef SWITCH_HAVE_FVAD
|
|
if (vad->fvad) {
|
|
fvad_reset(vad->fvad);
|
|
}
|
|
#endif
|
|
vad->vad_state = SWITCH_VAD_STATE_NONE;
|
|
vad->voice_samples = 0;
|
|
vad->silence_samples = 0;
|
|
|
|
if (vad->debug) switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "reset vad state\n");
|
|
}
|
|
|
|
SWITCH_DECLARE(switch_vad_state_t) switch_vad_process(switch_vad_t *vad, int16_t *data, unsigned int samples)
|
|
{
|
|
int score = 0;
|
|
|
|
// Each frame has 2 possible outcomes- voice or not voice.
|
|
// The VAD has 2 real states- talking / not talking with
|
|
// begin talking and stop talking as events to mark transitions
|
|
|
|
|
|
// determine if this is a voice or non-voice frame
|
|
#ifdef SWITCH_HAVE_FVAD
|
|
if (vad->fvad) {
|
|
// fvad returns -1, 0, or 1
|
|
// -1: error
|
|
// 0: non-voice frame
|
|
// 1: voice frame
|
|
int ret = fvad_process(vad->fvad, data, samples);
|
|
|
|
// if voice frame set score > threshold
|
|
score = ret > 0 ? vad->thresh + 100 : 0;
|
|
} else {
|
|
#endif
|
|
int energy = 0, j = 0, count = 0;
|
|
for (energy = 0, j = 0, count = 0; count < samples; count++) {
|
|
energy += abs(data[j]);
|
|
j += vad->channels;
|
|
}
|
|
|
|
if (samples && vad->divisor && samples >= vad->divisor) {
|
|
score = (uint32_t)(energy / (samples / vad->divisor));
|
|
}
|
|
#ifdef SWITCH_HAVE_FVAD
|
|
}
|
|
#endif
|
|
|
|
if (vad->debug > 9) {
|
|
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "score: %d\n", score);
|
|
}
|
|
|
|
// clear the STOP/START TALKING events
|
|
if (vad->vad_state == SWITCH_VAD_STATE_STOP_TALKING) {
|
|
vad->vad_state = SWITCH_VAD_STATE_NONE;
|
|
} else if (vad->vad_state == SWITCH_VAD_STATE_START_TALKING) {
|
|
vad->vad_state = SWITCH_VAD_STATE_TALKING;
|
|
}
|
|
|
|
// adjust voice/silence run length counters
|
|
if (score > vad->thresh) {
|
|
vad->silence_samples = 0;
|
|
vad->voice_samples += samples;
|
|
} else {
|
|
vad->silence_samples += samples;
|
|
vad->voice_samples = 0;
|
|
}
|
|
|
|
// check for state transitions
|
|
if (vad->vad_state == SWITCH_VAD_STATE_TALKING && vad->silence_samples > vad->silence_samples_thresh) {
|
|
vad->vad_state = SWITCH_VAD_STATE_STOP_TALKING;
|
|
if (vad->debug) switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_NOTICE, "vad state STOP_TALKING\n");
|
|
} else if (vad->vad_state == SWITCH_VAD_STATE_NONE && vad->voice_samples > vad->voice_samples_thresh) {
|
|
vad->vad_state = SWITCH_VAD_STATE_START_TALKING;
|
|
if (vad->debug) switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "vad state START_TALKING\n");
|
|
}
|
|
|
|
if (vad->debug > 9) switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "vad state %s\n", switch_vad_state2str(vad->vad_state));
|
|
|
|
return vad->vad_state;
|
|
}
|
|
|
|
SWITCH_DECLARE(switch_vad_state_t) switch_vad_get_state(switch_vad_t *vad)
|
|
{
|
|
|
|
return vad->vad_state;
|
|
}
|
|
|
|
SWITCH_DECLARE(void) switch_vad_destroy(switch_vad_t **vad)
|
|
{
|
|
if (*vad) {
|
|
|
|
#ifdef SWITCH_HAVE_FVAD
|
|
if ((*vad)->fvad) fvad_free ((*vad)->fvad);
|
|
#endif
|
|
|
|
free(*vad);
|
|
*vad = NULL;
|
|
}
|
|
}
|