/* * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application * Copyright (C) 2018, Anthony Minessale II * * Version: MPL 1.1 * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License * for the specific language governing rights and limitations under the * License. * * The Original Code is FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application * * The Initial Developer of the Original Code is * Anthony Minessale II * Portions created by the Initial Developer are Copyright (C) * the Initial Developer. All Rights Reserved. * * Contributor(s): * * Seven Du * * * switch_vad.c VAD code with optional libfvad * */ #include #ifdef SWITCH_HAVE_FVAD #include #endif struct switch_vad_s { // configs int channels; int sample_rate; int debug; int divisor; int thresh; int voice_samples_thresh; int silence_samples_thresh; // VAD state int voice_samples; int silence_samples; switch_vad_state_t vad_state; #ifdef SWITCH_HAVE_FVAD Fvad *fvad; #endif }; SWITCH_DECLARE(const char *) switch_vad_state2str(switch_vad_state_t state) { switch(state) { case SWITCH_VAD_STATE_NONE: return "none"; case SWITCH_VAD_STATE_START_TALKING: return "start_talking"; case SWITCH_VAD_STATE_TALKING: return "talking"; case SWITCH_VAD_STATE_STOP_TALKING: return "stop_talking"; default: return "error"; } } SWITCH_DECLARE(switch_vad_t *) switch_vad_init(int sample_rate, int channels) { switch_vad_t *vad = malloc(sizeof(switch_vad_t)); if (!vad) return NULL; memset(vad, 0, sizeof(*vad)); vad->sample_rate = sample_rate ? sample_rate : 8000; vad->channels = channels; vad->silence_samples_thresh = 500 * (vad->sample_rate / 1000); vad->voice_samples_thresh = 200 * (vad->sample_rate / 1000); vad->thresh = 100; vad->divisor = vad->sample_rate / 8000; if (vad->divisor <= 0) { vad->divisor = 1; } switch_vad_reset(vad); return vad; } SWITCH_DECLARE(int) switch_vad_set_mode(switch_vad_t *vad, int mode) { #ifdef SWITCH_HAVE_FVAD int ret = 0; if (mode < 0) { if (vad->fvad) fvad_free(vad->fvad); vad->fvad = NULL; return ret; } else if (mode > 3) { mode = 3; } if (!vad->fvad) { vad->fvad = fvad_new(); if (!vad->fvad) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "libfvad init error\n"); } } if (vad->fvad) { ret = fvad_set_mode(vad->fvad, mode); fvad_set_sample_rate(vad->fvad, vad->sample_rate); } switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "libfvad started, mode = %d\n", mode); return ret; #else if (vad->debug) switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "set vad mode = %d\n", mode); return 0; #endif } SWITCH_DECLARE(void) switch_vad_set_param(switch_vad_t *vad, const char *key, int val) { if (!key) return; if (!strcmp(key, "hangover_len")) { /* convert old-style hits to samples assuming 20ms ptime */ switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "hangover_len is deprecated, setting silence_ms to %d\n", 20 * val); switch_vad_set_param(vad, "silence_ms", val * 20); } else if (!strcmp(key, "silence_ms")) { if (val > 0) { vad->silence_samples_thresh = val * (vad->sample_rate / 1000); } else { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Ignoring invalid silence_ms of %d\n", val); } } else if (!strcmp(key, "thresh")) { vad->thresh = val; } else if (!strcmp(key, "debug")) { vad->debug = val; } else if (!strcmp(key, "voice_ms")) { if (val > 0) { vad->voice_samples_thresh = val * (vad->sample_rate / 1000); } else { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Ignoring invalid voice_ms of %d\n", val); } } else if (!strcmp(key, "listen_hits")) { /* convert old-style hits to samples assuming 20ms ptime */ switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "listen_hits is deprecated, setting voice_ms to %d\n", 20 * val); switch_vad_set_param(vad, "voice_ms", 20 * val); } if (vad->debug) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "set %s to %d\n", key, val); } } SWITCH_DECLARE(void) switch_vad_reset(switch_vad_t *vad) { #ifdef SWITCH_HAVE_FVAD if (vad->fvad) { fvad_reset(vad->fvad); } #endif vad->vad_state = SWITCH_VAD_STATE_NONE; vad->voice_samples = 0; vad->silence_samples = 0; if (vad->debug) switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "reset vad state\n"); } SWITCH_DECLARE(switch_vad_state_t) switch_vad_process(switch_vad_t *vad, int16_t *data, unsigned int samples) { int score = 0; // Each frame has 2 possible outcomes- voice or not voice. // The VAD has 2 real states- talking / not talking with // begin talking and stop talking as events to mark transitions // determine if this is a voice or non-voice frame #ifdef SWITCH_HAVE_FVAD if (vad->fvad) { int ret = fvad_process(vad->fvad, data, samples); // printf("%d ", ret); fflush(stdout); score = vad->thresh + ret - 1; } else { #endif int energy = 0, j = 0, count = 0; for (energy = 0, j = 0, count = 0; count < samples; count++) { energy += abs(data[j]); j += vad->channels; } score = (uint32_t) (energy / (samples / vad->divisor)); #ifdef SWITCH_HAVE_FVAD } #endif if (vad->debug > 9) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "score: %d\n", score); } // clear the STOP/START TALKING events if (vad->vad_state == SWITCH_VAD_STATE_STOP_TALKING) { vad->vad_state = SWITCH_VAD_STATE_NONE; } else if (vad->vad_state == SWITCH_VAD_STATE_START_TALKING) { vad->vad_state = SWITCH_VAD_STATE_TALKING; } // adjust voice/silence run length counters if (score > vad->thresh) { vad->silence_samples = 0; vad->voice_samples += samples; } else { vad->silence_samples += samples; vad->voice_samples = 0; } // check for state transitions if (vad->vad_state == SWITCH_VAD_STATE_TALKING && vad->silence_samples > vad->silence_samples_thresh) { vad->vad_state = SWITCH_VAD_STATE_STOP_TALKING; if (vad->debug) switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_NOTICE, "vad state STOP_TALKING\n"); } else if (vad->vad_state == SWITCH_VAD_STATE_NONE && vad->voice_samples > vad->voice_samples_thresh) { vad->vad_state = SWITCH_VAD_STATE_START_TALKING; if (vad->debug) switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "vad state START_TALKING\n"); } if (vad->debug > 9) switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "vad state %s\n", switch_vad_state2str(vad->vad_state)); return vad->vad_state; } SWITCH_DECLARE(switch_vad_state_t) switch_vad_get_state(switch_vad_t *vad) { return vad->vad_state; } SWITCH_DECLARE(void) switch_vad_destroy(switch_vad_t **vad) { if (*vad) { #ifdef SWITCH_HAVE_FVAD if ((*vad)->fvad) fvad_free ((*vad)->fvad); #endif free(*vad); *vad = NULL; } }