diff --git a/scripts/lua/hangup_jitterbuffer_metrics.lua b/scripts/lua/hangup_jitterbuffer_metrics.lua new file mode 100755 index 0000000000..8e3dd37623 --- /dev/null +++ b/scripts/lua/hangup_jitterbuffer_metrics.lua @@ -0,0 +1,53 @@ +local https = require("socket.http") +local ip = os.getenv("LOCAL_IPV4") +local response_body = {} +-- jitter buffer stats +local size_max_ms = session:getVariable("rtp_jb_size_max_ms"); +local size_est_ms = session:getVariable("rtp_jb_size_est_ms"); +local acceleration_ms = session:getVariable("rtp_jb_acceleration_ms"); +local expand_ms = session:getVariable("rtp_jb_expand_ms"); +local jitter_max_ms = session:getVariable("rtp_jb_jitter_max_ms"); +local jitter_est_ms = session:getVariable("rtp_jb_jitter_est_ms"); + +local reset_count = session:getVariable("rtp_jb_reset_count"); +local reset_too_big = session:getVariable("rtp_jb_reset_too_big"); +local reset_missing_frames = session:getVariable("rtp_jb_reset_missing_frames"); +local reset_ts_jump = session:getVariable("rtp_jb_reset_ts_jump"); +local reset_error = session:getVariable("rtp_jb_reset_error"); +local call_id = session:getVariable("sip_call_id"); +local out_call_id = session:getVariable("last_bridge_to"); + +if size_max_ms == nil or size_est_ms == nil or acceleration_ms == nil or expand_ms == nil or jitter_max_ms == nil or jitter_est_ms == nil then + session:consoleLog("info", "[metrics] jitter no data\n"); + return +end +local request_body = '{"in_call_id": "'..call_id..'", "out_call_id": "'..out_call_id..'", "jb":{"size_max_ms":'..size_max_ms.. + ',"size_est_ms":'..size_est_ms..',"acceleration_ms":'..acceleration_ms..',"expand_ms":'..expand_ms.. + ',"jitter_max_ms":'..jitter_max_ms..',"jitter_est_ms":'..jitter_est_ms..',"reset":'..reset_count +-- if reset_too_big ~= "0" then + request_body = request_body .. ',"reset_too_big":'..reset_too_big +-- end +if reset_missing_frames ~= "0" then + request_body = request_body .. ',"reset_missing_frames":'..reset_missing_frames +end +if reset_ts_jump ~= "0" then + request_body = request_body .. ',"reset_ts_jump":'..reset_ts_jump +end +if reset_error ~= "0" then + request_body = request_body .. ',"reset_error":'..reset_error +end + +local v = request_body .. '}}'; + +local r, c, h, s = https.request{ + method = 'POST', + url = "http://"..ip..":80/freeswitch_metrics", + headers = { + ["Content-Type"] = "application/json", + ["Content-Length"] = string.len(v) + }, + source = ltn12.source.string(v), + sink = ltn12.sink.table(response_body) +} +-- print('statusCode ', c) +session:consoleLog("info", "[metrics] jitter:".. v .. "\n"); diff --git a/src/include/switch_jitterbuffer.h b/src/include/switch_jitterbuffer.h index bee0fa02f8..f098ede2db 100644 --- a/src/include/switch_jitterbuffer.h +++ b/src/include/switch_jitterbuffer.h @@ -61,6 +61,7 @@ SWITCH_DECLARE(switch_status_t) switch_jb_get_packet(switch_jb_t *jb, switch_rtp SWITCH_DECLARE(uint32_t) switch_jb_pop_nack(switch_jb_t *jb); SWITCH_DECLARE(switch_status_t) switch_jb_get_packet_by_seq(switch_jb_t *jb, uint16_t seq, switch_rtp_packet_t *packet, switch_size_t *len); SWITCH_DECLARE(void) switch_jb_set_session(switch_jb_t *jb, switch_core_session_t *session); +SWITCH_DECLARE(void) switch_jb_set_jitter_estimator(switch_jb_t *jb, double *jitter, uint32_t samples_per_frame, uint32_t samples_per_second); SWITCH_DECLARE(void) switch_jb_ts_mode(switch_jb_t *jb, uint32_t samples_per_frame, uint32_t samples_per_second); SWITCH_DECLARE(void) switch_jb_set_flag(switch_jb_t *jb, switch_jb_flag_t flag); SWITCH_DECLARE(void) switch_jb_clear_flag(switch_jb_t *jb, switch_jb_flag_t flag); diff --git a/src/include/switch_types.h b/src/include/switch_types.h index 82639c2ca6..94a4c62cca 100644 --- a/src/include/switch_types.h +++ b/src/include/switch_types.h @@ -2425,6 +2425,7 @@ typedef enum { SCC_VIDEO_RESET, SCC_AUDIO_PACKET_LOSS, SCC_AUDIO_ADJUST_BITRATE, + SCC_AUDIO_VAD, SCC_DEBUG, SCC_CODEC_SPECIFIC } switch_codec_control_command_t; diff --git a/src/mod/codecs/mod_opus/Makefile.am b/src/mod/codecs/mod_opus/Makefile.am index 690c5fa471..70710898e7 100644 --- a/src/mod/codecs/mod_opus/Makefile.am +++ b/src/mod/codecs/mod_opus/Makefile.am @@ -4,7 +4,7 @@ MODNAME=mod_opus if HAVE_OPUS mod_LTLIBRARIES = mod_opus.la -mod_opus_la_SOURCES = mod_opus.c +mod_opus_la_SOURCES = mod_opus.c opus_parse.c mod_opus_la_CFLAGS = $(AM_CFLAGS) $(OPUS_CFLAGS) mod_opus_la_LIBADD = $(switch_builddir)/libfreeswitch.la $(OPUS_LIBS) mod_opus_la_LDFLAGS = -avoid-version -module -no-undefined -shared -lm -lz diff --git a/src/mod/codecs/mod_opus/mod_opus.c b/src/mod/codecs/mod_opus/mod_opus.c index 4a47b45df4..ce9aff52a3 100644 --- a/src/mod/codecs/mod_opus/mod_opus.c +++ b/src/mod/codecs/mod_opus/mod_opus.c @@ -33,6 +33,7 @@ #include "switch.h" #include "opus.h" +#include "opus_parse.h" #define SWITCH_OPUS_MIN_BITRATE 6000 #define SWITCH_OPUS_MAX_BITRATE 510000 @@ -1169,6 +1170,27 @@ static switch_status_t switch_opus_keep_fec_enabled(switch_codec_t *codec) } } +static switch_bool_t switch_opus_vad(struct opus_context *context, void *encoded_data, uint32_t encoded_data_len) { + const uint8_t *payload = (const uint8_t *) encoded_data; + opus_packet_info_t opus_packet_info; + switch_bool_t debug = (globals.debug || context->debug > 1); + if (!switch_opus_packet_parse(payload, encoded_data_len, &opus_packet_info, debug)) { + if (debug) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "OPUS PACKET PARSING ERROR len:%d bytes:%02x %02x\n", + (int)encoded_data_len, payload[0], payload[1]); + } + return SWITCH_TRUE; + } + if (debug) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "OPUS EXTRACT PAYLOAD VAD len:%d vad_ms:%d bytes:%02x %02x\n", + (int)encoded_data_len, opus_packet_info.vad_ms, payload[0], payload[1]); + } + if (opus_packet_info.vad_ms == 0) { + return SWITCH_FALSE; + } + return SWITCH_TRUE; +} + static switch_status_t switch_opus_control(switch_codec_t *codec, switch_codec_control_command_t cmd, switch_codec_control_type_t ctype, @@ -1260,6 +1282,14 @@ static switch_status_t switch_opus_control(switch_codec_t *codec, context->old_plpct = plpct; } break; + case SCC_AUDIO_VAD: + { + void* encoded_data = (void *)cmd_data; + uint16_t* encoded_data_len = (uint16_t *)cmd_arg; + switch_bool_t *ret = (switch_bool_t *) *ret_data; + *ret = switch_opus_vad(context, encoded_data, *encoded_data_len); + } + break; case SCC_AUDIO_ADJUST_BITRATE: { const char *cmd = (const char *)cmd_data; diff --git a/src/mod/codecs/mod_opus/opus_parse.c b/src/mod/codecs/mod_opus/opus_parse.c new file mode 100644 index 0000000000..41e1ec6490 --- /dev/null +++ b/src/mod/codecs/mod_opus/opus_parse.c @@ -0,0 +1,366 @@ +/* + * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application + * Copyright (C) 2005-2023, Anthony Minessale II + * + * Version: MPL 1.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application + * + * The Initial Developer of the Original Code is + * Anthony Minessale II + * Portions created by the Initial Developer are Copyright (C) + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * + * Claude Lamblin + * Julien Chavanton + * + */ + +#include "switch.h" +#include +#include "opus_parse.h" +/* Tables for LBRR_sympbol decoding */ + +static const opus_int16 silk_LBRR_flags_2_PDFCum[3] = {53, 106, 256}; /* 256 - silk_LBRR_flags_2_iCDF[i] ; silk_LBRR_flags_2_iCDF[ 3 ] = { 203, 150, 0 }; */ +static const opus_int16 silk_LBRR_flags_3_PDFCum[7] = {41, 61, 90, 131, 146, 174, 256}; /* 256 - silk_LBRR_flags_3_iCDF[i] ; silk_LBRR_flags_3_iCDF[ 7 ] = { 215, 195, 166, 125, 110, 82, 0 }; */ + +/* get the number of VAD flags - i.e. number of 20 ms frame - from the config */ +/* in a silk-only or hybrid opus frame mono or stereo*/ +/* 5 MSB TOC byte (see table 2 of IETF RFC6716 clause 3.1) */ +/* if 10 ms frame (config=0, 4, 8, 12, 14) : return 1 */ +/* if CELT_only frame no VAD flag =>return 0 */ +static opus_int16 switch_opus_get_nb_flags_in_silk_frame(int16_t config) +{ + opus_int16 silk_frame_nb_flags; + if (config > 15) { + /* CELT_only frame no VAD flag nor LBRR flag */ + silk_frame_nb_flags = 0; + } else { + silk_frame_nb_flags = 1; /*default*/ + if (config < 12) { + /* silk-only NB, MB or WB */ + /* The least two significant bits give the number of VAD flags inside the silk frame 1, 2 or 3 */ + silk_frame_nb_flags = config & 0x3; + if (silk_frame_nb_flags == 0) { /* 0 => 10ms frame : one VAD flag */ + silk_frame_nb_flags++; + } + } + } + return silk_frame_nb_flags; +} + +/* get the time in ms corresponding to one VAD flag from the config */ +/* in a silk-only or hybrid opus frame mono or stereo*/ +/* 5 MSB TOC byte (see table 2 of IETF RFC6716 clause 3.1) */ +/* if CELT_only frame (config >15) no VAD flag =>return FALSE */ +/* if 10 ms frame (config=0, 4, 8, 12, 14) : return 10 */ +/* otherwise return 20 */ +static opus_int16 switch_opus_get_silk_frame_ms_per_flag(int16_t config, opus_int16 silk_frame_nb_flags) +{ + opus_int16 silk_size_frame_ms_per_flag; + if (config > 15) { + /* CELT_only frame no VAD flag nor LBRR flag */ + /* switch_opus_get_silk_frame_ms_per_flag: code not written for CELT-only mode */ + return FALSE; + } + silk_size_frame_ms_per_flag = 20; /* default*/ + if (silk_frame_nb_flags == 1) { /* could be 10 or 20 ms */ + if ((config &0x01) == 0) { + silk_size_frame_ms_per_flag = 10; + } + } + return silk_size_frame_ms_per_flag; +} + +/* code written only for mono, silk-only or hybrid mode */ +/* for CELT-only frame no vad flags for LBRR flag the routine must not be called */ +/* for stereo : the mid frame VAD_flags and the LBRR_flag could be obtained */ +/* yet, to get the LBRR_flags of the mid frame the routine should be modified */ +/* to skip the side VAD flags and the side LBRR flag and to get the mid LBRR_symbol */ +static bool_t switch_opus_get_VAD_LBRR_flags(const uint8_t *buf, opus_int16 silk_frame_nb_flags, + opus_int16 *VAD_flags, opus_int16 *LBRR_flags, opus_int16 *nb_VAD1, opus_int16 *nb_FEC) +{ + const opus_int16 *ptr_pdf_cum; + opus_int nb_pdf_symbol; + opus_uint16 LBRR_symbol; + opus_int16 val, nb_bit, compl_nb_bit, mask, mask2; + opus_int16 *ptr_flags; + opus_int16 LBRR_flag; + opus_int16 nb_vad, nb_fec; + int i; + + nb_vad = 0; + nb_fec = 0; + + /* get VAD_FLAGS & LBRR_FLAG */ + /* silk_frame_nb_flags = 1 (10 or 20 ms), the two MSB of the first byte are the VAD flag and the LBRR flag */ + /* silk_frame_nb_flags = 2 (40 ms), the three MSB of the first byte are the two VAD flags and the LBRR flag */ + /* silk_frame_nb_flags = 3 (60 ms), the four MSB of the first byte are the three VAD flags and the LBRR flag */ + /* compute the number of MSB to analyse */ + nb_bit = silk_frame_nb_flags + 1; + /* number of right shifts to appply to the first byte to only have the bits of LBRR flag and of the VAD flags */ + compl_nb_bit = 8 - nb_bit; + mask = (1 << nb_bit) - 1; + + /* the bits of the silk_frame_nb_flags VAD flags and the LBRR flag are the MSB of the first byte */ + /* silk_frame_nb_flags = 1 (10 or 20 ms), VAD_flags(0) | LBRR_flag */ + /* silk_frame_nb_flags = 2 (40 ms), VAD_flags(0) | VAD_flags(1) | LBRR_flag */ + /* silk_frame_nb_flags = 3 (60 ms), VAD_flags(0) | VAD_flags(1) | VAD_flags(2) |LBRR_flag */ + val = (buf[0] >> compl_nb_bit) & mask; + + LBRR_flag = val & 0x1; /* LBRR_FLAG LSB */ + + /* get VAD_flags */ + ptr_flags = VAD_flags + silk_frame_nb_flags; + for (i=0; i < silk_frame_nb_flags; i++) { + LBRR_flags[i] = 0; /* init */ + val >>= 1; + *(--ptr_flags) = val & 0x1; + } + if (LBRR_flag != 0) { /* there is at least one LBRR frame */ + if (silk_frame_nb_flags == 1) { + LBRR_flags[0] = 1; + nb_fec = 1; + } else { /* get LBRR_symbol then LBRR_flags */ + /* LBRR symbol is encoded with range encoder : range on 8 bits */ + /* silk_frame_nb_flags = 2 ; 3 possible values for LBRR_flags(1) | LBRR_flags(0))= 01, 10, 11 */ + /* silk_frame_nb_flags = 3 ; 7 possible values for LBRR_flags(2) | LBRR_flags(1) | LBRR_flags(0))= 001, 010, 011, 100, 101, 110, 111 */ + mask2 = (1 << compl_nb_bit) - 1; + /* get next 8 bits: (8-nb_bit) LSB of the first byte and nb_bit MSB of the second byte */ + val = (((buf[0]) & mask2) << nb_bit) | ((buf[1] >> compl_nb_bit) & mask); + + if (silk_frame_nb_flags == 2) { + nb_pdf_symbol = 3; + ptr_pdf_cum = silk_LBRR_flags_2_PDFCum; + } else { + nb_pdf_symbol = 7; + ptr_pdf_cum = silk_LBRR_flags_3_PDFCum; + } + LBRR_symbol = 0; + for (i = 1; i <= nb_pdf_symbol; i++) { + if (val < *ptr_pdf_cum++) { + LBRR_symbol = i; + break; + } + } + for (i = 0; i < silk_frame_nb_flags; i++) { + LBRR_flags[i] = LBRR_symbol & 0x01; + LBRR_symbol >>= 1; + nb_fec += LBRR_flags[i]; + } + } + } + for (i = 0; i < silk_frame_nb_flags; i++) { + nb_vad += VAD_flags[i]; + } + + *nb_VAD1 = nb_vad; + *nb_FEC = nb_fec; + return TRUE; +} + +/* Parse the packet to retrieve informations about its content + * RFC6716: Definition of the Opus Audio Codec + * return: FALSE if there was a problem found parsing the packet, the info returned should be ignored. + * */ +bool_t switch_opus_packet_parse(const uint8_t *payload, int payload_length_bytes, opus_packet_info_t *packet_info, bool_t debug) +{ + int f; + int32_t samplerate; + int i, shift_silk, silk_frame_packet; + int16_t vad_flags_per_silk_frame, fec_flags_per_silk_frame; + opus_int16 frame_sizes[48]; + const unsigned char *frame_data[48]; + opus_int16 packet_LBBR_FLAGS[3 * 48], packet_VAD_FLAGS[3 * 48]; + opus_int16 *ptr_LBBR_FLAGS, *ptr_VAD_FLAGS; + opus_int16 silk_frame_nb_flags, silk_size_frame_ms_per_flag; + opus_int16 silk_frame_nb_fec, silk_frame_nb_vad1; + opus_int sample_per_frame; + packet_info->config = 0; + packet_info->fec = 0; + packet_info->fec_ms = 0; + packet_info->vad = 0; + packet_info->vad_ms = 0; + packet_info->stereo = FALSE; + packet_info->frames = 0; + packet_info->channels = 1; /* as stereo is set to FALSE */ + packet_info->ms_per_frame = 0; + packet_info->ptime_ts = 0; + if (payload == NULL || payload_length_bytes <= 0) { + if (debug) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "opus_packet_parse: payload null."); + } + return FALSE; + } + + /* In CELT_ONLY mode, packets should not have FEC. */ + if (payload[0] & 0x80) { + /* opus_packet_parse: CELT_ONLY mode, we do not support this mode. */ + return FALSE; + } else { + int mode = (payload[0] >> 3); + if (mode <= 3) { + samplerate = 8000; + } else if (mode <= 7) { + samplerate = 12000; + } else if (mode <= 11) { + samplerate = 16000; + } else if (mode <= 13) { + samplerate = 24000; + } else if (mode <= 15) { + samplerate = 48000; + } else { + /* opus_packet_parse: CELT_ONLY mode, we do not support this mode. */ + return FALSE; + } + if (debug) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "opus_packet_parse: mode[%d]s[%d]c[%d] [%d]Hz\n", mode, (payload[0]>>2)&0x1 ,(payload[0])&0x3, samplerate); + } + } + if (payload[0] & 0x04) { + packet_info->stereo = TRUE; + packet_info->channels = 2; + } + packet_info->config = payload[0] >> 3; + sample_per_frame = opus_packet_get_samples_per_frame(payload, samplerate); + packet_info->ms_per_frame = sample_per_frame * 1000 / samplerate; + if (packet_info->ms_per_frame < 10 || packet_info->ms_per_frame > 120) { + if (debug) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "opus_packet_parse: invalid packet."); + } + return FALSE; + } + + packet_info->frames = opus_packet_parse(payload, payload_length_bytes, NULL, frame_data, frame_sizes, NULL); + if (packet_info->frames < 0) { + packet_info->frames = 0; + if (debug) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "opus_packet_parse: opus_packet_parse found no frame.\n"); + } + return FALSE; + } + packet_info->ptime_ts = packet_info->frames * sample_per_frame; + + if (frame_sizes[0] <= 1) { + if (debug) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "opus_packet_parse: opus_packet_parse frame size too small.\n"); + } + return FALSE; + } + + /* +---------------+-----------+-----------+-------------------+ */ + /* | Configuration | Mode | Bandwidth | Frame Sizes | */ + /* | Number(s) | | | | */ + /* +---------------+-----------+-----------+-------------------+ */ + /* | 0...3 | SILK-only | NB | 10, 20, 40, 60 ms | */ + /* | 4...7 | SILK-only | MB | 10, 20, 40, 60 ms | */ + /* | 8...11 | SILK-only | WB | 10, 20, 40, 60 ms | */ + /* | 12...13 | Hybrid | SWB | 10, 20 ms | */ + /* | 14...15 | Hybrid | FB | 10, 20 ms | */ + /* | 16...19 | CELT-only | NB | 2.5, 5, 10, 20 ms | */ + /* | 20...23 | CELT-only | WB | 2.5, 5, 10, 20 ms | */ + /* | 24...27 | CELT-only | SWB | 2.5, 5, 10, 20 ms | */ + /* | 28...31 | CELT-only | FB | 2.5, 5, 10, 20 ms | */ + /* +---------------+-----------+-----------+-------------------+ */ + + if (!packet_info->stereo) { + /* the routines opus_get_nb_flags_in_silk_frame and opus_get_silk_frame_ms_per_flag are also valid for stereo frames */ + /* yet the routine opus_get_VAD_LBRR_flags is currently only for mono frame*/ + silk_frame_nb_flags = switch_opus_get_nb_flags_in_silk_frame(packet_info->config); /* =1 for 10 or 20 ms frame; = 2 for 40 ms; = 3 for 60 ms */ + if (!silk_frame_nb_flags) { + /* We should not go there as CELT_ONLY is already tested above */ + return FALSE; + } + + packet_info->frames_silk = silk_frame_nb_flags; + silk_size_frame_ms_per_flag = switch_opus_get_silk_frame_ms_per_flag(packet_info->config, silk_frame_nb_flags); /* 10 or 20 ms frame*/ + if (!silk_size_frame_ms_per_flag) { + /* we should not go there as CELT_ONLY is already tested above */ + return FALSE; + } + + ptr_LBBR_FLAGS = packet_LBBR_FLAGS; + ptr_VAD_FLAGS = packet_VAD_FLAGS; + + for (f = 0; f < packet_info->frames; f++) { + switch_opus_get_VAD_LBRR_flags(frame_data[f], silk_frame_nb_flags, ptr_VAD_FLAGS, ptr_LBBR_FLAGS, + &silk_frame_nb_vad1, &silk_frame_nb_fec); + packet_info->vad += silk_frame_nb_vad1; + packet_info->fec += silk_frame_nb_fec; + packet_info->vad_ms += silk_frame_nb_vad1 * silk_size_frame_ms_per_flag; + packet_info->fec_ms += silk_frame_nb_fec * silk_size_frame_ms_per_flag; + + ptr_VAD_FLAGS += silk_frame_nb_flags; + ptr_LBBR_FLAGS += silk_frame_nb_flags; + } + /* store the VAD & LBRR flags of all 20 ms silk-frames of the packet; LSB the first frame, MSB: the last */ + vad_flags_per_silk_frame = 0; + fec_flags_per_silk_frame = 0; + silk_frame_packet = packet_info->frames * packet_info->frames_silk; + if (silk_frame_packet > 15) { + if (debug) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "opus_packet_parse: more than %d 20-ms frames in the packet ; only first 15 silk-frames data will be stored (pb silkFastAccelerate)\n", silk_frame_packet); + } + silk_frame_packet = 15; + } + ptr_LBBR_FLAGS = packet_LBBR_FLAGS; + ptr_VAD_FLAGS = packet_VAD_FLAGS; + shift_silk = 0; + for (i=0; i < silk_frame_packet; i++) { + vad_flags_per_silk_frame += (*ptr_VAD_FLAGS) << shift_silk; + fec_flags_per_silk_frame += (*ptr_LBBR_FLAGS) << shift_silk; + shift_silk++; + ptr_LBBR_FLAGS++; ptr_VAD_FLAGS++; + } + packet_info->vad_flags_per_silk_frame = vad_flags_per_silk_frame; + packet_info->fec_flags_per_silk_frame = fec_flags_per_silk_frame; + return TRUE; + } + + if (packet_info->config != 1 && packet_info->config != 5 && packet_info->config != 9) { + if (debug) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "opus_packet_parse: the current parser implementation does not support muliple SILK frames for VAD or FEC detection.\n"); + } + return FALSE; + } + /* + * Parse the VAD and LBRR flags in each Opus frame + * */ + for (f = 0; f < packet_info->frames; f++) { + if (frame_data[f][0] & 0x80) { + packet_info->vad++; + } + if (frame_data[f][0] & 0x40) { + packet_info->fec++; + } + if (debug) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "opus_packet_parse: LP layer opus_frame[%d] VAD[%d] FEC[%d]\n", f+1, (frame_data[f][0]&0x80)>>7, (frame_data[f][0]&0x40)>>6); + } + } + packet_info->vad_ms = packet_info->vad * packet_info->ms_per_frame; + packet_info->fec_ms = packet_info->fec * packet_info->ms_per_frame; + return TRUE; +} + +/* For Emacs: + * Local Variables: + * mode:c + * indent-tabs-mode:t + * tab-width:4 + * c-basic-offset:4 + * End: + * For VIM: + * vim:set softtabstop=4 shiftwidth=4 tabstop=4 noet: + */ diff --git a/src/mod/codecs/mod_opus/opus_parse.h b/src/mod/codecs/mod_opus/opus_parse.h new file mode 100644 index 0000000000..8dd61500cd --- /dev/null +++ b/src/mod/codecs/mod_opus/opus_parse.h @@ -0,0 +1,66 @@ +/* + * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application + * Copyright (C) 2005-2023, Anthony Minessale II + * + * Version: MPL 1.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application + * + * The Initial Developer of the Original Code is + * Anthony Minessale II + * Portions created by the Initial Developer are Copyright (C) + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * + * Julien Chavanton + * + */ + +#ifndef SWITCH_OPUS_PARSE_H +#define SWITCH_OPUS_PARSE_H + +typedef enum { false, true } bool_t; + +typedef struct opus_packet_info { + int16_t vad; + int16_t vad_ms; + int16_t fec; + int16_t fec_ms; + bool_t stereo; + int16_t frames; /* number of opus frames in the packet */ + int16_t config; + int16_t channels; + int16_t ms_per_frame; + int32_t ptime_ts; + bool_t valid; + int16_t frames_silk; /* number of silk_frames in an opus frame */ + /* VAD flag of all 20 ms silk-frames of the packet; LSB the first frame, MSB: the last */ + int16_t vad_flags_per_silk_frame; + /* LBRR (FEC) flag of all 20 ms silk-frames of the packet; LSB the first frame, MSB: the last */ + int16_t fec_flags_per_silk_frame; +} opus_packet_info_t; + +bool_t switch_opus_packet_parse(const uint8_t *payload, int payload_length_bytes, opus_packet_info_t *packet_info, bool_t debug); +#endif + +/* For Emacs: + * Local Variables: + * mode:c + * indent-tabs-mode:t + * tab-width:4 + * c-basic-offset:4 + * End: + * For VIM: + * vim:set softtabstop=4 shiftwidth=4 tabstop=4 noet: + */ diff --git a/src/switch_jitterbuffer.c b/src/switch_jitterbuffer.c index d68b269024..d8d9df3a33 100644 --- a/src/switch_jitterbuffer.c +++ b/src/switch_jitterbuffer.c @@ -44,6 +44,8 @@ struct switch_jb_s; +static inline int check_jb_size(switch_jb_t *jb); + typedef struct switch_jb_node_s { struct switch_jb_s *parent; switch_rtp_packet_t packet; @@ -56,6 +58,29 @@ typedef struct switch_jb_node_s { switch_bool_t complete_frame_mark; } switch_jb_node_t; +typedef struct switch_jb_stats_s { + uint32_t reset_too_big; + uint32_t reset_missing_frames; + uint32_t reset_ts_jump; + uint32_t reset_error; + uint32_t reset; + uint32_t size_max; + uint32_t size_est; + uint32_t acceleration; + uint32_t expand; + uint32_t jitter_max_ms; + int estimate_ms; + int buffer_size_ms; +} switch_jb_stats_t; + +typedef struct switch_jb_jitter_s { + double *estimate; + uint32_t samples_per_second; + uint32_t samples_per_frame; + uint32_t drop_gap; + switch_jb_stats_t stats; +} switch_jb_jitter_t; + struct switch_jb_s { struct switch_jb_node_s *node_list; uint32_t last_target_seq; @@ -104,6 +129,7 @@ struct switch_jb_s { switch_jb_flag_t flags; switch_jb_type_t type; switch_core_session_t *session; + switch_jb_jitter_t jitter; switch_channel_t *channel; uint32_t buffer_lag; uint32_t flush; @@ -112,6 +138,8 @@ struct switch_jb_s { uint32_t period_len; uint32_t nack_saved_the_day; uint32_t nack_didnt_save_the_day; + switch_bool_t elastic; + switch_codec_t *codec; }; @@ -233,6 +261,7 @@ static inline switch_jb_node_t *new_node(switch_jb_t *jb) if (jb->allocated_nodes > jb->max_frame_len * mult) { jb_debug(jb, 2, "ALLOCATED FRAMES TOO HIGH! %d\n", jb->allocated_nodes); + jb->jitter.stats.reset_too_big++; switch_jb_reset(jb); switch_mutex_unlock(jb->list_mutex); return NULL; @@ -332,6 +361,29 @@ static inline void hide_nodes(switch_jb_t *jb) switch_mutex_unlock(jb->list_mutex); } +static inline switch_bool_t packet_vad(switch_jb_t *jb, switch_rtp_packet_t *packet, switch_size_t len) { + void* payload; + uint16_t payload_len = len; + + if (packet->ebody) { + payload = packet->ebody; + } else { + payload = packet->body; + } + if (payload && payload_len > 0) { + switch_bool_t ret; + switch_bool_t *ret_p = &ret; + switch_codec_control_type_t ret_t; + switch_core_media_codec_control(jb->session, SWITCH_MEDIA_TYPE_AUDIO, + SWITCH_IO_WRITE, SCC_AUDIO_VAD, + SCCT_STRING, (void *)payload, + SCCT_INT, (void *)&payload_len, + &ret_t, (void *)&ret_p); + return ret; + } + return SWITCH_TRUE; +} + static inline void drop_ts(switch_jb_t *jb, uint32_t ts) { switch_jb_node_t *np; @@ -667,6 +719,7 @@ static inline void add_node(switch_jb_t *jb, switch_rtp_packet_t *packet, switch if (((seq_diff >= 100) || (ts_diff > (900000 * 5)))) { jb_debug(jb, 2, "CHANGE DETECTED, PUNT %u\n", abs(((int)ntohs(packet->header.seq) - ntohs(jb->highest_wrote_seq)))); + jb->jitter.stats.reset_ts_jump++; switch_jb_reset(jb); } } @@ -732,6 +785,12 @@ static inline void increment_seq(switch_jb_t *jb) jb->target_seq = htons((ntohs(jb->target_seq) + 1)); } +static inline void decrement_seq(switch_jb_t *jb) +{ + jb->last_target_seq = jb->target_seq; + jb->target_seq = htons((ntohs(jb->target_seq) - 1)); +} + static inline void set_read_seq(switch_jb_t *jb, uint16_t seq) { jb->last_target_seq = seq; @@ -854,12 +913,128 @@ static inline switch_status_t jb_next_packet_by_ts(switch_jb_t *jb, switch_jb_no } +static inline int check_jb_size(switch_jb_t *jb) +{ + switch_jb_node_t *np; + uint16_t seq; + uint16_t l_seq=0; + uint16_t h_seq=0; + uint16_t count=0; + uint16_t old=0; + switch_mutex_lock(jb->list_mutex); + + for (np = jb->node_list; np; np = np->next) { + if (!np->visible) { + continue; + } + + seq = ntohs(np->packet.header.seq); + if (ntohs(jb->target_seq) > seq) { + hide_node(np, SWITCH_FALSE); + old++; + continue; + } + if (count == 0) { + l_seq = h_seq = seq; + } + count++; + if (seq < l_seq) + l_seq = seq; + if (seq > h_seq) + h_seq = seq; + } + if (count > jb->jitter.stats.size_max) { + jb->jitter.stats.size_max = count; + } + if (jb->jitter.stats.size_est == 0) { + jb->jitter.stats.size_est = count; + } else { + jb->jitter.stats.size_est = ((99*jb->jitter.stats.size_est)+(1*count))/100; + } + if (ntohs(jb->target_seq) % 50 == 0) { /* update the stats every x packets */ + int packet_ms = jb->jitter.samples_per_frame / (jb->jitter.samples_per_second / 1000); + jb->jitter.stats.estimate_ms = (*jb->jitter.estimate) / jb->jitter.samples_per_second * 1000; + switch_channel_set_variable_printf(jb->channel, "rtp_jb_size_max_ms", "%u", jb->jitter.stats.size_max*packet_ms); + switch_channel_set_variable_printf(jb->channel, "rtp_jb_size_est_ms", "%u", jb->jitter.stats.size_est*packet_ms); + switch_channel_set_variable_printf(jb->channel, "rtp_jb_acceleration_ms", "%u", jb->jitter.stats.acceleration*packet_ms); + switch_channel_set_variable_printf(jb->channel, "rtp_jb_expand_ms", "%u", jb->jitter.stats.expand*packet_ms); + if (jb->jitter.stats.jitter_max_ms < jb->jitter.stats.estimate_ms) { + jb->jitter.stats.jitter_max_ms = jb->jitter.stats.estimate_ms; + } + switch_channel_set_variable_printf(jb->channel, "rtp_jb_jitter_max_ms", "%u", jb->jitter.stats.jitter_max_ms); + switch_channel_set_variable_printf(jb->channel, "rtp_jb_jitter_est_ms", "%u", jb->jitter.stats.estimate_ms); + } + if (old) { + sort_free_nodes(jb); + } + switch_mutex_unlock(jb->list_mutex); + jb_debug(jb, SWITCH_LOG_INFO, "JITTER buffersize %u == %u old[%u] target[%u] seq[%u|%u]\n", count, h_seq-l_seq+1, old, ntohs(jb->target_seq), l_seq, h_seq); + return count; +} + +static inline switch_status_t jb_next_packet_by_seq_with_acceleration(switch_jb_t *jb, switch_jb_node_t **nodep) +{ + switch_status_t status = jb_next_packet_by_seq(jb, nodep); + switch_rtp_packet_t *packet; + uint32_t len; + uint16_t seq = ntohs(jb->target_seq); + + /* When using a Codec that provides voice activity detection ex. Opus, use it to + select packet to drop/accelerate. */ + + if (jb->elastic && jb->jitter.estimate && (jb->visible_nodes*jb->jitter.samples_per_frame)>0 && jb->jitter.samples_per_second) { + int visible_not_old = check_jb_size(jb); + jb->jitter.stats.estimate_ms = (int)((*jb->jitter.estimate)/((jb->jitter.samples_per_second))*1000); + jb->jitter.stats.buffer_size_ms = (int)((visible_not_old*jb->jitter.samples_per_frame)/(jb->jitter.samples_per_second/1000)); + + // We try to accelerate in order to remove delay when the jitter buffer is 3x larger than the estimation. + if (jb->jitter.stats.buffer_size_ms > (3*jb->jitter.stats.estimate_ms) && jb->jitter.stats.buffer_size_ms > 60) { + if (status == SWITCH_STATUS_SUCCESS) { + packet = &(*nodep)->packet; + seq = ntohs((*nodep)->packet.header.seq); + len = (*nodep)->len; + } + if (jb->jitter.drop_gap > 0) { + jb_debug(jb, SWITCH_LOG_INFO, "JITTER estimation %dms buffersize %d/%d %dms seq:%u [drop-gap][%d]\n", + jb->jitter.stats.estimate_ms, jb->complete_frames , jb->frame_len, jb->jitter.stats.buffer_size_ms, seq, jb->jitter.drop_gap); + jb->jitter.drop_gap--; + } else { + if (status != SWITCH_STATUS_SUCCESS || packet_vad(jb, packet, len) == SWITCH_FALSE) { + jb->jitter.drop_gap = 3; + if (status != SWITCH_STATUS_SUCCESS) { + jb_debug(jb, SWITCH_LOG_INFO, "JITTER estimation n/a buffersize %d/%d %dms seq:%u [drop-missing/no-plc]\n", + jb->complete_frames , jb->frame_len, jb->jitter.stats.buffer_size_ms, seq); + } else { + jb_debug(jb, SWITCH_LOG_INFO, "JITTER estimation %dms buffersize %d/%d %dms seq:%u ACCELERATE [drop]\n", + jb->jitter.stats.estimate_ms, jb->complete_frames , jb->frame_len, jb->jitter.stats.buffer_size_ms, seq); + } + jb->jitter.stats.acceleration++; + return jb_next_packet_by_seq(jb, nodep); + } else { + jb_debug(jb, SWITCH_LOG_INFO, "JITTER estimation %dms buffersize %d/%d %dms seq:%u [drop-skip-vad]\n", + jb->jitter.stats.estimate_ms, jb->complete_frames , jb->frame_len, jb->jitter.stats.buffer_size_ms, seq); + } + } + } else { + jb_debug(jb, 2, "JITTER estimation %dms buffersize %d/%d %dms\n", + jb->jitter.stats.estimate_ms, jb->complete_frames , jb->frame_len, jb->jitter.stats.buffer_size_ms); + } + } + return status; +} + static inline switch_status_t jb_next_packet(switch_jb_t *jb, switch_jb_node_t **nodep) { if (jb->samples_per_frame) { return jb_next_packet_by_ts(jb, nodep); } else { - return jb_next_packet_by_seq(jb, nodep); + switch_status_t status; + if (jb->elastic && jb->jitter.estimate) { + status = jb_next_packet_by_seq_with_acceleration(jb, nodep); + } else { + status = jb_next_packet_by_seq(jb, nodep); + } + return status; } } @@ -877,13 +1052,50 @@ SWITCH_DECLARE(void) switch_jb_ts_mode(switch_jb_t *jb, uint32_t samples_per_fra switch_core_inthash_init(&jb->node_hash_ts); } +SWITCH_DECLARE(void) switch_jb_set_jitter_estimator(switch_jb_t *jb, double *jitter, uint32_t samples_per_frame, uint32_t samples_per_second) +{ + if (jb && jitter) { + memset(&jb->jitter,0,sizeof(switch_jb_jitter_t)); + switch_channel_set_variable_printf(jb->channel, "rtp_jb_max_ms", "%u", 0); + switch_channel_set_variable_printf(jb->channel, "rtp_jb_size_ms", "%u", 0); + switch_channel_set_variable_printf(jb->channel, "rtp_jb_acceleration_ms", "%u", 0); + switch_channel_set_variable_printf(jb->channel, "rtp_jb_expand_ms", "%u", 0); + switch_channel_set_variable_printf(jb->channel, "rtp_jb_jitter_max_ms", "%u", 0); + switch_channel_set_variable_printf(jb->channel, "rtp_jb_jitter_ms", "%u", 0); + switch_channel_set_variable_printf(jb->channel, "rtp_jb_reset_count", "%u", 0); + switch_channel_set_variable_printf(jb->channel, "rtp_jb_reset_too_big", "%u", 0); + switch_channel_set_variable_printf(jb->channel, "rtp_jb_reset_missing_frames", "%u", 0); + switch_channel_set_variable_printf(jb->channel, "rtp_jb_reset_ts_jump", "%u", 0); + switch_channel_set_variable_printf(jb->channel, "rtp_jb_reset_error", "%u", 0); + jb->jitter.estimate = jitter; + jb->jitter.samples_per_frame = samples_per_frame; + jb->jitter.samples_per_second = samples_per_second; + jb->jitter.drop_gap = 5; + } +} + SWITCH_DECLARE(void) switch_jb_set_session(switch_jb_t *jb, switch_core_session_t *session) { const char *var; if (session) { + jb->codec = switch_core_session_get_read_codec(session); jb->session = session; jb->channel = switch_core_session_get_channel(session); + if (!strcmp(jb->codec->implementation->iananame, "opus")) { + if (switch_true(switch_channel_get_variable(jb->channel, "rtp_jitter_buffer_accelerate"))) { + switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "codec is %s, accelerate on\n", jb->codec->implementation->iananame); + jb->elastic = SWITCH_TRUE; + } else { + switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "codec is %s, accelerate off\n", jb->codec->implementation->iananame); + jb->elastic = SWITCH_FALSE; + } + + } else { + switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "codec not opus: %s\n", jb->codec->implementation->iananame); + jb->elastic = SWITCH_FALSE; + } + if (jb->type == SJB_VIDEO && !switch_test_flag(jb, SJB_QUEUE_ONLY) && (var = switch_channel_get_variable_dup(jb->channel, "jb_video_low_bitrate", SWITCH_FALSE, -1))) { int tmp = atoi(var); @@ -932,6 +1144,12 @@ SWITCH_DECLARE(void) switch_jb_debug_level(switch_jb_t *jb, uint8_t level) SWITCH_DECLARE(void) switch_jb_reset(switch_jb_t *jb) { + jb->jitter.stats.reset++; + switch_channel_set_variable_printf(jb->channel, "rtp_jb_reset_count", "%u", jb->jitter.stats.reset); + switch_channel_set_variable_printf(jb->channel, "rtp_jb_reset_too_big", "%u", jb->jitter.stats.reset_too_big); + switch_channel_set_variable_printf(jb->channel, "rtp_jb_reset_missing_frames", "%u", jb->jitter.stats.reset_missing_frames); + switch_channel_set_variable_printf(jb->channel, "rtp_jb_reset_ts_jump", "%u", jb->jitter.stats.reset_ts_jump); + switch_channel_set_variable_printf(jb->channel, "rtp_jb_reset_error", "%u", jb->jitter.stats.reset_error); if (jb->type == SJB_VIDEO) { switch_mutex_lock(jb->mutex); @@ -1257,6 +1475,7 @@ SWITCH_DECLARE(switch_status_t) switch_jb_put_packet(switch_jb_t *jb, switch_rtp if (got > want) { if (got - want > jb->max_frame_len && got - want > 17) { jb_debug(jb, 2, "Missing %u frames, Resetting\n", got - want); + jb->jitter.stats.reset_missing_frames++; switch_jb_reset(jb); } else { @@ -1434,6 +1653,7 @@ SWITCH_DECLARE(switch_status_t) switch_jb_get_packet(switch_jb_t *jb, switch_rtp switch(status) { case SWITCH_STATUS_RESTART: jb_debug(jb, 2, "%s", "Error encountered\n"); + jb->jitter.stats.reset_error++; switch_jb_reset(jb); switch_goto_status(SWITCH_STATUS_RESTART, end); case SWITCH_STATUS_NOTFOUND: @@ -1444,7 +1664,22 @@ SWITCH_DECLARE(switch_status_t) switch_jb_get_packet(switch_jb_t *jb, switch_rtp jb_debug(jb, 2, "%s", "Too many frames not found, RESIZE\n"); switch_goto_status(SWITCH_STATUS_RESTART, end); } else { - jb_debug(jb, 2, "%s", "Frame not found suggest PLC\n"); + if (jb->elastic) { + int visible_not_old = check_jb_size(jb); + jb->jitter.stats.estimate_ms = (int)((*jb->jitter.estimate)/((jb->jitter.samples_per_second))*1000); + jb->jitter.stats.buffer_size_ms = (int)((visible_not_old*jb->jitter.samples_per_frame)/(jb->jitter.samples_per_second/1000)); + /* When playing PLC, we take the oportunity to expand the buffer if the jitter buffer is smaller than the 3x the estimated jitter. */ + if (jb->jitter.stats.buffer_size_ms < (3*jb->jitter.stats.estimate_ms)) { + jb_debug(jb, SWITCH_LOG_INFO, "JITTER estimation %dms buffersize %d/%d %dms EXPAND [plc]\n", + jb->jitter.stats.estimate_ms, jb->complete_frames , jb->frame_len, jb->jitter.stats.buffer_size_ms); + jb->jitter.stats.expand++; + decrement_seq(jb); + } else { + jb_debug(jb, 2, "%s", "Frame not found suggest PLC\n"); + } + } else { + jb_debug(jb, 2, "%s", "Frame not found suggest PLC\n"); + } plc = 1; switch_goto_status(SWITCH_STATUS_NOTFOUND, end); } diff --git a/src/switch_rtp.c b/src/switch_rtp.c index 614d79bf78..05b0858313 100644 --- a/src/switch_rtp.c +++ b/src/switch_rtp.c @@ -4676,6 +4676,7 @@ SWITCH_DECLARE(switch_status_t) switch_rtp_activate_jitter_buffer(switch_rtp_t * READ_INC(rtp_session); status = switch_jb_create(&rtp_session->jb, SJB_AUDIO, queue_frames, max_queue_frames, rtp_session->pool); switch_jb_set_session(rtp_session->jb, rtp_session->session); + switch_jb_set_jitter_estimator(rtp_session->jb, &rtp_session->stats.rtcp.inter_jitter, samples_per_packet, samples_per_second); if (switch_true(switch_channel_get_variable_dup(switch_core_session_get_channel(rtp_session->session), "jb_use_timestamps", SWITCH_FALSE, -1))) { switch_jb_ts_mode(rtp_session->jb, samples_per_packet, samples_per_second); }