From bc6842a9508b0bc9d3fd674948162c1d68a778fc Mon Sep 17 00:00:00 2001 From: rucc Date: Fri, 17 Jan 2020 13:56:32 +0100 Subject: [PATCH] [mod_dptools:playback] fix audio speed change to preserve pitch When a file is being played with the playback application and the speed is changed with eg. uuid_fileman speed:+1 the audio pitch increases. This commit tries to preserve the pitch even when the speed is changed. How? + Use longer sample chunks (where original freq is preserved) + Search for the best cut position using cross-correlation + Crossfade adjacent chunks to each other This updated co-authored commit from the original attempts to optimize memory usage by allocating memory on the heap for samples of the previous frame, but using stack memory for the modified data that is flushed to the buffer. Co-authored-by: hari Co-authored-by: Joseph Nadiv --- src/switch_ivr_play_say.c | 110 +++++++++++++++++++++++++++++--------- 1 file changed, 85 insertions(+), 25 deletions(-) diff --git a/src/switch_ivr_play_say.c b/src/switch_ivr_play_say.c index 25486eee7f..53a4fbc2ac 100644 --- a/src/switch_ivr_play_say.c +++ b/src/switch_ivr_play_say.c @@ -1360,6 +1360,14 @@ SWITCH_DECLARE(switch_status_t) switch_ivr_play_file(switch_core_session_t *sess for (cur = 0; switch_channel_ready(channel) && !done && cur < argc; cur++) { + const int sp_fadeLen = 32; + const int sp_cut_src_rng = 64; + int16_t sp_ovrlap[32]; + int sp_has_overlap = 0; + int sp_prev_idx = 0; + int sp_prev_cap = 0; + int16_t *sp_prev = NULL; + file = argv[cur]; eof = 0; @@ -1859,42 +1867,91 @@ SWITCH_DECLARE(switch_status_t) switch_ivr_play_file(switch_core_session_t *sess if (!switch_test_flag(fh, SWITCH_FILE_NATIVE) && fh->speed && do_speed) { float factor = 0.25f * abs(fh->speed); - switch_size_t newlen, supplement, step; - short *bp = write_frame.data; - switch_size_t wrote = 0; + int16_t* bp = write_frame.data; + switch_size_t supplement = (int) (factor * olen); + switch_size_t newlen = (fh->speed > 0) ? olen - supplement : olen + supplement; + int src_rng = (fh->speed > 0 ? supplement : sp_cut_src_rng)* sp_has_overlap; + int datalen = newlen + src_rng + sp_fadeLen; + int extra = datalen - olen; + int16_t data[datalen * 2]; + int16_t* currp = NULL; + int best_cut_idx = 0; - supplement = (int) (factor * olen); - if (!supplement) { - supplement = 1; + memset(data, 0, sizeof(data)); + + if (!sp_prev) { + sp_prev_cap = olen * 3 + sp_prev_idx; + switch_zmalloc(sp_prev, sp_prev_cap); } - newlen = (fh->speed > 0) ? olen - supplement : olen + supplement; - - step = (fh->speed > 0) ? (newlen / supplement) : (olen / supplement); if (!fh->sp_audio_buffer) { switch_buffer_create_dynamic(&fh->sp_audio_buffer, 1024, 1024, 0); } - while ((wrote + step) < newlen) { - switch_buffer_write(fh->sp_audio_buffer, bp, step * 2); - wrote += step; - bp += step; - if (fh->speed > 0) { - bp++; + if (extra > 0) { + int cont = 0; + currp = data; + + if (extra > sp_prev_idx) { + // not enough data + switch_buffer_write(fh->sp_audio_buffer, bp, newlen * 2); + cont = 1; } else { - float f; - short s; - f = (float) (*bp + *(bp + 1) + *(bp - 1)); - f /= 3; - s = (short) f; - switch_buffer_write(fh->sp_audio_buffer, &s, 2); - wrote++; + memcpy(currp, sp_prev + sp_prev_idx - extra, extra * 2); + memcpy(currp + extra, bp, olen * 2); + } + + if (olen * 3 + sp_prev_idx > sp_prev_cap) { + sp_prev_cap = olen * 3 + sp_prev_idx; + sp_prev = realloc(sp_prev, sp_prev_cap); + sp_prev_idx = 0; + } + + if (sp_prev_idx > olen) { + memmove(sp_prev, sp_prev + sp_prev_idx - olen, olen * 2); + sp_prev_idx = olen; + } + + memcpy(sp_prev + sp_prev_idx, bp, olen * 2); + sp_prev_idx += olen; + + if (cont) { + continue; + } + } else { + sp_prev_idx = 0; + currp = bp; + } + + if (sp_has_overlap) { + double best = INT_MIN; + for (int idx_src = 0; idx_src < src_rng; idx_src++) { + double cc = 0; + for (int i = 0; i < sp_fadeLen; i++) { + cc += sp_ovrlap[i] * (*(currp + idx_src + i)); + } + if (cc > best) { + best = cc; + best_cut_idx = idx_src; + } } } - if (wrote < newlen) { - switch_size_t r = newlen - wrote; - switch_buffer_write(fh->sp_audio_buffer, bp, r * 2); + + currp += best_cut_idx; + switch_buffer_write(fh->sp_audio_buffer, currp, newlen * 2); + currp += newlen; + + if (sp_has_overlap) { + int16_t* ret = (int16_t*)((unsigned char*)switch_buffer_get_head_pointer(fh->sp_audio_buffer) + switch_buffer_inuse(fh->sp_audio_buffer) - 2 * newlen); + // crossfade + for (int i = 0; i < (newlen < sp_fadeLen ? newlen : sp_fadeLen); i++) { + double factor = ((double)i) / sp_fadeLen; + *(ret + i) = (int16_t)(sp_ovrlap[i] * (1 - factor) + *(ret + i) * factor); + } } + + memcpy(sp_ovrlap, currp, sp_fadeLen * 2); + sp_has_overlap = 1; last_speed = fh->speed; continue; } @@ -2056,6 +2113,9 @@ SWITCH_DECLARE(switch_status_t) switch_ivr_play_file(switch_core_session_t *sess if (fh->sp_audio_buffer) { switch_buffer_destroy(&fh->sp_audio_buffer); } + + switch_safe_free(sp_prev); + } if (switch_core_codec_ready((&codec))) {