diff --git a/src/include/switch_core.h b/src/include/switch_core.h index b2d0c99165..ff8b47276d 100644 --- a/src/include/switch_core.h +++ b/src/include/switch_core.h @@ -895,6 +895,28 @@ SWITCH_DECLARE(switch_status_t) switch_core_speech_interpret_asr(switch_speech_h */ SWITCH_DECLARE(switch_status_t) switch_core_speech_feed_tts(switch_speech_handle_t *sh, char *text, switch_speech_flag_t *flags); +/*! + \brief Flush TTS audio on a given handle + \param sh the speech handle +*/ +SWITCH_DECLARE(void) switch_core_speech_flush_tts(switch_speech_handle_t *sh); + +/*! + \brief Set a text parameter on a TTS handle + \param sh the speech handle + \param param the parameter + \param val the value +*/ +SWITCH_DECLARE(void) switch_core_speech_text_param_tts(switch_speech_handle_t *sh, char *param, char *val); + +/*! + \brief Set a numeric parameter on a TTS handle + \param sh the speech handle + \param param the parameter + \param val the value +*/ +SWITCH_DECLARE(void) switch_core_speech_numeric_param_tts(switch_speech_handle_t *sh, char *param, int val); + /*! \brief Read rendered audio from the TTS module \param sh the speech handle to read diff --git a/src/include/switch_ivr.h b/src/include/switch_ivr.h index 69c02b872e..f0f75623ee 100644 --- a/src/include/switch_ivr.h +++ b/src/include/switch_ivr.h @@ -125,6 +125,15 @@ SWITCH_DECLARE(switch_status_t) switch_ivr_record_file(switch_core_session_t *se unsigned int buflen); +SWITCH_DECLARE(switch_status_t) switch_ivr_speak_text_handle(switch_core_session_t *session, + switch_speech_handle_t *sh, + switch_codec_t *codec, + switch_timer_t *timer, + switch_dtmf_callback_function_t dtmf_callback, + char *text, + void *buf, + unsigned int buflen); + /*! \brief Speak given text with given tts engine \param session the session to speak on @@ -139,14 +148,15 @@ SWITCH_DECLARE(switch_status_t) switch_ivr_record_file(switch_core_session_t *se \return SWITCH_STATUS_SUCCESS if all is well */ SWITCH_DECLARE(switch_status_t) switch_ivr_speak_text(switch_core_session_t *session, - char *tts_name, - char *voice_name, - char *timer_name, - uint32_t rate, - switch_dtmf_callback_function_t dtmf_callback, - char *text, - void *buf, - unsigned int buflen); + char *tts_name, + char *voice_name, + char *timer_name, + uint32_t rate, + switch_dtmf_callback_function_t dtmf_callback, + char *text, + void *buf, + unsigned int buflen); + /*! \brief Bridge Audio from one session to another diff --git a/src/include/switch_module_interfaces.h b/src/include/switch_module_interfaces.h index 2f86cb22a6..7f6920e6a4 100644 --- a/src/include/switch_module_interfaces.h +++ b/src/include/switch_module_interfaces.h @@ -327,6 +327,9 @@ struct switch_speech_interface { switch_size_t *datalen, uint32_t *rate, switch_speech_flag_t *flags); + void (*speech_flush_tts)(switch_speech_handle_t *sh); + void (*speech_text_param_tts)(switch_speech_handle_t *sh, char *param, char *val); + void (*speech_numeric_param_tts)(switch_speech_handle_t *sh, char *param, int val); const struct switch_speech_interface *next; }; @@ -338,7 +341,10 @@ struct switch_speech_handle { const switch_speech_interface_t *speech_interface; /*! flags to control behaviour */ uint32_t flags; - + /*! The Name*/ + char *name; + /*! The Rate*/ + uint32_t rate; /*! the handle's memory pool */ switch_memory_pool_t *memory_pool; /*! private data for the format module to store handle specific info */ diff --git a/src/mod/asr_tts/mod_cepstral/mod_cepstral.c b/src/mod/asr_tts/mod_cepstral/mod_cepstral.c index e8aaace236..ded36faac8 100644 --- a/src/mod/asr_tts/mod_cepstral/mod_cepstral.c +++ b/src/mod/asr_tts/mod_cepstral/mod_cepstral.c @@ -36,6 +36,8 @@ #include #include +#define MY_BUF_LEN 1024 * 512 + static const char modname[] = "mod_cepstral"; static swift_engine *engine; @@ -59,23 +61,42 @@ static swift_result_t write_audio(swift_event *event, swift_event_t type, void * cepstral_t *cepstral; swift_event_t rv = SWIFT_SUCCESS; void *buf = NULL; - int len = 0; + int len = 0, i = 0; cepstral = udata; assert(cepstral != NULL); + if (!cepstral->port || cepstral->done || cepstral->done_gen) { + return SWIFT_UNKNOWN_ERROR; + } + /* Only proceed when we have success */ if (!SWIFT_FAILED((rv = swift_event_get_audio(event, &buf, &len)))) { - switch_mutex_lock(cepstral->audio_lock); - - if (switch_buffer_write(cepstral->audio_buffer, buf, len) <= 0) { - rv = SWIFT_UNKNOWN_ERROR; + while(!cepstral->done) { + switch_mutex_lock(cepstral->audio_lock); + if (switch_buffer_write(cepstral->audio_buffer, buf, len) > 0) { + switch_mutex_unlock(cepstral->audio_lock); + break; + } + switch_mutex_unlock(cepstral->audio_lock); + if (!cepstral->done) { + for (i = 0; i < 10; i++) { + switch_yield(10000); + if (cepstral->done) { + break; + } + } + } + } - switch_mutex_unlock(cepstral->audio_lock); } else { cepstral->done = 1; } + if (cepstral->done) { + rv = SWIFT_UNKNOWN_ERROR; + } + return rv; } @@ -92,7 +113,7 @@ static switch_status_t cepstral_speech_open(switch_speech_handle_t *sh, char *vo return SWITCH_STATUS_MEMERR; } - if (switch_buffer_create(sh->memory_pool, &cepstral->audio_buffer, 1024 * 256) != SWITCH_STATUS_SUCCESS) { + if (switch_buffer_create(sh->memory_pool, &cepstral->audio_buffer, MY_BUF_LEN) != SWITCH_STATUS_SUCCESS) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Write Buffer Failed!\n"); return SWITCH_STATUS_MEMERR; } @@ -151,6 +172,11 @@ static switch_status_t cepstral_speech_close(switch_speech_handle_t *sh, switch_ cepstral = sh->private_info; assert(cepstral != NULL); + + cepstral->done = 1; + cepstral->done_gen = 1; + printf("CLOSE!!!\n"); + swift_port_stop(cepstral->port, SWIFT_ASYNC_ANY, SWIFT_EVENT_NOW); /* Close the Swift Port and Engine */ if (NULL != cepstral->port) swift_port_close(cepstral->port); //if (NULL != cepstral->engine) swift_engine_close(cepstral->engine); @@ -169,13 +195,33 @@ static switch_status_t cepstral_speech_feed_tts(switch_speech_handle_t *sh, char cepstral = sh->private_info; assert(cepstral != NULL); - swift_port_speak_text(cepstral->port, text, 0, NULL, &cepstral->tts_stream, NULL); - //swift_port_speak_text(cepstral->port, text, 0, NULL, NULL, NULL); + cepstral->done_gen = 0; + cepstral->done = 0; + + cepstral->tts_stream = NULL; + + swift_port_speak_text(cepstral->port, text, 0, NULL, &cepstral->tts_stream, NULL); - return SWITCH_STATUS_FALSE; } +static void cepstral_speech_flush_tts(switch_speech_handle_t *sh) +{ + cepstral_t *cepstral; + + cepstral = sh->private_info; + assert(cepstral != NULL); + + cepstral->done_gen = 1; + cepstral->done = 1; + if (cepstral->audio_buffer) { + switch_mutex_lock(cepstral->audio_lock); + switch_buffer_zero(cepstral->audio_buffer); + switch_mutex_unlock(cepstral->audio_lock); + } + swift_port_stop(cepstral->port, SWIFT_ASYNC_ANY, SWIFT_EVENT_NOW); +} + static switch_status_t cepstral_speech_read_tts(switch_speech_handle_t *sh, void *data, size_t *datalen, @@ -193,18 +239,23 @@ static switch_status_t cepstral_speech_read_tts(switch_speech_handle_t *sh, while(!cepstral->done) { if (!cepstral->done_gen) { - int check = (SWIFT_STATUS_RUNNING == swift_port_status(cepstral->port, cepstral->tts_stream)); - if (!check) { + int check = swift_port_status(cepstral->port, cepstral->tts_stream); + + if (!check == SWIFT_STATUS_RUNNING) { cepstral->done_gen = 1; } } + switch_mutex_lock(cepstral->audio_lock); used = switch_buffer_inuse(cepstral->audio_buffer); - + switch_mutex_unlock(cepstral->audio_lock); + + if (!used && cepstral->done_gen) { + + status = SWITCH_STATUS_BREAK; break; } - /* wait for the right amount of data (unless there is no blocking flag) */ if (used < desired) { @@ -250,8 +301,8 @@ static const switch_speech_interface_t cepstral_speech_interface = { /*.speech_feed_asr*/ NULL, /*.speech_interpret_asr*/ NULL, /*.speech_feed_tts*/ cepstral_speech_feed_tts, - /*.speech_read_tts*/ cepstral_speech_read_tts - + /*.speech_read_tts*/ cepstral_speech_read_tts, + /*.speech_flush_tts*/ cepstral_speech_flush_tts }; static const switch_loadable_module_interface_t cepstral_module_interface = { diff --git a/src/switch_core.c b/src/switch_core.c index 929bd2dc7a..1c768c0e13 100644 --- a/src/switch_core.c +++ b/src/switch_core.c @@ -565,11 +565,11 @@ SWITCH_DECLARE(switch_status_t) switch_core_directory_close(switch_directory_han } SWITCH_DECLARE(switch_status_t) switch_core_speech_open(switch_speech_handle_t *sh, - char *module_name, - char *voice_name, - unsigned int rate, - switch_speech_flag_t *flags, - switch_memory_pool_t *pool) + char *module_name, + char *voice_name, + unsigned int rate, + switch_speech_flag_t *flags, + switch_memory_pool_t *pool) { switch_status_t status; @@ -587,7 +587,8 @@ SWITCH_DECLARE(switch_status_t) switch_core_speech_open(switch_speech_handle_t * } switch_set_flag(sh, SWITCH_SPEECH_FLAG_FREE_POOL); } - + sh->rate = rate; + sh->name = switch_core_strdup(pool, module_name); return sh->speech_interface->speech_open(sh, voice_name, rate, flags); } @@ -612,6 +613,33 @@ SWITCH_DECLARE(switch_status_t) switch_core_speech_feed_tts(switch_speech_handle return sh->speech_interface->speech_feed_tts(sh, text, flags); } +SWITCH_DECLARE(void) switch_core_speech_flush_tts(switch_speech_handle_t *sh) +{ + assert(sh != NULL); + + if (sh->speech_interface->speech_flush_tts) { + sh->speech_interface->speech_flush_tts(sh); + } +} + +SWITCH_DECLARE(void) switch_core_speech_text_param_tts(switch_speech_handle_t *sh, char *param, char *val) +{ + assert(sh != NULL); + + if (sh->speech_interface->speech_text_param_tts) { + sh->speech_interface->speech_text_param_tts(sh, param, val); + } +} + +SWITCH_DECLARE(void) switch_core_speech_numeric_param_tts(switch_speech_handle_t *sh, char *param, int val) +{ + assert(sh != NULL); + + if (sh->speech_interface->speech_numeric_param_tts) { + sh->speech_interface->speech_numeric_param_tts(sh, param, val); + } +} + SWITCH_DECLARE(switch_status_t) switch_core_speech_read_tts(switch_speech_handle_t *sh, void *data, switch_size_t *datalen, diff --git a/src/switch_ivr.c b/src/switch_ivr.c index 74d93e70e1..c5f859f458 100644 --- a/src/switch_ivr.c +++ b/src/switch_ivr.c @@ -495,44 +495,186 @@ SWITCH_DECLARE(switch_status_t) switch_ivr_play_file(switch_core_session_t *sess -SWITCH_DECLARE(switch_status_t) switch_ivr_speak_text(switch_core_session_t *session, - char *tts_name, - char *voice_name, - char *timer_name, - uint32_t rate, - switch_dtmf_callback_function_t dtmf_callback, - char *text, - void *buf, - unsigned int buflen) +SWITCH_DECLARE(switch_status_t) switch_ivr_speak_text_handle(switch_core_session_t *session, + switch_speech_handle_t *sh, + switch_codec_t *codec, + switch_timer_t *timer, + switch_dtmf_callback_function_t dtmf_callback, + char *text, + void *buf, + unsigned int buflen) { switch_channel_t *channel; short abuf[960]; char dtmf[128]; + uint32_t len = 0; + switch_size_t ilen = 0; + switch_frame_t write_frame = {0}; + int x; + int stream_id; + int done = 0; + int lead_in_out = 10; + switch_status_t status = SWITCH_STATUS_SUCCESS; + switch_speech_flag_t flags = SWITCH_SPEECH_FLAG_TTS; + uint32_t rate = 0, samples = 0; + + channel = switch_core_session_get_channel(session); + assert(channel != NULL); + + if (!sh) { + return SWITCH_STATUS_FALSE; + } + + switch_channel_answer(channel); + + write_frame.data = abuf; + write_frame.buflen = sizeof(abuf); + + samples = (uint32_t)(sh->rate / 50); + len = samples * 2; + + flags = 0; + switch_core_speech_feed_tts(sh, text, &flags); + write_frame.rate = sh->rate; + + memset(write_frame.data, 0, len); + write_frame.datalen = len; + write_frame.samples = len / 2; + write_frame.codec = codec; + + for( x = 0; !done && x < lead_in_out; x++) { + for (stream_id = 0; stream_id < switch_core_session_get_stream_count(session); stream_id++) { + if (switch_core_session_write_frame(session, &write_frame, -1, stream_id) != SWITCH_STATUS_SUCCESS) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Bad Write\n"); + done = 1; + break; + } + } + } + + ilen = len; + while(switch_channel_ready(channel)) { + flags = SWITCH_SPEECH_FLAG_BLOCKING; + status = switch_core_speech_read_tts(sh, + abuf, + &ilen, + &rate, + &flags); + + if (status != SWITCH_STATUS_SUCCESS) { + for( x = 0; !done && x < lead_in_out; x++) { + for (stream_id = 0; stream_id < switch_core_session_get_stream_count(session); stream_id++) { + if (switch_core_session_write_frame(session, &write_frame, -1, stream_id) != SWITCH_STATUS_SUCCESS) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Bad Write\n"); + done = 1; + break; + } + } + } + if (status == SWITCH_STATUS_BREAK) { + status = SWITCH_STATUS_SUCCESS; + } + done = 1; + } + + if (done) { + break; + } + + write_frame.datalen = (uint32_t)ilen; + write_frame.samples = (uint32_t)(ilen / 2); + + for (stream_id = 0; stream_id < switch_core_session_get_stream_count(session); stream_id++) { + if (switch_core_session_write_frame(session, &write_frame, -1, stream_id) != SWITCH_STATUS_SUCCESS) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Bad Write\n"); + done = 1; + break; + } + + if (done) { + break; + } + } + + if (dtmf_callback || buf) { + /* + dtmf handler function you can hook up to be executed when a digit is dialed during playback + if you return anything but SWITCH_STATUS_SUCCESS the playback will stop. + */ + if (switch_channel_has_dtmf(channel)) { + switch_channel_dequeue_dtmf(channel, dtmf, sizeof(dtmf)); + if (dtmf_callback) { + status = dtmf_callback(session, dtmf, buf, buflen); + } else { + switch_copy_string((char *)buf, dtmf, buflen); + status = SWITCH_STATUS_BREAK; + } + } + + if (status != SWITCH_STATUS_SUCCESS) { + done = 1; + break; + } + } + + if (timer) { + if ((x = switch_core_timer_next(timer)) < 0) { + break; + } + } else { /* time off the channel (if you must) */ + switch_frame_t *read_frame; + switch_status_t status = switch_core_session_read_frame(session, &read_frame, -1, 0); + + while (switch_channel_test_flag(channel, CF_HOLD)) { + switch_yield(10000); + } + + if (!SWITCH_READ_ACCEPTABLE(status)) { + break; + } + } + + } + + + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "done speaking text\n"); + flags = 0; + switch_core_speech_flush_tts(sh); + return status; +} + + +SWITCH_DECLARE(switch_status_t) switch_ivr_speak_text(switch_core_session_t *session, + char *tts_name, + char *voice_name, + char *timer_name, + uint32_t rate, + switch_dtmf_callback_function_t dtmf_callback, + char *text, + void *buf, + unsigned int buflen) +{ + switch_channel_t *channel; int interval = 0; uint32_t samples = 0; uint32_t len = 0; - switch_size_t ilen = 0; switch_frame_t write_frame = {0}; switch_timer_t timer; switch_core_thread_session_t thread_session; switch_codec_t codec; switch_memory_pool_t *pool = switch_core_session_get_pool(session); char *codec_name; - int x; int stream_id; - int done = 0; - int lead_in_out = 10; - switch_status_t status = SWITCH_STATUS_SUCCESS; switch_speech_handle_t sh; switch_speech_flag_t flags = SWITCH_SPEECH_FLAG_TTS; - memset(&sh, 0, sizeof(sh)); - channel = switch_core_session_get_channel(session); assert(channel != NULL); + + memset(&sh, 0, sizeof(sh)); if (switch_core_speech_open(&sh, tts_name, voice_name, @@ -543,13 +685,8 @@ SWITCH_DECLARE(switch_status_t) switch_ivr_speak_text(switch_core_session_t *ses switch_core_session_reset(session); return SWITCH_STATUS_FALSE; } - + switch_channel_answer(channel); - - write_frame.data = abuf; - write_frame.buflen = sizeof(abuf); - - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "OPEN TTS %s\n", tts_name); interval = 20; @@ -582,119 +719,19 @@ SWITCH_DECLARE(switch_status_t) switch_ivr_speak_text(switch_core_session_t *ses switch_core_codec_destroy(&codec); flags = 0; switch_core_speech_close(&sh, &flags); + switch_core_session_reset(session); return SWITCH_STATUS_GENERR; } switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "setup timer success %u bytes per %d ms!\n", len, interval); - } - flags = 0; - switch_core_speech_feed_tts(&sh, text, &flags); - write_frame.rate = rate; - - memset(write_frame.data, 0, len); - write_frame.datalen = len; - write_frame.samples = len / 2; - - for( x = 0; !done && x < lead_in_out; x++) { - for (stream_id = 0; stream_id < switch_core_session_get_stream_count(session); stream_id++) { - if (switch_core_session_write_frame(session, &write_frame, -1, stream_id) != SWITCH_STATUS_SUCCESS) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Bad Write\n"); - done = 1; - break; - } - } - } - - if (timer_name) { /* start a thread to absorb incoming audio */ for (stream_id = 0; stream_id < switch_core_session_get_stream_count(session); stream_id++) { switch_core_service_session(session, &thread_session, stream_id); } } - ilen = len; - while(switch_channel_ready(channel)) { - if (dtmf_callback || buf) { - - - /* - dtmf handler function you can hook up to be executed when a digit is dialed during playback - if you return anything but SWITCH_STATUS_SUCCESS the playback will stop. - */ - if (switch_channel_has_dtmf(channel)) { - switch_channel_dequeue_dtmf(channel, dtmf, sizeof(dtmf)); - if (dtmf_callback) { - status = dtmf_callback(session, dtmf, buf, buflen); - } else { - switch_copy_string((char *)buf, dtmf, buflen); - status = SWITCH_STATUS_BREAK; - } - } - - if (status != SWITCH_STATUS_SUCCESS) { - done = 1; - break; - } - } - - flags = SWITCH_SPEECH_FLAG_BLOCKING; - status = switch_core_speech_read_tts(&sh, - abuf, - &ilen, - &rate, - &flags); - - if (status != SWITCH_STATUS_SUCCESS) { - for( x = 0; !done && x < lead_in_out; x++) { - for (stream_id = 0; stream_id < switch_core_session_get_stream_count(session); stream_id++) { - if (switch_core_session_write_frame(session, &write_frame, -1, stream_id) != SWITCH_STATUS_SUCCESS) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Bad Write\n"); - done = 1; - break; - } - } - } - done = 1; - } - - if (done || ilen <= 0) { - break; - } - - write_frame.datalen = (uint32_t)ilen; - write_frame.samples = (uint32_t)(ilen / 2); - - for (stream_id = 0; stream_id < switch_core_session_get_stream_count(session); stream_id++) { - if (switch_core_session_write_frame(session, &write_frame, -1, stream_id) != SWITCH_STATUS_SUCCESS) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Bad Write\n"); - done = 1; - break; - } - - if (done) { - break; - } - } - if (timer_name) { - if ((x = switch_core_timer_next(&timer)) < 0) { - break; - } - } else { /* time off the channel (if you must) */ - switch_frame_t *read_frame; - switch_status_t status = switch_core_session_read_frame(session, &read_frame, -1, 0); - - while (switch_channel_test_flag(channel, CF_HOLD)) { - switch_yield(10000); - } - - if (!SWITCH_READ_ACCEPTABLE(status)) { - break; - } - } - } - - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "done speaking text\n"); + switch_ivr_speak_text_handle(session, &sh, &codec, timer_name ? &timer : NULL, dtmf_callback, text, buf, buflen); flags = 0; switch_core_speech_close(&sh, &flags); switch_core_codec_destroy(&codec);