/* * mod_ssml for FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application * Copyright (C) 2013, Grasshopper * * Version: MPL 1.1 * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License * for the specific language governing rights and limitations under the * License. * * The Original Code is mod_ssml for FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application * * The Initial Developer of the Original Code is Grasshopper * Portions created by the Initial Developer are Copyright (C) * the Initial Developer. All Rights Reserved. * * Contributor(s): * Chris Rienzo * * mod_ssml.c -- SSML audio rendering format * */ #include #include SWITCH_MODULE_LOAD_FUNCTION(mod_ssml_load); SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_ssml_shutdown); SWITCH_MODULE_DEFINITION(mod_ssml, mod_ssml_load, mod_ssml_shutdown, NULL); #define MAX_VOICE_FILES 256 #define MAX_VOICE_PRIORITY 999 #define VOICE_NAME_PRIORITY 1000 #define VOICE_GENDER_PRIORITY 1000 #define VOICE_LANG_PRIORITY 1000000 struct ssml_parser; /** function to handle tag attributes */ typedef int (* tag_attribs_fn)(struct ssml_parser *, char **); /** function to handle tag CDATA */ typedef int (* tag_cdata_fn)(struct ssml_parser *, char *, size_t); /** * Tag definition */ struct tag_def { tag_attribs_fn attribs_fn; tag_cdata_fn cdata_fn; switch_bool_t is_root; switch_hash_t *children_tags; }; /** * Module configuration */ static struct { /** Mapping of mod-name-language-gender to voice */ switch_hash_t *voice_cache; /** Mapping of voice names */ switch_hash_t *say_voice_map; /** Mapping of voice names */ switch_hash_t *tts_voice_map; /** Mapping of interpret-as value to macro */ switch_hash_t *interpret_as_map; /** Mapping of ISO language code to say-module */ switch_hash_t *language_map; /** Mapping of tag name to definition */ switch_hash_t *tag_defs; /** module memory pool */ switch_memory_pool_t *pool; } globals; /** * A say language */ struct language { /** The ISO language code */ char *iso; /** The FreeSWITCH language code */ char *language; /** The say module name */ char *say_module; }; /** * A say macro */ struct macro { /** interpret-as name (cardinal...) */ char *name; /** language (en-US, en-UK, ...) */ char *language; /** type (number, items, persons, messages...) */ char *type; /** method (pronounced, counted, iterated...) */ char *method; }; /** * A TTS voice */ struct voice { /** higher priority = more likely to pick */ int priority; /** voice gender */ char *gender; /** voice name / macro */ char *name; /** voice language */ char *language; /** internal file prefix */ char *prefix; }; #define TAG_LEN 32 #define NAME_LEN 128 #define LANGUAGE_LEN 6 #define GENDER_LEN 8 /** * SSML voice state */ struct ssml_node { /** tag name */ char tag_name[TAG_LEN]; /** requested name */ char name[NAME_LEN]; /** requested language */ char language[LANGUAGE_LEN]; /** requested gender */ char gender[GENDER_LEN]; /** voice to use */ struct voice *tts_voice; /** say macro to use */ struct macro *say_macro; /** tag handling data */ struct tag_def *tag_def; /** previous node */ struct ssml_node *parent_node; }; /** * A file to play */ struct ssml_file { /** prefix to add to file handle */ char *prefix; /** the file to play */ const char *name; }; /** * SSML parser state */ struct ssml_parser { /** current attribs */ struct ssml_node *cur_node; /** files to play */ struct ssml_file *files; /** number of files */ int num_files; /** max files to play */ int max_files; /** memory pool to use */ switch_memory_pool_t *pool; /** desired sample rate */ int sample_rate; }; /** * SSML playback state */ struct ssml_context { /** handle to current file */ switch_file_handle_t fh; /** files to play */ struct ssml_file *files; /** number of files */ int num_files; /** current file being played */ int index; }; /** * Add a definition for a tag * @param tag the name * @param attribs_fn the function to handle the tag attributes * @param cdata_fn the function to handler the tag CDATA * @param children_tags comma-separated list of valid child tag names * @return the definition */ static struct tag_def *add_tag_def(const char *tag, tag_attribs_fn attribs_fn, tag_cdata_fn cdata_fn, const char *children_tags) { struct tag_def *def = switch_core_alloc(globals.pool, sizeof(*def)); switch_core_hash_init(&def->children_tags, globals.pool); if (!zstr(children_tags)) { char *children_tags_dup = switch_core_strdup(globals.pool, children_tags); char *tags[32] = { 0 }; int tag_count = switch_separate_string(children_tags_dup, ',', tags, sizeof(tags) / sizeof(tags[0])); if (tag_count) { int i; for (i = 0; i < tag_count; i++) { switch_core_hash_insert(def->children_tags, tags[i], tags[i]); } } } def->attribs_fn = attribs_fn; def->cdata_fn = cdata_fn; def->is_root = SWITCH_FALSE; switch_core_hash_insert(globals.tag_defs, tag, def); return def; } /** * Add a definition for a root tag * @param tag the name * @param attribs_fn the function to handle the tag attributes * @param cdata_fn the function to handler the tag CDATA * @param children_tags comma-separated list of valid child tag names * @return the definition */ static struct tag_def *add_root_tag_def(const char *tag, tag_attribs_fn attribs_fn, tag_cdata_fn cdata_fn, const char *children_tags) { struct tag_def *def = add_tag_def(tag, attribs_fn, cdata_fn, children_tags); def->is_root = SWITCH_TRUE; return def; } /** * Handle tag attributes * @param parser the parser * @param name the tag name * @param atts the attributes * @return IKS_OK if OK IKS_BADXML on parse failure */ static int process_tag(struct ssml_parser *parser, const char *name, char **atts) { struct tag_def *def = switch_core_hash_find(globals.tag_defs, name); if (def) { parser->cur_node->tag_def = def; if (def->is_root && parser->cur_node->parent_node == NULL) { /* no parent for ROOT tags */ return def->attribs_fn(parser, atts); } else if (!def->is_root && parser->cur_node->parent_node) { /* check if this child is allowed by parent node */ struct tag_def *parent_def = parser->cur_node->parent_node->tag_def; if (switch_core_hash_find(parent_def->children_tags, "ANY") || switch_core_hash_find(parent_def->children_tags, name)) { return def->attribs_fn(parser, atts); } } } return IKS_BADXML; } /** * Handle tag attributes that are ignored * @param parser the parser * @param atts the attributes * @return IKS_OK */ static int process_attribs_ignore(struct ssml_parser *parser, char **atts) { return IKS_OK; } /** * Handle CDATA that is ignored * @param parser the parser * @param data the CDATA * @param len the CDATA length * @return IKS_OK */ static int process_cdata_ignore(struct ssml_parser *parser, char *data, size_t len) { return IKS_OK; } /** * Handle CDATA that is not allowed * @param parser the parser * @param data the CDATA * @param len the CDATA length * @return IKS_BADXML */ static int process_cdata_bad(struct ssml_parser *parser, char *data, size_t len) { int i; for (i = 0; i < len; i++) { if (isgraph(data[i])) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "Unexpected CDATA for <%s>\n", parser->cur_node->tag_name); return IKS_BADXML; } } return IKS_OK; } /** * Score the voice on how close it is to desired language, name, and gender * @param voice the voice to score * @param cur_node the desired voice attributes * @param lang_required if true, language must match * @return the score */ static int score_voice(struct voice *voice, struct ssml_node *cur_node, int lang_required) { /* language > gender,name > priority */ int score = voice->priority; if (!zstr_buf(cur_node->gender) && !strcmp(cur_node->gender, voice->gender)) { score += VOICE_GENDER_PRIORITY; } if (!zstr_buf(cur_node->name) && !strcmp(cur_node->name, voice->name)) { score += VOICE_NAME_PRIORITY; } if (!zstr_buf(cur_node->language) && !strcmp(cur_node->language, voice->language)) { score += VOICE_LANG_PRIORITY; } else if (lang_required) { score = 0; } return score; } /** * Search for best voice based on attributes * @param cur_node the desired voice attributes * @param map the map to search * @param type "say" or "tts" * @param lang_required if true, language must match * @return the voice or NULL */ static struct voice *find_voice(struct ssml_node *cur_node, switch_hash_t *map, char *type, int lang_required) { switch_hash_index_t *hi = NULL; struct voice *voice = (struct voice *)switch_core_hash_find(map, cur_node->name); char *lang_name_gender = NULL; int best_score = 0; /* check cache */ lang_name_gender = switch_mprintf("%s-%s-%s-%s", type, cur_node->language, cur_node->name, cur_node->gender); voice = (struct voice *)switch_core_hash_find(globals.voice_cache, lang_name_gender); if (voice) { /* that was easy! */ goto done; } /* find best language, name, gender match */ for (hi = switch_hash_first(NULL, map); hi; hi = switch_hash_next(hi)) { const void *key; void *val; struct voice *candidate; int candidate_score = 0; switch_hash_this(hi, &key, NULL, &val); candidate = (struct voice *)val; candidate_score = score_voice(candidate, cur_node, lang_required); if (candidate_score > 0 && candidate_score > best_score) { voice = candidate; best_score = candidate_score; } } /* remember for next time */ if (voice) { switch_core_hash_insert(globals.voice_cache, lang_name_gender, voice); } done: switch_safe_free(lang_name_gender); return voice; } /** * Search for best voice based on attributes * @param cur_node the desired voice attributes * @return the voice or NULL */ static struct voice *find_tts_voice(struct ssml_node *cur_node) { return find_voice(cur_node, globals.tts_voice_map, "tts", 0); } /** * Search for best voice based on attributes * @param cur_node the desired voice attributes * @return the voice or NULL */ static struct voice *find_say_voice(struct ssml_node *cur_node) { return find_voice(cur_node, globals.say_voice_map, "say", 1); } /** * open next file for reading * @param handle the file handle */ static switch_status_t next_file(switch_file_handle_t *handle) { struct ssml_context *context = handle->private_info; const char *file; top: context->index++; if (switch_test_flag((&context->fh), SWITCH_FILE_OPEN)) { switch_core_file_close(&context->fh); } if (context->index >= context->num_files) { return SWITCH_STATUS_FALSE; } file = context->files[context->index].name; context->fh.prefix = context->files[context->index].prefix; if (switch_test_flag(handle, SWITCH_FILE_FLAG_WRITE)) { /* unsupported */ return SWITCH_STATUS_FALSE; } if (switch_core_file_open(&context->fh, file, handle->channels, handle->samplerate, handle->flags, NULL) != SWITCH_STATUS_SUCCESS) { goto top; } handle->samples = context->fh.samples; handle->format = context->fh.format; handle->sections = context->fh.sections; handle->seekable = context->fh.seekable; handle->speed = context->fh.speed; handle->interval = context->fh.interval; if (switch_test_flag((&context->fh), SWITCH_FILE_NATIVE)) { switch_set_flag(handle, SWITCH_FILE_NATIVE); } else { switch_clear_flag(handle, SWITCH_FILE_NATIVE); } return SWITCH_STATUS_SUCCESS; } /** * Process xml:lang attribute */ static int process_xml_lang(struct ssml_parser *parsed_data, char **atts) { struct ssml_node *cur_node = parsed_data->cur_node; /* only allow language change in ,

, and */ if (atts) { int i = 0; while (atts[i]) { if (!strcmp("xml:lang", atts[i])) { if (!zstr(atts[i + 1])) { strncpy(cur_node->language, atts[i + 1], LANGUAGE_LEN); cur_node->language[LANGUAGE_LEN - 1] = '\0'; } } i += 2; } } cur_node->tts_voice = find_tts_voice(cur_node); return IKS_OK; } /** * Process */ static int process_voice(struct ssml_parser *parsed_data, char **atts) { struct ssml_node *cur_node = parsed_data->cur_node; if (atts) { int i = 0; while (atts[i]) { if (!strcmp("xml:lang", atts[i])) { if (!zstr(atts[i + 1])) { strncpy(cur_node->language, atts[i + 1], LANGUAGE_LEN); cur_node->language[LANGUAGE_LEN - 1] = '\0'; } } else if (!strcmp("name", atts[i])) { if (!zstr(atts[i + 1])) { strncpy(cur_node->name, atts[i + 1], NAME_LEN); cur_node->name[NAME_LEN - 1] = '\0'; } } else if (!strcmp("gender", atts[i])) { if (!zstr(atts[i + 1])) { strncpy(cur_node->gender, atts[i + 1], GENDER_LEN); cur_node->gender[GENDER_LEN - 1] = '\0'; } } i += 2; } } cur_node->tts_voice = find_tts_voice(cur_node); return IKS_OK; } /** * Process */ static int process_say_as(struct ssml_parser *parsed_data, char **atts) { struct ssml_node *cur_node = parsed_data->cur_node; if (atts) { int i = 0; while (atts[i]) { if (!strcmp("interpret-as", atts[i])) { char *interpret_as = atts[i + 1]; if (!zstr(interpret_as)) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "interpret-as: %s\n", atts[i + 1]); cur_node->say_macro = (struct macro *)switch_core_hash_find(globals.interpret_as_map, interpret_as); } break; } i += 2; } } cur_node->tts_voice = find_tts_voice(cur_node); return IKS_OK; } /** * Process - this is a period of silence */ static int process_break(struct ssml_parser *parsed_data, char **atts) { if (atts) { int i = 0; while (atts[i]) { if (!strcmp("time", atts[i])) { char *t = atts[i + 1]; if (!zstr(t) && parsed_data->num_files < parsed_data->max_files) { int timeout_ms = 0; char *unit; if ((unit = strstr(t, "ms"))) { *unit = '\0'; if (switch_is_number(t)) { timeout_ms = atoi(t); } } else if ((unit = strstr(t, "s"))) { *unit = '\0'; if (switch_is_number(t)) { timeout_ms = atoi(t) * 1000; } } if (timeout_ms > 0) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Adding : \"%s\"\n", t); parsed_data->files[parsed_data->num_files].name = switch_core_sprintf(parsed_data->pool, "silence_stream://%i", timeout_ms); parsed_data->files[parsed_data->num_files++].prefix = NULL; } } return IKS_OK; } i += 2; } } return IKS_OK; } /** * Process