/* * mod_rayo for FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application * Copyright (C) 2013, Grasshopper * * Version: MPL 1.1 * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License * for the specific language governing rights and limitations under the * License. * * The Original Code is mod_rayo for FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application * * The Initial Developer of the Original Code is Grasshopper * Portions created by the Initial Developer are Copyright (C) * the Initial Developer. All Rights Reserved. * * Contributor(s): * Chris Rienzo * * srgs.c -- Parses / converts / matches SRGS grammars * */ #include #include #include #include "srgs.h" #define MAX_RECURSION 100 /** function to handle tag attributes */ typedef int (* tag_attribs_fn)(struct srgs_grammar *, char **); /** function to handle tag CDATA */ typedef int (* tag_cdata_fn)(struct srgs_grammar *, char *, size_t); /** * Tag definition */ struct tag_def { tag_attribs_fn attribs_fn; tag_cdata_fn cdata_fn; switch_bool_t is_root; switch_hash_t *children_tags; }; /** * library configuration */ static struct { /** true if initialized */ switch_bool_t init; /** Mapping of tag name to definition */ switch_hash_t *tag_defs; /** library memory pool */ switch_memory_pool_t *pool; } globals; /** * SRGS node types */ enum srgs_node_type { /** anything */ SNT_ANY, /** */ SNT_GRAMMAR, /** */ SNT_RULE, /** */ SNT_ONE_OF, /** */ SNT_ITEM, /** unresolved reference to node */ SNT_UNRESOLVED_REF, /** resolved reference to node */ SNT_REF, /** string */ SNT_STRING, /** */ SNT_TAG, /** */ SNT_LEXICON, /** */ SNT_EXAMPLE, /** */ SNT_TOKEN, /** */ SNT_META, /** */ SNT_METADATA }; /** * value */ struct rule_value { char is_public; char *id; char *regex; }; /** * value */ struct item_value { int repeat_min; int repeat_max; const char *weight; }; /** * value */ union ref_value { struct srgs_node *node; char *uri; }; /** * A node in the SRGS parse tree */ struct srgs_node { /** Name of node */ const char *name; /** Type of node */ enum srgs_node_type type; /** True if node has been inspected for loops */ char visited; /** Node value */ union { char *root; const char *string; union ref_value ref; struct rule_value rule; struct item_value item; } value; /** parent node */ struct srgs_node *parent; /** child node */ struct srgs_node *child; /** sibling node */ struct srgs_node *next; /** number of child nodes */ int num_children; /** tag handling data */ struct tag_def *tag_def; }; /** * A parsed grammar */ struct srgs_grammar { /** grammar memory pool */ switch_memory_pool_t *pool; /** current node being parsed */ struct srgs_node *cur; /** rule names mapped to node */ switch_hash_t *rules; /** grammar encoding */ char *encoding; /** grammar language */ char *language; /** true if digit grammar */ int digit_mode; /** grammar parse tree root */ struct srgs_node *root; /** root rule */ struct srgs_node *root_rule; /** compiled grammar regex */ pcre *compiled_regex; /** grammar in regex format */ char *regex; /** grammar in JSGF format */ char *jsgf; /** grammar as JSGF file */ char *jsgf_file_name; /** synchronizes access to this grammar */ switch_mutex_t *mutex; /** optional uuid for logging */ const char *uuid; }; /** * The SRGS SAX parser */ struct srgs_parser { /** parser memory pool */ switch_memory_pool_t *pool; /** grammar cache */ switch_hash_t *cache; /** cache mutex */ switch_mutex_t *mutex; /** optional uuid for logging */ const char *uuid; }; /** * Convert entity name to node type * @param name of entity * @return the type or ANY */ static enum srgs_node_type string_to_node_type(char *name) { if (!strcmp("grammar", name)) { return SNT_GRAMMAR; } if (!strcmp("item", name)) { return SNT_ITEM; } if (!strcmp("one-of", name)) { return SNT_ONE_OF; } if (!strcmp("ruleref", name)) { return SNT_UNRESOLVED_REF; } if (!strcmp("rule", name)) { return SNT_RULE; } if (!strcmp("tag", name)) { return SNT_TAG; } if (!strcmp("lexicon", name)) { return SNT_LEXICON; } if (!strcmp("example", name)) { return SNT_EXAMPLE; } if (!strcmp("token", name)) { return SNT_TOKEN; } if (!strcmp("meta", name)) { return SNT_META; } if (!strcmp("metadata", name)) { return SNT_METADATA; } return SNT_ANY; } /** * Log node */ static void sn_log_node_open(struct srgs_node *node) { switch (node->type) { case SNT_ANY: case SNT_METADATA: case SNT_META: case SNT_TOKEN: case SNT_EXAMPLE: case SNT_LEXICON: case SNT_TAG: case SNT_ONE_OF: case SNT_GRAMMAR: switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG1, "<%s>\n", node->name); return; case SNT_RULE: switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG1, "\n", node->value.rule.id, node->value.rule.is_public ? "public" : "private"); return; case SNT_ITEM: switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG1, "\n", node->value.item.repeat_min); return; case SNT_UNRESOLVED_REF: switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG1, "value.ref.uri); return; case SNT_REF: switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG1, "\n", node->value.ref.node->value.rule.id); return; case SNT_STRING: switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG1, "%s\n", node->value.string); return; } } /** * Log node */ static void sn_log_node_close(struct srgs_node *node) { switch (node->type) { case SNT_GRAMMAR: case SNT_RULE: case SNT_ONE_OF: case SNT_ITEM: case SNT_REF: case SNT_TAG: case SNT_LEXICON: case SNT_EXAMPLE: case SNT_TOKEN: case SNT_META: case SNT_METADATA: case SNT_ANY: switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG1, "\n", node->name); return; case SNT_UNRESOLVED_REF: switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG1, "\n"); return; case SNT_STRING: return; } } /** * Create a new node * @param pool to use * @param name of node * @param type of node * @return the node */ static struct srgs_node *sn_new(switch_memory_pool_t *pool, const char *name, enum srgs_node_type type) { struct srgs_node *node = switch_core_alloc(pool, sizeof(*node)); node->name = switch_core_strdup(pool, name); node->type = type; return node; } /** * @param node to search * @return the last sibling of node */ static struct srgs_node *sn_find_last_sibling(struct srgs_node *node) { if (node && node->next) { return sn_find_last_sibling(node->next); } return node; } /** * Add child node * @param pool to use * @param parent node to add child to * @param name the child node name * @param type the child node type * @return the child node */ static struct srgs_node *sn_insert(switch_memory_pool_t *pool, struct srgs_node *parent, const char *name, enum srgs_node_type type) { struct srgs_node *sibling = parent ? sn_find_last_sibling(parent->child) : NULL; struct srgs_node *child = sn_new(pool, name, type); if (parent) { parent->num_children++; child->parent = parent; } if (sibling) { sibling->next = child; } else if (parent) { parent->child = child; } return child; } /** * Add string child node * @param pool to use * @param parent node to add string to * @param string to add - this function does not copy the string * @return the string child node */ static struct srgs_node *sn_insert_string(switch_memory_pool_t *pool, struct srgs_node *parent, char *string) { struct srgs_node *child = sn_insert(pool, parent, string, SNT_STRING); child->value.string = string; return child; } /** * Add a definition for a tag * @param tag the name * @param attribs_fn the function to handle the tag attributes * @param cdata_fn the function to handler the tag CDATA * @param children_tags comma-separated list of valid child tag names * @return the definition */ static struct tag_def *add_tag_def(const char *tag, tag_attribs_fn attribs_fn, tag_cdata_fn cdata_fn, const char *children_tags) { struct tag_def *def = switch_core_alloc(globals.pool, sizeof(*def)); switch_core_hash_init(&def->children_tags, globals.pool); if (!zstr(children_tags)) { char *children_tags_dup = switch_core_strdup(globals.pool, children_tags); char *tags[32] = { 0 }; int tag_count = switch_separate_string(children_tags_dup, ',', tags, sizeof(tags) / sizeof(tags[0])); if (tag_count) { int i; for (i = 0; i < tag_count; i++) { switch_core_hash_insert(def->children_tags, tags[i], tags[i]); } } } def->attribs_fn = attribs_fn; def->cdata_fn = cdata_fn; def->is_root = SWITCH_FALSE; switch_core_hash_insert(globals.tag_defs, tag, def); return def; } /** * Add a definition for a root tag * @param tag the name * @param attribs_fn the function to handle the tag attributes * @param cdata_fn the function to handler the tag CDATA * @param children_tags comma-separated list of valid child tag names * @return the definition */ static struct tag_def *add_root_tag_def(const char *tag, tag_attribs_fn attribs_fn, tag_cdata_fn cdata_fn, const char *children_tags) { struct tag_def *def = add_tag_def(tag, attribs_fn, cdata_fn, children_tags); def->is_root = SWITCH_TRUE; return def; } /** * Handle tag attributes * @param parser the parser * @param name the tag name * @param atts the attributes * @return IKS_OK if OK IKS_BADXML on parse failure */ static int process_tag(struct srgs_grammar *grammar, const char *name, char **atts) { struct srgs_node *cur = grammar->cur; if (cur->tag_def->is_root && cur->parent == NULL) { /* no parent for ROOT tags */ return cur->tag_def->attribs_fn(grammar, atts); } else if (!cur->tag_def->is_root && cur->parent) { /* check if this child is allowed by parent node */ struct tag_def *parent_def = cur->parent->tag_def; if (switch_core_hash_find(parent_def->children_tags, "ANY") || switch_core_hash_find(parent_def->children_tags, name)) { return cur->tag_def->attribs_fn(grammar, atts); } else { switch_log_printf(SWITCH_CHANNEL_UUID_LOG(grammar->uuid), SWITCH_LOG_INFO, "<%s> cannot be a child of <%s>\n", name, cur->parent->name); } } else if (cur->tag_def->is_root && cur->parent != NULL) { switch_log_printf(SWITCH_CHANNEL_UUID_LOG(grammar->uuid), SWITCH_LOG_INFO, "<%s> must be the root element\n", name); } else { switch_log_printf(SWITCH_CHANNEL_UUID_LOG(grammar->uuid), SWITCH_LOG_INFO, "<%s> cannot be a root element\n", name); } return IKS_BADXML; } /** * Handle tag attributes that are ignored * @param grammar the grammar * @param atts the attributes * @return IKS_OK */ static int process_attribs_ignore(struct srgs_grammar *grammar, char **atts) { return IKS_OK; } /** * Handle CDATA that is ignored * @param grammar the grammar * @param data the CDATA * @param len the CDATA length * @return IKS_OK */ static int process_cdata_ignore(struct srgs_grammar *grammar, char *data, size_t len) { return IKS_OK; } /** * Handle CDATA that is not allowed * @param grammar the grammar * @param data the CDATA * @param len the CDATA length * @return IKS_BADXML if any printable characters */ static int process_cdata_bad(struct srgs_grammar *grammar, char *data, size_t len) { int i; for (i = 0; i < len; i++) { if (isgraph(data[i])) { switch_log_printf(SWITCH_CHANNEL_UUID_LOG(grammar->uuid), SWITCH_LOG_INFO, "Unexpected CDATA for <%s>\n", grammar->cur->name); return IKS_BADXML; } } return IKS_OK; } /** * Process attributes * @param grammar the grammar state * @param atts the attributes * @return IKS_OK if ok */ static int process_rule(struct srgs_grammar *grammar, char **atts) { struct srgs_node *rule = grammar->cur; rule->value.rule.is_public = 0; rule->value.rule.id = NULL; if (atts) { int i = 0; while (atts[i]) { if (!strcmp("scope", atts[i])) { rule->value.rule.is_public = !zstr(atts[i + 1]) && !strcmp("public", atts[i + 1]); } else if (!strcmp("id", atts[i])) { if (!zstr(atts[i + 1])) { rule->value.rule.id = switch_core_strdup(grammar->pool, atts[i + 1]); } } i += 2; } } if (zstr(rule->value.rule.id)) { switch_log_printf(SWITCH_CHANNEL_UUID_LOG(grammar->uuid), SWITCH_LOG_INFO, "Missing rule ID: %s\n", rule->value.rule.id); return IKS_BADXML; } if (switch_core_hash_find(grammar->rules, rule->value.rule.id)) { switch_log_printf(SWITCH_CHANNEL_UUID_LOG(grammar->uuid), SWITCH_LOG_INFO, "Duplicate rule ID: %s\n", rule->value.rule.id); return IKS_BADXML; } switch_core_hash_insert(grammar->rules, rule->value.rule.id, rule); return IKS_OK; } /** * Process attributes * @param grammar the grammar state * @param atts the attributes * @return IKS_OK if ok */ static int process_ruleref(struct srgs_grammar *grammar, char **atts) { struct srgs_node *ruleref = grammar->cur; if (atts) { int i = 0; while (atts[i]) { if (!strcmp("uri", atts[i])) { char *uri = atts[i + 1]; if (zstr(uri)) { switch_log_printf(SWITCH_CHANNEL_UUID_LOG(grammar->uuid), SWITCH_LOG_INFO, "Empty uri\n"); return IKS_BADXML; } /* only allow local reference */ if (uri[0] != '#' || strlen(uri) < 2) { switch_log_printf(SWITCH_CHANNEL_UUID_LOG(grammar->uuid), SWITCH_LOG_INFO, "Only local rule refs allowed\n"); return IKS_BADXML; } ruleref->value.ref.uri = switch_core_strdup(grammar->pool, uri); return IKS_OK; } i += 2; } } return IKS_OK; } /** * Process attributes * @param grammar the grammar state * @param atts the attributes * @return IKS_OK if ok */ static int process_item(struct srgs_grammar *grammar, char **atts) { struct srgs_node *item = grammar->cur; item->value.item.repeat_min = 1; item->value.item.repeat_max = 1; item->value.item.weight = NULL; if (atts) { int i = 0; while (atts[i]) { if (!strcmp("repeat", atts[i])) { /* repeats of 0 are not supported by this code */ char *repeat = atts[i + 1]; if (zstr(repeat)) { switch_log_printf(SWITCH_CHANNEL_UUID_LOG(grammar->uuid), SWITCH_LOG_INFO, "Empty repeat atribute\n"); return IKS_BADXML; } if (switch_is_number(repeat)) { /* single number */ int repeat_val = atoi(repeat); if (repeat_val < 1) { switch_log_printf(SWITCH_CHANNEL_UUID_LOG(grammar->uuid), SWITCH_LOG_INFO, " repeat must be >= 0\n"); return IKS_BADXML; } item->value.item.repeat_min = repeat_val; item->value.item.repeat_max = repeat_val; } else { /* range */ char *min = switch_core_strdup(grammar->pool, repeat); char *max = strchr(min, '-'); if (max) { *max = '\0'; max++; } else { switch_log_printf(SWITCH_CHANNEL_UUID_LOG(grammar->uuid), SWITCH_LOG_INFO, " repeat must be a number or range\n"); return IKS_BADXML; } if (switch_is_number(min) && (switch_is_number(max) || zstr(max))) { int min_val = atoi(min); int max_val = zstr(max) ? INT_MAX : atoi(max); /* max must be >= min and > 0 min must be >= 0 */ if ((max_val <= 0) || (max_val < min_val) || (min_val < 0)) { switch_log_printf(SWITCH_CHANNEL_UUID_LOG(grammar->uuid), SWITCH_LOG_INFO, " repeat range invalid\n"); return IKS_BADXML; } item->value.item.repeat_min = min_val; item->value.item.repeat_max = max_val; } else { switch_log_printf(SWITCH_CHANNEL_UUID_LOG(grammar->uuid), SWITCH_LOG_INFO, " repeat range is not a number\n"); return IKS_BADXML; } } } else if (!strcmp("weight", atts[i])) { const char *weight = atts[i + 1]; if (zstr(weight) || !switch_is_number(weight) || atof(weight) < 0) { switch_log_printf(SWITCH_CHANNEL_UUID_LOG(grammar->uuid), SWITCH_LOG_INFO, " weight is not a number >= 0\n"); return IKS_BADXML; } item->value.item.weight = switch_core_strdup(grammar->pool, weight); } i += 2; } } return IKS_OK; } /** * Process attributes * @param grammar the grammar state * @param atts the attributes * @return IKS_OK if ok */ static int process_grammar(struct srgs_grammar *grammar, char **atts) { if (grammar->root) { switch_log_printf(SWITCH_CHANNEL_UUID_LOG(grammar->uuid), SWITCH_LOG_INFO, "Only one tag allowed\n"); return IKS_BADXML; } grammar->root = grammar->cur; if (atts) { int i = 0; while (atts[i]) { if (!strcmp("mode", atts[i])) { char *mode = atts[i + 1]; if (zstr(mode)) { switch_log_printf(SWITCH_CHANNEL_UUID_LOG(grammar->uuid), SWITCH_LOG_INFO, " mode is missing\n"); return IKS_BADXML; } grammar->digit_mode = !strcasecmp(mode, "dtmf"); } else if(!strcmp("encoding", atts[i])) { char *encoding = atts[i + 1]; if (zstr(encoding)) { switch_log_printf(SWITCH_CHANNEL_UUID_LOG(grammar->uuid), SWITCH_LOG_INFO, " encoding is empty\n"); return IKS_BADXML; } grammar->encoding = switch_core_strdup(grammar->pool, encoding); } else if (!strcmp("language", atts[i])) { char *language = atts[i + 1]; if (zstr(language)) { switch_log_printf(SWITCH_CHANNEL_UUID_LOG(grammar->uuid), SWITCH_LOG_INFO, " language is empty\n"); return IKS_BADXML; } grammar->language = switch_core_strdup(grammar->pool, language); } else if (!strcmp("root", atts[i])) { char *root = atts[i + 1]; if (zstr(root)) { switch_log_printf(SWITCH_CHANNEL_UUID_LOG(grammar->uuid), SWITCH_LOG_INFO, " root is empty\n"); return IKS_BADXML; } grammar->cur->value.root = switch_core_strdup(grammar->pool, root); } i += 2; } } return IKS_OK; } /** * Process a tag */ static int tag_hook(void *user_data, char *name, char **atts, int type) { int result = IKS_OK; struct srgs_grammar *grammar = (struct srgs_grammar *)user_data; if (type == IKS_OPEN || type == IKS_SINGLE) { enum srgs_node_type ntype = string_to_node_type(name); grammar->cur = sn_insert(grammar->pool, grammar->cur, name, ntype); grammar->cur->tag_def = switch_core_hash_find(globals.tag_defs, name); if (!grammar->cur->tag_def) { grammar->cur->tag_def = switch_core_hash_find(globals.tag_defs, "ANY"); } result = process_tag(grammar, name, atts); sn_log_node_open(grammar->cur); } if (type == IKS_CLOSE || type == IKS_SINGLE) { sn_log_node_close(grammar->cur); grammar->cur = grammar->cur->parent; } return result; } /** * Process CDATA grammar tokens * @param grammar the grammar * @param data the CDATA * @param len the CDATA length * @return IKS_OK */ static int process_cdata_tokens(struct srgs_grammar *grammar, char *data, size_t len) { struct srgs_node *string = grammar->cur; int i; if (grammar->digit_mode) { for (i = 0; i < len; i++) { if (isdigit(data[i]) || data[i] == '#' || data[i] == '*') { char *digit = switch_core_alloc(grammar->pool, sizeof(char) * 2); digit[0] = data[i]; digit[1] = '\0'; string = sn_insert_string(grammar->pool, string, digit); sn_log_node_open(string); } } } else { char *data_dup = switch_core_alloc(grammar->pool, sizeof(char) * (len + 1)); char *start = data_dup; char *end = start + len - 1; memcpy(data_dup, data, len); /* remove start whitespace */ for (; start && *start && !isgraph(*start); start++) { } if (!zstr(start)) { /* remove end whitespace */ for (; end != start && *end && !isgraph(*end); end--) { *end = '\0'; } if (!zstr(start)) { string = sn_insert_string(grammar->pool, string, start); } } } return IKS_OK; } /** * Process cdata * @param user_data the grammar * @param data the CDATA * @param len the CDATA length * @return IKS_OK */ static int cdata_hook(void *user_data, char *data, size_t len) { struct srgs_grammar *grammar = (struct srgs_grammar *)user_data; if (!grammar) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "Missing grammar\n"); return IKS_BADXML; } if (grammar->cur) { if (grammar->cur->tag_def) { return grammar->cur->tag_def->cdata_fn(grammar, data, len); } switch_log_printf(SWITCH_CHANNEL_UUID_LOG(grammar->uuid), SWITCH_LOG_INFO, "Missing definition for <%s>\n", grammar->cur->name); return IKS_BADXML; } return IKS_OK; } /** * Create a new parsed grammar * @param parser * @return the grammar */ struct srgs_grammar *srgs_grammar_new(struct srgs_parser *parser) { switch_memory_pool_t *pool = NULL; struct srgs_grammar *grammar = NULL; switch_core_new_memory_pool(&pool); grammar = switch_core_alloc(pool, sizeof (*grammar)); grammar->pool = pool; grammar->root = NULL; grammar->cur = NULL; grammar->uuid = (parser && !zstr(parser->uuid)) ? switch_core_strdup(pool, parser->uuid) : ""; switch_core_hash_init(&grammar->rules, pool); switch_mutex_init(&grammar->mutex, SWITCH_MUTEX_NESTED, pool); return grammar; } /** * Destroy a parsed grammar * @param grammar the grammar */ static void srgs_grammar_destroy(struct srgs_grammar *grammar) { switch_memory_pool_t *pool = grammar->pool; if (grammar->compiled_regex) { pcre_free(grammar->compiled_regex); } if (grammar->jsgf_file_name) { switch_file_remove(grammar->jsgf_file_name, pool); } switch_core_destroy_memory_pool(&pool); } /** * Create a new parser. * @param uuid optional uuid for logging * @return the created parser */ struct srgs_parser *srgs_parser_new(const char *uuid) { switch_memory_pool_t *pool = NULL; struct srgs_parser *parser = NULL; switch_core_new_memory_pool(&pool); if (pool) { parser = switch_core_alloc(pool, sizeof(*parser)); parser->pool = pool; parser->uuid = zstr(uuid) ? "" : switch_core_strdup(pool, uuid); switch_core_hash_init(&parser->cache, pool); switch_mutex_init(&parser->mutex, SWITCH_MUTEX_NESTED, pool); } return parser; } /** * Destroy the parser. * @param parser to destroy */ void srgs_parser_destroy(struct srgs_parser *parser) { switch_memory_pool_t *pool = parser->pool; switch_hash_index_t *hi = NULL; /* clean up all cached grammars */ for (hi = switch_core_hash_first(parser->cache); hi; hi = switch_core_hash_next(hi)) { struct srgs_grammar *grammar = NULL; const void *key; void *val; switch_core_hash_this(hi, &key, NULL, &val); grammar = (struct srgs_grammar *)val; switch_assert(grammar); srgs_grammar_destroy(grammar); } switch_core_destroy_memory_pool(&pool); } /** * Create regexes * @param grammar the grammar * @param node root node * @param stream set to NULL * @return 1 if successful */ static int create_regexes(struct srgs_grammar *grammar, struct srgs_node *node, switch_stream_handle_t *stream) { sn_log_node_open(node); switch (node->type) { case SNT_GRAMMAR: if (node->child) { int num_rules = 0; struct srgs_node *child = node->child; if (grammar->root_rule) { if (!create_regexes(grammar, grammar->root_rule, NULL)) { return 0; } grammar->regex = switch_core_sprintf(grammar->pool, "^%s$", grammar->root_rule->value.rule.regex); } else { switch_stream_handle_t new_stream = { 0 }; SWITCH_STANDARD_STREAM(new_stream); if (node->num_children > 1) { new_stream.write_function(&new_stream, "%s", "^(?:"); } else { new_stream.write_function(&new_stream, "%s", "^"); } for (; child; child = child->next) { if (!create_regexes(grammar, child, &new_stream)) { switch_safe_free(new_stream.data); return 0; } if (child->type == SNT_RULE && child->value.rule.is_public) { if (num_rules > 0) { new_stream.write_function(&new_stream, "%s", "|"); } new_stream.write_function(&new_stream, "%s", child->value.rule.regex); num_rules++; } } if (node->num_children > 1) { new_stream.write_function(&new_stream, "%s", ")$"); } else { new_stream.write_function(&new_stream, "%s", "$"); } grammar->regex = switch_core_strdup(grammar->pool, new_stream.data); switch_safe_free(new_stream.data); } switch_log_printf(SWITCH_CHANNEL_UUID_LOG(grammar->uuid), SWITCH_LOG_DEBUG, "document regex = %s\n", grammar->regex); } break; case SNT_RULE: if (node->value.rule.regex) { return 1; } else if (node->child) { struct srgs_node *item = node->child; switch_stream_handle_t new_stream = { 0 }; SWITCH_STANDARD_STREAM(new_stream); for (; item; item = item->next) { if (!create_regexes(grammar, item, &new_stream)) { switch_log_printf(SWITCH_CHANNEL_UUID_LOG(grammar->uuid), SWITCH_LOG_DEBUG, "%s regex failed = %s\n", node->value.rule.id, node->value.rule.regex); switch_safe_free(new_stream.data); return 0; } } node->value.rule.regex = switch_core_strdup(grammar->pool, new_stream.data); switch_log_printf(SWITCH_CHANNEL_UUID_LOG(grammar->uuid), SWITCH_LOG_DEBUG, "%s regex = %s\n", node->value.rule.id, node->value.rule.regex); switch_safe_free(new_stream.data); } break; case SNT_STRING: { int i; for (i = 0; i < strlen(node->value.string); i++) { switch (node->value.string[i]) { case '[': case '\\': case '^': case '$': case '.': case '|': case '?': case '*': case '+': case '(': case ')': /* escape special PCRE regex characters */ stream->write_function(stream, "\\%c", node->value.string[i]); break; default: stream->write_function(stream, "%c", node->value.string[i]); break; } } if (node->child) { if (!create_regexes(grammar, node->child, stream)) { return 0; } } break; } case SNT_ITEM: if (node->child) { struct srgs_node *item = node->child; if (node->value.item.repeat_min != 1 || node->value.item.repeat_max != 1) { stream->write_function(stream, "%s", "(?:"); } for(; item; item = item->next) { if (!create_regexes(grammar, item, stream)) { return 0; } } if (node->value.item.repeat_min != 1 || node->value.item.repeat_max != 1) { if (node->value.item.repeat_min != node->value.item.repeat_max) { if (node->value.item.repeat_min == 0 && node->value.item.repeat_max == INT_MAX) { stream->write_function(stream, ")*"); } else if (node->value.item.repeat_min == 0 && node->value.item.repeat_max == 1) { stream->write_function(stream, ")?"); } else if (node->value.item.repeat_min == 1 && node->value.item.repeat_max == INT_MAX) { stream->write_function(stream, ")+"); } else if (node->value.item.repeat_max == INT_MAX) { stream->write_function(stream, "){%i,1000}", node->value.item.repeat_min); } else { stream->write_function(stream, "){%i,%i}", node->value.item.repeat_min, node->value.item.repeat_max); } } else { stream->write_function(stream, "){%i}", node->value.item.repeat_min); } } } break; case SNT_ONE_OF: if (node->child) { struct srgs_node *item = node->child; if (node->num_children > 1) { stream->write_function(stream, "%s", "(?:"); } for (; item; item = item->next) { if (item != node->child) { stream->write_function(stream, "%s", "|"); } if (!create_regexes(grammar, item, stream)) { return 0; } } if (node->num_children > 1) { stream->write_function(stream, "%s", ")"); } } break; case SNT_REF: { struct srgs_node *rule = node->value.ref.node; if (!rule->value.rule.regex) { switch_log_printf(SWITCH_CHANNEL_UUID_LOG(grammar->uuid), SWITCH_LOG_DEBUG, "ruleref: create %s regex\n", rule->value.rule.id); if (!create_regexes(grammar, rule, NULL)) { return 0; } } if (!rule->value.rule.regex) { return 0; } stream->write_function(stream, "%s", rule->value.rule.regex); break; } case SNT_ANY: default: /* ignore */ return 1; } sn_log_node_close(node); return 1; } /** * Compile regex */ static pcre *get_compiled_regex(struct srgs_grammar *grammar) { int erroffset = 0; const char *errptr = ""; int options = 0; const char *regex; if (!grammar) { switch_log_printf(SWITCH_CHANNEL_UUID_LOG(grammar->uuid), SWITCH_LOG_CRIT, "grammar is NULL!\n"); return NULL; } switch_mutex_lock(grammar->mutex); if (!grammar->compiled_regex && (regex = srgs_grammar_to_regex(grammar))) { if (!(grammar->compiled_regex = pcre_compile(regex, options, &errptr, &erroffset, NULL))) { switch_log_printf(SWITCH_CHANNEL_UUID_LOG(grammar->uuid), SWITCH_LOG_WARNING, "Failed to compile grammar regex: %s\n", regex); } } switch_mutex_unlock(grammar->mutex); return grammar->compiled_regex; } /** * Resolve all unresolved references and detect loops. * @param grammar the grammar * @param node the current node * @param level the recursion level */ static int resolve_refs(struct srgs_grammar *grammar, struct srgs_node *node, int level) { sn_log_node_open(node); if (node->visited) { switch_log_printf(SWITCH_CHANNEL_UUID_LOG(grammar->uuid), SWITCH_LOG_INFO, "Loop detected.\n"); return 0; } node->visited = 1; if (level > MAX_RECURSION) { switch_log_printf(SWITCH_CHANNEL_UUID_LOG(grammar->uuid), SWITCH_LOG_INFO, "Recursion too deep.\n"); return 0; } if (node->type == SNT_GRAMMAR && node->value.root) { struct srgs_node *rule = (struct srgs_node *)switch_core_hash_find(grammar->rules, node->value.root); if (!rule) { switch_log_printf(SWITCH_CHANNEL_UUID_LOG(grammar->uuid), SWITCH_LOG_INFO, "Root rule not found: %s\n", node->value.root); return 0; } grammar->root_rule = rule; } if (node->type == SNT_UNRESOLVED_REF) { /* resolve reference to local rule- drop first character # from URI */ struct srgs_node *rule = (struct srgs_node *)switch_core_hash_find(grammar->rules, node->value.ref.uri + 1); if (!rule) { switch_log_printf(SWITCH_CHANNEL_UUID_LOG(grammar->uuid), SWITCH_LOG_INFO, "Local rule not found: %s\n", node->value.ref.uri); return 0; } /* link to rule */ node->type = SNT_REF; node->value.ref.node = rule; } /* travel through rule to detect loops */ if (node->type == SNT_REF) { if (!resolve_refs(grammar, node->value.ref.node, level + 1)) { return 0; } } /* resolve children refs */ if (node->child) { struct srgs_node *child = node->child; for (; child; child = child->next) { if (!resolve_refs(grammar, child, level + 1)) { return 0; } } } node->visited = 0; sn_log_node_close(node); return 1; } /** * Parse the document into rules to match * @param parser the parser * @param document the document to parse * @return the parsed grammar if successful */ struct srgs_grammar *srgs_parse(struct srgs_parser *parser, const char *document) { struct srgs_grammar *grammar = NULL; if (!parser) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "NULL parser!!\n"); return NULL; } if (zstr(document)) { switch_log_printf(SWITCH_CHANNEL_UUID_LOG(parser->uuid), SWITCH_LOG_INFO, "Missing grammar document\n"); return NULL; } /* check for cached grammar */ switch_mutex_lock(parser->mutex); grammar = (struct srgs_grammar *)switch_core_hash_find(parser->cache, document); if (!grammar) { int result = 0; iksparser *p; switch_log_printf(SWITCH_CHANNEL_UUID_LOG(parser->uuid), SWITCH_LOG_DEBUG, "Parsing new grammar\n"); grammar = srgs_grammar_new(parser); p = iks_sax_new(grammar, tag_hook, cdata_hook); if (iks_parse(p, document, 0, 1) == IKS_OK) { if (grammar->root) { switch_log_printf(SWITCH_CHANNEL_UUID_LOG(parser->uuid), SWITCH_LOG_DEBUG, "Resolving references\n"); if (resolve_refs(grammar, grammar->root, 0)) { result = 1; } } else { switch_log_printf(SWITCH_CHANNEL_UUID_LOG(parser->uuid), SWITCH_LOG_INFO, "Nothing to parse!\n"); } } iks_parser_delete(p); if (result) { switch_core_hash_insert(parser->cache, document, grammar); } else { if (grammar) { srgs_grammar_destroy(grammar); grammar = NULL; } switch_log_printf(SWITCH_CHANNEL_UUID_LOG(parser->uuid), SWITCH_LOG_INFO, "Failed to parse grammar\n"); } } else { switch_log_printf(SWITCH_CHANNEL_UUID_LOG(parser->uuid), SWITCH_LOG_DEBUG, "Using cached grammar\n"); } switch_mutex_unlock(parser->mutex); return grammar; } #define MAX_INPUT_SIZE 128 #define OVECTOR_SIZE 30 #define WORKSPACE_SIZE 1024 /** * Check if no more digits can be added to input and match * @param compiled_regex the regex used in the initial match * @param input the input to check * @return true if end of match (no more input can be added) */ static int is_match_end(pcre *compiled_regex, const char *input) { int ovector[OVECTOR_SIZE]; int input_size = strlen(input); char search_input[MAX_INPUT_SIZE + 2]; const char *search_set = "0123456789#*ABCD"; const char *search = strchr(search_set, input[input_size - 1]); /* start with last digit in input */ int i = 0; /* For each digit in search_set, check if input + search_set digit is a potential match. If so, then this is not a match end. */ sprintf(search_input, "%sZ", input); for (i = 0; i < 16; i++) { int result; if (!*search) { search = search_set; } search_input[input_size] = *search++; result = pcre_exec(compiled_regex, NULL, search_input, input_size + 1, 0, 0, ovector, sizeof(ovector) / sizeof(ovector[0])); if (result > 0) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "not match end\n"); return 0; } } switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "is match end\n"); return 1; } /** * Find a match * @param grammar the grammar to match * @param input the input to compare * @return the match result */ enum srgs_match_type srgs_grammar_match(struct srgs_grammar *grammar, const char *input) { int result = 0; int ovector[OVECTOR_SIZE]; int workspace[WORKSPACE_SIZE]; pcre *compiled_regex; if (zstr(input)) { return SMT_NO_MATCH; } if (strlen(input) > MAX_INPUT_SIZE) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "input too large: %s\n", input); return SMT_NO_MATCH; } if (!(compiled_regex = get_compiled_regex(grammar))) { return SMT_NO_MATCH; } result = pcre_dfa_exec(compiled_regex, NULL, input, strlen(input), 0, PCRE_PARTIAL, ovector, sizeof(ovector) / sizeof(ovector[0]), workspace, sizeof(workspace) / sizeof(workspace[0])); switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "match = %i\n", result); if (result > 0) { if (is_match_end(compiled_regex, input)) { return SMT_MATCH_END; } return SMT_MATCH; } if (result == PCRE_ERROR_PARTIAL) { return SMT_MATCH_PARTIAL; } return SMT_NO_MATCH; } /** * Generate regex from SRGS document. Call this after parsing SRGS document. * @param parser the parser * @return the regex or NULL */ const char *srgs_grammar_to_regex(struct srgs_grammar *grammar) { if (!grammar) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "grammar is NULL!\n"); return NULL; } switch_mutex_lock(grammar->mutex); if (!grammar->regex && !create_regexes(grammar, grammar->root, NULL)) { switch_mutex_unlock(grammar->mutex); return NULL; } switch_mutex_unlock(grammar->mutex); return grammar->regex; } /** * Create JSGF grammar * @param parser the parser * @param node root node * @param stream set to NULL * @return 1 if successful */ static int create_jsgf(struct srgs_grammar *grammar, struct srgs_node *node, switch_stream_handle_t *stream) { sn_log_node_open(node); switch (node->type) { case SNT_GRAMMAR: if (node->child) { struct srgs_node *child; switch_stream_handle_t new_stream = { 0 }; SWITCH_STANDARD_STREAM(new_stream); new_stream.write_function(&new_stream, "#JSGF V1.0"); if (!zstr(grammar->encoding)) { new_stream.write_function(&new_stream, " %s", grammar->encoding); if (!zstr(grammar->language)) { new_stream.write_function(&new_stream, " %s", grammar->language); } } new_stream.write_function(&new_stream, ";\ngrammar org.freeswitch.srgs_to_jsgf;\n" "public "); /* output root rule */ if (grammar->root_rule) { if (!create_jsgf(grammar, grammar->root_rule, &new_stream)) { switch_safe_free(new_stream.data); return 0; } } else { int num_rules = 0; int first = 1; for (child = node->child; child; child = child->next) { if (child->type == SNT_RULE && child->value.rule.is_public) { num_rules++; } } if (num_rules > 1) { new_stream.write_function(&new_stream, " ="); for (child = node->child; child; child = child->next) { if (child->type == SNT_RULE && child->value.rule.is_public) { if (!first) { new_stream.write_function(&new_stream, "%s", " |"); } first = 0; new_stream.write_function(&new_stream, " <%s>", child->value.rule.id); } } new_stream.write_function(&new_stream, ";\n"); } else { for (child = node->child; child; child = child->next) { if (child->type == SNT_RULE && child->value.rule.is_public) { grammar->root_rule = child; if (!create_jsgf(grammar, child, &new_stream)) { switch_safe_free(new_stream.data); return 0; } else { break; } } } } } /* output all rule definitions */ for (child = node->child; child; child = child->next) { if (child->type == SNT_RULE && child != grammar->root_rule) { if (!create_jsgf(grammar, child, &new_stream)) { switch_safe_free(new_stream.data); return 0; } } } grammar->jsgf = switch_core_strdup(grammar->pool, new_stream.data); switch_safe_free(new_stream.data); switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "document jsgf = %s\n", grammar->jsgf); } break; case SNT_RULE: if (node->child) { struct srgs_node *item = node->child; stream->write_function(stream, "<%s> =", node->value.rule.id); for (; item; item = item->next) { if (!create_jsgf(grammar, item, stream)) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "%s jsgf rule failed\n", node->value.rule.id); return 0; } } stream->write_function(stream, ";\n"); } break; case SNT_STRING: { int len = strlen(node->value.string); int i; stream->write_function(stream, " "); for (i = 0; i < len; i++) { switch (node->value.string[i]) { case '\\': case '*': case '+': case '/': case '(': case ')': case '[': case ']': case '{': case '}': case '=': case '<': case '>': case ';': case '|': stream->write_function(stream, "\\"); break; default: break; } stream->write_function(stream, "%c", node->value.string[i]); } if (node->child) { if (!create_jsgf(grammar, node->child, stream)) { return 0; } } break; } case SNT_ITEM: if (node->child) { struct srgs_node *item; if (node->value.item.repeat_min == 0 && node->value.item.repeat_max == 1) { /* optional item */ stream->write_function(stream, " ["); for(item = node->child; item; item = item->next) { if (!create_jsgf(grammar, item, stream)) { return 0; } } stream->write_function(stream, " ]"); } else { /* minimum repeats */ int i; for (i = 0; i < node->value.item.repeat_min; i++) { if (node->value.item.repeat_min != 1 && node->value.item.repeat_max != 1) { stream->write_function(stream, " ("); } for(item = node->child; item; item = item->next) { if (!create_jsgf(grammar, item, stream)) { return 0; } } if (node->value.item.repeat_min != 1 && node->value.item.repeat_max != 1) { stream->write_function(stream, " )"); } } if (node->value.item.repeat_max == INT_MAX) { stream->write_function(stream, "*"); } else { for (;i < node->value.item.repeat_max; i++) { stream->write_function(stream, " ["); for(item = node->child; item; item = item->next) { if (!create_jsgf(grammar, item, stream)) { return 0; } } stream->write_function(stream, " ]"); } } } } break; case SNT_ONE_OF: if (node->child) { struct srgs_node *item = node->child; if (node->num_children > 1) { stream->write_function(stream, " ("); } for (; item; item = item->next) { if (item != node->child) { stream->write_function(stream, " |"); } stream->write_function(stream, " ("); if (!create_jsgf(grammar, item, stream)) { return 0; } stream->write_function(stream, " )"); } if (node->num_children > 1) { stream->write_function(stream, " )"); } } break; case SNT_REF: { struct srgs_node *rule = node->value.ref.node; stream->write_function(stream, " <%s>", rule->value.rule.id); break; } case SNT_ANY: default: /* ignore */ return 1; } sn_log_node_close(node); return 1; } /** * Generate JSGF from SRGS document. Call this after parsing SRGS document. * @param grammar the grammar * @return the JSGF document or NULL */ const char *srgs_grammar_to_jsgf(struct srgs_grammar *grammar) { if (!grammar) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "grammar is NULL!\n"); return NULL; } switch_mutex_lock(grammar->mutex); if (!grammar->jsgf && !create_jsgf(grammar, grammar->root, NULL)) { switch_mutex_unlock(grammar->mutex); return NULL; } switch_mutex_unlock(grammar->mutex); return grammar->jsgf; } /** * Generate JSGF file from SRGS document. Call this after parsing SRGS document. * @param grammar the grammar * @param basedir the base path to use if file does not already exist * @param ext the extension to use * @return the path or NULL */ const char *srgs_grammar_to_jsgf_file(struct srgs_grammar *grammar, const char *basedir, const char *ext) { if (!grammar) { switch_log_printf(SWITCH_CHANNEL_UUID_LOG(grammar->uuid), SWITCH_LOG_CRIT, "grammar is NULL!\n"); return NULL; } switch_mutex_lock(grammar->mutex); if (!grammar->jsgf_file_name) { char file_name_buf[SWITCH_UUID_FORMATTED_LENGTH + 1]; switch_file_t *file; switch_size_t len; const char *jsgf = srgs_grammar_to_jsgf(grammar); switch_uuid_str(file_name_buf, sizeof(file_name_buf)); grammar->jsgf_file_name = switch_core_sprintf(grammar->pool, "%s%s%s.%s", basedir, SWITCH_PATH_SEPARATOR, file_name_buf, ext); if (!jsgf) { switch_mutex_unlock(grammar->mutex); return NULL; } /* write grammar to file */ if (switch_file_open(&file, grammar->jsgf_file_name, SWITCH_FOPEN_WRITE | SWITCH_FOPEN_TRUNCATE | SWITCH_FOPEN_CREATE, SWITCH_FPROT_OS_DEFAULT, grammar->pool) != SWITCH_STATUS_SUCCESS) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Failed to create jsgf file: %s!\n", grammar->jsgf_file_name); grammar->jsgf_file_name = NULL; switch_mutex_unlock(grammar->mutex); return NULL; } len = strlen(jsgf); switch_file_write(file, jsgf, &len); switch_file_close(file); } switch_mutex_unlock(grammar->mutex); return grammar->jsgf_file_name; } /** * Initialize SRGS parser. This function is not thread safe. */ int srgs_init(void) { if (globals.init) { return 1; } globals.init = SWITCH_TRUE; switch_core_new_memory_pool(&globals.pool); switch_core_hash_init(&globals.tag_defs, globals.pool); add_root_tag_def("grammar", process_grammar, process_cdata_bad, "meta,metadata,lexicon,tag,rule"); add_tag_def("ruleref", process_ruleref, process_cdata_bad, ""); add_tag_def("token", process_attribs_ignore, process_cdata_ignore, ""); add_tag_def("tag", process_attribs_ignore, process_cdata_ignore, ""); add_tag_def("one-of", process_attribs_ignore, process_cdata_tokens, "item"); add_tag_def("item", process_item, process_cdata_tokens, "token,ruleref,item,one-of,tag"); add_tag_def("rule", process_rule, process_cdata_tokens, "token,ruleref,item,one-of,tag,example"); add_tag_def("example", process_attribs_ignore, process_cdata_ignore, ""); add_tag_def("lexicon", process_attribs_ignore, process_cdata_bad, ""); add_tag_def("meta", process_attribs_ignore, process_cdata_bad, ""); add_tag_def("metadata", process_attribs_ignore, process_cdata_ignore, "ANY"); add_tag_def("ANY", process_attribs_ignore, process_cdata_ignore, "ANY"); return 1; } /* For Emacs: * Local Variables: * mode:c * indent-tabs-mode:t * tab-width:4 * c-basic-offset:4 * End: * For VIM: * vim:set softtabstop=4 shiftwidth=4 tabstop=4 noet */