Add framework for speech modules (asr/tts)
git-svn-id: http://svn.freeswitch.org/svn/freeswitch/trunk@457 d0543943-73ff-0310-b7d9-9358b9ac24b2
This commit is contained in:
parent
48ae14726b
commit
17e06cf6bc
|
@ -732,6 +732,72 @@ SWITCH_DECLARE(switch_status) switch_core_file_seek(switch_file_handle *fh, unsi
|
|||
SWITCH_DECLARE(switch_status) switch_core_file_close(switch_file_handle *fh);
|
||||
///\}
|
||||
|
||||
///\defgroup speeech ASR/TTS Functions
|
||||
///\ingroup core1
|
||||
///\{
|
||||
/*!
|
||||
\brief Open a speech handle
|
||||
\param sh a speech handle to use
|
||||
\param module_name the speech module to use
|
||||
\param flags asr/tts flags
|
||||
\param pool the pool to use (NULL for new pool)
|
||||
\return SWITCH_STATUS_SUCCESS if the handle is opened
|
||||
*/
|
||||
SWITCH_DECLARE(switch_status) switch_core_speech_open(switch_speech_handle *sh, char *module_name, unsigned int flags, switch_memory_pool *pool);
|
||||
|
||||
/*!
|
||||
\brief Feed data to the ASR module
|
||||
\param sh the speech handle to feed
|
||||
\param data the buffer of audio data
|
||||
\param len the in-use size of the buffer
|
||||
\param rate the rate of the audio (in hz)
|
||||
\param flags flags in/out for fine tuning
|
||||
\return SWITCH_STATUS_SUCCESS with possible new flags on success
|
||||
*/
|
||||
SWITCH_DECLARE(switch_status) switch_core_speech_feed_asr(switch_speech_handle *sh, void *data, unsigned int *len, int rate, unsigned int *flags);
|
||||
|
||||
/*!
|
||||
\brief Get text back from the ASR module
|
||||
\param sh the speech handle to read
|
||||
\param buf the buffer to insert the text into
|
||||
\param buflen the max size of the buffer
|
||||
\param flags flags in/out for fine tuning
|
||||
\return SWITCH_STATUS_SUCCESS with possible new flags on success
|
||||
*/
|
||||
SWITCH_DECLARE(switch_status) switch_core_speech_interpret_asr(switch_speech_handle *sh, char *buf, unsigned int buflen, unsigned int *flags);
|
||||
|
||||
/*!
|
||||
\brief Feed text to the TTS module
|
||||
\param sh the speech handle to feed
|
||||
\param text the buffer to write
|
||||
\param flags flags in/out for fine tuning
|
||||
\return SWITCH_STATUS_SUCCESS with len adjusted to the bytes written if successful
|
||||
*/
|
||||
SWITCH_DECLARE(switch_status) switch_core_speech_feed_tts(switch_speech_handle *sh, char *text, unsigned int *flags);
|
||||
|
||||
/*!
|
||||
\brief Read rendered audio from the TTS module
|
||||
\param sh the speech handle to read
|
||||
\param data the buffer to read to
|
||||
\param datalen the max size / written size of the data
|
||||
\param rate the rate of the read audio
|
||||
\param flags flags in/out for fine tuning
|
||||
\return SWITCH_STATUS_SUCCESS with len adjusted to the bytes written if successful
|
||||
*/
|
||||
SWITCH_DECLARE(switch_status) switch_core_speech_read_tts(switch_speech_handle *sh,
|
||||
void *data,
|
||||
unsigned int *datalen,
|
||||
unsigned int *rate,
|
||||
unsigned int *flags);
|
||||
/*!
|
||||
\brief Close an open speech handle
|
||||
\param sh the speech handle to close
|
||||
\param flags flags in/out for fine tuning
|
||||
\return SWITCH_STATUS_SUCCESS if the file handle was closed
|
||||
*/
|
||||
SWITCH_DECLARE(switch_status) switch_core_speech_close(switch_speech_handle *sh, unsigned int *flags);
|
||||
///\}
|
||||
|
||||
///\defgroup misc Misc
|
||||
///\ingroup core1
|
||||
///\{
|
||||
|
|
|
@ -71,6 +71,8 @@ struct switch_loadable_module_interface {
|
|||
const switch_api_interface *api_interface;
|
||||
/*! the table of file formats the module has implmented */
|
||||
const switch_file_interface *file_interface;
|
||||
/*! the table of speech interfaces the module has implmented */
|
||||
const switch_speech_interface *speech_interface;
|
||||
};
|
||||
|
||||
/*!
|
||||
|
@ -133,6 +135,14 @@ SWITCH_DECLARE(switch_api_interface *) switch_loadable_module_get_api_interface(
|
|||
*/
|
||||
SWITCH_DECLARE(switch_file_interface *) switch_loadable_module_get_file_interface(char *name);
|
||||
|
||||
/*!
|
||||
\brief Retrieve the speech interface by it's registered name
|
||||
\param name the name of the speech interface
|
||||
\return the desired speech interface
|
||||
*/
|
||||
SWITCH_DECLARE(switch_speech_interface *) switch_loadable_module_get_speech_interface(char *name);
|
||||
|
||||
|
||||
/*!
|
||||
\brief Retrieve the list of loaded codecs into an array
|
||||
\param pool the memory pool to use for the hash index
|
||||
|
|
|
@ -280,6 +280,46 @@ struct switch_file_handle {
|
|||
};
|
||||
|
||||
|
||||
/*! \brief Abstract interface to a speech module */
|
||||
struct switch_speech_interface {
|
||||
/*! the name of the interface */
|
||||
const char *interface_name;
|
||||
/*! function to open the speech interface */
|
||||
switch_status (*speech_open)(switch_speech_handle *sh,
|
||||
unsigned int flags);
|
||||
/*! function to close the speech interface */
|
||||
switch_status (*speech_close)(switch_speech_handle *);
|
||||
/*! function to feed audio to the ASR*/
|
||||
switch_status (*speech_feed_asr)(switch_speech_handle *sh, void *data, unsigned int *len, int rate, unsigned int *flags);
|
||||
/*! function to read text from the ASR*/
|
||||
switch_status (*speech_interpret_asr)(switch_speech_handle *sh, char *buf, unsigned int buflen, unsigned int *flags);
|
||||
/*! function to feed text to the TTS*/
|
||||
switch_status (*speech_feed_tts)(switch_speech_handle *sh, char *text, unsigned int *flags);
|
||||
/*! function to read audio from the TTS*/
|
||||
switch_status (*speech_read_tts)(switch_speech_handle *sh,
|
||||
void *data,
|
||||
unsigned int *datalen,
|
||||
unsigned int *rate,
|
||||
unsigned int *flags);
|
||||
|
||||
const struct switch_speech_interface *next;
|
||||
};
|
||||
|
||||
|
||||
/*! an abstract representation of a asr/tts speech interface. */
|
||||
struct switch_speech_handle {
|
||||
/*! the interface of the module that implemented the current speech interface */
|
||||
const struct switch_speech_interface *speech_interface;
|
||||
/*! flags to control behaviour */
|
||||
unsigned int flags;
|
||||
|
||||
/*! the handle's memory pool */
|
||||
switch_memory_pool *memory_pool;
|
||||
/*! private data for the format module to store handle specific info */
|
||||
void *private;
|
||||
};
|
||||
|
||||
|
||||
/* nobody has more setting than speex so we will let them set the standard */
|
||||
/*! \brief Various codec settings (currently only relevant to speex) */
|
||||
struct switch_codec_settings {
|
||||
|
|
|
@ -49,12 +49,15 @@ extern "C" {
|
|||
\enum switch_ivr_option_t
|
||||
\brief Possible options related to ivr functions
|
||||
<pre>
|
||||
SWITCH_IVR_OPTION_SYNC - synchronous (do everyting in the forground)
|
||||
|
||||
SWITCH_IVR_OPTION_NONE - nothing whatsoever
|
||||
SWITCH_IVR_OPTION_ASYNC - Asynchronous (do things in the background when applicable)
|
||||
SWITCH_IVR_OPTION_FILE - string argument implies a filename
|
||||
</pre>
|
||||
*/
|
||||
typedef enum {
|
||||
SWITCH_IVR_OPTION_SYNC = (1 << 0)
|
||||
SWITCH_IVR_OPTION_NONE = 0,
|
||||
SWITCH_IVR_OPTION_ASYNC = (1 << 0),
|
||||
SWITCH_IVR_OPTION_FILE = (1 << 1)
|
||||
} switch_ivr_option_t;
|
||||
|
||||
/*!
|
||||
|
@ -215,6 +218,27 @@ typedef enum {
|
|||
|
||||
} switch_codec_flag;
|
||||
|
||||
|
||||
/*!
|
||||
\enum switch_speech_flag
|
||||
\brief Speech related flags
|
||||
<pre>
|
||||
SWITCH_SPEECH_FLAG_TTS = (1 << 0) - Interface can/should convert text to speech.
|
||||
SWITCH_SPEECH_FLAG_ASR = (1 << 1) - Interface can/should convert audio to text.
|
||||
SWITCH_SPEECH_FLAG_HASTEXT = (1 << 2) - Interface is has text to read.
|
||||
SWITCH_SPEECH_FLAG_PEEK = (1 << 3) - Read data but do not erase it.
|
||||
SWITCH_SPEECH_FLAG_FREE_POOL = (1 << 4) - Free interface's pool on destruction.
|
||||
</pre>
|
||||
*/
|
||||
typedef enum {
|
||||
SWITCH_SPEECH_FLAG_TTS = (1 << 0),
|
||||
SWITCH_SPEECH_FLAG_ASR = (1 << 1),
|
||||
SWITCH_SPEECH_FLAG_HASTEXT = (1 << 2),
|
||||
SWITCH_SPEECH_FLAG_PEEK = (1 << 3),
|
||||
SWITCH_SPEECH_FLAG_FREE_POOL = (1 << 4),
|
||||
|
||||
} switch_speech_flag;
|
||||
|
||||
/*!
|
||||
\enum switch_codec_type
|
||||
\brief Codec types
|
||||
|
@ -352,6 +376,8 @@ typedef struct switch_io_event_hooks switch_io_event_hooks;
|
|||
typedef struct switch_buffer switch_buffer;
|
||||
typedef struct switch_codec_settings switch_codec_settings;
|
||||
typedef struct switch_config switch_config;
|
||||
typedef struct switch_speech_handle switch_speech_handle;
|
||||
typedef struct switch_speech_interface switch_speech_interface;
|
||||
typedef void (*switch_application_function)(switch_core_session *, char *);
|
||||
typedef void (*switch_event_callback_t)(switch_event *);
|
||||
typedef switch_caller_extension *(*switch_dialplan_hunt_function)(switch_core_session *);
|
||||
|
|
|
@ -388,7 +388,7 @@ SWITCH_DECLARE(switch_status) switch_core_file_open(switch_file_handle *fh, char
|
|||
if ((status = switch_core_new_memory_pool(&fh->memory_pool)) != SWITCH_STATUS_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
switch_set_flag(fh, SWITCH_TIMER_FLAG_FREE_POOL);
|
||||
switch_set_flag(fh, SWITCH_FILE_FLAG_FREE_POOL);
|
||||
}
|
||||
|
||||
return fh->file_interface->file_open(fh, file_path);
|
||||
|
@ -419,6 +419,68 @@ SWITCH_DECLARE(switch_status) switch_core_file_close(switch_file_handle *fh)
|
|||
return fh->file_interface->file_close(fh);
|
||||
}
|
||||
|
||||
SWITCH_DECLARE(switch_status) switch_core_speech_open(switch_speech_handle *sh,
|
||||
char *module_name,
|
||||
unsigned int flags,
|
||||
switch_memory_pool *pool)
|
||||
{
|
||||
switch_status status;
|
||||
|
||||
if (!(sh->speech_interface = switch_loadable_module_get_speech_interface(module_name))) {
|
||||
switch_console_printf(SWITCH_CHANNEL_CONSOLE, "invalid speech module [%s]!\n", module_name);
|
||||
return SWITCH_STATUS_GENERR;
|
||||
}
|
||||
|
||||
sh->flags = flags;
|
||||
if (pool) {
|
||||
sh->memory_pool = pool;
|
||||
} else {
|
||||
if ((status = switch_core_new_memory_pool(&sh->memory_pool)) != SWITCH_STATUS_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
switch_set_flag(sh, SWITCH_SPEECH_FLAG_FREE_POOL);
|
||||
}
|
||||
|
||||
return sh->speech_interface->speech_open(sh, flags);
|
||||
}
|
||||
|
||||
SWITCH_DECLARE(switch_status) switch_core_speech_feed_asr(switch_speech_handle *sh, void *data, unsigned int *len, int rate, unsigned int *flags)
|
||||
{
|
||||
assert(sh != NULL);
|
||||
|
||||
return sh->speech_interface->speech_feed_asr(sh, data, len, rate, flags);
|
||||
}
|
||||
|
||||
SWITCH_DECLARE(switch_status) switch_core_speech_interpret_asr(switch_speech_handle *sh, char *buf, unsigned int buflen, unsigned int *flags)
|
||||
{
|
||||
assert(sh != NULL);
|
||||
|
||||
return sh->speech_interface->speech_interpret_asr(sh, buf, buflen, flags);
|
||||
}
|
||||
|
||||
SWITCH_DECLARE(switch_status) switch_core_speech_feed_tts(switch_speech_handle *sh, char *text, unsigned int *flags)
|
||||
{
|
||||
assert(sh != NULL);
|
||||
|
||||
return sh->speech_interface->speech_feed_tts(sh, text, flags);
|
||||
}
|
||||
|
||||
SWITCH_DECLARE(switch_status) switch_core_speech_read_tts(switch_speech_handle *sh,
|
||||
void *data,
|
||||
unsigned int *datalen,
|
||||
unsigned int *rate,
|
||||
unsigned int *flags)
|
||||
{
|
||||
assert(sh != NULL);
|
||||
|
||||
return sh->speech_interface->speech_read_tts(sh, data, datalen, rate, flags);
|
||||
}
|
||||
|
||||
|
||||
SWITCH_DECLARE(switch_status) switch_core_speech_close(switch_speech_handle *sh, unsigned int *flags)
|
||||
{
|
||||
return sh->speech_interface->speech_close(sh);
|
||||
}
|
||||
|
||||
SWITCH_DECLARE(switch_status) switch_core_timer_init(switch_timer *timer, char *timer_name, int interval, int samples,
|
||||
switch_memory_pool *pool)
|
||||
|
|
|
@ -33,8 +33,7 @@
|
|||
#include <switch_ivr.h>
|
||||
|
||||
|
||||
/* TBD (Lots! there are only 2 functions in here lol) */
|
||||
|
||||
/* TBD (Lots! there are not very many functions in here lol) */
|
||||
|
||||
SWITCH_DECLARE(switch_status) switch_ivr_record_file(switch_core_session *session,
|
||||
char *file,
|
||||
|
|
|
@ -62,6 +62,7 @@ struct switch_loadable_module_container {
|
|||
switch_hash *application_hash;
|
||||
switch_hash *api_hash;
|
||||
switch_hash *file_hash;
|
||||
switch_hash *speech_hash;
|
||||
switch_memory_pool *pool;
|
||||
};
|
||||
|
||||
|
@ -218,6 +219,7 @@ SWITCH_DECLARE(switch_status) switch_loadable_module_init()
|
|||
switch_core_hash_init(&loadable_modules.application_hash, loadable_modules.pool);
|
||||
switch_core_hash_init(&loadable_modules.api_hash, loadable_modules.pool);
|
||||
switch_core_hash_init(&loadable_modules.file_hash, loadable_modules.pool);
|
||||
switch_core_hash_init(&loadable_modules.speech_hash, loadable_modules.pool);
|
||||
switch_core_hash_init(&loadable_modules.dialplan_hash, loadable_modules.pool);
|
||||
|
||||
while (apr_dir_read(&finfo, finfo_flags, module_dir_handle) == APR_SUCCESS) {
|
||||
|
@ -321,6 +323,15 @@ SWITCH_DECLARE(switch_status) switch_loadable_module_init()
|
|||
}
|
||||
}
|
||||
|
||||
if (new_module->interface->speech_interface) {
|
||||
const switch_speech_interface *ptr;
|
||||
|
||||
for (ptr = new_module->interface->speech_interface; ptr; ptr = ptr->next) {
|
||||
switch_console_printf(SWITCH_CHANNEL_CONSOLE, "Adding Speech interface '%s'\n", ptr->interface_name);
|
||||
switch_core_hash_insert(loadable_modules.speech_hash, (char *) ptr->interface_name, (void *) ptr);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -384,6 +395,11 @@ SWITCH_DECLARE(switch_file_interface *) switch_loadable_module_get_file_interfac
|
|||
return switch_core_hash_find(loadable_modules.file_hash, name);
|
||||
}
|
||||
|
||||
SWITCH_DECLARE(switch_speech_interface *) switch_loadable_module_get_speech_interface(char *name)
|
||||
{
|
||||
return switch_core_hash_find(loadable_modules.speech_hash, name);
|
||||
}
|
||||
|
||||
SWITCH_DECLARE(int) switch_loadable_module_get_codecs(switch_memory_pool *pool, switch_codec_interface **array,
|
||||
int arraylen)
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue