Add framework for speech modules (asr/tts)

git-svn-id: http://svn.freeswitch.org/svn/freeswitch/trunk@457 d0543943-73ff-0310-b7d9-9358b9ac24b2
This commit is contained in:
Anthony Minessale 2006-01-27 16:43:57 +00:00
parent 48ae14726b
commit 17e06cf6bc
7 changed files with 225 additions and 6 deletions

View File

@ -732,6 +732,72 @@ SWITCH_DECLARE(switch_status) switch_core_file_seek(switch_file_handle *fh, unsi
SWITCH_DECLARE(switch_status) switch_core_file_close(switch_file_handle *fh);
///\}
///\defgroup speeech ASR/TTS Functions
///\ingroup core1
///\{
/*!
\brief Open a speech handle
\param sh a speech handle to use
\param module_name the speech module to use
\param flags asr/tts flags
\param pool the pool to use (NULL for new pool)
\return SWITCH_STATUS_SUCCESS if the handle is opened
*/
SWITCH_DECLARE(switch_status) switch_core_speech_open(switch_speech_handle *sh, char *module_name, unsigned int flags, switch_memory_pool *pool);
/*!
\brief Feed data to the ASR module
\param sh the speech handle to feed
\param data the buffer of audio data
\param len the in-use size of the buffer
\param rate the rate of the audio (in hz)
\param flags flags in/out for fine tuning
\return SWITCH_STATUS_SUCCESS with possible new flags on success
*/
SWITCH_DECLARE(switch_status) switch_core_speech_feed_asr(switch_speech_handle *sh, void *data, unsigned int *len, int rate, unsigned int *flags);
/*!
\brief Get text back from the ASR module
\param sh the speech handle to read
\param buf the buffer to insert the text into
\param buflen the max size of the buffer
\param flags flags in/out for fine tuning
\return SWITCH_STATUS_SUCCESS with possible new flags on success
*/
SWITCH_DECLARE(switch_status) switch_core_speech_interpret_asr(switch_speech_handle *sh, char *buf, unsigned int buflen, unsigned int *flags);
/*!
\brief Feed text to the TTS module
\param sh the speech handle to feed
\param text the buffer to write
\param flags flags in/out for fine tuning
\return SWITCH_STATUS_SUCCESS with len adjusted to the bytes written if successful
*/
SWITCH_DECLARE(switch_status) switch_core_speech_feed_tts(switch_speech_handle *sh, char *text, unsigned int *flags);
/*!
\brief Read rendered audio from the TTS module
\param sh the speech handle to read
\param data the buffer to read to
\param datalen the max size / written size of the data
\param rate the rate of the read audio
\param flags flags in/out for fine tuning
\return SWITCH_STATUS_SUCCESS with len adjusted to the bytes written if successful
*/
SWITCH_DECLARE(switch_status) switch_core_speech_read_tts(switch_speech_handle *sh,
void *data,
unsigned int *datalen,
unsigned int *rate,
unsigned int *flags);
/*!
\brief Close an open speech handle
\param sh the speech handle to close
\param flags flags in/out for fine tuning
\return SWITCH_STATUS_SUCCESS if the file handle was closed
*/
SWITCH_DECLARE(switch_status) switch_core_speech_close(switch_speech_handle *sh, unsigned int *flags);
///\}
///\defgroup misc Misc
///\ingroup core1
///\{

View File

@ -71,6 +71,8 @@ struct switch_loadable_module_interface {
const switch_api_interface *api_interface;
/*! the table of file formats the module has implmented */
const switch_file_interface *file_interface;
/*! the table of speech interfaces the module has implmented */
const switch_speech_interface *speech_interface;
};
/*!
@ -133,6 +135,14 @@ SWITCH_DECLARE(switch_api_interface *) switch_loadable_module_get_api_interface(
*/
SWITCH_DECLARE(switch_file_interface *) switch_loadable_module_get_file_interface(char *name);
/*!
\brief Retrieve the speech interface by it's registered name
\param name the name of the speech interface
\return the desired speech interface
*/
SWITCH_DECLARE(switch_speech_interface *) switch_loadable_module_get_speech_interface(char *name);
/*!
\brief Retrieve the list of loaded codecs into an array
\param pool the memory pool to use for the hash index

View File

@ -280,6 +280,46 @@ struct switch_file_handle {
};
/*! \brief Abstract interface to a speech module */
struct switch_speech_interface {
/*! the name of the interface */
const char *interface_name;
/*! function to open the speech interface */
switch_status (*speech_open)(switch_speech_handle *sh,
unsigned int flags);
/*! function to close the speech interface */
switch_status (*speech_close)(switch_speech_handle *);
/*! function to feed audio to the ASR*/
switch_status (*speech_feed_asr)(switch_speech_handle *sh, void *data, unsigned int *len, int rate, unsigned int *flags);
/*! function to read text from the ASR*/
switch_status (*speech_interpret_asr)(switch_speech_handle *sh, char *buf, unsigned int buflen, unsigned int *flags);
/*! function to feed text to the TTS*/
switch_status (*speech_feed_tts)(switch_speech_handle *sh, char *text, unsigned int *flags);
/*! function to read audio from the TTS*/
switch_status (*speech_read_tts)(switch_speech_handle *sh,
void *data,
unsigned int *datalen,
unsigned int *rate,
unsigned int *flags);
const struct switch_speech_interface *next;
};
/*! an abstract representation of a asr/tts speech interface. */
struct switch_speech_handle {
/*! the interface of the module that implemented the current speech interface */
const struct switch_speech_interface *speech_interface;
/*! flags to control behaviour */
unsigned int flags;
/*! the handle's memory pool */
switch_memory_pool *memory_pool;
/*! private data for the format module to store handle specific info */
void *private;
};
/* nobody has more setting than speex so we will let them set the standard */
/*! \brief Various codec settings (currently only relevant to speex) */
struct switch_codec_settings {

View File

@ -49,12 +49,15 @@ extern "C" {
\enum switch_ivr_option_t
\brief Possible options related to ivr functions
<pre>
SWITCH_IVR_OPTION_SYNC - synchronous (do everyting in the forground)
SWITCH_IVR_OPTION_NONE - nothing whatsoever
SWITCH_IVR_OPTION_ASYNC - Asynchronous (do things in the background when applicable)
SWITCH_IVR_OPTION_FILE - string argument implies a filename
</pre>
*/
typedef enum {
SWITCH_IVR_OPTION_SYNC = (1 << 0)
SWITCH_IVR_OPTION_NONE = 0,
SWITCH_IVR_OPTION_ASYNC = (1 << 0),
SWITCH_IVR_OPTION_FILE = (1 << 1)
} switch_ivr_option_t;
/*!
@ -215,6 +218,27 @@ typedef enum {
} switch_codec_flag;
/*!
\enum switch_speech_flag
\brief Speech related flags
<pre>
SWITCH_SPEECH_FLAG_TTS = (1 << 0) - Interface can/should convert text to speech.
SWITCH_SPEECH_FLAG_ASR = (1 << 1) - Interface can/should convert audio to text.
SWITCH_SPEECH_FLAG_HASTEXT = (1 << 2) - Interface is has text to read.
SWITCH_SPEECH_FLAG_PEEK = (1 << 3) - Read data but do not erase it.
SWITCH_SPEECH_FLAG_FREE_POOL = (1 << 4) - Free interface's pool on destruction.
</pre>
*/
typedef enum {
SWITCH_SPEECH_FLAG_TTS = (1 << 0),
SWITCH_SPEECH_FLAG_ASR = (1 << 1),
SWITCH_SPEECH_FLAG_HASTEXT = (1 << 2),
SWITCH_SPEECH_FLAG_PEEK = (1 << 3),
SWITCH_SPEECH_FLAG_FREE_POOL = (1 << 4),
} switch_speech_flag;
/*!
\enum switch_codec_type
\brief Codec types
@ -352,6 +376,8 @@ typedef struct switch_io_event_hooks switch_io_event_hooks;
typedef struct switch_buffer switch_buffer;
typedef struct switch_codec_settings switch_codec_settings;
typedef struct switch_config switch_config;
typedef struct switch_speech_handle switch_speech_handle;
typedef struct switch_speech_interface switch_speech_interface;
typedef void (*switch_application_function)(switch_core_session *, char *);
typedef void (*switch_event_callback_t)(switch_event *);
typedef switch_caller_extension *(*switch_dialplan_hunt_function)(switch_core_session *);

View File

@ -388,7 +388,7 @@ SWITCH_DECLARE(switch_status) switch_core_file_open(switch_file_handle *fh, char
if ((status = switch_core_new_memory_pool(&fh->memory_pool)) != SWITCH_STATUS_SUCCESS) {
return status;
}
switch_set_flag(fh, SWITCH_TIMER_FLAG_FREE_POOL);
switch_set_flag(fh, SWITCH_FILE_FLAG_FREE_POOL);
}
return fh->file_interface->file_open(fh, file_path);
@ -419,6 +419,68 @@ SWITCH_DECLARE(switch_status) switch_core_file_close(switch_file_handle *fh)
return fh->file_interface->file_close(fh);
}
SWITCH_DECLARE(switch_status) switch_core_speech_open(switch_speech_handle *sh,
char *module_name,
unsigned int flags,
switch_memory_pool *pool)
{
switch_status status;
if (!(sh->speech_interface = switch_loadable_module_get_speech_interface(module_name))) {
switch_console_printf(SWITCH_CHANNEL_CONSOLE, "invalid speech module [%s]!\n", module_name);
return SWITCH_STATUS_GENERR;
}
sh->flags = flags;
if (pool) {
sh->memory_pool = pool;
} else {
if ((status = switch_core_new_memory_pool(&sh->memory_pool)) != SWITCH_STATUS_SUCCESS) {
return status;
}
switch_set_flag(sh, SWITCH_SPEECH_FLAG_FREE_POOL);
}
return sh->speech_interface->speech_open(sh, flags);
}
SWITCH_DECLARE(switch_status) switch_core_speech_feed_asr(switch_speech_handle *sh, void *data, unsigned int *len, int rate, unsigned int *flags)
{
assert(sh != NULL);
return sh->speech_interface->speech_feed_asr(sh, data, len, rate, flags);
}
SWITCH_DECLARE(switch_status) switch_core_speech_interpret_asr(switch_speech_handle *sh, char *buf, unsigned int buflen, unsigned int *flags)
{
assert(sh != NULL);
return sh->speech_interface->speech_interpret_asr(sh, buf, buflen, flags);
}
SWITCH_DECLARE(switch_status) switch_core_speech_feed_tts(switch_speech_handle *sh, char *text, unsigned int *flags)
{
assert(sh != NULL);
return sh->speech_interface->speech_feed_tts(sh, text, flags);
}
SWITCH_DECLARE(switch_status) switch_core_speech_read_tts(switch_speech_handle *sh,
void *data,
unsigned int *datalen,
unsigned int *rate,
unsigned int *flags)
{
assert(sh != NULL);
return sh->speech_interface->speech_read_tts(sh, data, datalen, rate, flags);
}
SWITCH_DECLARE(switch_status) switch_core_speech_close(switch_speech_handle *sh, unsigned int *flags)
{
return sh->speech_interface->speech_close(sh);
}
SWITCH_DECLARE(switch_status) switch_core_timer_init(switch_timer *timer, char *timer_name, int interval, int samples,
switch_memory_pool *pool)

View File

@ -33,8 +33,7 @@
#include <switch_ivr.h>
/* TBD (Lots! there are only 2 functions in here lol) */
/* TBD (Lots! there are not very many functions in here lol) */
SWITCH_DECLARE(switch_status) switch_ivr_record_file(switch_core_session *session,
char *file,

View File

@ -62,6 +62,7 @@ struct switch_loadable_module_container {
switch_hash *application_hash;
switch_hash *api_hash;
switch_hash *file_hash;
switch_hash *speech_hash;
switch_memory_pool *pool;
};
@ -218,6 +219,7 @@ SWITCH_DECLARE(switch_status) switch_loadable_module_init()
switch_core_hash_init(&loadable_modules.application_hash, loadable_modules.pool);
switch_core_hash_init(&loadable_modules.api_hash, loadable_modules.pool);
switch_core_hash_init(&loadable_modules.file_hash, loadable_modules.pool);
switch_core_hash_init(&loadable_modules.speech_hash, loadable_modules.pool);
switch_core_hash_init(&loadable_modules.dialplan_hash, loadable_modules.pool);
while (apr_dir_read(&finfo, finfo_flags, module_dir_handle) == APR_SUCCESS) {
@ -321,6 +323,15 @@ SWITCH_DECLARE(switch_status) switch_loadable_module_init()
}
}
if (new_module->interface->speech_interface) {
const switch_speech_interface *ptr;
for (ptr = new_module->interface->speech_interface; ptr; ptr = ptr->next) {
switch_console_printf(SWITCH_CHANNEL_CONSOLE, "Adding Speech interface '%s'\n", ptr->interface_name);
switch_core_hash_insert(loadable_modules.speech_hash, (char *) ptr->interface_name, (void *) ptr);
}
}
}
}
@ -384,6 +395,11 @@ SWITCH_DECLARE(switch_file_interface *) switch_loadable_module_get_file_interfac
return switch_core_hash_find(loadable_modules.file_hash, name);
}
SWITCH_DECLARE(switch_speech_interface *) switch_loadable_module_get_speech_interface(char *name)
{
return switch_core_hash_find(loadable_modules.speech_hash, name);
}
SWITCH_DECLARE(int) switch_loadable_module_get_codecs(switch_memory_pool *pool, switch_codec_interface **array,
int arraylen)
{