Navit SVN

/work/compile/navit/src/navit/support/espeak/speak_lib.h

00001 #ifndef SPEAK_LIB_H
00002 #define SPEAK_LIB_H
00003 /***************************************************************************
00004  *   Copyright (C) 2005 to 2007 by Jonathan Duddington                     *
00005  *   email: jonsd@users.sourceforge.net                                    *
00006  *                                                                         *
00007  *   This program is free software; you can redistribute it and/or modify  *
00008  *   it under the terms of the GNU General Public License as published by  *
00009  *   the Free Software Foundation; either version 3 of the License, or     *
00010  *   (at your option) any later version.                                   *
00011  *                                                                         *
00012  *   This program is distributed in the hope that it will be useful,       *
00013  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
00014  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
00015  *   GNU General Public License for more details.                          *
00016  *                                                                         *
00017  *   You should have received a copy of the GNU General Public License     *
00018  *   along with this program; if not, see:                                 *
00019  *               <http://www.gnu.org/licenses/>.                           *
00020  ***************************************************************************/
00021 
00022 
00023 /*************************************************************/
00024 /* This is the header file for the library version of espeak */
00025 /*                                                           */
00026 /*************************************************************/
00027 
00028 #include <stdio.h>
00029 
00030 #define ESPEAK_API_REVISION  5
00031 /*
00032 Revision 2
00033    Added parameter "options" to eSpeakInitialize()
00034 
00035 Revision 3
00036    Added espeakWORDGAP to  espeak_PARAMETER
00037 
00038 Revision 4
00039    Added flags parameter to espeak_CompileDictionary()
00040 
00041 Revision 5
00042    Added espeakCHARS_16BIT
00043 */
00044          /********************/
00045          /*  Initialization  */
00046          /********************/
00047 
00048 
00049 typedef enum {
00050   espeakEVENT_LIST_TERMINATED = 0, // Retrieval mode: terminates the event list.
00051   espeakEVENT_WORD = 1,            // Start of word
00052   espeakEVENT_SENTENCE,            // Start of sentence
00053   espeakEVENT_MARK,                // Mark
00054   espeakEVENT_PLAY,                // Audio element
00055   espeakEVENT_END,                 // End of sentence or clause
00056   espeakEVENT_MSG_TERMINATED,      // End of message
00057   espeakEVENT_PHONEME              // Phoneme, if enabled in espeak_Initialize()
00058 } espeak_EVENT_TYPE;
00059 
00060 
00061 
00062 typedef struct {
00063         espeak_EVENT_TYPE type;
00064         unsigned int unique_identifier; // message identifier (or 0 for key or character)
00065         int text_position;    // the number of characters from the start of the text
00066         int length;           // word length, in characters (for espeakEVENT_WORD)
00067         int audio_position;   // the time in mS within the generated speech output data
00068         int sample;           // sample id (internal use)
00069         void* user_data;      // pointer supplied by the calling program
00070         union {
00071                 int number;        // used for WORD and SENTENCE events. For PHONEME events this is the phoneme mnemonic.
00072                 const char *name;  // used for MARK and PLAY events.  UTF8 string
00073         } id;
00074 } espeak_EVENT;
00075 /* 
00076    When a message is supplied to espeak_synth, the request is buffered and espeak_synth returns. When the message is really processed, the callback function will be repetedly called.
00077 
00078 
00079    In RETRIEVAL mode, the callback function supplies to the calling program the audio data and an event list terminated by 0 (LIST_TERMINATED).
00080 
00081    In PLAYBACK mode, the callback function is called as soon as an event happens.
00082 
00083    For example suppose that the following message is supplied to espeak_Synth: 
00084    "hello, hello."
00085 
00086 
00087    * Once processed in RETRIEVAL mode, it could lead to 3 calls of the callback function :
00088 
00089    ** Block 1:
00090    <audio data> + 
00091    List of events: SENTENCE + WORD + LIST_TERMINATED
00092  
00093    ** Block 2:
00094    <audio data> +
00095    List of events: WORD + END + LIST_TERMINATED
00096 
00097    ** Block 3:
00098    no audio data
00099    List of events: MSG_TERMINATED + LIST_TERMINATED
00100 
00101 
00102    * Once processed in PLAYBACK mode, it could lead to 5 calls of the callback function:
00103 
00104    ** SENTENCE
00105    ** WORD (call when the sounds are actually played)
00106    ** WORD
00107    ** END (call when the end of sentence is actually played.)
00108    ** MSG_TERMINATED
00109 
00110 
00111    The MSG_TERMINATED event is the last event. It can inform the calling program to clear the user data related to the message.
00112    So if the synthesis must be stopped, the callback function is called for each pending message with the MSG_TERMINATED event.
00113 
00114    A MARK event indicates a <mark> element in the text.
00115    A PLAY event indicates an <audio> element in the text, for which the calling program should play the named sound file.
00116 */
00117 
00118 
00119 
00120 typedef enum {
00121         POS_CHARACTER = 1,
00122         POS_WORD,
00123         POS_SENTENCE
00124 } espeak_POSITION_TYPE;
00125 
00126 
00127 typedef enum {
00128         /* PLAYBACK mode: plays the audio data, supplies events to the calling program*/
00129         AUDIO_OUTPUT_PLAYBACK, 
00130 
00131         /* RETRIEVAL mode: supplies audio data and events to the calling program */
00132         AUDIO_OUTPUT_RETRIEVAL,
00133  
00134         /* SYNCHRONOUS mode: as RETRIEVAL but doesn't return until synthesis is completed */
00135         AUDIO_OUTPUT_SYNCHRONOUS,
00136 
00137         /* Synchronous playback */
00138         AUDIO_OUTPUT_SYNCH_PLAYBACK
00139 
00140 } espeak_AUDIO_OUTPUT;
00141 
00142 
00143 typedef enum {
00144         EE_OK=0,
00145         EE_INTERNAL_ERROR=-1,
00146         EE_BUFFER_FULL=1,
00147         EE_NOT_FOUND=2
00148 } espeak_ERROR;
00149 
00150 
00151 #ifdef __cplusplus
00152 extern "C"
00153 #endif
00154 int espeak_Initialize(espeak_AUDIO_OUTPUT output, int buflength, const char *path, int options);
00155 /* Must be called before any synthesis functions are called.
00156    output: the audio data can either be played by eSpeak or passed back by the SynthCallback function.
00157 
00158    buflength:  The length in mS of sound buffers passed to the SynthCallback function.
00159 
00160    path: The directory which contains the espeak-data directory, or NULL for the default location.
00161 
00162    options: bit 0: 1=allow espeakEVENT_PHONEME events.
00163 
00164 
00165    Returns: sample rate in Hz, or -1 (EE_INTERNAL_ERROR).
00166 */
00167 
00168 typedef int (t_espeak_callback)(short*, int, espeak_EVENT*);
00169 
00170 #ifdef __cplusplus
00171 extern "C"
00172 #endif
00173 void espeak_SetSynthCallback(t_espeak_callback* SynthCallback);
00174 /* Must be called before any synthesis functions are called.
00175    This specifies a function in the calling program which is called when a buffer of
00176    speech sound data has been produced. 
00177 
00178 
00179    The callback function is of the form:
00180 
00181 int SynthCallback(short *wav, int numsamples, espeak_EVENT *events);
00182 
00183    wav:  is the speech sound data which has been produced.
00184       NULL indicates that the synthesis has been completed.
00185 
00186    numsamples: is the number of entries in wav.  This number may vary, may be less than
00187       the value implied by the buflength parameter given in espeak_Initialize, and may
00188       sometimes be zero (which does NOT indicate end of synthesis).
00189 
00190    events: an array of espeak_EVENT items which indicate word and sentence events, and
00191       also the occurance if <mark> and <audio> elements within the text.  The list of
00192       events is terminated by an event of type = 0.
00193 
00194 
00195    Callback returns: 0=continue synthesis,  1=abort synthesis.
00196 */
00197 
00198 #ifdef __cplusplus
00199 extern "C"
00200 #endif
00201 void espeak_SetUriCallback(int (*UriCallback)(int, const char*, const char*));
00202 /* This function may be called before synthesis functions are used, in order to deal with
00203    <audio> tags.  It specifies a callback function which is called when an <audio> element is
00204    encountered and allows the calling program to indicate whether the sound file which
00205    is specified in the <audio> element is available and is to be played.
00206 
00207    The callback function is of the form:
00208 
00209 int UriCallback(int type, const char *uri, const char *base);
00210 
00211    type:  type of callback event.  Currently only 1= <audio> element
00212 
00213    uri:   the "src" attribute from the <audio> element
00214 
00215    base:  the "xml:base" attribute (if any) from the <speak> element
00216 
00217    Return: 1=don't play the sound, but speak the text alternative.
00218            0=place a PLAY event in the event list at the point where the <audio> element
00219              occurs.  The calling program can then play the sound at that point.
00220 */
00221 
00222 
00223          /********************/
00224          /*    Synthesis     */
00225          /********************/
00226 
00227 
00228 #define espeakCHARS_AUTO   0
00229 #define espeakCHARS_UTF8   1
00230 #define espeakCHARS_8BIT   2
00231 #define espeakCHARS_WCHAR  3
00232 #define espeakCHARS_16BIT  4
00233 
00234 #define espeakSSML        0x10
00235 #define espeakPHONEMES    0x100
00236 #define espeakENDPAUSE    0x1000
00237 #define espeakKEEP_NAMEDATA 0x2000
00238 
00239 #ifdef __cplusplus
00240 extern "C"
00241 #endif
00242 espeak_ERROR espeak_Synth(const void *text,
00243         size_t size,
00244         unsigned int position,
00245         espeak_POSITION_TYPE position_type,
00246         unsigned int end_position,
00247         unsigned int flags,
00248         unsigned int* unique_identifier,
00249         void* user_data);
00250 /* Synthesize speech for the specified text.  The speech sound data is passed to the calling
00251    program in buffers by means of the callback function specified by espeak_SetSynthCallback(). The command is asynchronous: it is internally buffered and returns as soon as possible. If espeak_Initialize was previously called with AUDIO_OUTPUT_PLAYBACK as argument, the sound data are played by eSpeak.
00252 
00253    text: The text to be spoken, terminated by a zero character. It may be either 8-bit characters,
00254       wide characters (wchar_t), or UTF8 encoding.  Which of these is determined by the "flags"
00255       parameter.
00256 
00257    size: Equal to (or greatrer than) the size of the text data, in bytes.  This is used in order
00258       to allocate internal storage space for the text.  This value is not used for
00259       AUDIO_OUTPUT_SYNCHRONOUS mode.
00260 
00261    position:  The position in the text where speaking starts. Zero indicates speak from the
00262       start of the text.
00263 
00264    position_type:  Determines whether "position" is a number of characters, words, or sentences.
00265       Values: 
00266 
00267    end_position:  If set, this gives a character position at which speaking will stop.  A value
00268       of zero indicates no end position.
00269 
00270    flags:  These may be OR'd together:
00271       Type of character codes, one of:
00272          espeakCHARS_UTF8     UTF8 encoding
00273          espeakCHARS_8BIT     The 8 bit ISO-8859 character set for the particular language.
00274          espeakCHARS_AUTO     8 bit or UTF8  (this is the default)
00275          espeakCHARS_WCHAR    Wide characters (wchar_t)
00276 
00277       espeakSSML   Elements within < > are treated as SSML elements, or if not recognised are ignored.
00278 
00279       espeakPHONEMES  Text within [[ ]] is treated as phonemes codes (in espeak's Hirshenbaum encoding).
00280 
00281       espeakENDPAUSE  If set then a sentence pause is added at the end of the text.  If not set then
00282          this pause is suppressed.
00283 
00284    unique_identifier: message identifier; helpful for identifying later 
00285      data supplied to the callback.
00286 
00287    user_data: pointer which will be passed to the callback function.
00288 
00289    Return: EE_OK: operation achieved 
00290            EE_BUFFER_FULL: the command can not be buffered; 
00291              you may try after a while to call the function again.
00292            EE_INTERNAL_ERROR.
00293 */
00294 
00295 #ifdef __cplusplus
00296 extern "C"
00297 #endif
00298 espeak_ERROR espeak_Synth_Mark(const void *text,
00299         size_t size,
00300         const char *index_mark,
00301         unsigned int end_position,
00302         unsigned int flags,
00303         unsigned int* unique_identifier,
00304         void* user_data);
00305 /* Synthesize speech for the specified text.  Similar to espeak_Synth() but the start position is
00306    specified by the name of a <mark> element in the text.
00307 
00308    index_mark:  The "name" attribute of a <mark> element within the text which specified the
00309       point at which synthesis starts.  UTF8 string.
00310 
00311    For the other parameters, see espeak_Synth()
00312 
00313    Return: EE_OK: operation achieved 
00314            EE_BUFFER_FULL: the command can not be buffered; 
00315              you may try after a while to call the function again.
00316            EE_INTERNAL_ERROR.
00317 */
00318 
00319 #ifdef __cplusplus
00320 extern "C"
00321 #endif
00322 espeak_ERROR espeak_Key(const char *key_name);
00323 /* Speak the name of a keyboard key.
00324    If key_name is a single character, it speaks the name of the character.
00325    Otherwise, it speaks key_name as a text string.
00326 
00327    Return: EE_OK: operation achieved 
00328            EE_BUFFER_FULL: the command can not be buffered; 
00329              you may try after a while to call the function again.
00330            EE_INTERNAL_ERROR.
00331 */
00332 
00333 #ifdef __cplusplus
00334 extern "C"
00335 #endif
00336 espeak_ERROR espeak_Char(wchar_t character);
00337 /* Speak the name of the given character 
00338 
00339    Return: EE_OK: operation achieved 
00340            EE_BUFFER_FULL: the command can not be buffered; 
00341              you may try after a while to call the function again.
00342            EE_INTERNAL_ERROR.
00343 */
00344 
00345 
00346 
00347 
00348          /***********************/
00349          /*  Speech Parameters  */
00350          /***********************/
00351 
00352 typedef enum {
00353   espeakSILENCE=0, /* internal use */
00354   espeakRATE=1,
00355   espeakVOLUME=2,
00356   espeakPITCH=3,
00357   espeakRANGE=4,
00358   espeakPUNCTUATION=5,
00359   espeakCAPITALS=6,
00360   espeakWORDGAP=7,
00361   espeakOPTIONS=8,   // reserved for misc. options.  not yet used
00362   espeakINTONATION=9,
00363 
00364   espeakRESERVED1=10,
00365   espeakRESERVED2=11,
00366   espeakEMPHASIS,   /* internal use */
00367   espeakLINELENGTH, /* internal use */
00368   espeakVOICETYPE,  // internal, 1=mbrola
00369   N_SPEECH_PARAM    /* last enum */
00370 } espeak_PARAMETER;
00371 
00372 typedef enum {
00373   espeakPUNCT_NONE=0,
00374   espeakPUNCT_ALL=1,
00375   espeakPUNCT_SOME=2
00376 } espeak_PUNCT_TYPE;
00377 
00378 #ifdef __cplusplus
00379 extern "C"
00380 #endif
00381 espeak_ERROR espeak_SetParameter(espeak_PARAMETER parameter, int value, int relative);
00382 /* Sets the value of the specified parameter.
00383    relative=0   Sets the absolute value of the parameter.
00384    relative=1   Sets a relative value of the parameter.
00385 
00386    parameter:
00387       espeakRATE:    speaking speed in word per minute.
00388 
00389       espeakVOLUME:  volume in range 0-100    0=silence
00390 
00391       espeakPITCH:   base pitch, range 0-100.  50=normal
00392 
00393       espeakRANGE:   pitch range, range 0-100. 0-monotone, 50=normal
00394 
00395       espeakPUNCTUATION:  which punctuation characters to announce:
00396          value in espeak_PUNCT_TYPE (none, all, some), 
00397          see espeak_GetParameter() to specify which characters are announced.
00398 
00399       espeakCAPITALS: announce capital letters by:
00400          0=none,
00401          1=sound icon,
00402          2=spelling,
00403          3 or higher, by raising pitch.  This values gives the amount in Hz by which the pitch
00404             of a word raised to indicate it has a capital letter.
00405 
00406       espeakWORDGAP:  pause between words, units of 10mS (at the default speed)
00407 
00408    Return: EE_OK: operation achieved 
00409            EE_BUFFER_FULL: the command can not be buffered; 
00410              you may try after a while to call the function again.
00411            EE_INTERNAL_ERROR.
00412 */
00413 
00414 #ifdef __cplusplus
00415 extern "C"
00416 #endif
00417 int espeak_GetParameter(espeak_PARAMETER parameter, int current);
00418 /* current=0  Returns the default value of the specified parameter.
00419    current=1  Returns the current value of the specified parameter, as set by SetParameter()
00420 */
00421 
00422 #ifdef __cplusplus
00423 extern "C"
00424 #endif
00425 espeak_ERROR espeak_SetPunctuationList(const wchar_t *punctlist);
00426 /* Specified a list of punctuation characters whose names are to be spoken when the
00427    value of the Punctuation parameter is set to "some".
00428 
00429    punctlist:  A list of character codes, terminated by a zero character.
00430 
00431    Return: EE_OK: operation achieved 
00432            EE_BUFFER_FULL: the command can not be buffered; 
00433              you may try after a while to call the function again.
00434            EE_INTERNAL_ERROR.
00435 */
00436 
00437 #ifdef __cplusplus
00438 extern "C"
00439 #endif
00440 void espeak_SetPhonemeTrace(int value, FILE *stream);
00441 /* Controls the output of phoneme symbols for the text
00442    value=0  No phoneme output (default)
00443    value=1  Output the translated phoneme symbols for the text
00444    value=2  as (1), but also output a trace of how the translation was done (matching rules and list entries)
00445 
00446    stream   output stream for the phoneme symbols (and trace).  If stream=NULL then it uses stdout.
00447 */
00448 
00449 #ifdef __cplusplus
00450 extern "C"
00451 #endif
00452 void espeak_CompileDictionary(const char *path, FILE *log, int flags);
00453 /* Compile pronunciation dictionary for a language which corresponds to the currently
00454    selected voice.  The required voice should be selected before calling this function.
00455 
00456    path:  The directory which contains the language's '_rules' and '_list' files.
00457           'path' should end with a path separator character ('/').
00458    log:   Stream for error reports and statistics information. If log=NULL then stderr will be used.
00459 
00460    flags:  Bit 0: include source line information for debug purposes (This is displayed with the
00461           -X command line option).
00462 */
00463          /***********************/
00464          /*   Voice Selection   */
00465          /***********************/
00466 
00467 
00468 // voice table
00469 typedef struct {
00470         const char *name;      // a given name for this voice. UTF8 string.
00471         const char *languages;       // list of pairs of (byte) priority + (string) language (and dialect qualifier)
00472         const char *identifier;      // the filename for this voice within espeak-data/voices
00473         unsigned char gender;  // 0=none 1=male, 2=female,
00474         unsigned char age;     // 0=not specified, or age in years
00475         unsigned char variant; // only used when passed as a parameter to espeak_SetVoiceByProperties
00476         unsigned char xx1;     // for internal use 
00477         int score;       // for internal use
00478         void *spare;     // for internal use
00479 } espeak_VOICE;
00480 
00481 /* Note: The espeak_VOICE structure is used for two purposes:
00482   1.  To return the details of the available voices.
00483   2.  As a parameter to  espeak_SetVoiceByProperties() in order to specify selection criteria.
00484 
00485    In (1), the "languages" field consists of a list of (UTF8) language names for which this voice
00486    may be used, each language name in the list is terminated by a zero byte and is also preceded by
00487    a single byte which gives a "priority" number.  The list of languages is terminated by an
00488    additional zero byte.
00489 
00490    A language name consists of a language code, optionally followed by one or more qualifier (dialect)
00491    names separated by hyphens (eg. "en-uk").  A voice might, for example, have languages "en-uk" and
00492    "en".  Even without "en" listed, voice would still be selected for the "en" language (because
00493    "en-uk" is related) but at a lower priority.
00494 
00495    The priority byte indicates how the voice is preferred for the language. A low number indicates a
00496    more preferred voice, a higher number indicates a less preferred voice.
00497 
00498    In (2), the "languages" field consists simply of a single (UTF8) language name, with no preceding
00499    priority byte.
00500 */
00501 
00502 #ifdef __cplusplus
00503 extern "C"
00504 #endif
00505 const espeak_VOICE **espeak_ListVoices(espeak_VOICE *voice_spec);
00506 /* Reads the voice files from espeak-data/voices and creates an array of espeak_VOICE pointers.
00507    The list is terminated by a NULL pointer
00508 
00509    If voice_spec is NULL then all voices are listed.
00510    If voice spec is give, then only the voices which are compatible with the voice_spec
00511    are listed, and they are listed in preference order.
00512 */
00513 
00514 #ifdef __cplusplus
00515 extern "C"
00516 #endif
00517 espeak_ERROR espeak_SetVoiceByName(const char *name);
00518 /* Searches for a voice with a matching "name" field.  Language is not considered.
00519    "name" is a UTF8 string.
00520 
00521    Return: EE_OK: operation achieved 
00522            EE_BUFFER_FULL: the command can not be buffered; 
00523              you may try after a while to call the function again.
00524            EE_INTERNAL_ERROR.
00525 */
00526 
00527 #ifdef __cplusplus
00528 extern "C"
00529 #endif
00530 espeak_ERROR espeak_SetVoiceByProperties(espeak_VOICE *voice_spec);
00531 /* An espeak_VOICE structure is used to pass criteria to select a voice.  Any of the following
00532    fields may be set:
00533 
00534    name     NULL, or a voice name
00535 
00536    languages  NULL, or a single language string (with optional dialect), eg. "en-uk", or "en"
00537 
00538    gender   0=not specified, 1=male, 2=female
00539 
00540    age      0=not specified, or an age in years
00541 
00542    variant  After a list of candidates is produced, scored and sorted, "variant" is used to index
00543             that list and choose a voice.
00544             variant=0 takes the top voice (i.e. best match). variant=1 takes the next voice, etc
00545 */
00546 
00547 #ifdef __cplusplus
00548 extern "C"
00549 #endif
00550 espeak_VOICE *espeak_GetCurrentVoice(void);
00551 /* Returns the espeak_VOICE data for the currently selected voice.
00552    This is not affected by temporary voice changes caused by SSML elements such as <voice> and <s>
00553 */
00554 
00555 #ifdef __cplusplus
00556 extern "C"
00557 #endif
00558 espeak_ERROR espeak_Cancel(void);
00559 /* Stop immediately synthesis and audio output of the current text. When this
00560    function returns, the audio output is fully stopped and the synthesizer is ready to
00561    synthesize a new message.
00562 
00563    Return: EE_OK: operation achieved 
00564            EE_INTERNAL_ERROR.
00565 */
00566 
00567 
00568 #ifdef __cplusplus
00569 extern "C"
00570 #endif
00571 int espeak_IsPlaying(void);
00572 /* Returns 1 if audio is played, 0 otherwise.
00573 */
00574 
00575 #ifdef __cplusplus
00576 extern "C"
00577 #endif
00578 espeak_ERROR espeak_Synchronize(void);
00579 /* This function returns when all data have been spoken.
00580    Return: EE_OK: operation achieved 
00581            EE_INTERNAL_ERROR.
00582 */
00583 
00584 #ifdef __cplusplus
00585 extern "C"
00586 #endif
00587 espeak_ERROR espeak_Terminate(void);
00588 /* last function to be called.
00589    Return: EE_OK: operation achieved 
00590            EE_INTERNAL_ERROR.
00591 */
00592 
00593 
00594 #ifdef __cplusplus
00595 extern "C"
00596 #endif
00597 const char *espeak_Info(void);
00598 /* Returns the version number string.
00599    The parameter is for future use, and should be set to NULL
00600 */
00601 #endif