Navit SVN

/work/compile/navit/src/navit/support/espeak/translate.h

00001 /***************************************************************************
00002  *   Copyright (C) 2005 to 2007 by Jonathan Duddington                     *
00003  *   email: jonsd@users.sourceforge.net                                    *
00004  *                                                                         *
00005  *   This program is free software; you can redistribute it and/or modify  *
00006  *   it under the terms of the GNU General Public License as published by  *
00007  *   the Free Software Foundation; either version 3 of the License, or     *
00008  *   (at your option) any later version.                                   *
00009  *                                                                         *
00010  *   This program is distributed in the hope that it will be useful,       *
00011  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
00012  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
00013  *   GNU General Public License for more details.                          *
00014  *                                                                         *
00015  *   You should have received a copy of the GNU General Public License     *
00016  *   along with this program; if not, see:                                 *
00017  *               <http://www.gnu.org/licenses/>.                           *
00018  ***************************************************************************/
00019 
00020 
00021 #define L(c1,c2)  (c1<<8)+c2          // combine two characters into an integer for translator name 
00022 
00023 #define CTRL_EMBEDDED    0x01         // control character at the start of an embedded command
00024 #define REPLACED_E       'E'          // 'e' replaced by silent e
00025 
00026 #define N_WORD_PHONEMES  160          // max phonemes in a word
00027 #define N_WORD_BYTES     160          // max bytes for the UTF8 characters in a word
00028 #define N_CLAUSE_WORDS   300          // max words in a clause
00029 #define N_RULE_GROUP2    120          // max num of two-letter rule chains
00030 #define N_HASH_DICT     1024
00031 #define N_CHARSETS        20
00032 #define N_LETTER_GROUPS   26
00033 
00034 
00035 /* dictionary flags, word 1 */
00036 // bits 0-3  stressed syllable,  bit 6=unstressed
00037 #define FLAG_SKIPWORDS        0x80
00038 #define FLAG_PREPAUSE        0x100
00039 
00040 #define FLAG_ONLY            0x200
00041 #define FLAG_ONLY_S          0x400
00042 #define BITNUM_FLAG_ONLY         9  // bit 9 is set
00043 #define BITNUM_FLAG_ONLY_S      10  // bit 10 is set
00044 
00045 #define FLAG_STRESS_END      0x800  /* full stress if at end of clause */
00046 #define FLAG_STRESS_END2    0x1000  /* full stress if at end of clause, or only followed by unstressed */
00047 #define FLAG_UNSTRESS_END   0x2000  /* reduce stress at end of clause */
00048 #define FLAG_ATEND          0x4000  /* use this pronunciation if at end of clause */
00049 #define FLAG_SPELLWORD      0x8000  // re-translate the word as individual letters, separated by spaces
00050 
00051 #define FLAG_DOT           0x10000  /* ignore '.' after word (abbreviation) */
00052 #define FLAG_ABBREV        0x20000  // spell as letters, even with a vowel, OR use specified pronunciation rather than split into letters
00053 #define FLAG_STEM          0x40000  // must have a suffix
00054 
00055 #define FLAG_DOUBLING      0x80000  // doubles the following consonant
00056 #define FLAG_ALT_TRANS    0x100000  // language specific
00057 #define FLAG_ALT2_TRANS   0x200000  // language specific
00058 
00059 #define FLAG_MAX3       0x08000000  // limit to 3 repeats
00060 #define FLAG_PAUSE1     0x10000000  // shorter prepause
00061 #define FLAG_TEXTMODE   0x20000000  // word translates to replacement text, not phonemes
00062 #define BITNUM_FLAG_TEXTMODE    29
00063 
00064 #define FLAG_FOUND_ATTRIBUTES     0x40000000  // word was found in the dictionary list (has attributes)
00065 #define FLAG_FOUND      0x80000000  // pronunciation was found in the dictionary list
00066 
00067 // dictionary flags, word 2
00068 #define FLAG_VERBF             0x1  /* verb follows */
00069 #define FLAG_VERBSF            0x2  /* verb follows, may have -s suffix */
00070 #define FLAG_NOUNF             0x4  /* noun follows */
00071 #define FLAG_PASTF             0x8  /* past tense follows */
00072 #define FLAG_VERB             0x10  /* pronunciation for verb */
00073 #define FLAG_NOUN             0x20  /* pronunciation for noun */
00074 #define FLAG_PAST             0x40  /* pronunciation for past tense */
00075 #define FLAG_VERB_EXT        0x100  /* extend the 'verb follows' */
00076 #define FLAG_CAPITAL         0x200  /* pronunciation if initial letter is upper case */
00077 #define FLAG_ALLCAPS         0x400  // only if the word is all capitals
00078 #define BITNUM_FLAG_ALLCAPS   0x2a
00079 #define FLAG_ACCENT          0x800  // character name is base-character name + accent name
00080 #define FLAG_HYPHENATED     0x1000  // multiple-words, but needs hyphen between parts 1 and 2
00081 #define BITNUM_FLAG_HYPHENATED  0x2c
00082 
00083 
00084 // wordflags, flags in source word
00085 #define FLAG_ALL_UPPER     0x1    /* no lower case letters in the word */
00086 #define FLAG_FIRST_UPPER   0x2    /* first letter is upper case */
00087 #define FLAG_UPPERS        0x3    // FLAG_ALL_UPPER | FLAG_FIRST_UPPER
00088 #define FLAG_HAS_PLURAL    0x4    /* upper-case word with s or 's lower-case ending */
00089 #define FLAG_PHONEMES      0x8    /* word is phonemes */
00090 #define FLAG_LAST_WORD     0x10   /* last word in clause */
00091 #define FLAG_EMBEDDED      0x40   /* word is preceded by embedded commands */
00092 #define FLAG_HYPHEN        0x80
00093 #define FLAG_NOSPACE       0x100  // word is not seperated from previous word by a space
00094 #define FLAG_FIRST_WORD    0x200  // first word in clause
00095 #define FLAG_FOCUS         0x400   // the focus word of a clause
00096 #define FLAG_EMPHASIZED    0x800
00097 #define FLAG_EMPHASIZED2   0xc00  // FLAG_FOCUS | FLAG_EMPHASIZED
00098 #define FLAG_DONT_SWITCH_TRANSLATOR  0x1000
00099 #define FLAG_SUFFIX_REMOVED  0x2000
00100 #define FLAG_HYPHEN_AFTER    0x4000
00101 
00102 #define FLAG_NO_TRACE      0x10000   // passed to TranslateRules() to suppress dictionary lookup printout
00103 #define FLAG_NO_PREFIX     0x20000
00104 
00105 // prefix/suffix flags (bits 8 to 14, bits 16 to 22) don't use 0x8000, 0x800000
00106 #define SUFX_E        0x0100   // e may have been added
00107 #define SUFX_I        0x0200   // y may have been changed to i
00108 #define SUFX_P        0x0400   // prefix
00109 #define SUFX_V        0x0800   // suffix means use the verb form pronunciation
00110 #define SUFX_D        0x1000   // previous letter may have been doubles
00111 #define SUFX_F        0x2000   // verb follows
00112 #define SUFX_Q        0x4000   // don't retranslate
00113 #define SUFX_T        0x10000   // don't affect the stress position in the stem
00114 #define SUFX_B        0x20000  // break, this character breaks the word into stem and suffix (used with SUFX_P)
00115 
00116 #define FLAG_ALLOW_TEXTMODE  0x02  // allow dictionary to translate to text rather than phonemes
00117 #define FLAG_SUFX       0x04
00118 #define FLAG_SUFX_S     0x08
00119 #define FLAG_SUFX_E_ADDED 0x10
00120 
00121 
00122 // codes in dictionary rules
00123 #define RULE_PRE                        1
00124 #define RULE_POST                       2
00125 #define RULE_PHONEMES   3
00126 #define RULE_PH_COMMON  4       // At start of rule. Its phoneme string is used by subsequent rules
00127 #define RULE_CONDITION  5       // followed by condition number (byte)
00128 #define RULE_GROUP_START 6
00129 #define RULE_GROUP_END  7
00130 #define RULE_LINENUM            8  // next 2 bytes give a line number, for debugging purposes
00131 
00132 #define RULE_SPACE              32   // ascii space
00133 #define RULE_SYLLABLE   9
00134 #define RULE_STRESSED   10
00135 #define RULE_DOUBLE             11
00136 #define RULE_INC_SCORE  12
00137 #define RULE_DEL_FWD            13
00138 #define RULE_ENDING             14
00139 #define RULE_DIGIT              15   // D digit
00140 #define RULE_NONALPHA   16   // Z non-alpha
00141 #define RULE_LETTERGP   17   // A B C H F G Y   letter group number
00142 #define RULE_LETTERGP2  18   // L + letter group number
00143 #define RULE_CAPITAL    19   //    word starts with a capital letter
00144 #define RULE_REPLACEMENTS 20  // section for character replacements
00145 #define RULE_NO_SUFFIX  24   // N
00146 #define RULE_NOTVOWEL   25   // K
00147 #define RULE_IFVERB     26   // V
00148 #define RULE_ALT1       28   // T word has $alt attribute
00149 #define RULE_NOVOWELS   29   // X no vowels up to word boundary
00150 #define RULE_SPELLING   31   // W while spelling letter-by-letter
00151 #define RULE_LAST_RULE   31
00152 
00153 #define LETTERGP_A      0
00154 #define LETTERGP_B      1
00155 #define LETTERGP_C      2
00156 #define LETTERGP_H      3
00157 #define LETTERGP_F      4
00158 #define LETTERGP_G      5
00159 #define LETTERGP_Y      6
00160 #define LETTERGP_VOWEL2   7
00161 
00162 
00163 // Punctuation types  returned by ReadClause()
00164 // bits 0-7 pause x 10mS, bits 12-14 intonation type,
00165 // bit 19=sentence, bit 18=clause,  bits 17=voice change
00166 // bit 16 used to distinguish otherwise identical types
00167 // bit 20= punctuation character can be inside a word (Armenian)
00168 #define CLAUSE_BIT_SENTENCE  0x80000
00169 #define CLAUSE_BIT_VOICE     0x20000
00170 #define PUNCT_IN_WORD        0x100000
00171 
00172 #define CLAUSE_NONE         0 + 0x04000
00173 #define CLAUSE_PARAGRAPH   70 + 0x80000
00174 #define CLAUSE_EOF         35 + 0x90000
00175 #define CLAUSE_VOICE        0 + 0x24000
00176 #define CLAUSE_PERIOD      35 + 0x80000
00177 #define CLAUSE_COMMA       20 + 0x41000
00178 #define CLAUSE_SHORTCOMMA   4 + 0x41000
00179 #define CLAUSE_SHORTFALL    4 + 0x40000
00180 #define CLAUSE_QUESTION    35 + 0x82000
00181 #define CLAUSE_EXCLAMATION 40 + 0x83000
00182 #define CLAUSE_COLON       30 + 0x40000
00183 #ifdef PLATFORM_RISCOS
00184 #define CLAUSE_SEMICOLON   30 + 0x40000
00185 #else
00186 #define CLAUSE_SEMICOLON   30 + 0x41000
00187 #endif
00188 
00189 #define SAYAS_CHARS     0x12
00190 #define SAYAS_GLYPHS    0x13
00191 #define SAYAS_SINGLE_CHARS 0x14
00192 #define SAYAS_KEY       0x24
00193 #define SAYAS_DIGITS    0x40  // + number of digits
00194 #define SAYAS_DIGITS1   0xc1
00195 
00196 #define CHAR_EMPHASIS   0x0530  // this is an unused character code
00197 
00198 // Rule:
00199 // [4] [match] [1 pre] [2 post] [3 phonemes] 0
00200 //     match 1 pre 2 post 0     - use common phoneme string
00201 //     match 1 pre 2 post 3 0   - empty phoneme string
00202 
00203 typedef const char *  constcharptr;
00204 
00205 typedef struct {
00206         int points;
00207         const char *phonemes;
00208         int end_type;
00209         char *del_fwd;
00210 } MatchRecord;
00211         
00212 
00213 // used to mark words with the source[] buffer
00214 typedef struct{
00215         unsigned short start;
00216         unsigned short sourceix;
00217         unsigned short flags;
00218         unsigned char pre_pause;
00219         unsigned char wmark;
00220         unsigned char length;
00221 } WORD_TAB;
00222 
00223 // a clause translated into phoneme codes (first stage)
00224 typedef struct {
00225         unsigned char phcode;
00226         unsigned char stress;
00227         unsigned char tone_number; 
00228         unsigned char synthflags;
00229         unsigned short sourceix;
00230 } PHONEME_LIST2;
00231 
00232 
00233 typedef struct {
00234         int type;
00235         int parameter[N_SPEECH_PARAM];
00236 } PARAM_STACK;
00237 
00238 extern PARAM_STACK param_stack[];
00239 extern const int param_defaults[N_SPEECH_PARAM];
00240 
00241 
00242 
00243 #define N_LOPTS      16
00244 #define LOPT_DIERESES        1
00245  // 1=remove [:] from unstressed syllables, 2= remove from unstressed or non-penultimate syllables
00246  // bit 4=0, if stress < 4,  bit 4=1, if not the highest stress in the word
00247 #define LOPT_IT_LENGTHEN        2
00248 
00249  // 1=german
00250 #define LOPT_PREFIXES        3
00251 
00252  // non-zero, change voiced/unoiced to match last consonant in a cluster
00253  // bit 1=LANG=ru,  don't propagate over [v]
00254  // bit 2=don't propagate acress word boundaries
00255  // bit 3=LANG=pl,  propagate over liquids and nasals
00256 #define LOPT_REGRESSIVE_VOICING  4
00257 
00258  // 0=default, 1=no check, other allow this character as an extra initial letter (default is 's')
00259 #define LOPT_UNPRONOUNCABLE  5
00260 
00261  // select length_mods tables,  (length_mod_tab) + (length_mod_tab0 * 100)
00262 #define LOPT_LENGTH_MODS    6
00263 
00264  // increase this to prevent sonorants being shortened before shortened (eg. unstressed) vowels
00265 #define LOPT_SONORANT_MIN    7
00266 
00267  // don't break vowels at word boundary
00268 #define LOPT_WORD_MERGE      8
00269 
00270  // max. amplitude for vowel at the end of a clause
00271 #define LOPT_MAXAMP_EOC      9
00272 
00273  // bit 0=reduce even if phonemes are specified in the **_list file
00274  // bit 1=don't reduce the strongest vowel in a word which is marked 'unstressed'
00275 #define LOPT_REDUCE  10
00276 
00277  // LANG=cs,sk  combine some prepositions with the following word, if the combination has N or fewer syllables
00278  // bits 0-3  N syllables
00279  // bit 4=only if the second word has $alt attribute
00280  // bit 5=not if the second word is end-of-sentence
00281 #define LOPT_COMBINE_WORDS 11
00282 
00283  // change [t] when followed by unstressed vowel
00284 #define LOPT_REDUCE_T 12
00285 
00286  // 1 = allow capitals inside a word
00287  // 2 = stressed syllable is indicated by capitals
00288 #define LOPT_CAPS_IN_WORD  13
00289 
00290  // bit 0=Italian "syntactic doubling" of consoants in the word after a word marked with $double attribute
00291  // bit 1=also after a word which ends with a stressed vowel
00292 #define LOPT_IT_DOUBLING    14
00293 
00294   // Call ApplySpecialAttributes() if $alt or $alt2 is set for a word
00295 #define LOPT_ALT  15
00296 
00297 
00298 typedef struct {
00299 // bits0-2  separate words with (1=pause_vshort, 2=pause_short, 3=pause, 4=pause_long 5=[?] phonemme)
00300 // bit 3=don't use linking phoneme
00301 // bit4=longer pause before STOP, VSTOP,FRIC
00302 // bit5=length of a final vowel doesn't depend on the next phoneme
00303         int word_gap;
00304         int vowel_pause;
00305         int stress_rule; // 1=first syllable, 2=penultimate,  3=last
00306 
00307 // bit0=don't stress monosyllables, except at end of clause
00308 // bit1=don't set diminished stress,
00309 // bit2=mark unstressed final syllables as diminished
00310 // bit4=don't allow secondary stress on last syllable
00311 // bit5-don't use automatic secondary stress
00312 // bit6=light syllable followed by heavy, move secondary stress to the heavy syllable. LANG=Finnish
00313 // bit8=stress last syllable if it doesn't end in a vowel
00314 // bit9=stress last syllable if it doesn't end in vowel or "s" or "n"  LANG=Spanish
00315 // bit12= In a 2-syllable word, if one has primary stress then give the other secondary stress
00316 // bit13= If there is only one syllable before the primary stress, give it a secondary stress
00317 // bit15= Give stress to the first unstressed syllable
00318 // bit16= Don't diminish consecutive syllables within a word.
00319 // bit17= "priority" stress reduces other primary stress to "unstressed" not "secondary"
00320 // bit18= don't lengthen short vowels more than long vowels at end-of-clause
00321 // bit19=stress on final syllable if it has a long vowel, but previous syllable has a short vowel
00322 
00323         int stress_flags; 
00324         int unstressed_wd1; // stress for $u word of 1 syllable
00325         int unstressed_wd2; // stress for $u word of >1 syllable
00326         int param[N_LOPTS];
00327         unsigned char *length_mods;
00328         unsigned char *length_mods0;
00329 
00330 #define NUM_ROMAN        0x20000
00331 #define NUM_ROMAN_UC     0x40000
00332 #define NUM_NOPAUSE      0x80000
00333 #define NUM_ROMAN_AFTER 0x200000
00334 #define NUM_VIGESIMAL   0x400000
00335 
00336         // bits0-1=which numbers routine to use.
00337         // bit2=  thousands separator must be space
00338         // bit3=  , decimal separator, not .
00339         // bit4=use three-and-twenty rather than twenty-three
00340         // bit5='and' between tens and units
00341         // bit6=add "and" after hundred or thousand
00342         // bit7=don't have "and" both after hundreds and also between tens and units
00343    // bit8=only one primary stress in tens+units
00344         // bit9=only one vowel betwen tens and units
00345         // bit10=omit "one" before "hundred"
00346         // bit11=say 19** as nineteen hundred
00347         // bit12=allow space as thousands separator (in addition to langopts.thousands_sep)
00348         // bits13-15  post-decimal-digits 0=single digits, 1=(LANG=it) 2=(LANG=pl) 3=(LANG=ro)
00349         // bit16=dot after number indicates ordinal
00350         // bit17=recognize roman numbers
00351         // bit18=Roman numbers only if upper case
00352         // bit19=don't add pause after a number
00353         // bit20='and' before hundreds
00354         // bit21= say "roman" after the number, not before
00355         // bit22= vigesimal number, if tens are not found
00356         int numbers;
00357 
00358 #define NUM2_100000     0x800   // numbers for 100,000 and 10,000,000
00359 #define NUM2_100000a    0xc00   // numbers for 100,000 and 1,000,000
00360         // bits 1-4  use variant form of numbers before thousands,millions,etc.
00361         // bit6=(LANG=pl) two forms of plural, M or MA
00362         // bit7=(LANG-ru) use MB for 1 thousand, million, etc
00363         // bit8=(LANG=cs,sk) two forms of plural, M or MA
00364         // bit9=(LANG=rw) say "thousand" and "million" before its number, not after
00365    // bit10=(LANG=sw) special word for 100,000 and 1,000,000
00366         // bit11=(LANG=hi) special word for 100,000 and 10,000,000
00367         int numbers2;
00368 
00369         int max_roman;
00370         int thousands_sep;
00371         int decimal_sep;
00372 
00373         // bit 0, accent name before the letter name, bit 1 "capital" after letter name
00374         int accents;
00375 
00376         int tone_language;          // 1=tone language
00377         int intonation_group;
00378         int long_stop;          // extra mS pause for a lengthened stop
00379         int phoneme_change;     // TEST, change phonemes, after translation
00380         char max_initial_consonants;
00381         char spelling_stress;   // 0=default, 1=stress first letter
00382         char tone_numbers;
00383         char ideographs;      // treat as separate words
00384         char textmode;          // the meaning of FLAG_TEXTMODE is reversed (to save data when *_list file is compiled)
00385         int testing;            // testing options: bit 1= specify stressed syllable in the form:  "outdoor/2"
00386         int listx;    // compile *_listx after *list
00387         const unsigned int *replace_chars;      // characters to be substitutes
00388         const char *ascii_language;  // switch to this language for Latin characters
00389 } LANGUAGE_OPTIONS;
00390 
00391 
00392 // a parameter of ChangePhonemes()
00393 typedef struct {
00394         int flags;
00395         unsigned char stress;          // stress level of this vowel
00396         unsigned char stress_highest;  // the highest stress level of a vowel in this word
00397         unsigned char n_vowels;        // number of vowels in the word
00398         unsigned char vowel_this;      // syllable number of this vowel (counting from 1)
00399         unsigned char vowel_stressed;  // syllable number of the highest stressed vowel
00400 } CHANGEPH;
00401 
00402 
00403 
00404 #define NUM_SEP_DOT    0x0008    // . , for thousands and decimal separator
00405 #define NUM_SEP_SPACE  0x1000    // allow space as thousands separator (in addition to langopts.thousands_sep)
00406 #define NUM_DEC_IT     0x2000    // (LANG=it) speak post-decimal-point digits as a combined number not as single digits
00407 
00408 typedef struct Translator
00409 {//=============
00410 
00411         LANGUAGE_OPTIONS langopts;
00412         int translator_name;
00413         int transpose_offset;
00414         int transpose_max;
00415         int transpose_min;
00416 
00417         char phon_out[300];
00418         char phonemes_repeat[20];
00419         int phonemes_repeat_count;
00420 
00421         unsigned char stress_amps[8];
00422         unsigned char stress_amps_r[8];
00423         short stress_lengths[8];
00424         int dict_condition;    // conditional apply some pronunciation rules and dict.lookups
00425         const unsigned short *charset_a0;   // unicodes for characters 0xa0 to oxff
00426         const wchar_t *char_plus_apostrophe;  // single chars + apostrophe treated as words
00427         const wchar_t *punct_within_word;   // allow these punctuation characters within words
00428 
00429 // holds properties of characters: vowel, consonant, etc for pronunciation rules
00430         unsigned char letter_bits[256];
00431         int letter_bits_offset;
00432         const wchar_t *letter_groups[8];
00433 
00434         /* index1=option, index2 by 0=. 1=, 2=?, 3=! 4=none */
00435 #define INTONATION_TYPES 8
00436 #define PUNCT_INTONATIONS 6
00437         unsigned char punct_to_tone[INTONATION_TYPES][PUNCT_INTONATIONS];
00438 
00439         char *data_dictrules;     // language_1   translation rules file
00440         char *data_dictlist;      // language_2   dictionary lookup file
00441         char *dict_hashtab[N_HASH_DICT];   // hash table to index dictionary lookup file
00442         char *letterGroups[N_LETTER_GROUPS];
00443 
00444         // groups1 and groups2 are indexes into data_dictrules, set up by InitGroups()
00445         // the two-letter rules for each letter must be consecutive in the language_rules source
00446         
00447         char *groups1[256];         // translation rule lists, index by single letter
00448         char *groups2[N_RULE_GROUP2];   // translation rule lists, indexed by two-letter pairs
00449         unsigned int groups2_name[N_RULE_GROUP2];  // the two letter pairs for groups2[]
00450         int n_groups2;              // number of groups2[] entries used
00451         
00452         unsigned char groups2_count[256];    // number of 2 letter groups for this initial letter
00453         unsigned char groups2_start[256];    // index into groups2
00454         
00455         
00456         int expect_verb;
00457         int expect_past;    // expect past tense
00458         int expect_verb_s;
00459         int expect_noun;
00460         int prev_last_stress;
00461         char *clause_end;
00462 
00463         int word_vowel_count;     // number of vowels so far
00464         int word_stressed_count;  // number of vowels so far which could be stressed
00465         
00466         int clause_upper_count;   // number of upper case letters in the clause
00467         int clause_lower_count;   // number of lower case letters in the clause
00468 
00469         int prepause_timeout;
00470         int end_stressed_vowel;  // word ends with stressed vowel
00471         int prev_dict_flags;     // dictionary flags from previous word
00472 } Translator; //  end of class Translator
00473 
00474 
00475 extern int option_tone2;
00476 #define OPTION_EMPHASIZE_ALLCAPS  0x100
00477 #define OPTION_EMPHASIZE_PENULTIMATE 0x200
00478 extern int option_tone_flags;
00479 extern int option_waveout;
00480 extern int option_quiet;
00481 extern int option_phonemes;
00482 extern int option_phoneme_events;
00483 extern int option_linelength;     // treat lines shorter than this as end-of-clause
00484 extern int option_multibyte;
00485 extern int option_capitals;
00486 extern int option_punctuation;
00487 extern int option_endpause;
00488 extern int option_ssml;
00489 extern int option_phoneme_input;   // allow [[phonemes]] in input text
00490 extern int option_phoneme_variants;
00491 extern int option_sayas;
00492 extern int option_wordgap;
00493 
00494 extern int count_characters;
00495 extern int count_words;
00496 extern int count_sentences;
00497 extern int skip_characters;
00498 extern int skip_words;
00499 extern int skip_sentences;
00500 extern int skipping_text;
00501 extern int end_character_position;
00502 extern int clause_start_char;
00503 extern int clause_start_word;
00504 extern char *namedata;
00505 extern int pre_pause;
00506 
00507 
00508 
00509 #define N_MARKER_LENGTH 50   // max.length of a mark name
00510 extern char skip_marker[N_MARKER_LENGTH];
00511 
00512 #define N_PUNCTLIST  60
00513 extern wchar_t option_punctlist[N_PUNCTLIST];  // which punctuation characters to announce
00514 extern unsigned char punctuation_to_tone[INTONATION_TYPES][PUNCT_INTONATIONS];
00515 
00516 extern struct Translator *translator;
00517 extern struct Translator *translator2;
00518 extern const unsigned short *charsets[N_CHARSETS];
00519 extern char dictionary_name[40];
00520 extern char ctrl_embedded;    // to allow an alternative CTRL for embedded commands
00521 extern unsigned char *p_textinput;
00522 extern wchar_t *p_wchar_input;
00523 extern int dictionary_skipwords;
00524 
00525 extern int (* uri_callback)(int, const char *, const char *);
00526 extern int (* phoneme_callback)(const char *);
00527 extern void SetLengthMods(struct Translator *tr, int value);
00528 
00529 void LoadConfig(void);
00530 int TransposeAlphabet(char *text, int offset, int min, int max);
00531 int utf8_in(int *c, const char *buf);
00532 int utf8_in2(int *c, const char *buf, int backwards);
00533 int utf8_out(unsigned int c, char *buf);
00534 int utf8_nbytes(const char *buf);
00535 int lookupwchar(const unsigned short *list,int c);
00536 int Eof(void);
00537 char *strchr_w(const char *s, int c);
00538 int IsBracket(int c);
00539 void InitNamedata(void);
00540 void InitText(int flags);
00541 void InitText2(void);
00542 int IsDigit(unsigned int c);
00543 int IsAlpha(unsigned int c);
00544 int isspace2(unsigned int c);
00545 int towlower2(unsigned int c);
00546 void GetTranslatedPhonemeString(char *phon_out, int n_phon_out);
00547 
00548 struct Translator *SelectTranslator(const char *name);
00549 int SetTranslator2(const char *name);
00550 void DeleteTranslator(struct Translator *tr);
00551 int Lookup(struct Translator *tr, const char *word, char *ph_out);
00552 
00553 int TranslateNumber(Translator *tr, char *word1, char *ph_out, unsigned int *flags, int wflags);
00554 int TranslateRoman(Translator *tr, char *word, char *ph_out);
00555 
00556 void ChangeWordStress(Translator *tr, char *word, int new_stress);
00557 void SetSpellingStress(Translator *tr, char *phonemes, int control, int n_chars);
00558 int TranslateLetter(Translator *tr, char *letter, char *phonemes, int control, int word_length);
00559 void LookupLetter(Translator *tr, unsigned int letter, int next_byte, char *ph_buf);
00560 void LookupAccentedLetter(Translator *tr, unsigned int letter, char *ph_buf);
00561 
00562 int LoadDictionary(Translator *tr, const char *name, int no_error);
00563 int LookupDictList(Translator *tr, char **wordptr, char *ph_out, unsigned int *flags, int end_flags, WORD_TAB *wtab);
00564 
00565 void MakePhonemeList(Translator *tr, int post_pause, int new_sentence);
00566 int ChangePhonemes_ru(Translator *tr, PHONEME_LIST2 *phlist, int n_ph, int index, PHONEME_TAB *ph, CHANGEPH *ch);
00567 void ApplySpecialAttribute(Translator *tr, char *phonemes, int dict_flags);
00568 void ApplySpecialAttribute2(Translator *tr, char *phonemes, int dict_flags);
00569 void AppendPhonemes(Translator *tr, char *string, int size, const char *ph);
00570 
00571 void CalcLengths(Translator *tr);
00572 void CalcPitches(Translator *tr, int clause_tone);
00573 
00574 int RemoveEnding(Translator *tr, char *word, int end_type, char *word_copy);
00575 int Unpronouncable(Translator *tr, char *word);
00576 void SetWordStress(Translator *tr, char *output, unsigned int *dictionary_flags, int tonic, int prev_stress);
00577 int TranslateRules(Translator *tr, char *p, char *phonemes, int size, char *end_phonemes, int end_flags, unsigned int *dict_flags);
00578 int TranslateWord(Translator *tr, char *word1, int next_pause, WORD_TAB *wtab);
00579 void *TranslateClause(Translator *tr, FILE *f_text, const void *vp_input, int *tone, char **voice_change);
00580 int ReadClause(Translator *tr, FILE *f_in, char *buf, short *charix, int *charix_top, int n_buf, int *tone_type);
00581 
00582 void SetVoiceStack(espeak_VOICE *v);
00583 
00584 extern FILE *f_trans;           // for logging