#include <string.h> // strlen(), strcpy()
#include <stdio.h> // fwrite(), fprintf()
#include <stdlib.h> // exit()
#include "sam.h"
#include "phonem.h"
static struct phonem_t phonems[255];
static byte phonems_size; // Arrayfüllstand
static const char charInputLeadTable[] =
" .?,-IIE"
"AAAAUAIE"
"UORLWYWR"
"LWYMNNDQ"
"SSFT//ZZ"
"VDC*J***"
"EAOAOUB*"
"*D**G**G"
"**P**T**"
"K**K**UU"
"U";
static const char charInputTailTable[] =
"*****YHH"
"EAHOHXXR"
"XHXXXXH*"
"*****XX*"
"*H*HHX*H"
"*HH*****"
"YYYWWW**"
"*******X"
"********"
"***X**LM"
"N";
//loc_9F8C
static const byte flags1[81]={
0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0xA4 , 0xA4 , 0xA4 ,
0xA4 , 0xA4 , 0xA4 , 0x84 , 0x84 , 0xA4 , 0xA4 , 0x84 ,
0x84 , 0x84 , 0x84 , 0x84 , 0x84 , 0x84 , 0x44 , 0x44 ,
0x44 , 0x44 , 0x44 , 0x4C , 0x4C , 0x4C , 0x48 , 0x4C ,
0x40 , 0x40 , 0x40 , 0x40 , 0x40 , 0x40 , 0x44 , 0x44 ,
0x44 , 0x44 , 0x48 , 0x40 , 0x4C , 0x44 , 0x00 , 0x00 ,
0xB4 , 0xB4 , 0xB4 , 0x94 , 0x94 , 0x94 , 0x4E , 0x4E ,
0x4E , 0x4E , 0x4E , 0x4E , 0x4E , 0x4E , 0x4E , 0x4E ,
0x4E , 0x4E , 0x4B , 0x4B , 0x4B , 0x4B , 0x4B , 0x4B ,
0x4B , 0x4B , 0x4B , 0x4B , 0x4B , 0x4B , 0x80 , 0xC1 ,
0xC1
};
//??? flags overlap flags2
//loc_9FDA
static const byte flags2[81] =
{
0x80 , 0xC1 , 0xC1 , 0xC1 , 0xC1 , 0x00 , 0x00 , 0x00 ,
0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 ,
0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x10 ,
0x10 , 0x10 , 0x10 , 0x08 , 0x0C , 0x08 , 0x04 , 0x40 ,
0x24 , 0x20 , 0x20 , 0x24 , 0x00 , 0x00 , 0x24 , 0x20 ,
0x20 , 0x24 , 0x20 , 0x20 , 0x00 , 0x20 , 0x00 , 0x00 ,
0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 ,
0x00 , 0x04 , 0x04 , 0x04 , 0x00 , 0x00 , 0x00 , 0x00 ,
0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x04 , 0x04 , 0x04 ,
0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00
};
//tab45616???
static const byte phonemeStressedLengthTable[81] =
{
0x00 , 0x12 , 0x12 , 0x12 , 8 ,0xB , 9 ,0xB ,
0xE ,0xF ,0xB , 0x10 ,0xC , 6 , 6 ,0xE ,
0xC ,0xE ,0xC ,0xB , 8 , 8 ,0xB ,0xA ,
9 , 8 , 8 , 8 , 8 , 8 , 3 , 5 ,
2 , 2 , 2 , 2 , 2 , 2 , 6 , 6 ,
8 , 6 , 6 , 2 , 9 , 4 , 2 , 1 ,
0xE ,0xF ,0xF ,0xF ,0xE ,0xE , 8 , 2 ,
2 , 7 , 2 , 1 , 7 , 2 , 2 , 7 ,
2 , 2 , 8 , 2 , 2 , 6 , 2 , 2 ,
7 , 2 , 4 , 7 , 1 , 4 , 5 , 5
};
//tab45536???
static const byte phonemeLengthTable[81] =
{
0 , 0x12 , 0x12 , 0x12 , 8 , 8 , 8 , 8 ,
8 ,0xB , 6 ,0xC ,0xA , 5 , 5 ,0xB ,
0xA ,0xA ,0xA , 9 , 8 , 7 , 9 , 7 ,
6 , 8 , 6 , 7 , 7 , 7 , 2 , 5 ,
2 , 2 , 2 , 2 , 2 , 2 , 6 , 6 ,
7 , 6 , 6 , 2 , 8 , 3 , 1 , 0x1E ,
0xD ,0xC ,0xC ,0xC ,0xE , 9 , 6 , 1 ,
2 , 5 , 1 , 1 , 6 , 1 , 2 , 6 ,
1 , 2 , 8 , 2 , 2 , 4 , 2 , 2 ,
6 , 1 , 4 , 6 , 1 , 4 , 0xC7 , 0xFF
};
/*
Ind | phoneme | flags |
-----|---------|----------|
0 | * | 00000000 |
1 | .* | 00000000 |
2 | ?* | 00000000 |
3 | ,* | 00000000 |
4 | -* | 00000000 |
VOWELS
5 | IY | 10100100 |
6 | IH | 10100100 |
7 | EH | 10100100 |
8 | AE | 10100100 |
9 | AA | 10100100 |
10 | AH | 10100100 |
11 | AO | 10000100 |
17 | OH | 10000100 |
12 | UH | 10000100 |
16 | UX | 10000100 |
15 | ER | 10000100 |
13 | AX | 10100100 |
14 | IX | 10100100 |
DIPHTONGS
48 | EY | 10110100 |
49 | AY | 10110100 |
50 | OY | 10110100 |
51 | AW | 10010100 |
52 | OW | 10010100 |
53 | UW | 10010100 |
21 | YX | 10000100 |
20 | WX | 10000100 |
18 | RX | 10000100 |
19 | LX | 10000100 |
37 | /X | 01000000 |
30 | DX | 01001000 |
22 | WH | 01000100 |
VOICED CONSONANTS
23 | R* | 01000100 |
24 | L* | 01000100 |
25 | W* | 01000100 |
26 | Y* | 01000100 |
27 | M* | 01001100 |
28 | N* | 01001100 |
29 | NX | 01001100 |
54 | B* | 01001110 |
57 | D* | 01001110 |
60 | G* | 01001110 |
44 | J* | 01001100 |
38 | Z* | 01000100 |
39 | ZH | 01000100 |
40 | V* | 01000100 |
41 | DH | 01000100 |
unvoiced CONSONANTS
32 | S* | 01000000 |
33 | SH | 01000000 |
34 | F* | 01000000 |
35 | TH | 01000000 |
66 | P* | 01001011 |
69 | T* | 01001011 |
72 | K* | 01001011 |
42 | CH | 01001000 |
36 | /H | 01000000 |
43 | ** | 01000000 |
45 | ** | 01000100 |
46 | ** | 00000000 |
47 | ** | 00000000 |
55 | ** | 01001110 |
56 | ** | 01001110 |
58 | ** | 01001110 |
59 | ** | 01001110 |
61 | ** | 01001110 |
62 | ** | 01001110 |
63 | GX | 01001110 |
64 | ** | 01001110 |
65 | ** | 01001110 |
67 | ** | 01001011 |
68 | ** | 01001011 |
70 | ** | 01001011 |
71 | ** | 01001011 |
73 | ** | 01001011 |
74 | ** | 01001011 |
75 | KX | 01001011 |
76 | ** | 01001011 |
77 | ** | 01001011 |
SPECIAL
78 | UL | 10000000 |
79 | UM | 11000001 |
80 | UN | 11000001 |
31 | Q* | 01001100 |
*/
static void PrintPhonems() {
fprintf(stderr,"===========================================\n");
fprintf(stderr,"Internal Phoneme presentation:\n");
fprintf(stderr," # Index Phonem Laenge Akzent\n");
fprintf(stderr,"------------------------------\n");
for(const phonem_t*i=phonems; i<phonems+phonems_size; i++) {
byte idx=i->index;
fprintf(stderr,"%3i %3i(%02X) %c%c %3i %i\n",
i-phonems,idx,idx,
idx<81?charInputLeadTable[idx]:'?',
idx<81?charInputTailTable[idx]:'?',
i->length,
i->stress);
}
fprintf(stderr,"===========================================\n\n");
}
static void printPhonem(byte X) {
phonem_t&ph=phonems[X];
byte idx=ph.index;
fprintf(stderr,"phoneme %d (%c%c) length %d\n", X, charInputLeadTable[idx], charInputTailTable[idx], ph.length);
}
//standard sam sound
static byte mouth = 128;
static byte throat = 128;
//static byte mem39;
//static byte mem44;
//static byte mem47;
//static byte mem49;
//static byte mem50;
//static byte mem51;
//static byte mem53;
//static byte mem59;
static byte A, X, Y;
// 168=pitches
// 169=frequency1
// 170=frequency2
// 171=frequency3
// 172=amplitude1
// 173=amplitude2
// 174=amplitude3
static void Insert(byte position, byte idx, byte l, byte s) {
phonem_t*i=phonems+position;
int elemsmove=phonems_size-position;
if (elemsmove<0) {
fprintf(stderr,"s2: internal error: inserting element outside array\n");
exit(3);
}
memmove(i+1,i,elemsmove*sizeof(phonem_t));
phonems_size++;
i->index=idx;
i->length=l;
i->stress=s;
}
// The input[] buffer contains a string of phonemes and stress markers along
// the lines of:
//
// DHAX KAET IHZ AH5GLIY. NUL
//
// The byte NUL marks the end of the buffer. Some phonemes are 2 bytes
// long, such as "DH" and "AX". Others are 1 byte long, such as "T" and "Z".
// There are also stress markers, such as "5" and ".".
//
// The first character of the phonemes are stored in the table charInputLeadTable[].
// The second character of the phonemes are stored in the table charInputTailTable[].
// The stress characters are arranged in low to high stress order in stressInputTable[].
//
// The following process is used to parse the input[] buffer:
//
// Repeat until the NUL character is reached:
//
// First, a search is made for a 2 character match for phonemes that do not
// end with the '*' (wildcard) character. On a match, the index of the phoneme
// is added to phonemeIndex[] and the buffer position is advanced 2 bytes.
//
// If this fails, a search is made for a 1 character match against all
// phoneme names ending with a '*' (wildcard). If this succeeds, the
// phoneme is added to phonemeIndex[] and the buffer position is advanced
// 1 byte.
//
// If this fails, search for a 1 character match in the stressInputTable[].
// If this succeeds, the stress value is placed in the last stress[] table
// at the same index of the last added phoneme, and the buffer position is
// advanced by 1 byte.
//
// If this fails, return a 0.
//
// On success:
//
// 1. phonems[] will contain the index and stress of all the phonemes.
// input[] holds the string of phonemes, each two bytes wide
// charInputLeadTable[] holds the first character of each phoneme
// charInputTailTable[] holds te second character of each phoneme
// phonemeIndex[] holds the indexes of the phonemes after parsing input[]
//
// The parser scans through the input[], finding the names of the phonemes
// by searching charInputLeadTable[] and charInputTailTable[]. On a match, it
// copies the index of the phoneme into the phonemeIndexTable[].
//
// The character NUL marks the end of text in input[]. When it is reached,
// function returns with a 1 indicating success.
static bool Parser1(const char*input) {
char sign1, sign2;
phonem_t*position = phonems;
unsigned X;
// THIS CODE MATCHES THE PHONEME LETTERS TO THE TABLE
for(X=0;sign1=input[X++];) {
const char*p,*q;
sign2 = input[X]; // GET THE NEXT CHARACTER FROM THE BUFFER
// NOW sign1 = FIRST CHARACTER OF PHONEME, AND sign2 = SECOND CHARACTER OF PHONEME
// TRY TO MATCH PHONEMES ON TWO TWO-CHARACTER NAME
// IGNORE PHONEMES IN TABLE ENDING WITH WILDCARDS
for(p=charInputLeadTable;q=strchr(p,sign1);p=q+1) {
size_t Y = q-charInputLeadTable;
// GET FIRST CHARACTER AT POSITION Y IN signInputTable
// --> should change name to PhonemeNameTable1
// FIRST CHARACTER MATCHES?
// GET THE CHARACTER FROM THE PhonemeSecondLetterTable
char A = charInputTailTable[Y];
// NOT A SPECIAL AND MATCHES SECOND CHARACTER?
if ((A != '*') && (A == sign2)) {
// STORE THE INDEX OF THE PHONEME INTO THE phomeneIndexTable
position->index=Y; // ADVANCE THE POINTER TO THE phonemeIndexTable
position->length=0;
position->stress=0;
position++;
X++; // CONTINUE PARSING
goto rept;
}
}
// REACHED END OF TABLE WITHOUT AN EXACT (2 CHARACTER) MATCH.
// THIS TIME, SEARCH FOR A 1 CHARACTER MATCH AGAINST THE WILDCARDS
// RESET THE INDEX TO POINT TO THE START OF THE PHONEME NAME TABLE
for (p=charInputTailTable;q=strchr(p,'*');p=q+1) {
size_t Y=q-charInputTailTable;
if (charInputLeadTable[Y] == sign1) {
position->index=Y; // ADVANCE THE POINTER TO THE phonemeIndexTable
position->length=0;
position->stress=0;
position++;
goto rept; // CONTINUE THROUGH THE LOOP
}
}
// FAILED TO MATCH WITH A WILDCARD. ASSUME THIS IS A STRESS
// CHARACTER. SEARCH THROUGH THE STRESS TABLE
if (sign1>='1' && sign1 <='8') {
position[-1].stress = sign1-'0';
}else if (sign1!='*') return false;
rept:;
} //while
phonems_size=position-phonems; //mark endpoint
return true; //all ok
}
// Rewrites the phonemes using the following rules:
//
// <DIPHTONG ENDING WITH WX> -> <DIPHTONG ENDING WITH WX> WX
// <DIPHTONG NOT ENDING WITH WX> -> <DIPHTONG NOT ENDING WITH WX> YX
// UL -> AX L
// UM -> AX M
// <STRESSED VOWEL> <SILENCE> <STRESSED VOWEL> -> <STRESSED VOWEL> <SILENCE> Q <VOWEL>
// T R -> CH R
// D R -> J R
// <VOWEL> R -> <VOWEL> RX
// <VOWEL> L -> <VOWEL> LX
// G S -> G Z
// K <VOWEL OR DIPHTONG NOT ENDING WITH IY> -> KX <VOWEL OR DIPHTONG NOT ENDING WITH IY>
// G <VOWEL OR DIPHTONG NOT ENDING WITH IY> -> GX <VOWEL OR DIPHTONG NOT ENDING WITH IY>
// S P -> S B
// S T -> S D
// S K -> S G
// S KX -> S GX
// <ALVEOLAR> UW -> <ALVEOLAR> UX
// CH -> CH CH' (CH requires two phonemes to represent it)
// J -> J J' (J requires two phonemes to represent it)
// <UNSTRESSED VOWEL> T <PAUSE> -> <UNSTRESSED VOWEL> DX <PAUSE>
// <UNSTRESSED VOWEL> D <PAUSE> -> <UNSTRESSED VOWEL> DX <PAUSE>
//void Code41397()
static void Parser2() {
if (debug) fprintf(stderr,"Parser2\n");
byte pos = 0; //mem66;
byte mem58 = 0, mem59=0;
// Loop through phonemes
while(1)
{
// SET X TO THE CURRENT POSITION
X = pos;
// If end of phonemes flag reached, exit routine
if (pos >= phonems_size) return;
// GET THE PHONEME AT THE CURRENT POSITION
A = phonems[pos].index;
// DEBUG: Print phoneme and index
if (debug) fprintf(stderr,"%d: %c%c\n", X, charInputLeadTable[A], charInputTailTable[A]);
// Is phoneme pause?
if (A == 0)
{
// Move ahead to the
pos++;
continue;
}
// Copy the current phoneme index to Y
Y = A;
// RULE:
// <DIPHTONG ENDING WITH WX> -> <DIPHTONG ENDING WITH WX> WX
// <DIPHTONG NOT ENDING WITH WX> -> <DIPHTONG NOT ENDING WITH WX> YX
// Example: OIL, COW
// Check for DIPHTONG
if ((flags1[A] & 16) == 0) goto pos41457;
// Not a diphthong. Get the stress
mem58 = phonems[pos].stress;
// End in IY sound?
A = flags1[Y] & 32;
// If ends with IY, use YX, else use WX
if (A == 0) A = 20; else A = 21; // 'WX' = 20 'YX' = 21
//pos41443:
// Insert at WX or YX following, copying the stress
if (debug) {
if (A==20) fprintf(stderr,"RULE: insert WX following diphtong NOT ending in IY sound\n");
if (A==21) fprintf(stderr,"RULE: insert YX following diphtong ending in IY sound\n");
}
Insert(pos+1, A, mem59, mem58);
X = pos;
// Jump to ???
goto pos41749;
pos41457:
// RULE:
// UL -> AX L
// Example: MEDDLE
// Get phoneme
A = phonems[X].index;
// Skip this rule if phoneme is not UL
if (A != 78) goto pos41487; // 'UL'
A = 24; // 'L' //change 'UL' to 'AX L'
if (debug) fprintf(stderr,"RULE: UL -> AX L\n");
pos41466:
// Get current phoneme stress
mem58 = phonems[X].stress;
// Change UL to AX
phonems[X].index = 13; // 'AX'
// Perform insert. Note code below may jump up here with different values
Insert(X+1, A, mem59, mem58);
pos++;
// Move to next phoneme
continue;
pos41487:
// RULE:
// UM -> AX M
// Example: ASTRONOMY
// Skip rule if phoneme != UM
if (A != 79) goto pos41495; // 'UM'
// Jump up to branch - replaces current phoneme with AX and continues
A = 27; // 'M' //change 'UM' to 'AX M'
if (debug) fprintf(stderr,"RULE: UM -> AX M\n");
goto pos41466;
pos41495:
// RULE:
// UN -> AX N
// Example: FUNCTION
// Skip rule if phoneme != UN
if (A != 80) goto pos41503; // 'UN'
// Jump up to branch - replaces current phoneme with AX and continues
A = 28; // 'N' //change UN to 'AX N'
if (debug) fprintf(stderr,"RULE: UN -> AX N\n");
goto pos41466;
pos41503:
// RULE:
// <STRESSED VOWEL> <SILENCE> <STRESSED VOWEL> -> <STRESSED VOWEL> <SILENCE> Q <VOWEL>
// EXAMPLE: AWAY EIGHT
Y = A;
// VOWEL set?
A = flags1[A] & 128;
// Skip if not a vowel
if (A)
{
// Get the stress
A = phonems[X].stress;
// If stressed...
if (A)
{
// Get the following phoneme
X++;
A = phonems[X].index;
// If following phoneme is a pause
if (!A)
{
// Get the phoneme following pause
X++;
Y = phonems[X].index;
// Check for end of buffer flag
if (Y == 255) //buffer overflow
// ??? Not sure about these flags
A = 65&128;
else
// And VOWEL flag to current phoneme's flags
A = flags1[Y] & 128;
// If following phonemes is not a pause
if (A)
{
// If the following phoneme is not stressed
A = phonems[X].stress;
if (A)
{
// Insert a glottal stop and move forward
if (debug) fprintf(stderr,"RULE: Insert glottal stop between two stressed vowels with space between them\n");
// 31 = 'Q'
Insert(X, 31, mem59, 0);
pos++;
continue;
}
}
}
}
}
// RULES FOR PHONEMES BEFORE R
// T R -> CH R
// Example: TRACK
// Get current position and phoneme
X = pos;
A = phonems[pos].index;
if (A != 23) goto pos41611; // 'R'
// Look at prior phoneme
X--;
A = phonems[pos-1].index;
//pos41567:
if (A == 69) // 'T'
{
// Change T to CH
if (debug) fprintf(stderr,"RULE: T R -> CH R\n");
phonems[pos-1].index = 42;
goto pos41779;
}
// RULES FOR PHONEMES BEFORE R
// D R -> J R
// Example: DRY
// Prior phonemes D?
if (A == 57) // 'D'
{
// Change D to J
phonems[pos-1].index = 44;
if (debug) fprintf(stderr,"RULE: D R -> J R\n");
goto pos41788;
}
// RULES FOR PHONEMES BEFORE R
// <VOWEL> R -> <VOWEL> RX
// Example: ART
// If vowel flag is set change R to RX
A = flags1[A] & 128;
if (debug) fprintf(stderr,"RULE: R -> RX\n");
if (A != 0) phonems[pos].index = 18; // 'RX'
// continue to next phoneme
pos++;
continue;
pos41611:
// RULE:
// <VOWEL> L -> <VOWEL> LX
// Example: ALL
// Is phoneme L?
if (A == 24) // 'L'
{
// If prior phoneme does not have VOWEL flag set, move to next phoneme
if ((flags1[phonems[pos-1].index] & 128) == 0) {pos++; continue;}
// Prior phoneme has VOWEL flag set, so change L to LX and move to next phoneme
if (debug) fprintf(stderr,"RULE: <VOWEL> L -> <VOWEL> LX\n");
phonems[X].index = 19; // 'LX'
pos++;
continue;
}
// RULE:
// G S -> G Z
//
// Can't get to fire -
// 1. The G -> GX rule intervenes
// 2. Reciter already replaces GS -> GZ
// Is current phoneme S?
if (A == 32) // 'S'
{
// If prior phoneme is not G, move to next phoneme
if (phonems[pos-1].index != 60) {pos++; continue;}
// Replace S with Z and move on
if (debug) fprintf(stderr,"RULE: G S -> G Z\n");
phonems[pos].index = 38; // 'Z'
pos++;
continue;
}
// RULE:
// K <VOWEL OR DIPHTONG NOT ENDING WITH IY> -> KX <VOWEL OR DIPHTONG NOT ENDING WITH IY>
// Example: COW
// Is current phoneme K?
if (A == 72) // 'K'
{
// Get next phoneme
Y = phonems[pos+1].index;
// If at end, replace current phoneme with KX
if (pos+1>=phonems_size) phonems[pos].index = 75; // ML : prevents an index out of bounds problem
else
{
// VOWELS AND DIPHTONGS ENDING WITH IY SOUND flag set?
A = flags1[Y] & 32;
if (debug && A==0) fprintf(stderr,"RULE: K <VOWEL OR DIPHTONG NOT ENDING WITH IY> -> KX <VOWEL OR DIPHTONG NOT ENDING WITH IY>\n");
// Replace with KX
if (A == 0) phonems[pos].index = 75; // 'KX'
}
}
else
// RULE:
// G <VOWEL OR DIPHTONG NOT ENDING WITH IY> -> GX <VOWEL OR DIPHTONG NOT ENDING WITH IY>
// Example: GO
// Is character a G?
if (A == 60) // 'G'
{
// Get the following character
byte index = phonems[pos+1].index;
// At end of buffer?
if (pos+1>=phonems_size) //prevent buffer overflow
{
pos++; continue;
}
// If diphtong ending with YX, move continue processing next phoneme
if (flags1[index] & 32) {pos++; continue;}
// replace G with GX and continue processing next phoneme
if (debug) fprintf(stderr,"RULE: G <VOWEL OR DIPHTONG NOT ENDING WITH IY> -> GX <VOWEL OR DIPHTONG NOT ENDING WITH IY>\n");
phonems[pos].index = 63; // 'GX'
pos++;
continue;
}
// RULE:
// S P -> S B
// S T -> S D
// S K -> S G
// S KX -> S GX
// Examples: SPY, STY, SKY, SCOWL
Y = phonems[pos].index;
//pos41719:
// Replace with softer version?
A = flags1[Y] & 1;
if (A == 0) goto pos41749;
A = phonems[pos-1].index;
if (A != 32) // 'S'
{
A = Y;
goto pos41812;
}
// Replace with softer version
if (debug) fprintf(stderr,"RULE: S* %c%c -> S* %c%c\n", charInputLeadTable[Y], charInputTailTable[Y],charInputLeadTable[Y-12], charInputTailTable[Y-12]);
phonems[pos].index = Y-12;
pos++;
continue;
pos41749:
// RULE:
// <ALVEOLAR> UW -> <ALVEOLAR> UX
//
// Example: NEW, DEW, SUE, ZOO, THOO, TOO
// UW -> UX
A = phonems[X].index;
if (A == 53) // 'UW'
{
// ALVEOLAR flag set?
Y = phonems[X-1].index;
A = flags2[Y] & 4;
// If not set, continue processing next phoneme
if (A == 0) {pos++; continue;}
if (debug) fprintf(stderr,"RULE: <ALVEOLAR> UW -> <ALVEOLAR> UX\n");
phonems[X].index = 16;
pos++;
continue;
}
pos41779:
// RULE:
// CH -> CH CH' (CH requires two phonemes to represent it)
// Example: CHEW
if (A == 42) // 'CH'
{
// pos41783:
if (debug) fprintf(stderr,"CH -> CH CH+1\n");
Insert(X+1, A+1, mem59, phonems[X].stress);
pos++;
continue;
}
pos41788:
// RULE:
// J -> J J' (J requires two phonemes to represent it)
// Example: JAY
if (A == 44) // 'J'
{
if (debug) fprintf(stderr,"J -> J J+1\n");
Insert(X+1, A+1, mem59, phonems[X].stress);
pos++;
continue;
}
// Jump here to continue
pos41812:
// RULE: Soften T following vowel
// NOTE: This rule fails for cases such as "ODD"
// <UNSTRESSED VOWEL> T <PAUSE> -> <UNSTRESSED VOWEL> DX <PAUSE>
// <UNSTRESSED VOWEL> D <PAUSE> -> <UNSTRESSED VOWEL> DX <PAUSE>
// Example: PARTY, TARDY
// Past this point, only process if phoneme is T or D
if (A != 69) // 'T'
if (A != 57) {pos++; continue;} // 'D'
//pos41825:
// If prior phoneme is not a vowel, continue processing phonemes
if ((flags1[phonems[X-1].index] & 128) == 0) {pos++; continue;}
// Get next phoneme
X++;
A = phonems[X].index;
//pos41841
// Is the next phoneme a pause?
if (A != 0)
{
// If next phoneme is not a pause, continue processing phonemes
if ((flags1[A] & 128) == 0) {pos++; continue;}
// If next phoneme is stressed, continue processing phonemes
// FIXME: How does a pause get stressed?
if (phonems[X].stress) {pos++; continue;}
//pos41856:
// Set phonemes to DX
if (debug) fprintf(stderr,"RULE: Soften T or D following vowel or ER and preceding a pause -> DX\n");
phonems[pos].index = 30; // 'DX'
} else
{
A = phonems[X+1].index;
if (X+1 >= phonems_size) //prevent buffer overflow
A = 65 & 128;
else
// Is next phoneme a vowel or ER?
A = flags1[A] & 128;
if (debug && A) fprintf(stderr,"RULE: Soften T or D following vowel or ER and preceding a pause -> DX\n");
if (A != 0) phonems[pos].index = 30; // 'DX'
}
pos++;
} // while
}
// Iterates through the phoneme buffer, copying the stress value from
// the following phoneme under the following circumstance:
// 1. The current phoneme is voiced, excluding plosives and fricatives
// 2. The following phoneme is voiced, excluding plosives and fricatives, and
// 3. The following phoneme is stressed
//
// In those cases, the stress value+1 from the following phoneme is copied.
//
// For example, the word LOITER is represented as LOY5TER, with as stress
// of 5 on the diphtong OY. This routine will copy the stress value of 6 (5+1)
// to the L that precedes it.
//void Code41883()
static void CopyStress() {
// loop thought all the phonemes to be output
byte pos=0; //mem66
while(1)
{
// get the phomene
Y = phonems[pos].index;
// exit at end of buffer
if (pos>=phonems_size) return;
// if CONSONANT_FLAG set, skip - only vowels get stress
if ((flags1[Y] & 64) == 0) {pos++; continue;}
// get the next phoneme
Y = phonems[pos+1].index;
if (pos+1>=phonems_size) //prevent buffer overflow
{
pos++; continue;
} else
// if the following phoneme is a vowel, skip
if ((flags1[Y] & 128) == 0) {pos++; continue;}
// get the stress value at the next position
Y = phonems[pos+1].stress;
// if next phoneme is not stressed, skip
if (Y == 0) {pos++; continue;}
// if next phoneme is not a VOWEL OR ER, skip
if ((Y & 128) != 0) {pos++; continue;}
// copy stress from prior phoneme to this one
phonems[pos].stress = Y+1;
// advance pointer
pos++;
}
}
//change phonemelength depedendent on stress
//void Code41203()
static void SetPhonemeLength() {
for (phonem_t*position=phonems; position!=phonems+phonems_size;position++) {
byte A = position->stress;
//41218: BMI 41229
position->length = (!A || A&128 ? phonemeLengthTable : phonemeStressedLengthTable) [position->index];
}
}
// Applies various rules that adjust the lengths of phonemes
//
// Lengthen <FRICATIVE> or <VOICED> between <VOWEL> and <PUNCTUATION> by 1.5
// <VOWEL> <RX | LX> <CONSONANT> - decrease <VOWEL> length by 1
// <VOWEL> <UNVOICED PLOSIVE> - decrease vowel by 1/8th
// <VOWEL> <UNVOICED CONSONANT> - increase vowel by 1/2 + 1
// <NASAL> <STOP CONSONANT> - set nasal = 5, consonant = 6
// <VOICED STOP CONSONANT> {optional silence} <STOP CONSONANT> - shorten both to 1/2 + 1
// <LIQUID CONSONANT> <DIPHTONG> - decrease by 2
//void Code48619()
static void AdjustLengths() {
// LENGTHEN VOWELS PRECEDING PUNCTUATION
//
// Search for punctuation. If found, back up to the first vowel, then
// process all phonemes between there and up to (but not including) the punctuation.
// If any phoneme is found that is a either a fricative or voiced, the duration is
// increased by (length * 1.5) + 1
byte index;
// iterate through the phoneme list
byte loopIndex=0;
// loop index
X = 0;
while(1)
{
// get a phoneme
index = phonems[X].index;
// exit loop if end on buffer token
if (X >= phonems_size) break;
// not punctuation?
if((flags2[index] & 1) == 0)
{
// skip
X++;
continue;
}
// hold index
loopIndex = X;
// Loop backwards from this point
pos48644:
// back up one phoneme
X--;
// stop once the beginning is reached
if(X == 0) break;
// get the preceding phoneme
index = phonems[X].index;
if (X < phonems_size) //inserted to prevent access overrun
if((flags1[index] & 128) == 0) goto pos48644; // if not a vowel, continue looping
//pos48657:
do
{
// test for vowel
index = phonems[X].index;
if (X < phonems_size)//inserted to prevent access overrun
// test for fricative/unvoiced or not voiced
if(((flags2[index] & 32) == 0) || ((flags1[index] & 4) != 0)) //nochmal �berpr�fen
{
//A = flags[Y] & 4;
//if(A == 0) goto pos48688;
// get the phoneme length
A = phonems[X].length;
// change phoneme length to (length * 1.5) + 1
A = (A >> 1) + A + 1;
if (debug) {
fprintf(stderr,"RULE: Lengthen <FRICATIVE> or <VOICED> between <VOWEL> and <PUNCTUATION> by 1.5\n");
fprintf(stderr,"PRE\n");
printPhonem(X);
}
phonems[X].length = A;
if (debug) {
fprintf(stderr,"POST\n");
printPhonem(X);
}
}
// keep moving forward
X++;
} while (X != loopIndex);
// if (X != loopIndex) goto pos48657;
X++;
} // while
// Similar to the above routine, but shorten vowels under some circumstances
// Loop throught all phonemes
loopIndex = 0;
//pos48697
while(1)
{
// get a phoneme
X = loopIndex;
index = phonems[X].index;
// exit routine at end token
if (X >= phonems_size) return;
// vowel?
A = flags1[index] & 128;
if (A != 0)
{
byte mem56;
// get next phoneme
X++;
index = phonems[X].index;
// get flags
if (X >= phonems_size)
mem56 = 65; // use if end marker
else
mem56 = flags1[index];
// not a consonant
if ((flags1[index] & 64) == 0)
{
// RX or LX?
if ((index == 18) || (index == 19)) // 'RX' & 'LX'
{
// get the next phoneme
X++;
index = phonems[X].index;
// next phoneme a consonant?
if ((flags1[index] & 64) != 0) {
// RULE: <VOWEL> RX | LX <CONSONANT>
if (debug) {
fprintf(stderr,"RULE: <VOWEL> <RX | LX> <CONSONANT> - decrease length by 1\n");
fprintf(stderr,"PRE\n");
printPhonem(loopIndex);
}
// decrease length of vowel by 1 frame
phonems[loopIndex].length--;
if (debug) {
fprintf(stderr,"POST\n");
printPhonem(loopIndex);
}
}
// move ahead
loopIndex++;
continue;
}
// move ahead
loopIndex++;
continue;
}
// Got here if not <VOWEL>
// not voiced
if ((mem56 & 4) == 0)
{
// Unvoiced
// *, .*, ?*, ,*, -*, DX, S*, SH, F*, TH, /H, /X, CH, P*, T*, K*, KX
// not an unvoiced plosive?
if((mem56 & 1) == 0) {
// move ahead
loopIndex++;
continue;
}
// P*, T*, K*, KX
// RULE: <VOWEL> <UNVOICED PLOSIVE>
// <VOWEL> <P*, T*, K*, KX>
// move back
X--;
if (debug) {
fprintf(stderr,"RULE: <VOWEL> <UNVOICED PLOSIVE> - decrease vowel by 1/8th\n");
fprintf(stderr,"PRE\n");
printPhonem(X);
}
// decrease length by 1/8th
phonems[X].length -= phonems[X].length >> 3;
if (debug) {
fprintf(stderr,"POST\n");
printPhonem(X);
}
// move ahead
loopIndex++;
continue;
}
// RULE: <VOWEL> <VOICED CONSONANT>
// <VOWEL> <WH, R*, L*, W*, Y*, M*, N*, NX, DX, Q*, Z*, ZH, V*, DH, J*, B*, D*, G*, GX>
if (debug) {
fprintf(stderr,"RULE: <VOWEL> <VOICED CONSONANT> - increase vowel by 1/2 + 1\n");
fprintf(stderr,"PRE\n");
printPhonem(X-1);
}
// decrease length
byte A = phonems[X-1].length;
phonems[X-1].length = (A >> 2) + A + 1; // 5/4*A + 1
if (debug) {
fprintf(stderr,"POST\n");
printPhonem(X-1);
}
// move ahead
loopIndex++;
continue;
}
// WH, R*, L*, W*, Y*, M*, N*, NX, Q*, Z*, ZH, V*, DH, J*, B*, D*, G*, GX
//pos48821:
// RULE: <NASAL> <STOP CONSONANT>
// Set punctuation length to 6
// Set stop consonant length to 5
// nasal?
if((flags2[index] & 8) != 0)
{
// M*, N*, NX,
// get the next phoneme
X++;
index = phonems[X].index;
// end of buffer?
if (X >= phonems_size)
A = 65&2; //prevent buffer overflow
else
A = flags1[index] & 2; // check for stop consonant
// is next phoneme a stop consonant?
if (A != 0)
// B*, D*, G*, GX, P*, T*, K*, KX
{
if (debug) {
fprintf(stderr,"RULE: <NASAL> <STOP CONSONANT> - set nasal = 5, consonant = 6\n");
fprintf(stderr,"PRE\n");
printPhonem(X);
printPhonem(X-1);
}
// set stop consonant length to 6
phonems[X].length = 6;
// set nasal length to 5
phonems[X-1].length = 5;
if (debug) {
fprintf(stderr,"POST\n");
printPhonem(X);
printPhonem(X-1);
}
}
// move to next phoneme
loopIndex++;
continue;
}
// WH, R*, L*, W*, Y*, Q*, Z*, ZH, V*, DH, J*, B*, D*, G*, GX
// RULE: <VOICED STOP CONSONANT> {optional silence} <STOP CONSONANT>
// Shorten both to (length/2 + 1)
// (voiced) stop consonant?
if((flags1[index] & 2) != 0)
{
// B*, D*, G*, GX
// move past silence
do
{
// move ahead
X++;
index = phonems[X].index;
} while(index == 0);
// check for end of buffer
if (X >= phonems_size) //buffer overflow
{
// ignore, overflow code
if ((65 & 2) == 0) {loopIndex++; continue;}
} else if ((flags1[index] & 2) == 0) {
// if another stop consonant, move ahead
loopIndex++;
continue;
}
// RULE: <UNVOICED STOP CONSONANT> {optional silence} <STOP CONSONANT>
if (debug) {
fprintf(stderr,"RULE: <UNVOICED STOP CONSONANT> {optional silence} <STOP CONSONANT> - shorten both to 1/2 + 1\n");
fprintf(stderr,"PRE\n");
printPhonem(X);
printPhonem(X-1);
}
// X gets overwritten, so hold prior X value for debug statement
int debugX = X;
// shorten the prior phoneme length to (length/2 + 1)
phonems[X].length = (phonems[X].length >> 1) + 1;
X = loopIndex;
// also shorten this phoneme length to (length/2 +1)
phonems[loopIndex].length = (phonems[loopIndex].length >> 1) + 1;
if (debug) {
fprintf(stderr,"POST\n");
printPhonem(X);
printPhonem(X-1);
}
// move ahead
loopIndex++;
continue;
}
// WH, R*, L*, W*, Y*, Q*, Z*, ZH, V*, DH, J*, **,
// RULE: <VOICED NON-VOWEL> <DIPHTONG>
// Decrease <DIPHTONG> by 2
// liquic consonant?
if ((flags2[index] & 16) != 0)
{
// R*, L*, W*, Y*
// get the prior phoneme
index = phonems[X-1].index;
// prior phoneme a stop consonant>
if((flags1[index] & 2) != 0)
// Rule: <LIQUID CONSONANT> <DIPHTONG>
if (debug) {
fprintf(stderr,"RULE: <LIQUID CONSONANT> <DIPHTONG> - decrease by 2\n");
fprintf(stderr,"PRE\n");
printPhonem(X);
}
// decrease the phoneme length by 2 frames (20 ms)
phonems[X].length -= 2;
if (debug) {
fprintf(stderr,"POST\n");
printPhonem(X);
}
}
// move to next phoneme
loopIndex++;
continue;
}
// goto pos48701;
}
static void Code41240() {
byte pos=0;
while (pos<phonems_size) {
byte index; //register AC
X = pos;
index = phonems[pos].index;
if (!(flags1[index]&2)) {
pos++;
continue;
}
if (!(flags1[index]&1)) {
Insert(pos+1, index+1, phonemeLengthTable[index+1], phonems[pos].stress);
Insert(pos+2, index+2, phonemeLengthTable[index+2], phonems[pos].stress);
pos += 3;
continue;
}
do{
X++;
A = phonems[X].index;
}while(!A);
if (X < phonems_size) {
if (flags1[A] & 8) {pos++; continue;}
if ((A == 36) || (A == 37)) {pos++; continue;} // '/H' '/X'
}
Insert(pos+1, index+1, phonemeLengthTable[index+1], phonems[pos].stress);
Insert(pos+2, index+2, phonemeLengthTable[index+2], phonems[pos].stress);
pos += 3;
}
}
static void InsertBreath() {
byte mem54=255,mem55=0,index,mem66=0; //variable Y
while(1) {
//pos48440:
X = mem66;
index = phonems[X].index;
if (X >= phonems_size) return;
mem55 += phonems[X].length;
if (mem55 < 232) {
A = flags2[index]&1;
if (A) {
X++;
mem55 = 0;
Insert(X, 254, 0, 0); // 254 = Huh??
mem66+=2;
continue;
}
if (!index) mem54 = X;
mem66++;
continue;
}
X = mem54;
phonems[X].index = 31; // 'Q*' glottal stop
phonems[X].length = 4;
phonems[X].stress = 0;
X++;
mem55 = 0;
Insert(X, 254, 0, 0); // 254 = Huh??
X++;
mem66 = X;
}
}
void SAMMain(const char*input) {
phonems_size=0;
if (!Parser1(input)) return;
// TODO: Irgendwo geht ein letztes Phonem L"\x0202" mit dem Fragezeichen durch die Lappen!!
if (debug) PrintPhonems();
Parser2();
CopyStress();
SetPhonemeLength();
AdjustLengths();
Code41240();
InsertBreath();
// Aus irgendeinem Grund werden Nullphoneme mit herumgeschleppt.
// Diese müssen spätestens jetzt raus!
// Auch die rätselhaften 254-Tags!
phonem_t*d=phonems;
for (const phonem_t*s=phonems;s!=phonems+phonems_size;s++) {
if (s->index && s->index!=254) *d++=*s;
}
phonems_size=d-phonems;
if (debug) PrintPhonems();
fwrite(phonems,sizeof(phonem_t),phonems_size,stdout);
}
Detected encoding: ANSI (CP1252) | 4
|
|