/*****************************************************************************
* spttseng.idl *
*--------------*
*   Description:
*       This is the idl file for the Microsoft Text To Speech Driver.
*-----------------------------------------------------------------------------
*   Creation: 03/01/99
*   Copyright (C) Microsoft Corporation 1999
*   All rights reserved.
*
****************************************************************** EDC ******/
//--- Import base idl
import "oaidl.idl";
import "ocidl.idl";
import "sapiddk.idl";

//=== Forward References ======================================================
interface IMSVoiceData;
interface IMSTTSEngineInit;






typedef enum ENGPARTOFSPEECH
{
    MS_NotOverriden  = SPPS_NotOverriden,
    MS_Unknown       = SPPS_Unknown,         // Probably from user lexicon
    MS_Noun          = SPPS_Noun,
    MS_Verb          = SPPS_Verb,
    MS_Modifier      = SPPS_Modifier,
    MS_Function      = SPPS_Function,
    MS_Interjection  = SPPS_Interjection,

    // MS Nouns
    MS_Pron         = ( SPPS_Noun + 1 ),
    MS_SubjPron     = ( SPPS_Noun + 2 ),
    MS_ObjPron      = ( SPPS_Noun + 3 ),
    MS_RelPron      = ( SPPS_Noun + 4 ),
    MS_PosNoun      = ( SPPS_Noun + 9 ),
    // MS Modifiers
    MS_Adj          = ( SPPS_Modifier + 1 ),
    MS_Adv          = ( SPPS_Modifier + 2 ),
    // MS Function Words
    MS_VAux         = ( SPPS_Function + 1 ),
    MS_Conj         = ( SPPS_Function + 3 ),
    MS_CConj        = ( SPPS_Function + 4 ),
    MS_Interr       = ( SPPS_Function + 5 ),
    MS_Det          = ( SPPS_Function + 6 ),
    MS_Contr        = ( SPPS_Function + 7 ),
    MS_Prep         = ( SPPS_Function + 9 ),
    // MS Punctuation
    MS_Punctuation  = ( SPPS_Function + 11 ),
    MS_GroupBegin   = ( MS_Punctuation + 1 ),
    MS_GroupEnd     = ( MS_Punctuation + 2 ),
    MS_EOSItem      = ( MS_Punctuation + 3 ),
    MS_MiscPunc     = ( MS_Punctuation + 4 ),
    MS_Quotation    = ( MS_Punctuation + 5 )
} ENGPARTOFSPEECH;





typedef enum TTSItemType
{
    eWORDLIST_NOT_VALID         = 0x0000,
    eWORDLIST_IS_VALID          = 0x1000,
    eUNMATCHED                  = eWORDLIST_IS_VALID + 1,
    eALPHA_WORD                 = eWORDLIST_IS_VALID + 2,
    eABBREVIATION               = eWORDLIST_IS_VALID + 3,
    eABBREVIATION_NORMALIZE     = eWORDLIST_IS_VALID + 4,
    eINITIALISM                 = eWORDLIST_IS_VALID + 5,
    eNUM_CARDINAL               = eWORDLIST_IS_VALID + 6,
    eNUM_ORDINAL                = eWORDLIST_IS_VALID + 7,
    eNUM_DECIMAL                = eWORDLIST_IS_VALID + 8,
    eNUM_PERCENT                = eWORDLIST_IS_VALID + 9,
    eNUM_DEGREES                = eWORDLIST_IS_VALID + 10,
    eNUM_SQUARED                = eWORDLIST_IS_VALID + 11,
    eNUM_CUBED                  = eWORDLIST_IS_VALID + 12,
    eNUM_CURRENCY               = eWORDLIST_IS_VALID + 13,
    eNUM_FRACTION               = eWORDLIST_IS_VALID + 14,
    eNUM_MIXEDFRACTION          = eWORDLIST_IS_VALID + 15,
    eNUM_ROMAN_NUMERAL          = eWORDLIST_IS_VALID + 16,
    eNUM_ROMAN_NUMERAL_ORDINAL  = eWORDLIST_IS_VALID + 17,
    eNUM_PHONENUMBER            = eWORDLIST_IS_VALID + 18,
    eNUM_ZIPCODE                = eWORDLIST_IS_VALID + 19,
    eDATE_YEAR                  = eWORDLIST_IS_VALID + 20,
    eDATE                       = eWORDLIST_IS_VALID + 21,
    eDATE_LONGFORM              = eWORDLIST_IS_VALID + 22,
    eDECADE                     = eWORDLIST_IS_VALID + 23,
    eTIMEOFDAY                  = eWORDLIST_IS_VALID + 24,
    eTIME                       = eWORDLIST_IS_VALID + 25,
    eSPELLOUT                   = eWORDLIST_IS_VALID + 26,
    eHYPHENATED_STRING          = eWORDLIST_IS_VALID + 27,
    eSTATE_AND_ZIPCODE          = eWORDLIST_IS_VALID + 28,
    eTIME_RANGE                 = eWORDLIST_IS_VALID + 29,
    eNUM_RANGE                  = eWORDLIST_IS_VALID + 30,
    eTEMP_NUMBER                = eWORDLIST_IS_VALID + 31,
    eTEMP_PERCENT               = eWORDLIST_IS_VALID + 32,
    eTEMP_DEGREES               = eWORDLIST_IS_VALID + 33,
    eTEMP_NUM_FRACTION          = eWORDLIST_IS_VALID + 34,
    eTEMP_NUM_MIXEDFRACTION     = eWORDLIST_IS_VALID + 35,
    eTEMP_NUM_DECIMAL           = eWORDLIST_IS_VALID + 36,
    eTEMP_NUM_ORDINAL           = eWORDLIST_IS_VALID + 37,
    eTEMP_NUM_CURRENCY          = eWORDLIST_IS_VALID + 38,
    eNEWNUM_PHONENUMBER         = eWORDLIST_IS_VALID + 39,
	eNUM_CURRENCYRANGE			= eWORDLIST_IS_VALID + 40,
    eSUFFIX                     = eWORDLIST_IS_VALID + 41,
    eOPEN_PARENTHESIS           = eWORDLIST_NOT_VALID + 1,
    eOPEN_BRACKET               = eWORDLIST_NOT_VALID + 2,
    eOPEN_BRACE                 = eWORDLIST_NOT_VALID + 3,
    eCLOSE_PARENTHESIS          = eWORDLIST_NOT_VALID + 4,
    eCLOSE_BRACKET              = eWORDLIST_NOT_VALID + 5,
    eCLOSE_BRACE                = eWORDLIST_NOT_VALID + 6,
    eSINGLE_QUOTE               = eWORDLIST_NOT_VALID + 7,
    eDOUBLE_QUOTE               = eWORDLIST_NOT_VALID + 8,
    ePERIOD                     = eWORDLIST_NOT_VALID + 9,
    eEXCLAMATION                = eWORDLIST_NOT_VALID + 10,
    eQUESTION                   = eWORDLIST_NOT_VALID + 11,
    eCOMMA                      = eWORDLIST_NOT_VALID + 12,
    eSEMICOLON                  = eWORDLIST_NOT_VALID + 13,
    eCOLON                      = eWORDLIST_NOT_VALID + 14,
    eHYPHEN                     = eWORDLIST_NOT_VALID + 15,
    eELLIPSIS                   = eWORDLIST_NOT_VALID + 16,
} TTSItemType;

typedef struct TTSWord
{
    const SPVSTATE* pXmlState;          // The XML State of the word
    LPCWSTR         pWordText;          // Pointer to the orthographic form of the word
    ULONG           ulWordLen;          // Length of the word, in WCHARs
    LPCWSTR         pLemma;             // Pointer to the orthographic form of the root word
    ULONG           ulLemmaLen;         // Length of the lemma, in WCHARs
    SPPHONEID*      pWordPron;          // Pointer to the NULL terminated pronunciation of the word
    ENGPARTOFSPEECH  eWordPartOfSpeech;  // The part of speech of the word - Is this needed???
} TTSWord;

typedef struct TTSItemInfo
{
    TTSItemType Type;
} TTSItemInfo;

typedef struct TTSSentItem
{
    LPCWSTR         pItemSrcText;       // Pointer to original text of the item
    ULONG           ulItemSrcLen;       // Length of the original text of the item
    ULONG           ulItemSrcOffset;    // Offset of the original text of the item
    TTSWord*        Words;              // The words of the item, post normalization
    ULONG           ulNumWords;         // The number of words of the item, post normalization
    ENGPARTOFSPEECH  eItemPartOfSpeech;  // The part of speech of the entire item
    TTSItemInfo*    pItemInfo;           
} TTSSentItem;








//=== Constants ===============================================================
typedef enum INVCONST
{
    MAX_LPCORDER    = 30,
    MAX_FFTSIZE     = 512
} INVCONST;

//=== Interface definitions ===================================================

///// NOTE: This section to be moved to SAPI.IDL in SAPI6

[
    object,
    local,
    uuid(E0F4088D-CD08-11d2-B503-00C04F797396),
    helpstring("IEnumSENTITEM Interface"),
    pointer_default(unique)
]
interface IEnumSENTITEM : IUnknown
{
    HRESULT Next( [out] TTSSentItem *pItemEnum );
    HRESULT Reset(void);
};

//--- IEnumSpSentence -------------------------------------------------------
//  This structure points to a text fragement within the input stream and
//  the rendering attributes that are described by associated XML tags
//
[
    object,
    local,
    uuid(299A9157-CD08-11d2-B503-00C04F797396),
    helpstring("IEnumSpSentence Interface"),
    pointer_default(unique)
]
interface IEnumSpSentence : IUnknown
{
    HRESULT SetFragList( [in] const SPVTEXTFRAG* pTextFragList, [in] DWORD dwSpeakFlags);
    HRESULT Next( [out]IEnumSENTITEM **ppSentItemEnum );
    HRESULT Previous( [out]IEnumSENTITEM **ppSentItemEnum );
    HRESULT Reset(void);
};

///// End SAPI6 section

// Max number of POS per pronunciation
enum { POS_MAX = 4 };

// Pronunciation source
typedef enum PRONSRC
{	
    PRON_LEX = 0,
    PRON_LTS,
} PRONSRC;

//------------------------
// POS subset for prosody
//------------------------
enum PROSODY_POS
{
    POS_UNK,            // unknown
    POS_FUNC,           // any function word
    POS_CONTENT,        // any content word
    POS_AUX,
};

// Revberb delay presets
typedef enum REVERBTYPE
{
    REVERB_TYPE_OFF = 0,
    REVERB_TYPE_BATHTUB,
    REVERB_TYPE_ROOM,
    REVERB_TYPE_HALL,
    REVERB_TYPE_CHURCH,
    REVERB_TYPE_STADIUM,
    REVERB_TYPE_ECHO,
    REVERB_TYPE_ROBOSEQ,           // Robot with 'sequencer'
} REVERBTYPE;


typedef enum UNITFLAGS
{
    WORD_START_FLAG = (1L << 0),     // Word starts on this unit
    SENT_START_FLAG = (1L << 1),     // Sentence starts on this unit
}UNITFLAGS;

typedef enum TAPS
{
    MAXTAPS  = 8
}TAPS;

enum USER_RATE_VALUE
{   
    MIN_USER_RATE = (-18),
    MAX_USER_RATE = 18,
    DEFAULT_USER_RATE = 0        // None
};

// Change to new rate if value is NOT this
enum { NO_RATE_CHANGE = MAX_USER_RATE + 1 };


/*** UNITINFO
*   This describes the unit info structure
*/

typedef struct UNIT_CVT
{
    ULONG       PhonID;         // {in} Phoneme ID
    ULONG	    flags;          // {in] Position flags
    ULONG       UnitID;         // [out] Inventory table ID
	ULONG		SenoneID;		// [out] Context offset
	float		Dur;			// [out] duration in seconds
	float		Amp;			// [out] Amplitude
	float		AmpRatio;		// [out] Amplitude gain
    CHAR        szUnitName[15];	// [out] name string
} UNIT_CVT;

/*** MSVOICEINFO
*   This describes the voice data object
*/
typedef struct MSVOICEINFO
{
    WAVEFORMATEX    WaveFormatEx;   // Voice data format.
	LCID	LangID;             // Voice data language ID
    ULONG	Rate;               // Words-per-minute
    ULONG	Pitch;              // Average pitch in Hz 
    REVERBTYPE	eReverbType;    // Reverb param
    ULONG	ProsodyGain;        // 0 = monotone
    ULONG   NumOfTaps;          // BE: Whisper param
    float   TapCoefficients[MAXTAPS]; // BE: Whisper param
    float   VibratoFreq;        // Hertz
    ULONG   VibratoDepth;       // 0 - 100%
	ULONG	SampleRate;		    // 22050 typical
    ULONG   LPCOrder;           // Number of LPC coefficients
    ULONG   FFTSize;            // FFT window length
    float*  pWindow;            // Hanning Window
} MSVOICEINFO;

/*** MSUNITINFO
*   This is the result of an Unit fetch
*/
typedef struct MSUNITDATA
{
    ULONG       cNumEpochs;
    ULONG       cNumSamples;
    ULONG       cOrder;
    float       *pEpoch;
    float       *pLPC;
    float       *pRes;
    float       *pGain;
} MSUNITDATA;

// AlloToUnit() attributes
enum { ALLO_IS_STRESSED = (1 << 0) };

/*** IMSVoiceData
*   Private interface on TTS voice data objects. A voice data object encapsulates
*   the voice data with the necessary lookup logic.
*/
[
    object,
    local,
    uuid(6265B7E1-0340-11d3-B50C-00C04F797396),
    helpstring("IMSVoiceData Interface"),
    pointer_default(unique)
]
interface IMSVoiceData : IUnknown
{
    HRESULT GetVoiceInfo( [out]MSVOICEINFO* pVoiceInfo );
    HRESULT GetUnitIDs( [in,out]UNIT_CVT* pUnits, [in]ULONG cUnits );
    HRESULT GetUnitData( [in]ULONG unitID, [out]MSUNITDATA* pUnitData );
    HRESULT AlloToUnit( [in]short allo, [in]long attributes, [out]long* pUnitID );
};

/*** IMSTTSEngineInit
*   Private engine initialization interface used to connect the voice
*   object to the synthesizer.
*/
[
    object,
    local,
    uuid(8A7C38EB-D8B0-11d2-B504-00C04F797396),
    helpstring("IMSTTSEngineInit Interface"),
    pointer_default(unique)
]
interface IMSTTSEngineInit : IUnknown
{
    HRESULT VoiceInit( [in]IMSVoiceData* pVoiceData );
};

//=== CoClass definitions =====================================================
[
	uuid(3F7C4D29-D007-11D2-B503-00C04F797396),
	version(1.0),
	helpstring("MS TTS Engine 1.0 Type Library")
]
library MSTTSENGINELib
{
	importlib("stdole32.tlb");
	importlib("stdole2.tlb");

    //--- This object is used to load the voice data files
    //    and expose them to the driver.
	[
		uuid(65DBDDEF-0725-11d3-B50C-00C04F797396),
		helpstring("MSVoiceData Class")
	]
	coclass MSVoiceData
	{
		[default] interface IMSVoiceData;
	};

    //--- This is the synthesizer object
	[
		uuid(B93AE09F-D033-11D2-B503-00C04F797396),
		helpstring("MSTTSEngine Class")
	]
	coclass MSTTSEngine
	{
		[default] interface ISpTTSEngine;
        interface IMSTTSEngineInit;
	};
};
