/*
*
* Copyright (c) 1998,1999 Microsoft Corporation. All rights reserved.
* EXEMPT: copyright change only, no build required
*
*/
#ifndef _XMLSTREAM_HXX
#define _XMLSTREAM_HXX
#pragma once

#include "bufferedstream.hxx"
#include "encodingstream.hxx"
#include "_rawstack.hxx"

class XMLParser;

// the XMLStream class uses the error code and token types defined in the xmlparser
#include <ole2.h>
#include <xmlparser.h>

//==============================================================================
// This enum and StateEntry struct are used in table driven parsing for DTD
// stuff - so that the parser isn't bloated by this stuff.  This is about 15%
// slower than a hand written parser.

typedef enum {
    OP_OWS,     // optional whitespace
    OP_WS,      // required whitespace
    OP_CHAR,    // char comparison, _pch[0] is char, _sArg1 is else goto state or error code
    OP_CHAR2,   // same os OP_CHAR - except it doesn't do _pInput->Mark.
    OP_PEEK,    // same as OP_CHAR - except it doesn't advance.
    OP_NAME,    // scan name
    OP_TOKEN,   // return token, _sArg1 = token
    OP_STRING,  // scan a string
    OP_EXTID,   // scan an external id.
    OP_STRCMP,  // string comparison.
    OP_POP,     // pop state
    OP_NWS,     // not whitespace conditional
    OP_SUBSET,  // skip an internal subset
    OP_PUBIDOPTION, // conditional for _fShortPubIdOption
    OP_NMTOKEN,
    OP_TABLE,   // push a new table. (pointer in _pch field).
    OP_STABLE,   // switch to new table. (pointer in _pch field).
    OP_COMMENT,
    OP_CONDSECT,
    OP_SNCHAR,  // conditional 'is start name char'
    OP_EQUALS,  // scan ' = '
    OP_ENCODING, // switch encoding.
    OP_CHARWS,   // match char or must be white space.
    OP_ATTRVAL, //parse attribute values.(_sArg1 = return PCDATA token or not)
    OP_PETEST,
    OP_ATTEXPAND,
    OP_NMSTRING, // unqualified name within quote
    OP_FAKESYSTEM,
} OPCODE;

typedef struct {
    OPCODE			_sOp;
    const WCHAR*	_pch;
    DWORD			_sGoto;
    DWORD			_sArg1;
    long			_lDelta; // for when we do a Mark(), or Token if OP_CHAR
} StateEntry;

//================================================================================
class XMLStream
{
public:
    XMLStream(XMLParser * pXMLParser);
    ~XMLStream();

    //------------------------------------------------------------------------
    // These are some more tokens that the XMLStream returns.
	// xiaoyu : only few are used in fusion manifest
    typedef enum
    {
        // ADDITIONAL TOKENS THAT THE PARSER PULLS UP
        XML_PENDING = 0,						// still parsing.
        XML_NUMENTITYREF = XML_LASTSUBNODETYPE, // &23;
        XML_HEXENTITYREF,						// &x0cf7;
        XML_BUILTINENTITYREF,					//&gt;
        XML_TAGEND,								// >
        XML_EMPTYTAGEND,						// /> (text = tag name)
        XML_ENDTAG,								// </ (text = tag name)
        XML_ENDPI,								// text = pi body minus '?>'
        XML_ENDXMLDECL,							// end of xml declaration
        XML_ENDDECL,							// '>'
        XML_CLOSEPAREN,
        XML_ENDCONDSECT,						// ']]>'
        XML_STARTDTDSUBSET,
        XML_ENDPROLOG,
        XML_DATAAVAILABLE,
        XML_DATAREALLOCATE,
    } XMLToken;

    HRESULT PushStream(
        /* [in] */ EncodingStream  *pStm,
        /* [in] */ bool fExternalPE);

    HRESULT AppendData(
        /* [in] */ const BYTE  *buffer,
        /* [in] */ long length,
        /* [in] */ BOOL lastBuffer);

    HRESULT Reset( void);

    HRESULT GetNextToken(
        /* [out] */ DWORD  *token,
        /* [out] */ const WCHAR  **text,
        /* [out] */ long  *length,
        /* [out] */ long  *nslen);

    ULONG GetLine();

    ULONG GetLinePosition();

    ULONG GetInputPosition();

    HRESULT GetLineBuffer(
        /* [out] */ const WCHAR  * *buf,
        /* [out] */ ULONG* len,
        /* [out] */ ULONG* startpos);

    void SetFlags(
        /* [in] */ unsigned short usFlags);

    unsigned short GetFlags();

    HRESULT ErrorCallback(HRESULT hr);

    WCHAR   getAttrValueQuoteChar() { return _chTerminator; }

private:
    HRESULT init();
    void _init();

    HRESULT firstAdvance();
    HRESULT parseContent();
    HRESULT parseElement();
    HRESULT parseEndTag();
	HRESULT parsePI();
    HRESULT parseComment();
    HRESULT parseName();
    HRESULT parseAttributes();
    HRESULT parseAttrValue();

    HRESULT parsePCData();
	HRESULT parseEntityRef();

	HRESULT parseCondSect();
    HRESULT parseCData();

	HRESULT parseTable();
	HRESULT parseEquals();

    HRESULT skipWhiteSpace();

    inline void mark(long back = 0) { _pInput->Mark(back); }

    typedef HRESULT (XMLStream::* StateFunc)();

    // The state machine consists of functions where each
    // function can determine for itself its own substates
    // so that when it is reactivated by a pop() it can pick
    // up where it left off.  The current substate is set
    // to zero on a push() and at pop() time it is restored
    // to whatever it was told to be in the push().
    HRESULT push(StateFunc f, short substate = 0);
    HRESULT pushTable(short substate = 0, const StateEntry* table = NULL, DWORD le = 0);
    HRESULT pop(bool boundary = true);
    HRESULT switchTo(StateFunc f); // pop & push

    // Advance and jump to state
    HRESULT AdvanceTo(short substate);

    HRESULT PopStream();

    HRESULT ScanHexDigits();
    HRESULT ScanDecimalDigits();

	bool    PreEntityText();

    // Always use this function instead of calling _pInput->getToken
    inline void getToken(const WCHAR** ppText, long* pLen) { _pInput->getToken(ppText,pLen); }

    BufferedStream* getCurrentStream();

    StateFunc   _fnState; // current function.
    short       _sSubState; // current substate.
    short       _sSavedState;

    struct StateInfo
    {
        StateFunc			_fnState;
        short				_sSubState;
        const StateEntry*	_pTable;
        //DWORD				_lEOFError;
        int					_cStreamDepth;
    };
    _rawstack<StateInfo>	_pStack;

    struct InputInfo
    {
        BufferedStream*		_pInput;
        WCHAR				_chLookahead;
        //bool				_fPE;
        //bool				_fExternalPE;
        //bool				_fInternalSubset;	// remember that we were in internal subset.
        StateFunc			_fnState;			// remember the state function when pushstream
												// it is used to check parameter entity replacement text
												// is properly nested with markup declarations.
    };
    _rawstack<InputInfo> _pStreams;

    // Cache the current value of _pStreams.used() which is used to making sure
    // a parameter entity doesn't pop out of the scope in which it was entered.
    int         _cStreamDepth;

    BufferedStream* _pInput;			// current input stream.

    WCHAR       _chNextLookahead;
    bool        _fWasUsingBuffer;
    long        _lParseStringLevel;

    DWORD       _nPreToken;
    DWORD       _nToken;
    long        _lLengthDelta;			// amount to adjust token length by
    long        _lMarkDelta;			// amount to adjust mark position by
    bool        _fDelayMark;
    bool        _fFoundFirstElement;	// special trick for EndProlog.

    WCHAR       _chLookahead;
    bool        _fWhitespace;			// found whitespace while parsing PCDATA
    WCHAR       _chTerminator;
    WCHAR       _chEndChar;				// for parseAttributes.
    bool        _fEOF;					// reached end of file.

    long        _lNslen; // namespace length
    long        _lNssep; // namespace separator length ':' or '::'.

	long        _lEntityPos; // for parsing entity references.
	bool        _fPCDataPending; // whether pcdata is pending during parseEntityRef.
	const WCHAR* _pchCDataState;
    int         _cAttrCount;
	int         _nEntityNSLen; // saved namespace info for entity references.

    // Switches.
    unsigned short _usFlags;
    // bool      _fFloatingAmp;		// used in ParseEntityRef()
    bool         _fShortEndTags;		// used in ParserEndTag()
    bool         _fCaseInsensitive;
    bool         _fNoNamespaces;		// used in parseName()	
    //bool       _fNoWhitespaceNodes;   // used in DTD data	
    //bool       _fIE4Quirks;			// xiaoyu : what it means?
    bool         _fNoDTDNodes;			// only used in GetDTDNextToken(). may be deleted later
    //bool       _fHandlePE;			// This flag is used to turn on and off parameter entity handling in DTD
										// xiaoyu: used in ParsePI(), ParseDTD(), parseComment(),
										// parsePEDecl(), parseIgnoSet()
							

    // for table driven parsing.
    const StateEntry* _pTable;
    //DWORD				_lEOFError;	// used in parsePEDecl(), pushTable(), parseTable(),

    // buffer used during whitespace normalization
    WCHAR*      _pchBuffer;
    long        _lBufLen;
    long        _lBufSize;
    bool        _fFoundWhitespace;
    bool        _fUsingBuffer;
    bool        _fFoundNonWhitespace;
    bool        _fCheckAttribute;	// need to check the attribute name
									// xiaoyu : used for complicate attribute type, such as "xml:lang", "xmlns"
	
    bool        _fDTD;				// xiaoyu whether xml contains DTD
    //bool      _fInternalSubset;   // xiaoyu used in ParseDTD
    //int         _cConditionalSection;
    //bool        _fFoundPEREf;
    //bool        _fWasDTD;
//    bool        _fParsingNames;

    bool        _fParsingAttDef;	// used in ParseAttrValue()
    //int       _cIgnoreSectLevel;
    //bool		_fResolved;			// used in ParseEntity();
    bool        _fReturnAttributeValue;
    //int       _cStreams;			// used to identify if PushStream was called.
									// used in parseEntity();	
    WCHAR       _wcEntityValue;	// used in parseEntityRef()
    XMLParser * _pXMLParser;		// regular pointer pointing back to the parser

    inline HRESULT PushChar(WCHAR ch)
    {
        if (_lBufLen < _lBufSize)
        {
            _pchBuffer[_lBufLen++] = ch; return S_OK;
        }
        else return _PushChar(ch);
    }
    HRESULT     _PushChar(WCHAR ch); // grow the buffer.
};

#endif // _XML_STREAM_HXX
