/**********************************************************************
  Cache Search Stuff (simple strstr)

  Marc Miller (t-marcmi) - 1998
 **********************************************************************/
#include "cachesrch.h"

DWORD CacheSearchEngine::CacheStreamWrapper::s_dwPageSize = 0;

BOOL  CacheSearchEngine::CacheStreamWrapper::_ReadNextBlock() {
    if (_fEndOfFile)
        return FALSE;

    if (!s_dwPageSize) {
        SYSTEM_INFO sysInfo;
        GetSystemInfo(&sysInfo);
        s_dwPageSize = sysInfo.dwPageSize;
    }
    BOOL fNewRead = FALSE; // is this our first look at this file?
    if (!_pbBuff) {
        // Allocate a page of memory

        // Note: find out why this returned error code #87
        //_pbBuff  = (LPBYTE)(VirtualAlloc(NULL, s_dwPageSize, MEM_COMMIT, PAGE_READWRITE));
        _pbBuff = (LPBYTE)(LocalAlloc(LPTR, s_dwPageSize));
        if (!_pbBuff) {
            //DWORD dwError = GetLastError();
            return FALSE;
        }
        fNewRead          = TRUE;
        _dwCacheStreamLoc = 0;
    }

    BOOL  fSuccess;
    DWORD dwSizeRead = s_dwPageSize;
    if ((fSuccess = ReadUrlCacheEntryStream(_hCacheStream, _dwCacheStreamLoc,
                                            _pbBuff, &dwSizeRead, 0)) && dwSizeRead)
    {
        _fEndOfFile        = (dwSizeRead < s_dwPageSize);
        
        _dwCacheStreamLoc += dwSizeRead;
        _dwBuffSize        = dwSizeRead;
        _pbBuffPos         = _pbBuff;
        _pbBuffLast        = _pbBuff + dwSizeRead;

        _dataType = ASCII_DATA; // default
        if (fNewRead) {
            // deterine data type
            if (_dwBuffSize >= sizeof(USHORT)) {
                if      (*((USHORT *)_pbBuff) == UNICODE_SIGNATURE)
                    _dataType = UNICODE_DATA;
                else if (*((USHORT *)_pbBuff) == UNICODE_SIGNATURE_BACKWARDS)
                    _dataType = UNICODE_BACKWARDS_DATA;
                
                if (s_IsUnicode(_dataType))
                    _pbBuffPos += s_Charsize(_dataType);
            }
        }
    }
    else {
        fSuccess = FALSE;
        DWORD dwError = GetLastError();
        ASSERT(dwError != ERROR_INSUFFICIENT_BUFFER);
    }
    return fSuccess;
}

CacheSearchEngine::CacheStreamWrapper::CacheStreamWrapper(HANDLE hCacheStream) {
    // this class can be allocated on the stack:
    _pbBuff       = NULL;
    _pbBuffPos    = NULL;
    _pbBuffLast   = NULL;
    _dwBuffSize   = 0;
    _hCacheStream = hCacheStream;
    _fEndOfFile   = FALSE;

    // Read in preliminary block of data --
    //  Die on next read to handle failure
    _fEndOfFile   = !(_ReadNextBlock());
}

CacheSearchEngine::CacheStreamWrapper::~CacheStreamWrapper() {
    if (_pbBuff) {
        //VirtualFree(_pbBuff);
        LocalFree(_pbBuff);;
        _pbBuff = NULL;
    }
}

// Read next byte from cache stream, reading in next block if necessary
BOOL CacheSearchEngine::CacheStreamWrapper::_GetNextByte(BYTE &b)
{
    //
    // If the initial read fails _pbBuffPos will be NULL.  Don't
    // allow it to be dereffed.
    //
    BOOL fSuccess = _pbBuffPos ? TRUE : FALSE;

    if (_pbBuffPos == _pbBuffLast)
        fSuccess = _ReadNextBlock();

    if (fSuccess)
        b = *(_pbBuffPos++);

    return fSuccess;
}

BOOL CacheSearchEngine::CacheStreamWrapper::GetNextChar(WCHAR &wc) {
    BOOL fSuccess = TRUE;
    if (s_IsUnicode(_dataType)) {
        BYTE b1, b2;
        LPBYTE bs = (LPBYTE)&wc;
        if (_GetNextByte(b1) && _GetNextByte(b2)) {
            switch (_dataType) {
            case UNICODE_DATA:
                bs[0] = b1;
                bs[1] = b2;
                break;
            case UNICODE_BACKWARDS_DATA:
                bs[0] = b2;
                bs[1] = b1;
                break;
            default: ASSERT(0);
            }
        }
        else
            fSuccess = FALSE;
    }
    else 
    {
       
        BYTE szData[2];

        if (_GetNextByte(szData[0]))
        {
            int cch = 1;
            if (IsDBCSLeadByte(szData[0]))
            {
                if (!_GetNextByte(szData[1]))
                {
                    fSuccess = FALSE;
                }
                cch++;
            }

            if (fSuccess)
            {
                fSuccess = (MultiByteToWideChar(CP_ACP, 0, (LPSTR)szData, cch, &wc, 1) > 0);
            }
        }
        else
        {
            fSuccess = FALSE;
        }

    }
    return fSuccess;
}


// Prepare a search target string for searching --
void CacheSearchEngine::StreamSearcher::_PrepareSearchTarget(LPCWSTR pwszSearchTarget)
{
    UINT uStrLen = lstrlenW(pwszSearchTarget);
    _pwszPreparedSearchTarget = ((LPWSTR)LocalAlloc(LPTR, (uStrLen + 1) * sizeof(WCHAR)));

    if (_pwszPreparedSearchTarget) {
        // Strip leading and trailing whitespace and compress adjacent whitespace characters
        //  into literal spaces
        LPWSTR pwszTemp  = _pwszPreparedSearchTarget;
        pwszSearchTarget = s_SkipWhiteSpace(pwszSearchTarget);
        BOOL   fAddWs    = FALSE;
        while(*pwszSearchTarget) {
            if (s_IsWhiteSpace(*pwszSearchTarget)) {
                fAddWs = TRUE;
                pwszSearchTarget = s_SkipWhiteSpace(pwszSearchTarget);
            }
            else {
                if (fAddWs) {
                    *(pwszTemp++) = L' ';
                    fAddWs = FALSE;
                }
                *(pwszTemp++) = *(pwszSearchTarget++);
            }
        }
        *pwszTemp = L'\0';
    }
}

// Search a character stream for a searchtarget
//  Does a simple strstr, but tries to be smart about whitespace and
//  ignores HTML where possible...
BOOL CacheSearchEngine::StreamSearcher::SearchCharStream(CacheSearchEngine::IWideSequentialReadStream &wsrs,
                                                         BOOL fIsHTML/* = FALSE*/)
{
    BOOL fFound = FALSE;
    
    if (_pwszPreparedSearchTarget && *_pwszPreparedSearchTarget)
    {
        WCHAR   wc;
        LPCWSTR pwszCurrent    = _pwszPreparedSearchTarget;
        BOOL    fMatchedWS     = FALSE;
#if 0
        BOOL    fIgnoreHTMLTag = FALSE;
#endif
        
        while(*pwszCurrent && wsrs.GetNextChar(wc)) {
#if 0
            if (fIsHTML && (wc == L'<'))
                fIgnoreHTMLTag = TRUE;
            else if (fIgnoreHTMLTag) {
                if (wc == L'>')
                    fIgnoreHTMLTag = FALSE;
            }
            else 
#endif

            if (s_IsWhiteSpace(wc)) {
                // matched whitespace in search stream, look for
                //  matching whitespace in target string
                if (!fMatchedWS) {
                    if (s_IsWhiteSpace(*pwszCurrent)) {
                        fMatchedWS = TRUE;
                        ++pwszCurrent;
                    }
                    else
                        pwszCurrent = _pwszPreparedSearchTarget;
                }
            }
            else {
                fMatchedWS = FALSE;
                if (!ChrCmpIW(*pwszCurrent, wc)) {
                    ++pwszCurrent;
                }
                else {
                    pwszCurrent = _pwszPreparedSearchTarget;
                }
            }
        }
        fFound = !*pwszCurrent;
    }
    return fFound;
}

BOOL CacheSearchEngine::SearchCacheStream(CacheSearchEngine::StreamSearcher &cse, HANDLE hCacheStream,
                                          BOOL fIsHTML/* = FALSE*/)
{
    CacheStreamWrapper csw(hCacheStream);
    return cse.SearchCharStream(csw, fIsHTML);
}
