// N.B. This is nothing to do with the Prototype JS library.

// Returns true if a string starts with the specified string
// (and false if not, obvsly).
String.prototype.startsWith = function (str) {
	return this.indexOf(str) === 0;
}

function tStripQuotes( str, replacement )
{
	return str.replace( /"/g, '+' ).replace( /'/g, '+' );
}

/**
 * SearchQueryParser
 * John McKerrell - 2008/08/06
 *
 * A singleton object that can be used to parse a search query.
 * Parsing the search query will return a normalised tree
 * structure stored as arrays of arrays. This tree can then 
 * be converted back to a search query using treeToString().
 * This is all encapsulated in the normalise method.
 * Example usage:
 * 
 */
var SearchQueryParser = {
    /**
     * The default binary operator that we want to insert if
     * the user doesn't specify.
     */
    _default: 'OR',

    /**
     * Takes a search query, parses it into a tree structure and then
     * outputs it back out as a normalised search query.
     */
    normalise: function( query ) {
        return SearchQueryParser.treeToString( SearchQueryParser.parse( query ) );
    },
    /**
     * Parses a search query into a normalised tree structure.
     */
    parse: function( query ) {
        /**
         * Converts a search query to an array of tokens, strings
         * surrounded by quotes will be counted as a single token.
         */
        var tokens = SearchQueryParser.tokenise( query );

        /**
         * As we go through the array of tokens we store the current
         * position in this one dimensional array. We use an array
         * so that we can essentially pass the value "by reference".
         */
        var start = [ 0 ];

        /**
         * parseTokens converts the series of tokens into the
         * tree structure.
         */
        return SearchQueryParser.parseTokens( tokens, start, false );
    },
    /**
     * Parses an array of tokens into a tree structure,
     * an array of arrays.
     *
     * Gets called recursively when it encounters brackets.
     */
    parseTokens: function( tokens, start, brackets ) {
        /**
         * Basic way to store the possible binary operators.
         * If adding entries be sure to have spaces at either
         * end too.
         */
        var operators = ' AND OR ';

        /**
         * Always holds the topmost node in the tree.
         */
        var node = [];
        for( var l = tokens.length; start[0] < l; ++start[0] ) {
            var token = tokens[start[0]]
            /**
             * If we're parsing a bracketed section and we've come
             * to a close brackets, then just return the tree-branch
             * we've built up so far.
             */
            if( brackets && token == ')' ) {
                return node;
            }
            /**
             * If this token is an open bracket then we need to create
             * a new branch until we reach a close bracket and insert
             * that whole branch at this point.
             */
            if( token == '(' ) {
                ++start[0];
                token = SearchQueryParser.parseTokens( tokens, start, true );
            }
            /**
             * If the token is 'NOT' then we need to create a two-part node
             * with 'NOT' as the first part and the following token as the
             * second part. If the following token is a bracket then we
             * we need to take the entire bracketed section.
             */
            if( token == 'NOT' ) {
                ++start[0];
                if( tokens[start[0]] == '(' ) {
                    ++start[0];
                    token = [ token, SearchQueryParser.parseTokens( tokens, start, true ) ];
                } else {
                    token = [ token, tokens[start[0]] ];
                }
            }
            /**
             * If this is a binary operator then ideally we must
             * already have a token in the node, see the following
             * comment for what happens when things go wrong.
             */
            if( node.length == 1 && operators.indexOf( ' '+token+' ' ) != -1 ) {
                if( node.length == 1 ) {
                    node.push( token );
                /**
                 * If you pass an operator at the wrong point then there's
                 * two options for how that can be handled. This option
                 * treats them as normal tokens if they're in the wrong place
                 * (i.e. if you pass "foo AND AND BAR" you get
                 *  "( ( foo AND AND ) OR bar )" )
                 * Taking the node.length check out from above and putting the
                 * following else back in would give "( ( foo AND ) AND bar )"
                 * or taking the node.length check out and leaving the else
                 * out too would ignore the second operator.
                } else {
                    console.log( 'HERE' );
                    node = [ node, token ];
                 */
                }
            /**
             * If we already have an element in this node, then add
             * the default operator and then this token.
             */
            } else if( node.length == 1 ) {
                node.push( SearchQueryParser._default );
                node.push( token );
            /**
             * This node must be empty, insert the token.
             */
            } else {
                node.push( token );
            }
            /**
             * If this node has three tokens already then we need
             * to create a new parent node with this node as the
             * first child.
             */
            if( node.length == 3 ) {
                node = [ node ];
            }
        }
        return node;
    },
    /**
     * Take the query and parse it into an array of tokens
     */
    tokenise: function( query ) {
        /**
         * We want to split on the interesting parts, easiest way to
         * do this is to surround those by spaces then split the
         * whole string on spaces.
         */
        query = query.replace( new RegExp( '(\\[|\\]|\\(|\\)|:|"|'+"'"+')', 'g' ), ' $1 ' );
        var tokens = query.split( / +/ );

        /**
         * Characters that a "quoted" string may start with.
         * If adding entries then make sure the start position
         * matches the end position in stringEnds.
         */
        var stringStarts = "'\"[";
        /**
         * Characters that a "quoted" string may end with.
         * If adding entries see above comment.
         */
        var stringEnds = "'\"]";

        /**
         * String-safe tokens, i.e. quoted strings are concatenated
         * into a single token.
         */
        var safeTokens = [];

        /**
         * If we're in a "quoted" string we need to know how it
         * started to know how it should end.
         */
        var inString = '';

        /**
         * If we're in a "quoted" string then we keep adding bits
         * to this array until we come to the end.
         */
        var bigString = [];

        /**
         * If we come to an "end quote", then this will contain
         * the position in stringEnds so we can match it to
         * the starting "quote"
         */
        var endIndex;
        for( var i = 0, l = tokens.length; i < l; ++i ) {
            /**
             * Discard empty tokens.
             */
            if( tokens[i] == '' ) {
            /**
             * If we're not already in a "quoted" string and we
             * come to a "quote" then store the type of "quote".
             */
            } else if( inString == ''
                    && stringStarts.indexOf( tokens[i] ) != -1 ) {
                inString = tokens[i];
            /**
             * If we're in a "quoted" string and we come to an
             * end quote...
             */
            } else if( inString != ''
                    && ( endIndex = stringEnds.indexOf( tokens[i] ) ) != -1 ) {
                /**
                 * Check whether the end "quote" matches the start "quote".
                 */
                if( inString == stringStarts.substr(endIndex,1) ) {
                    /**
                     * If so, merge big string into a single string and
                     * put the "quotes" back in.
                     */
                    safeTokens.push( inString + bigString.join( ' ' ) + tokens[i] );
                    bigString = [];
                    inString = '';
                } else {
                    /**
                     * If not, just add this token to bigString
                     */
                    bigString.push( tokens[i] );
                }
            /**
             * If we're in a "quoted" string, add the token to bigString.
             */
            } else if( inString != '' ) {
                bigString.push( tokens[i] );
            /**
             * Otherwise just add the token to the array of
             * "quoted" string-safe tokens.
             */
            } else {
                safeTokens.push( tokens[i] );
            }
        }

        /**
         * We need to make sure that terms with a colon in are
         * concatenated, so 'type:"fish wife"' remains as a
         * single token for example.
         */
        var colonTokens = [];
        while( safeTokens.length ) {
            var token = safeTokens.shift();
            if( token == ':' ) {
                token = colonTokens.pop()+token+safeTokens.shift()
            }
            colonTokens.push( token );
        }
        return colonTokens;
    },
    /**
     * Takes the tree structure and converts it into a search query.
     * Will put brackets in at all points where necessary.
     */
    treeToString: function( tree ) {
        if( tree instanceof Array ) {
            /**
             * Basically pass through nodes with only one entry.
             */
            if( tree.length == 1 ) {
                return SearchQueryParser.treeToString(tree[0]);
            } else {
                /**
                 * For each node with more than one entry iterate
                 * over the entries and call treeToString recursively.
                 * This will continue until it gets to a non-array.
                 */
                var bits = [];
                for( var i = 0, l = tree.length; i < l; ++i ) {
                    bits[i] = SearchQueryParser.treeToString(tree[i]);
                }
                /**
                 * Then join those resultant bits into a string,
                 * always surrounded by brackets to be safe.
                 */
                return '( '+bits.join( ' ' )+' )';
            }
        }
        return tree;
    },
    /**
     * This saves me having to keep track of which is the last
     * method to leave the comma off.
     */
    1:1
};

