/* eslint-disable no-useless-escape */
/* eslint-disable no-restricted-globals */
/* eslint-disable no-plusplus */
import { isEmpty } from '../../../../common/helpers/object';
//
const tokenTypes = {
  STRING: 'STRING',
  COMBINE_OPERATOR: 'COMBINE_OPERATOR',
  OPERATOR: 'OPERATOR',
  FIELD: 'FIELD',
  EOF: 'EOF',
  VALUE: 'VALUE',
  SUBEXPRESSION: 'SUBEXPRESSION',
  NOT_OPERATOR: 'NOT_OPERATOR',
};
//
const operators = [
  '!=',
  'EQUAL',
  'IN',
  'LIKE',
  'BETWEEN',
  '<>',
  '<',
  '>',
  '=',
  '>=',
  '<=',
  'NOT IN',
  'NOT LIKE',
  'NOT EQUAL',
  'IS',
  'IS NOT',
];

/**
 * Lexer function to tokenize the input string.
 * @param {string} inputString - The input string to tokenize.
 * @param {Array<Object>} data - The data array.
 * @returns {Array<Object>} An array of token objects.
 */
const lexer = (inputString, data) => {
  // Function to check if a lexeme contains a sub-expression in parentheses
  const hasSubExpression = (lexeme) => lexeme.startsWith('(') && lexeme.endsWith(')');

  // Tokenize function to split the input string into tokens
  const tokenize = (input) =>
    input.match(
      // Regex matches various token types while ignoring parentheses within quoted strings
      /('[^']*'|"[^"]*"|\[[^\[\]]*(?:,[^\[\]]*)*\]|\([^()]*((?<!')'[^']*'|"[^"]*"|[^()])*?\)|IS\s+NOT|NOT\s+IN|NOT\s+LIKE|NOT\s+EQUAL|[^\s()[]+)/g
    ) || [];

  // Tokenize the input string
  const tokens = tokenize(inputString);

  // Extract unique fields from the data array
  const fields = !isEmpty(data)
    ? data?.reduce((acc, obj) => {
        Object.keys(obj).forEach((key) => {
          if (!acc.includes(key)) {
            acc.push(key);
          }
        });
        return acc;
      }, [])
    : [];

  // Convert tokens into objects with type and lexeme
  const tokenObjects = tokens.map((token) => {
    if (['AND', 'OR'].includes(token.toUpperCase())) {
      return { type: tokenTypes.COMBINE_OPERATOR, lexeme: token };
    }
    if (['NOT'].includes(token.toUpperCase())) {
      return { type: tokenTypes.NOT_OPERATOR, lexeme: token };
    }
    if (operators.includes(token.toUpperCase())) {
      return { type: tokenTypes.OPERATOR, lexeme: token };
    }
    if (token === '(' || token === ')') {
      return { type: token, lexeme: token };
    }
    if (hasSubExpression(token)) {
      return { type: tokenTypes.SUBEXPRESSION, lexeme: token };
    }

    // Check if the token exists as a field in the fields array
    if (fields?.includes(token)) {
      return { type: tokenTypes.FIELD, lexeme: token };
    }
    return { type: tokenTypes.VALUE, lexeme: token.replace(/^'(.*)'$/, '$1') };
  });

  // Tokenize sub-expression and add precedence level
  const processSubExpression = (lexeme) => {
    const subExpressionTokens = tokenize(lexeme.slice(1, -1));
    const subExpressionObjects = subExpressionTokens.map((subToken) => {
      if (['AND', 'OR'].includes(subToken.toUpperCase())) {
        return { type: tokenTypes.COMBINE_OPERATOR, lexeme: subToken, precedence: 3 };
      }
      if (['OR'].includes(subToken.toUpperCase())) {
        return { type: tokenTypes.COMBINE_OPERATOR, lexeme: subToken, precedence: 4 };
      }
      if (
        ['IN', 'LIKE', 'BETWEEN', '<>', '<', '>', '=', '>=', '<=', 'IS'].includes(
          subToken.toUpperCase()
        )
      ) {
        return { type: tokenTypes.OPERATOR, lexeme: subToken, precedence: 1 };
      }
      if (['NOT'].includes(subToken.toUpperCase())) {
        return { type: tokenTypes.NOT_OPERATOR, lexeme: subToken, precedence: 2 };
      }
      if (['IS NOT', 'NOT IN', 'NOT LIKE', 'NOT EQUAL'].includes(subToken.toUpperCase())) {
        return { type: tokenTypes.OPERATOR, lexeme: subToken, precedence: 2 };
      }
      if (subToken === '(' || subToken === ')') {
        return { type: subToken, lexeme: subToken, precedence: 0 };
      }
      if (fields?.includes(subToken)) {
        return { type: tokenTypes.FIELD, lexeme: subToken, precedence: 0 };
      }
      return { type: tokenTypes.VALUE, lexeme: subToken.replace(/^'(.*)'$/, '$1'), precedence: 0 };
    });

    // Assign precedence level to sub-expressions
    // eslint-disable-next-line no-use-before-define
    assignPrecedenceLevel(subExpressionObjects);
    return subExpressionObjects;
  };

  // Assign precedence level based on order of execution
  const assignPrecedenceLevel = (objects) => {
    objects.forEach((obj) => {
      if (obj.type === tokenTypes.COMBINE_OPERATOR) {
        // Setting precedence for combined operators
        obj.precedence = obj.lexeme === 'AND' ? 3 : 4;
      } else if (obj.type === tokenTypes.OPERATOR) {
        obj.precedence = obj.lexeme.includes('NOT') ? 2 : 1;
      } else if (hasSubExpression(obj.lexeme)) {
        const subExpressionObjects = processSubExpression(obj.lexeme);
        obj.precedence = 0; // Precedence level 0 for objects containing sub-expressions
        obj.subExpressions = subExpressionObjects;
      } else {
        obj.precedence = 0; // Assign precedence level 0 for keywords, words, and FIELDs
      }
    });
  };

  // Assign precedence level to token objects
  assignPrecedenceLevel(tokenObjects);

  // Iterate through each token in the tokenObjects array for validation
  for (let i = 0; i < tokenObjects.length; i++) {
    const token = tokenObjects[i];

    // Check if the token is a field type
    if (token.type === tokenTypes.FIELD) {
      // Ensure the field exists in the fields array
      if (!fields.includes(token.lexeme)) {
        throw new Error(`Unknown field: ${token.lexeme}`);
      }
    }
    // Check if the token is an operator type
    else if (token.type === tokenTypes.OPERATOR) {
      const prevToken = tokenObjects[i - 1];
      const nextToken = tokenObjects[i + 1];

      // Ensure the operator is preceded by a field
      if (!prevToken || prevToken.type !== tokenTypes.FIELD) {
        throw new Error(
          `Invalid Field: "${prevToken.lexeme}". Operator "${token.lexeme}" must be preceded by a field.`
        );
      }

      // Special check for the 'IN' operator to be followed by a list in square brackets
      if (token.lexeme.toUpperCase() === 'IN') {
        if (!nextToken || !nextToken.lexeme.startsWith('[') || !nextToken.lexeme.endsWith(']')) {
          throw new Error(`Operator "IN" must be followed by values within square brackets.`);
        }

        // Extract and validate the values within the square brackets
        const valuesString = nextToken.lexeme.slice(1, -1).trim();
        if (valuesString === '') {
          throw new Error(
            `Operator "IN" must be followed by non-empty values within square brackets.`
          );
        }

        // Split the values string by commas and trim any whitespace from each value
        const values = valuesString.split(',').map((value) => value.trim());

        // Check each value to ensure it is enclosed in single or double quotes
        values.forEach((value) => {
          if (
            !(
              (value.startsWith("'") && value.endsWith("'")) ||
              (value.startsWith('"') && value.endsWith('"'))
            )
          ) {
            throw new Error(
              `Each value within "IN" must be enclosed in single or double quotes. Invalid value: ${value}`
            );
          }
        });
      }

      // Special check for the 'BETWEEN' operator to be followed by a numeric or date range in square brackets
      if (token.lexeme.toUpperCase() === 'BETWEEN') {
        if (!nextToken || !nextToken.lexeme.startsWith('[') || !nextToken.lexeme.endsWith(']')) {
          throw new Error(`Operator "BETWEEN" must be followed by a range within square brackets.`);
        }

        // Extract and validate the range within the square brackets
        const rangeString = nextToken.lexeme.slice(1, -1).trim();
        if (rangeString === '') {
          throw new Error(
            `Operator "BETWEEN" must be followed by a non-empty range within square brackets.`
          );
        }

        // Split the range string by commas and trim any whitespace from each value
        const rangeValues = rangeString.split(',').map((value) => value.trim());

        // Check that there are exactly two values
        if (rangeValues.length !== 2) {
          throw new Error(
            `Operator "BETWEEN" must be followed by exactly two values within square brackets.`
          );
        }

        // Check each value to ensure it is either numeric or a valid date and dates are enclosed in quotes
        rangeValues.forEach((value) => {
          const isQuoted =
            (value.startsWith("'") && value.endsWith("'")) ||
            (value.startsWith('"') && value.endsWith('"'));
          const unquotedValue = isQuoted ? value.slice(1, -1) : value; // Remove the enclosing quotes for date parsing
          const isNumeric = !isNaN(Number(unquotedValue));
          const isValidDate = !isNaN(Date.parse(unquotedValue));

          if (isQuoted && isNumeric) {
            throw new Error(
              `Numeric values within "BETWEEN" should not be enclosed in quotes. Invalid value: ${value}`
            );
          }

          if (!isNumeric && (!isQuoted || !isValidDate)) {
            throw new Error(
              `Each date value within "BETWEEN" must be a valid date enclosed in single or double quotes. Invalid value: ${value}`
            );
          }
        });
      }

      // Ensure the operator is uppercase
      if (token.lexeme !== token.lexeme.toUpperCase()) {
        throw new Error(`Operator must be uppercase: "${token.lexeme}"`);
      }
    }
    // Check if the token is a value type
    else if (token.type === tokenTypes.VALUE) {
      const nextToken = tokenObjects[i + 1];

      // Ensure no consecutive values without proper separation
      if (nextToken && nextToken.type === tokenTypes.VALUE) {
        throw new Error(
          `Value "${token.lexeme}" must not be followed by another value "${nextToken.lexeme}". Space-separated values should be enclosed in quotes.`
        );
      }
    }
    // Check if the token is a combine operator type (ie AND, OR)
    else if (token.type === tokenTypes.COMBINE_OPERATOR) {
      const prevToken = tokenObjects[i - 1];
      const nextToken = tokenObjects[i + 1];

      // Ensure the combine operator is preceded by a valid token type
      if (
        !prevToken ||
        ![
          tokenTypes.FIELD,
          tokenTypes.VALUE,
          tokenTypes.PARENTHESIS,
          tokenTypes.SUBEXPRESSION,
        ].includes(prevToken.type)
      ) {
        throw new Error(
          `Combine operator "${token.lexeme}" must be preceded by a field, value, parenthesis, or subexpression.`
        );
      }

      // Ensure the combine operator is followed by a valid token type
      if (
        !nextToken ||
        ![
          tokenTypes.FIELD,
          tokenTypes.VALUE,
          tokenTypes.PARENTHESIS,
          tokenTypes.SUBEXPRESSION,
        ].includes(nextToken.type)
      ) {
        throw new Error(
          `Combine operator "${token.lexeme}" must be followed by a field, value, parenthesis, or subexpression.`
        );
      }
    }
    // Check if the token is a NOT operator type
    else if (token.type === tokenTypes.NOT_OPERATOR) {
      const nextToken = tokenObjects[i + 1];
      const nextNextToken = tokenObjects[i + 2];
      const nextNextNextToken = tokenObjects[i + 3];

      // Ensure the NOT operator is followed correctly by field, operator, and value
      if (!nextToken || nextToken.type !== tokenTypes.FIELD) {
        throw new Error(`NOT operator "${token.lexeme}" must be followed by a field.`);
      }
      if (!nextNextToken || nextNextToken.type !== tokenTypes.OPERATOR) {
        throw new Error(`Field "${nextToken.lexeme}" must be followed by an operator after NOT.`);
      }
      if (!nextNextNextToken || nextNextNextToken.type !== tokenTypes.VALUE) {
        throw new Error(
          `Operator "${nextNextToken.lexeme}" must be followed by a value after NOT.`
        );
      }
    }
    // Check if the token is a subexpression type
    else if (token.type === tokenTypes.SUBEXPRESSION) {
      const subTokens = lexer(token.lexeme.slice(1, -1), data);

      // Ensure the subexpression contains a valid expression
      if (!subTokens.length) {
        throw new Error(`Subexpression "${token.lexeme}" must contain a valid expression.`);
      }
    }
    // Handle unexpected tokens
    else {
      throw new Error(`Unexpected token: ${token.lexeme}`);
    }
  }
  //
  return tokenObjects;
};
//
export default lexer;
