"""
SQL WHERE clause validator and tokenizer.
Enforces specific rules for QGIS OBM Connect plugin.
"""

from dataclasses import dataclass
from typing import List, Dict, Optional, Tuple, Iterable, Set
from PyQt5 import QtWidgets, QtCore


def tr(message: str) -> str:
    """Translates the message using QCoreApplication."""
    return QtCore.QCoreApplication.translate('ObmConnect', message)


class TokenKind:
    """Enumeration of token kinds."""
    IDENT_DQ = 'IDENT_DQ'   # Double-quoted identifier
    STRING = 'STRING'       # Single-quoted string literal
    NUMBER = 'NUMBER'
    OP = 'OP'               # Operator
    LPAREN = 'LPAREN'
    RPAREN = 'RPAREN'
    COMMA = 'COMMA'
    KEYWORD = 'KEYWORD'     # Reserved keywords
    BAREWORD = 'BAREWORD'   # Non-keyword identifiers (unquoted)
    MINUS = 'MINUS'
    BOOLEAN = 'BOOLEAN'     # TRUE / FALSE


@dataclass
class Token:
    """Represents a lexical token."""
    kind: str
    value: str
    start: int
    end: int


@dataclass
class ValidationIssue:
    """Internal representation of a validation error found."""
    index: int
    length: int
    message: str
    kind: str


@dataclass
class ValidatorResult:
    """Public result object returned by validation."""
    ok: bool
    errors: List[str]
    details: Dict[str, Optional[int]]
    error_index: Optional[int] = None
    error_length: Optional[int] = None
    error_kind: Optional[str] = None


class SqlTokenizer:
    """
    Tokenizer for SQL WHERE clauses.
    Splits input string into a list of Tokens.
    """

    KEYWORDS: Set[str] = {
        "AND", "OR", "NOT", "IN", "IIN", "IS", "NULL", "LIKE", "ILIKE",
        "EMPTY", "TRUE", "FALSE"
    }

    def __init__(self, text: str):
        self.text = text
        self.n = len(text)
        self.i = 0
        self.tokens: List[Token] = []

    def tokenize(self) -> List[Token]:
        """Main tokenization loop."""
        while self.i < self.n:
            char = self.text[self.i]

            if char.isspace():
                self.i += 1
                continue

            if char == "'":
                self._read_string()
                continue

            if char == '"':
                self._read_identifier()
                continue

            if char.isdigit():
                self._read_number()
                continue

            if self._is_alpha(char):
                self._read_word()
                continue

            if self._read_operator():
                continue

            # Unknown character, treat as OP allowing parser to potentially complain later
            # or handle gracefully. Original code treated unknown as OP one-char.
            self._add_token(TokenKind.OP, char, self.i, self.i + 1)
            self.i += 1

        return self.tokens

    def _add_token(self, kind: str, value: str, start: int, end: int):
        self.tokens.append(Token(kind, value, start, end))

    def _is_alpha(self, char: str) -> bool:
        return char.isalpha() or char == "_"

    def _is_alnum(self, char: str) -> bool:
        return char.isalnum() or char == "_"

    def _read_string(self):
        """Reads a single-quoted string literal."""
        start = self.i
        self.i += 1  # Skip opening quote
        while self.i < self.n:
            if self.text[self.i] == "'":
                # Check for escaped quote (two single quotes)
                if self.i + 1 < self.n and self.text[self.i + 1] == "'":
                    self.i += 2
                    continue
                self.i += 1
                break
            self.i += 1
        
        value = self.text[start:self.i]
        self._add_token(TokenKind.STRING, value, start, self.i)

    def _read_identifier(self):
        """Reads a double-quoted identifier."""
        start = self.i
        self.i += 1  # Skip opening quote
        while self.i < self.n:
            if self.text[self.i] == '"':
                # Check for escaped quote
                if self.i + 1 < self.n and self.text[self.i + 1] == '"':
                    self.i += 2
                    continue
                self.i += 1
                break
            self.i += 1

        value = self.text[start:self.i]
        self._add_token(TokenKind.IDENT_DQ, value, start, self.i)

    def _read_number(self):
        """Reads a numeric literal (integer or float)."""
        start = self.i
        self.i += 1
        has_dot = False
        while self.i < self.n:
            char = self.text[self.i]
            if char.isdigit():
                self.i += 1
            elif char == '.' and not has_dot:
                has_dot = True
                self.i += 1
            else:
                break
        
        value = self.text[start:self.i]
        self._add_token(TokenKind.NUMBER, value, start, self.i)

    def _read_word(self):
        """Reads keywords, boolean literals, or barewords."""
        start = self.i
        self.i += 1
        while self.i < self.n and self._is_alnum(self.text[self.i]):
            self.i += 1
        
        word = self.text[start:self.i]
        upper_word = word.upper()

        # Check if it's a function call (next non-space is '(')
        is_function = False
        j = self.i
        while j < self.n and self.text[j].isspace():
            j += 1
        if j < self.n and self.text[j] == '(':
            is_function = True

        if upper_word in self.KEYWORDS:
            if upper_word in {'TRUE', 'FALSE'}:
                self._add_token(TokenKind.BOOLEAN, upper_word, start, self.i)
            else:
                self._add_token(TokenKind.KEYWORD, upper_word, start, self.i)
        elif is_function:
            self._add_token(TokenKind.BAREWORD, word, start, self.i)
        else:
            self._add_token(TokenKind.BAREWORD, word, start, self.i)

    def _read_operator(self) -> bool:
        """Attempts to read an operator. Returns True if successful."""
        # Multi-char operators
        if self.i + 1 < self.n:
            two_chars = self.text[self.i:self.i + 2]
            if two_chars in {"!=", "<>", "<=", ">=", "||"}:
                self._add_token(TokenKind.OP, two_chars, self.i, self.i + 2)
                self.i += 2
                return True
        
        char = self.text[self.i]
        
        # Single-char structural tokens
        if char == '(':
            self._add_token(TokenKind.LPAREN, char, self.i, self.i + 1)
            self.i += 1
            return True
        if char == ')':
            self._add_token(TokenKind.RPAREN, char, self.i, self.i + 1)
            self.i += 1
            return True
        if char == ',':
            self._add_token(TokenKind.COMMA, char, self.i, self.i + 1)
            self.i += 1
            return True
        if char == '-':
            self._add_token(TokenKind.MINUS, char, self.i, self.i + 1)
            self.i += 1
            return True
        
        # Single-char operators
        if char in "=<>+*/%":
            self._add_token(TokenKind.OP, char, self.i, self.i + 1)
            self.i += 1
            return True
            
        return False


class SqlWhereValidator(QtWidgets.QDialog):
    """
    Lightweight SQL WHERE expression tokenizer and validator.
    Inherits from QDialog to be compatible with potential Qt usages, 
    though primarily a logic class.
    
    Key rules enforced:
      - Field names must be double-quoted: "fieldName"
      - String/date literals must be single-quoted: 'text' or 'YYYY-MM-DD'
      - Recognizes multi-word operators (NOT IN, IS NOT NULL, etc.)
      - IN/IIN must be followed by parenthesized lists
      - Binary predicates should have one identifier and one literal
      """

    NONSTD_FOR_DEEP: Set[str] = {"ILIKE", "IIN", "EMPTY"}

    # Allowed single/multi-char operator symbols
    OP_SYMBOLS: Set[str] = {"=", "!=", "<", "<=", ">", ">=", "<>"}

    # Normalized, allowed multi-word operators
    MW_OPERATORS: Set[str] = {
        "NOT IN", "NOT IIN", "NOT LIKE", "NOT ILIKE",
        "IS NULL", "IS NOT NULL", "IS EMPTY", "IS NOT EMPTY"
    }

    def validate(self,
                 where_expr: str,
                 field_names: Optional[Iterable[str]] = None,
                 case_sensitive_fields: bool = False,
                 deep_sql_check: bool = True) -> ValidatorResult:
        """
        Full validation pipeline:
          0) Tokenize
          1) Structural checks (quotes, parens)
          2) Identifier checks
          3) Operator checks
          4) Predicate checks
          5) Literal checks
          6) Deep SQL check (optional)
        """
        
        # 0) Tokenize
        tokenizer = SqlTokenizer(where_expr)
        tokens = tokenizer.tokenize()
        
        if not tokens:
            return self._create_result(True, [], where_expr)

        # 1) Structural checks
        issue = self._check_quotes_and_parens(where_expr, tokens)
        if issue:
            return self._issue_to_result(issue, where_expr)

        errors: List[str] = []
        first_issue: Optional[ValidationIssue] = None

        # Helper to capture first issue
        def add_error(msg: str, idx: int, length: int, kind: str):
            nonlocal first_issue
            errors.append(msg)
            if not first_issue:
                first_issue = ValidationIssue(idx, length, msg, kind)

        # 2) RHS string literals
        self._check_rhs_string_literals(tokens, add_error)
        
        # 3) Identifiers
        self._check_identifiers(tokens, field_names, case_sensitive_fields, add_error)

        # 4) Operators
        self._check_operators(tokens, add_error)

        # 5) Binary predicates
        self._check_binary_predicates(tokens, add_error)
        
        # 6) Adjacent values and logic
        self._check_adjacent_values_and_logic(tokens, add_error)

        # 7) Date literals
        self._check_literals(tokens, add_error)

        if errors:
            return self._issue_to_result(first_issue, where_expr, errors)

        # 8) Deep SQL Check
        if deep_sql_check and not self._has_nonstandard_ops(tokens):
            ok, deep_errors = self._validate_deep(where_expr)
            if not ok:
                return ValidatorResult(
                    ok=False, 
                    errors=deep_errors, 
                    details={"length": len(where_expr), "error_index": None, "error_length": None}
                )

        return self._create_result(True, [], where_expr)

    # --- Structural Checks ---

    def _check_quotes_and_parens(self, where_expr: str, tokens: List[Token]) -> Optional[ValidationIssue]:
        """Precise checks for unterminated quotes and parenthesis balance."""
        
        # Check Strings
        for i, t in enumerate(tokens):
            if t.kind == TokenKind.STRING:
                val = t.value
                if not (len(val) >= 2 and val.startswith("'") and val.endswith("'")):
                    # Determine implied start/end based on previous token to guess what happened
                    prev = tokens[i - 1] if i > 0 else None
                    if prev and prev.end == t.start and prev.kind in {
                        TokenKind.NUMBER, TokenKind.BAREWORD, TokenKind.IDENT_DQ, TokenKind.RPAREN
                    }:
                        return ValidationIssue(
                            prev.start, t.end - prev.start,
                            tr("Unterminated string literal in WHERE (missing opening ')."),  # Note: logic preserved from original
                            "single_quote_opening"
                        )
                    return ValidationIssue(
                        t.start, 1,
                        tr("Unterminated string literal in WHERE (missing closing ')."),
                        "single_quote_closing"
                    )

        # Check Identifiers
        for i, t in enumerate(tokens):
            if t.kind == TokenKind.IDENT_DQ:
                val = t.value
                if not (len(val) >= 2 and val.startswith('"') and val.endswith('"')):
                    prev = tokens[i - 1] if i > 0 else None
                    if prev and prev.end == t.start and prev.kind == TokenKind.BAREWORD:
                        return ValidationIssue(
                            prev.start, t.end - prev.start,
                            tr('Unterminated double-quoted identifier in WHERE (missing opening ").'),
                            "double_quote_opening"
                        )
                    return ValidationIssue(
                        t.start, 1,
                        tr('Unterminated double-quoted identifier in WHERE (missing closing ").'),
                        "double_quote_closing"
                    )

        # Check Parentheses
        stack: List[Token] = []
        for t in tokens:
            if t.kind == TokenKind.LPAREN:
                stack.append(t)
            elif t.kind == TokenKind.RPAREN:
                if not stack:
                    return ValidationIssue(t.start, 1, tr("Unbalanced parentheses in WHERE (too many ))."), "paren_closing")
                stack.pop()
        
        if stack:
            first_unmatched = stack[0]
            return ValidationIssue(first_unmatched.start, 1, tr("Unbalanced parentheses in WHERE (missing ))."), "paren_opening")

        return None

    # --- Semantics Checks ---

    def _check_rhs_string_literals(self, tokens: List[Token], add_error):
        """Checks if unquoted barewords appear where strings are expected on RHS."""
        n = len(tokens)
        compare_ops = self.OP_SYMBOLS | SqlTokenizer.KEYWORDS

        for i, t in enumerate(tokens):
            if t.kind == TokenKind.OP and t.value in compare_ops:
                # Look ahead for RHS
                j = i + 1
                if j >= n: break
                
                rhs_words = []
                while j < n and tokens[j].kind == TokenKind.BAREWORD:
                    # stop if we hit a keyword or start of a function
                    if tokens[j].value.upper() in SqlTokenizer.KEYWORDS:
                        rhs_words = []
                        break
                    if j + 1 < n and tokens[j+1].kind == TokenKind.LPAREN:
                        rhs_words = []
                        break
                    rhs_words.append(tokens[j])
                    j += 1
                
                if rhs_words:
                    a, e = rhs_words[0], rhs_words[-1]
                    add_error(
                        tr("String literal must be enclosed in single quotes (e.g., 'Abax ater')."),
                        a.start, e.end - a.start, "unquoted_string"
                    )

    def _check_identifiers(self, tokens: List[Token], field_names: Optional[Iterable[str]], 
                           case_sensitive: bool, add_error):
        
        # Prepare field lookup
        field_set: Optional[Set[str]] = None
        if field_names:
            if case_sensitive:
                field_set = set(field_names)
                def norm(x): return x
            else:
                field_set = {f.lower() for f in field_names}
                def norm(x): return x.lower()
        else:
            def norm(x): return x
            
        n = len(tokens)
        for i, t in enumerate(tokens):
            # Check Valid Double Quoted Identifiers
            if t.kind == TokenKind.IDENT_DQ and field_set is not None:
                name = norm(self._normalize_ident(t))
                if name not in field_set:
                    add_error(
                        tr('Unknown field identifier "{}" (not in provided field list).').format(self._normalize_ident(t)),
                        t.start, t.end - t.start, "identifier_unknown"
                    )

            # Check Unquoted Barewords
            if t.kind == TokenKind.BAREWORD:
                is_function = (i + 1 < n and tokens[i+1].kind == TokenKind.LPAREN)
                if t.value.upper() in SqlTokenizer.KEYWORDS or is_function:
                    continue
                
                if field_set is None:
                    add_error(
                        tr('Unquoted identifier "{}" found; fields must be in double quotes.').format(t.value),
                        t.start, t.end - t.start, "unquoted_identifier"
                    )
                else:
                    if norm(t.value) in field_set:
                         add_error(
                            tr('Unquoted identifier "{}" found; fields must be in double quotes.').format(t.value),
                            t.start, t.end - t.start, "unquoted_identifier"
                        )
                    else:
                        add_error(
                            tr('Value "{}" must be enclosed in single quotes.').format(t.value),
                            t.start, t.end - t.start, "unquoted_value"
                        )

    def _check_operators(self, tokens: List[Token], add_error):
        n = len(tokens)
        
        # 1. Basic Operand Checks
        for i, t in enumerate(tokens):
            if t.kind == TokenKind.OP and t.value in self.OP_SYMBOLS:
                # Check Left Hand Side (LHS)
                missing_lhs = False
                if i == 0:
                    missing_lhs = True
                elif tokens[i-1].kind in {TokenKind.OP, TokenKind.LPAREN, TokenKind.COMMA, TokenKind.KEYWORD}:
                     # Exception: Unary minus allowed before operator? No, only after.
                     # But if previous is minus, it might be part of an expression.
                     if i > 0 and tokens[i-1].kind == TokenKind.MINUS:
                         pass
                     else:
                         missing_lhs = True
                
                if missing_lhs:
                    add_error(
                        tr('Missing left-hand operand before operator "{}".').format(t.value),
                        t.start, t.end - t.start, "missing_lhs"
                    )
                    
                # Check Right Hand Side (RHS)
                missing_rhs = False
                if i + 1 >= n:
                    missing_rhs = True
                elif tokens[i+1].kind in {TokenKind.OP, TokenKind.COMMA, TokenKind.RPAREN, TokenKind.KEYWORD}:
                    # Allow MINUS (unary)
                    if tokens[i+1].kind == TokenKind.MINUS:
                        pass
                    # Allow BOOLEAN literals
                    elif tokens[i+1].kind == TokenKind.BOOLEAN:
                        pass
                    else:
                        missing_rhs = True
                
                if missing_rhs:
                    pos = tokens[i+1].start if i + 1 < n else t.end
                    add_error(
                        tr('Missing right-hand operand after operator "{}".').format(t.value),
                        pos, 1, "missing_rhs"
                    )

        # 2. Multi-word Operators and validations
        i = 0
        while i < n:
            t = tokens[i]
            
            if t.kind == TokenKind.KEYWORD:
                val = t.value
                
                # IS NULL / IS NOT NULL / ...
                if val == "IS":
                    end_token = self._check_is_operator(tokens, i, add_error)
                    if end_token:
                        # Skip processed tokens does not require changing i drastically here 
                        # because we iterate one by one, but checking logic handles context
                        pass
                
                # NOT IN / NOT LIKE ...
                if val == "NOT":
                    self._check_not_operator(tokens, i, add_error)

                # IN / IIN
                if val in {"IN", "IIN"}:
                    self._check_in_operator(tokens, i, add_error)
            
            # Symbolic Operators unknown
            if t.kind == TokenKind.OP and t.value not in self.OP_SYMBOLS:
                add_error(
                    tr('Unknown or disallowed operator "{}".').format(t.value),
                    t.start, t.end - t.start, "invalid_operator"
                )
            
            i += 1

    def _check_is_operator(self, tokens: List[Token], idx: int, add_error) -> Optional[Token]:
        n = len(tokens)
        j = idx + 1
        # Skip whitespace equivalent logic (tokens list doesn't have whitespace)
        # But tokens might have COMMAS? No, IS , NULL is invalid.
        
        while j < n and tokens[j].kind == TokenKind.COMMA:
            j += 1
            
        if j < n and tokens[j].kind == TokenKind.KEYWORD:
            next_val = tokens[j].value
            
            if next_val == "NOT":
                k = j + 1
                if k < n and tokens[k].kind == TokenKind.KEYWORD:
                    if tokens[k].value in {"NULL", "EMPTY"}:
                        op = f"IS NOT {tokens[k].value}"
                        if op not in self.MW_OPERATORS:
                            add_error(tr('Operator "{}" is not allowed.').format(op), tokens[idx].start, tokens[k].end - tokens[idx].start, "invalid_operator")
                        return tokens[k]
                        
            elif next_val in {"NULL", "EMPTY"}:
                op = f"IS {next_val}"
                if op not in self.MW_OPERATORS:
                     add_error(tr('Operator "{}" is not allowed.').format(op), tokens[idx].start, tokens[j].end - tokens[idx].start, "invalid_operator")
                return tokens[j]
        return None

    def _check_not_operator(self, tokens: List[Token], idx: int, add_error):
        n = len(tokens)
        j = idx + 1
        if j < n and tokens[j].kind == TokenKind.KEYWORD and tokens[j].value in {"IN", "IIN", "LIKE", "ILIKE"}:
            op = f"NOT {tokens[j].value}"
            if op not in self.MW_OPERATORS:
                add_error(tr('Operator "{}" is not allowed.').format(op), tokens[idx].start, tokens[j].end - tokens[idx].start, "invalid_operator")

    def _check_in_operator(self, tokens: List[Token], idx: int, add_error):
        n = len(tokens)
        j = idx + 1
        while j < n and tokens[j].kind == TokenKind.COMMA:
            j += 1
        
        if j >= n or tokens[j].kind != TokenKind.LPAREN:
             add_error(
                tr('Operator "{}" must be followed by a parenthesized list: {} (...)').format(tokens[idx].value, tokens[idx].value),
                tokens[idx].start, tokens[idx].end - tokens[idx].start, "missing_in_list"
            )

    def _check_binary_predicates(self, tokens: List[Token], add_error):
        n = len(tokens)
        i = 0
        while i < n:
            t = tokens[i]
            
            # IS [NOT] ... trailing check
            if t.kind == TokenKind.KEYWORD and t.value == "IS":
                # Find end of IS expression to check what follows
                end_idx = i
                # Simple heuristic: scan forward max 2 keywords
                if i+2 < n and tokens[i+1].value == "NOT" and tokens[i+2].value in {"NULL", "EMPTY"}:
                    end_idx = i+2
                elif i+1 < n and tokens[i+1].value in {"NULL", "EMPTY"}:
                    end_idx = i+1
                
                next_idx = end_idx + 1
                if next_idx < n:
                    next_t = tokens[next_idx]
                    if next_t.kind in {TokenKind.STRING, TokenKind.NUMBER, TokenKind.IDENT_DQ}:
                        add_error(
                             tr("Operator 'IS {}' cannot have a trailing operand.").format(tokens[end_idx].value),
                             next_t.start, next_t.end - next_t.start, "trailing_operand"
                        )
            
            # Binary Ops
            is_bin_op = (t.kind == TokenKind.OP and t.value in self.OP_SYMBOLS)
            is_like_op = (t.kind == TokenKind.KEYWORD and t.value in {"LIKE", "ILIKE"})
            
            if is_bin_op or is_like_op:
                self._validate_sides(tokens, i, t, add_error)
                
            i += 1

    def _validate_sides(self, tokens: List[Token], op_idx: int, op_token: Token, add_error):
        n = len(tokens)
        
        # Identify Left Side
        left_idx = op_idx - 1
        if left_idx >= 0 and tokens[left_idx].kind == TokenKind.MINUS:
            left_idx -= 1
        
        # Identify Right Side
        right_idx = op_idx + 1
        if right_idx < n and tokens[right_idx].kind == TokenKind.MINUS:
            right_idx += 1
            
        if left_idx < 0 or right_idx >= n:
            return

        left_t = tokens[left_idx]
        right_t = tokens[right_idx]

        left_is_ident = (left_t.kind == TokenKind.IDENT_DQ)
        left_is_literal = (left_t.kind in {TokenKind.STRING, TokenKind.NUMBER, TokenKind.BOOLEAN})
        
        right_is_ident = (right_t.kind == TokenKind.IDENT_DQ)
        right_is_literal = (right_t.kind in {TokenKind.STRING, TokenKind.NUMBER, TokenKind.BOOLEAN})

        if left_is_ident and right_is_ident:
            add_error(
                tr("Binary predicate cannot have identifiers on both sides of operator '{}'.").format(op_token.value),
                left_t.start, right_t.end - left_t.start, "both_identifiers"
            )
        
        if left_is_literal and right_is_literal:
            add_error(
                 tr("Binary predicate cannot have literals on both sides of operator '{}'.").format(op_token.value),
                 left_t.start, right_t.end - left_t.start, "both_literals"
            )

    def _check_adjacent_values_and_logic(self, tokens: List[Token], add_error):
        n = len(tokens)
        val_kinds = {TokenKind.STRING, TokenKind.NUMBER, TokenKind.IDENT_DQ, TokenKind.BOOLEAN}
        
        for i in range(n - 1):
            a, b = tokens[i], tokens[i+1]
            if a.kind in val_kinds and b.kind in val_kinds:
                add_error(
                    tr("Missing operator between values (two literals/identifiers cannot be adjacent)."),
                    b.start, b.end - b.start, "adjacent_values"
                )
        
        for i, t in enumerate(tokens):
            if t.kind == TokenKind.KEYWORD and t.value in {"AND", "OR"}:
                if i == n - 1:
                    add_error(
                        tr("Trailing logical operator '{}' without following expression.").format(t.value),
                        t.start, t.end - t.start, "dangling_logic"
                    )
                else:
                    nxt = tokens[i+1]
                    valid_next = (
                        (nxt.kind == TokenKind.KEYWORD and nxt.value == "NOT") or 
                        nxt.kind in {TokenKind.LPAREN, TokenKind.IDENT_DQ, TokenKind.STRING, TokenKind.NUMBER}
                    )
                    if not valid_next:
                         add_error(
                            tr("Logical operator '{}' must be followed by a valid expression.").format(t.value),
                            nxt.start, nxt.end - nxt.start, "dangling_logic"
                        )

    def _check_literals(self, tokens: List[Token], add_error):
        n = len(tokens)
        for i in range(n - 4):
            # Check for unquoted date pattern: NUMBER MINUS NUMBER MINUS NUMBER
            chunk = tokens[i:i+5]
            if self._is_unquoted_date(chunk):
                a, e = chunk[0], chunk[-1]
                add_error(
                    tr("Date literal must be enclosed in single quotes (e.g., 'YYYY-MM-DD')."),
                    a.start, e.end - a.start, "unquoted_date"
                )

    def _is_unquoted_date(self, chunk: List[Token]) -> bool:
        if len(chunk) != 5: return False
        t1, t2, t3, t4, t5 = chunk
        return (t1.kind == TokenKind.NUMBER and t2.kind == TokenKind.MINUS and
                t3.kind == TokenKind.NUMBER and t4.kind == TokenKind.MINUS and
                t5.kind == TokenKind.NUMBER)

    # --- Helpers ---

    def _normalize_ident(self, token: Token) -> str:
        """Strip outer quotes and unescape."""
        raw = token.value
        if len(raw) >= 2 and raw.startswith('"') and raw.endswith('"'):
            return raw[1:-1].replace('""', '"')
        return raw

    def _has_nonstandard_ops(self, tokens: List[Token]) -> bool:
        n = len(tokens)
        for i, t in enumerate(tokens):
            if t.kind == TokenKind.KEYWORD and t.value in self.NONSTD_FOR_DEEP:
                return True
            if (t.kind == TokenKind.KEYWORD and t.value == "NOT" and 
                i + 1 < n and tokens[i+1].value in self.NONSTD_FOR_DEEP):
                return True
        return False

    def _create_result(self, ok: bool, errors: List[str], where_expr: str) -> ValidatorResult:
        return ValidatorResult(
            ok=ok,
            errors=errors,
            details={"length": len(where_expr), "error_index": None, "error_length": None}
        )

    def _issue_to_result(self, issue: ValidationIssue, where_expr: str, errors: List[str] = None) -> ValidatorResult:
        all_errors = errors if errors else [issue.message]
        return ValidatorResult(
            ok=False,
            errors=all_errors,
            details={"length": len(where_expr), "error_index": issue.index, "error_length": issue.length},
            error_index=issue.index,
            error_length=issue.length,
            error_kind=issue.kind
        )

    def _validate_deep(self, where_expr: str) -> Tuple[bool, List[str]]:
        try:
            import sqlvalidator
        except ImportError:
            return False, [tr("sqlvalidator is not available.")]

        sql = f"SELECT 1 FROM t WHERE {where_expr}"
        try:
            parsed = sqlvalidator.parse(sql)
            if not parsed.is_valid():
                return False, list(parsed.errors)
            return True, []
        except Exception as e:
            return False, [tr("Validation error: {}").format(e)]
