from PyQt5 import QtWidgets, uic, QtGui, QtCore
from qgis.core import (
    QgsProject,
    QgsVectorLayer,
    QgsFields,
    QgsField,
    QgsFeature,
    QgsGeometry,
    QgsWkbTypes
)
from PyQt5.QtCore import QVariant, Qt
import json
import os
import requests
from sqlparse.sql import Identifier, Comparison, Parenthesis, Where, IdentifierList
from sqlparse.tokens import Keyword, Comparison as CompType, Operator, Name, Whitespace, Punctuation
import sqlparse
import sys
import html
import tempfile
import random
import string
import re
from typing import Union


sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from config import build_project_api_url, build_projects_url, build_unique_values_url, DEFAULT_OBM_SERVER




class ObmConnectFilterDialog(QtWidgets.QDialog):
    def tr(self, msg: str) -> str:
        return QtCore.QCoreApplication.translate("ObmConnectFilter", msg)

    def __init__(self, parent=None, debug=False):
        super().__init__(parent)
        uic.loadUi(os.path.join(os.path.dirname(__file__), "obm_connect_filter.ui"), self)

        self.DEBUG = debug if 'debug' in locals() else False
        # self.iface = iface
        self.main_widget = parent
        # ensure iface is available when dialog is created from the dockwidget
        self.iface = None
        if parent is not None:
            # parent may be the dockwidget instance that has .iface
            if hasattr(parent, "iface"):
                self.iface = parent.iface
            # or main_widget wrapper
            elif hasattr(parent, "main_widget") and getattr(parent, "main_widget") is not None and hasattr(parent.main_widget, "iface"):
                self.iface = parent.main_widget.iface

        # Wire OK/Cancel
        self.buttonBox.accepted.connect(self.on_ok_clicked)
        self.buttonBox.rejected.connect(self.on_cancel_clicked)

        # Wire field list double-click to insert quoted field name
        self.fieldListView.doubleClicked.connect(self._on_field_double_clicked)
        self.fieldListView.clicked.connect(self._set_unique_count_on_all_button)
        self.valueListView.doubleClicked.connect(self._on_value_double_clicked)
        self._highlighted_row = None


        # Map operator buttons to tokens and connect
        self._wire_operator_buttons()

        # Wire Test button to test() method
        if hasattr(self, "pushButton_test"):
            self.pushButton_test.clicked.connect(self.test)

        # Initialize value list and wire sample/all buttons and search
        self.value_list_full = []
        self.sampleButton.clicked.connect(self.on_sample_button_clicked)
        self.allButton.clicked.connect(self.on_all_button_clicked)
        self.lineEdit_search.textChanged.connect(self.on_search_text_changed)


    # ---------- UI helpers ----------
    def _wire_operator_buttons(self):
        """Connect operator buttons to insert their token at the cursor with spaces."""
        btn_map = {
            "pushButton_equal": " = ",
            "pushButton_less": " < ",
            "pushButton_greater": " > ",
            "pushButton_lessequal": " <= ",
            "pushButton_greaterequal": " >= ",
            "pushButton_notequal": " != ",
            "pushButton_like": " LIKE ",
            "pushButton_ilike": " ILIKE ",
            "pushButton_wildcard": "%",
            "pushButton_in": " IN (",
            "pushButton_notin": " NOT IN (",
            "pushButton_and": " AND ",
            "pushButton_or": " OR ",
            "pushButton_not": " NOT ",
            "pushButton_iin": " IIN (",
            "pushButton_notiin": " NOT IIN (",
            "pushButton_isnull": " IS NULL ",
            "pushButton_isnotnull": " IS NOT NULL ",
            }
        for obj_name, token in btn_map.items():
            btn = getattr(self, obj_name, None)
            if btn is not None:
                btn.clicked.connect(lambda checked=False, t=token: self._insert_token(t))

    def _on_field_double_clicked(self, index: QtCore.QModelIndex):
        """Insert the selected field name wrapped in double quotes."""
        field_name = str(index.data())
        self._insert_text(f"\"{field_name}\"")

    def _on_value_double_clicked(self, index: QtCore.QModelIndex):
        """Insert value into the filter editor.
        - Numeric values (int/float) and booleans are inserted without quotes.
        - Other values are inserted as SQL strings with proper escaping.
        """
        try:
            val = index.data()

            # None -> NULL
            if val is None:
                self._insert_text("NULL")
                return

            # 1) Type-based handling
            # Bool: insert postgres compatible boolean literal (true/false)
            if isinstance(val, bool):
                self._insert_text("true" if val else "false")
                return

            # Numbers: insert without quotes
            if isinstance(val, (int, float)):
                self._insert_text(str(val))
                return

            # 2) Normalize string input
            text = str(val).strip()

            # Empty string -> '' (not NULL)
            if text == "":
                self._insert_text("''")
                return

            # 2/a) Strings that represent booleans
            lower = text.lower()
            truthy = {"true", "t", "1", "yes", "y"}
            falsy  = {"false", "f", "0", "no", "n"}
            if lower in truthy:
                self._insert_text("true")
                return
            if lower in falsy:
                self._insert_text("false")
                return

            # 2/b) Strings that parse as numbers
            is_numeric = False
            try:
                int(text)
                is_numeric = True
            except ValueError:
                try:
                    float(text)
                    is_numeric = True
                except ValueError:
                    is_numeric = False

            if is_numeric:
                self._insert_text(text)
            else:
                # 3) String: SQL-escape single quote by doubling it
                text_escaped = text.replace("'", "''")
                self._insert_text(f"'{text_escaped}'")

        except Exception:
            pass

    def _insert_token(self, token: str):
        """Insert operator token with a leading and trailing space at the cursor."""
        self._insert_text(f"{token}")

    def _insert_text(self, text: str):
        """Insert arbitrary text at the current cursor position in the filter editor."""
        editor: QtWidgets.QPlainTextEdit = self.filterEditor
        cursor: QtGui.QTextCursor = editor.textCursor()
        cursor.insertText(text)  # inserts at cursor, replacing any selection
        editor.setTextCursor(cursor)
        editor.setFocus()

    def highlight_row(self, row_index, color='#7CBEE3'):
        """Highlight the given row index and restore the previous row's style."""
        model = self.fieldListView.model()

        # Reset previously highlighted row to default background
        if self._highlighted_row is not None:
            previous = model.item(self._highlighted_row)
            if previous is not None:
                previous.setBackground(QtGui.QBrush())

        # Set background color for the newly selected row
        current = model.item(row_index)
        if current is not None:
            current.setBackground(QtGui.QColor(color))
        self._highlighted_row = row_index
        self.valueListView.repaint()

    def _set_unique_count_on_all_button(self, cur=None, prev=None):
        """
        Find the selected field in parent._last_fields_json and update the "All" button
        to include the unique-value count (if available).
        """
        model = getattr(self, "fieldListView").model()
        if model is None or model.rowCount() == 0:
            raise RuntimeError(self.tr("No field list available"))
        sel = self.fieldListView.selectionModel().currentIndex()

        if sel is None or not sel.isValid():
            sel = model.index(0, 0)
        field_name = str(model.itemFromIndex(sel).text()).strip()
        if not field_name:
            raise RuntimeError(self.tr("No field selected"))
        
        parent = self.main_widget or self.parent()
        unique_values = None
        last_fields_json = getattr(parent, "_last_fields_json", None)
        for field in last_fields_json.get("fields", []):
            if field.get("name") == field_name:
                unique_values = field.get("unique_values")
                break
        # Fallback value if unknown
        if unique_values is None:
            unique_values = "?"
        # Update button label
        self.allButton.setText(self.tr(f"All ({unique_values})"))

    def on_sample_button_clicked(self):
        """Download a sample of unique values (limit 25) from the API and populate the list."""
        parent = self.main_widget or self.parent()
        try:
            obm_server = (parent.obmServerLineEdit.text().strip() if parent and hasattr(parent, "obmServerLineEdit") else DEFAULT_OBM_SERVER) or DEFAULT_OBM_SERVER

            # field from fieldListView selection
            model = getattr(self, "fieldListView").model()
            if model is None or model.rowCount() == 0:
                raise RuntimeError(self.tr("No field list available"))
            sel = self.fieldListView.selectionModel().currentIndex()
            if sel is None or not sel.isValid():
                # fallback to first row
                sel = model.index(0, 0)
            field_name = str(model.itemFromIndex(sel).text()).strip()
            if not field_name:
                raise RuntimeError(self.tr("No field selected"))
            self.highlight_row(sel.row())

            project_name, schema, table_name, fields = self._get_last_fields_info(parent)

            url = build_unique_values_url(obm_server, project_name, schema, table_name, field_name)

            headers = {}
            if parent and getattr(parent, "_access_token", None):
                headers["Authorization"] = f"{parent._access_token}"
            params = "?limit=25"
            resp = requests.get(url + params, headers=headers, timeout=15)
            resp.raise_for_status()
            self.value_list_full = resp.json()

            self.apply_filter_to_valuelist()
        except Exception as e:
            if self.DEBUG:
                raise
            QtWidgets.QMessageBox.critical(self, self.tr("API error"), str(e))

    def on_all_button_clicked(self):
        """Download all unique values (large limit) from the API and populate the list."""
        parent = self.main_widget or self.parent()
        try:
            # build using same logic as sample
            obm_server = (parent.obmServerLineEdit.text().strip() if parent and hasattr(parent, "obmServerLineEdit") else DEFAULT_OBM_SERVER) or DEFAULT_OBM_SERVER

            model = getattr(self, "fieldListView").model()
            if model is None or model.rowCount() == 0:
                raise RuntimeError(self.tr("No field list available"))
            sel = self.fieldListView.selectionModel().currentIndex()
            if sel is None or not sel.isValid():
                sel = model.index(0, 0)
            field_name = str(model.itemFromIndex(sel).text()).strip()
            if not field_name:
                raise RuntimeError(self.tr("No field selected"))
            self.highlight_row(sel.row())

            project_name, schema, table_name, fields = self._get_last_fields_info(parent)

            url = build_unique_values_url(obm_server, project_name, schema, table_name, field_name)

            params = "?limit=1000000"
            # ensure Authorization header if available on parent
            headers = {}
            if parent and getattr(parent, "_access_token", None):
                headers["Authorization"] = f"{parent._access_token}"

            resp = requests.get(url + params, headers=headers, timeout=60)
            resp.raise_for_status()
            self.value_list_full = resp.json()
            self.apply_filter_to_valuelist()
        except Exception as e:
            if self.DEBUG:
                raise
            QtWidgets.QMessageBox.critical(self, self.tr("API error"), str(e))

    def on_search_text_changed(self):
        """Filter the currently loaded value list (client-side, no API call)."""
        self.apply_filter_to_valuelist()

    def apply_filter_to_valuelist(self):
        """Filter the full downloaded value list by the search box (case-insensitive substring)."""
        search = self.lineEdit_search.text().strip().lower()
        if search:
            filtered = [
                v for v in self.value_list_full
                if v is not None and search in str(v).lower()
            ]
        else:
            filtered = [v for v in self.value_list_full if v is not None]
        model = QtGui.QStandardItemModel()
        for v in filtered:
            item = QtGui.QStandardItem(str(v))
            model.appendRow(item)
        self.valueListView.setModel(model)

    def _get_last_fields_info(self, parent):
        """
        Return (project, schema, table_name, fields) using parent._last_fields_json.
        Safe defaults provided when information is missing.
        """
        last = getattr(parent, "_last_fields_json", None)
        project = None
        schema = "public"
        table_name = None
        fields = []

        if isinstance(last, dict):
            fields_json = last.get("fields", []) or []
            project = last.get("project", None)
            schema = last.get("schema", "public")
            table_name = last.get("name", None)
            fields = [item["name"] for item in fields_json if isinstance(item, dict) and "name" in item]
        return project, schema, table_name, fields

    def _build_graphql_spatial_payload(self, schema_name, table_name, geom_field, filters_dict, limit, offset=0, include_srid=True):
        fields_string = (
            "{"
            " total_count"
            " feature_collection {"
            " type"
            " features {"
            " type"
            " geometry { type coordinates srid }"
            " properties"
            " }"
            " }"
            " }"
        )
        query = (
            "query getFilteredObmData($filters: ObmDataFilterInput, $limit: Int, $offset: Int) { "
            f"spatialObmDataList(primaryGeometry: \"{geom_field}\" filters: $filters, limit: $limit, offset: $offset) "
            f"{fields_string} "
            "}"
        )
        return {
            "schema": schema_name or "public",
            "table_name": table_name,
            "query": query,
            "variables": {
                "filters": filters_dict,
                "limit": max(0, int(limit) if isinstance(limit, int) else 1),
                "offset": int(offset) if isinstance(offset, int) else 0
            }
        }

    def test(self):
        """Run a lightweight test using current filter expression."""
        expr = self.filterEditor.toPlainText()
        converter = SqlWhereGraphQLConverter()
        filters_dict = converter.sql_where_to_graphql(expr)

        # parent = self._resolve_parent()
        parent = self.main_widget or self.parent()

        if not parent or not hasattr(parent, "ensure_valid_token"):
            QtWidgets.QMessageBox.warning(self, self.tr("Error"), self.tr("Main window or token validation function not found."))
            return

        if not parent.ensure_valid_token():
            QtWidgets.QMessageBox.warning(self, self.tr("Error"), self.tr("No valid token."))
            return

        # get fields/schema/name from helper
        project, schema_name, table_name, fields = self._get_last_fields_info(parent)
        if not fields:
            QtWidgets.QMessageBox.warning(self, self.tr("Error"), self.tr("Table fields are unknown."))
            return

        geom_field = None
        try:
            tv = getattr(parent, "tablesTableView", None)
            if tv is None or tv.model() is None:
                raise RuntimeError(self.tr("Tables view not available"))

            # Current selected row in the tablesTableView
            sel = tv.selectionModel().currentIndex()
            row = sel.row() if sel.isValid() else -1

            if row >= 0:
                item_geom = tv.model().item(row, 2)  # third column -> geometry field
                if item_geom is not None:
                    geom_field = item_geom.text().strip()
        except Exception:
            geom_field = None

        graphql = self._build_graphql_spatial_payload(
            schema_name=schema_name,
            table_name=table_name,
            geom_field=geom_field,
            filters_dict=filters_dict,
            limit=0,
            offset=0
        )

        message = ""
        # Validate with SqlWhereValidator (returns ValidatorResult)
        validator = SqlWhereValidator()
        result = validator.validate(
            expr,
            field_names=fields,
            case_sensitive_fields=False,
            deep_sql_check=True
        )

        box = QtWidgets.QMessageBox(self)
        box.setWindowTitle(self.tr("Test"))
        box.setTextFormat(Qt.RichText)

        resp_json = None
        css_mono = "font-family: 'DejaVu Sans Mono','Consolas','Courier New',monospace; white-space: pre; margin:0"

        if result.ok:
            project_name, schema_name, table_name, fields = self._get_last_fields_info(parent)
            resp_json = self.call_graphql_request(parent=parent, project_name=project_name, graphql=graphql)
            if resp_json is None:
                return
            
            data = resp_json.get("data", {})
            slist = data.get("spatialObmDataList", {}) if isinstance(data, dict) else {}
            total_count = None
            if isinstance(slist, dict):
                total_count = slist.get("total_count")
            if total_count is None:
                total_count = 0
            # message = self.tr("The WHERE expression is valid SQL.")
            message += "<br>" + self.tr("Number of rows in the current filter: {count}").format(count=total_count)            
        else:
            err_lines = ["- " + str(e) for e in result.errors]
            error_slice_html = ""
            if result.error_index is not None:
                start = max(0, result.error_index - 20)
                end = min(len(expr), (result.error_index + (result.error_length or 1) + 20))
                snippet = expr[start:end]
                caret = " " * int(result.error_index - start) + "^" * max(1, int(result.error_length or 1))
                error_slice_html = f"<pre style=\"{css_mono}\">{html.escape(snippet)}\n{html.escape(caret)}</pre>"

            message += "<br>" + self.tr("The WHERE expression is NOT valid SQL.") + "<br>"
            if error_slice_html:
                message += ("<br>" + self.tr("Errors near:") + "<br>" +
                    error_slice_html + "<br>"
                    )
            if err_lines:
                safe_errors = [html.escape(e) for e in err_lines] 
                message += "<br>".join(safe_errors)
        box.setText(message)
        box.exec_()

        if self.DEBUG:
            debug_message = self.tr("Would run test with:\nFilter: {expr}").format(expr=expr)
            debug_message += self.tr("\nGraphQL equivalent:\n{graphql}").format(
                graphql=json.dumps(graphql, indent=2, ensure_ascii=False)
            )

            debug_message += self.tr("\n\nResponse:\n{graphql}").format(
                graphql=json.dumps(resp_json, indent=2, ensure_ascii=False)
            )

            debug_message += self.tr("\n\nSQL Validator WHERE expression analysis:\n")
            # from  validator list to string
            debug_message += self.tr("- Number of tokens: {count}").format(count=len(validator.tokenize(expr)))
            debug_message += self.tr("\n- tokens: {tokens}").format(tokens=validator.tokenize(expr))

            debug_message += self.tr("\nOriginal error index: {error_index}").format(error_index=result.error_index)
            debug_message += self.tr("\nOriginal error length: {error_length}").format(error_length=result.error_length)

            QtWidgets.QMessageBox.information(
                self,
                self.tr("DEBUG"),
                debug_message
            )

    def call_graphql_request(self, parent, project_name: str, graphql: dict, timeout: int = 30) -> Union[dict, None]:
        """
        Generic GraphQL POST call for an OBM project.

        Args:
            parent: main window or component holding obmServerLineEdit and optional _access_token
            project_name: OBM project name
            graphql: GraphQL payload (dict)
            timeout: HTTP timeout in seconds

        Returns:
            dict: full JSON response or None on error. Errors or missing expected keys result in None.
        """
        obm_server = parent.obmServerLineEdit.text().strip() or DEFAULT_OBM_SERVER
        url = build_project_api_url(obm_server, project_name)
        if not url.endswith("/"):
            url += "/"
        url += "get-data"

        headers = {}
        if getattr(parent, "_access_token", None):
            headers["Authorization"] = f"{parent._access_token}"

        resp = requests.post(url, json=graphql, headers=headers, timeout=timeout)
        resp.raise_for_status()
        resp_json = resp.json() if resp.content else {}

        # GraphQL response may contain 'errors' according to spec
        errors = resp_json.get("errors")
        if isinstance(errors, list) and errors:
            msgs = []
            for e in errors:
                if isinstance(e, dict):
                    msg = e.get("message") or "GraphQL error"
                    locs = e.get("locations")
                    if isinstance(locs, list) and locs:
                        loc_strs = []
                        for loc in locs:
                            line = loc.get("line")
                            col = loc.get("column")
                            if line is not None and col is not None:
                                loc_strs.append(f"(line {line}, col {col})")
                        if loc_strs:
                            msg += " " + " ".join(loc_strs)
                else:
                    msg = str(e) if e is not None else "GraphQL error"
                msgs.append(msg)

            # Show a detailed API error message to the user
            if self.DEBUG:
                # Append request/response debug info
                strgraphql = self.tr("\n\nGraphQL request:\n{graphql}").format(
                    graphql=json.dumps(graphql, indent=2, ensure_ascii=False)
                )
                msgs.append(strgraphql)
                strresponse = self.tr("\n\nFull response:\n{response}").format(
                    response=json.dumps(resp_json, indent=2, ensure_ascii=False)
                )
                msgs.append(strresponse)
            QtWidgets.QMessageBox.critical(self, self.tr("API error"), "\n".join(msgs))
            return None

        # Ensure we have the expected data structure
        data = resp_json.get("data", {})
        if not isinstance(data, dict) or "spatialObmDataList" not in data:
            QtWidgets.QMessageBox.warning(
                self,
                self.tr("No data"),
                self.tr("The response did not include 'data.spatialObmDataList'.")
            )
            return None

        return resp_json

    def on_ok_clicked(self):
        parent = self.main_widget or self.parent()

        if not parent or not hasattr(parent, "ensure_valid_token"):
            QtWidgets.QMessageBox.warning(self, self.tr("Error"), self.tr("Main window or token validation function not found."))
            return

        if not parent.ensure_valid_token():
            QtWidgets.QMessageBox.warning(self, self.tr("Error"), self.tr("No valid token."))
            return

        limit_txt = self.lineEdit_recNum.text().strip() if hasattr(self, "lineEdit_recNum") else "100"
        try:
            limit = max(1, int(limit_txt))
        except Exception:
            limit = 100

        expr = self.filterEditor.toPlainText().strip()
        project_name, schema_name, table_name, fields = self._get_last_fields_info(parent)

        converter = SqlWhereGraphQLConverter()
        filters_dict = converter.sql_where_to_graphql(expr)

        if self.DEBUG:
            last_fields_json = getattr(parent, "_last_fields_json", None)
            if not last_fields_json:
                QtWidgets.QMessageBox.warning(self, self.tr("Error"), self.tr("Table fields are unknown."))
                return
            QtWidgets.QMessageBox.information(
                self,
                self.tr("last_fields_json"),
                self.tr("last_fields_json:\n{last_fields_json}").format(last_fields_json=json.dumps(last_fields_json, ensure_ascii=False))
            )

        # Geometry mező UI-ból
        geom_field = None
        try:
            tv = getattr(parent, "tablesTableView", None)
            if tv is None or tv.model() is None:
                raise RuntimeError(self.tr("Tables view not available"))

            sel = tv.selectionModel().currentIndex()
            row = sel.row() if sel.isValid() else -1
            if row >= 0:
                item_geom = tv.model().item(row, 2)
                if item_geom is not None:
                    geom_field = item_geom.text().strip()
        except Exception:
            geom_field = None

        graphql = self._build_graphql_spatial_payload(
            schema_name=schema_name,
            table_name=table_name,
            geom_field=geom_field,
            filters_dict=filters_dict,
            limit=limit,
            offset=0
        )

        if self.DEBUG:
            QtWidgets.QMessageBox.information(
                self,
                self.tr("GraphQL payload"),
                self.tr("GraphQL payload:\n{graphql}").format(graphql=json.dumps(graphql, indent=2, ensure_ascii=False))
            )

        try:
            resp_json = self.call_graphql_request(parent=parent, project_name=project_name, graphql=graphql)
            if resp_json is None:
                return
            
            # DEBUG mentés
            if self.DEBUG:
                temp_dir = tempfile.gettempdir()
                with open(os.path.join(temp_dir, "response.geojson"), 'w', encoding='utf-8') as f:
                    json.dump(resp_json, f, ensure_ascii=False, indent=2)
                with open(os.path.join(temp_dir, "request.geojson"), 'w', encoding='utf-8') as f:
                    json.dump(graphql, f, ensure_ascii=False)

            data = resp_json.get("data", {})
            # if not isinstance(data, dict) or "spatialObmDataList" not in data:
            #     QtWidgets.QMessageBox.warning(
            #         self,
            #         self.tr("No data"),
            #         self.tr("The response did not include 'data.spatialObmDataList'.")
            #     )
            #     return

            geojson_result = data.get("spatialObmDataList") or {}

            if isinstance(geojson_result, dict) and geom_field:
                if "feature_collection" in geojson_result:
                    geojson_result[table_name + "-" + geom_field] = geojson_result.pop("feature_collection")

            if not geojson_result:
                QtWidgets.QMessageBox.information(
                    self,
                    self.tr("Empty result"),
                    self.tr("No features returned for the given filters.")
                )
                return

            if self.DEBUG:
                temp_dir = tempfile.gettempdir()
                with open(os.path.join(temp_dir, "geojson_result.geojson"), 'w', encoding='utf-8') as f:
                    json.dump(geojson_result, f, ensure_ascii=False, indent=2)

            self.createLayerFromGeojson(geojson_result, table_name + "-" + geom_field if geom_field else table_name)

        except Exception as e:
            if self.DEBUG:
                raise
            QtWidgets.QMessageBox.critical(
                self,
                self.tr("Error"),
                self.tr("Failed to send GraphQL request: {err}").format(err=str(e))
            )
            return

        self.accept()

    def on_cancel_clicked(self):
        self.close()

    def createLayerFromGeojson(self, json_obj, layer_name):
        # Generate random folder name OBMCon-XXXXXX
        random_chars = ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(6))
        custom_dir = f"OBMCon-{random_chars}"
        
        # Create unique temp folder
        temp_base = tempfile.gettempdir()
        temp_dir = os.path.join(temp_base, custom_dir)
        os.makedirs(temp_dir, exist_ok=True)
        
        temp_geojson_path = os.path.join(temp_dir, f"{layer_name}.geojson")
        if self.DEBUG:
            self.iface.messageBar().pushMessage(
                self.tr("DEBUG"), 
                self.tr("Creating temp geojson at: {path}").format(path=temp_geojson_path), 
                duration=5
            )

        with open(temp_geojson_path, 'w', encoding='utf-8') as f:
            json.dump(json_obj, f, ensure_ascii=False)
        if self.DEBUG:
            self.iface.messageBar().pushMessage(
                self.tr("DEBUG"), 
                self.tr("Temp geojson written."), 
                duration=5
            )
        
        try:
            vl = None
            try:
                if hasattr(self, "iface") and self.iface:
                    vl = self.iface.addVectorLayer(temp_geojson_path, layer_name, "ogr")
                else:
                    raise RuntimeError(self.tr("No iface available"))
            except Exception:
                vl = None

            # User canceled or failed to add: return silently
            if vl is None:
                return None

            # Loading error: clean up and raise
            if not vl.isValid():
                try:
                    if self.DEBUG:
                        self.iface.messageBar().pushMessage(
                            self.tr("DEBUG"), 
                            self.tr("Layer is not valid, removing temp geojson."), 
                            duration=5
                        )
                    os.unlink(temp_geojson_path)
                except Exception:
                    pass
                raise RuntimeError(self.tr("Layer failed to load"))

            # On success the layer is already in the project
            return vl

        except:
            try:
                os.unlink(temp_geojson_path)
            except Exception:
                pass



#-- PDS API to GeoJSON ------------------------------------------
    from qgis.core import QgsGeometry, QgsJsonExporter, QgsFeature

    def obm_json_to_geojson_qgis(self, obm_json_data):
        """
        Convert OBM JSON format into a GeoJSON FeatureCollection using QGIS core classes.

        Args:
            obm_json_data (dict or str): OBM API JSON response

        Returns:
            dict: GeoJSON FeatureCollection
        """
        # If a JSON string was provided, parse it
        if isinstance(obm_json_data, str):
            obm_json_data = json.loads(obm_json_data)
        
        # Basic structure validation
        if not isinstance(obm_json_data, dict) or 'data' not in obm_json_data:
            raise ValueError("Invalid OBM JSON structure. Expected dict with 'data' key.")
        
        records = obm_json_data['data']
        if not isinstance(records, list):
            raise ValueError("Expected 'data' to be a list of records.")
        
        # Build GeoJSON FeatureCollection
        geojson = {
            "type": "FeatureCollection",
            "features": []
        }
        
        for record in records:
            if not isinstance(record, dict):
                continue
                
            # Geometry processing via QgsGeometry (WKT expected in 'obm_geometry')
            wkt_geometry = record.get('obm_geometry')
            if not wkt_geometry:
                continue  # Skip records without geometry
            
            try:
                # Convert WKT -> QgsGeometry -> GeoJSON geometry
                qgs_geometry = QgsGeometry.fromWkt(wkt_geometry)
                if qgs_geometry.isEmpty():
                    print(f"Warning: Empty geometry for WKT: {wkt_geometry}")
                    continue
                    
                # Use QgsGeometry.asJson for GeoJSON geometry text, then parse
                geojson_geometry_str = qgs_geometry.asJson(precision=8)
                geojson_geometry = json.loads(geojson_geometry_str)
                
            except Exception as e:
                print(f"Warning: Failed to parse geometry '{wkt_geometry}': {e}")
                continue
            
            # Build properties excluding the geometry field
            properties = {}
            for key, value in record.items():
                if key != 'obm_geometry':
                    properties[key] = value
            
            # Create feature entry
            feature = {
                "type": "Feature",
                "geometry": geojson_geometry,
                "properties": properties
            }
            
            geojson["features"].append(feature)
        
        return geojson
#--------------------------------------------------------------

    def graphql_to_geojson(self, graphql_response):
        """
        Convert a GraphQL response to GeoJSON FeatureCollection.
        Records without geometry are kept with null geometry.

        Args:
            graphql_response: dict or JSON string returned by the GraphQL API

        Returns:
            dict: GeoJSON FeatureCollection
        """
        # If a JSON string was provided, parse it
        if isinstance(graphql_response, str):
            graphql_response = json.loads(graphql_response)
        
        geojson = {
            "metadata": "converted",
            "type": "FeatureCollection",
            "features": []
        }
        
        try:
            data_list = graphql_response.get("data", {}).get("ObmDataList", [])
        except (AttributeError, KeyError):
            return geojson
        
        # Use stored geometry field (selected earlier) if available
        geom_field = getattr(self, "selected_geom_field", None)

        for item in data_list:
            if not isinstance(item, dict):
                continue

            # Extract geometry based on the selected geometry field
            geometry = None
            if geom_field:
                val = item.get(geom_field)
                if val and (isinstance(val, dict) or isinstance(val, str)):
                    geometry = val

            # Build properties excluding the chosen geometry field
            properties = {}
            for key, value in item.items():
                if key != geom_field:
                    properties[key] = value
         
            feature = {
                "type": "Feature",
                "geometry": geometry,  # may be None
                "properties": properties
             }
             
            geojson["features"].append(feature)
         
        return geojson


# obm_connect_filter.py
import sqlparse
from sqlparse.sql import Where, Identifier, Parenthesis, Comparison
from sqlparse.tokens import Keyword, Whitespace, Punctuation, Operator as T_Operator, Name as T_Name
from sqlparse.lexer import Lexer

class SqlWhereGraphQLConverter:
    def __init__(self):
        pass

    def register_custom_keywords(self):
        lex = Lexer.get_default_instance()
        # Add custom keyword token mapping for specialized operators
        lex.add_keywords({'IIN': Keyword})

    def sql_where_to_graphql(self, where_string):
        self.register_custom_keywords()
        # Multiple whitespace -> a single space
        # where_string = ' '.join(where_string.split())
        where_string = re.sub(r'\s+(?=(?:[^"\']*["\'][^"\']*["\'])*[^"\']*$)', ' ', where_string) 

        parsed = sqlparse.parse(f"SELECT * FROM t WHERE {where_string}")[0]
        where_clause = next((token for token in parsed.tokens if isinstance(token, Where)), None)
        if where_clause is None:
            return {}
        return self.parse_where_tokens(where_clause.tokens[1:])

    # --- helpers for field tokens ---
    def _is_field_token(self, token):
        return isinstance(token, Identifier) or getattr(token, "ttype", None) == T_Name

    def _field_name(self, token):
        # Identifier may have quotes or aliases, prefer real name if available
        if isinstance(token, Identifier):
            name = getattr(token, "get_real_name", lambda: None)() or token.value
            return name.strip('"')
        # Name token: plain identifier
        return (getattr(token, "value", "") or "").strip('"')

    # --- new: literal detection & operator flip ---
    def _is_literal_token(self, token):
        # Prefer token type checks from sqlparse
        ttype = getattr(token, 'ttype', None)
        if ttype is not None:
            tstr = str(ttype)
            if tstr.startswith('Token.Literal') or tstr.startswith('Token.String') or tstr.startswith('Token.Number'):
                return True
        # Fallback by value shape
        val = getattr(token, 'value', '')
        if isinstance(val, str):
            if (len(val) >= 2 and val[0] == val[-1] == "'"):
                return True
            v = val.strip()
            if v.lstrip('-').isdigit():
                return True
            try:
                float(v)
                return True
            except Exception:
                pass
        return False

    def _flip_operator(self, op):
        """Flip non-commutative comparison operators when sides are swapped."""
        if not isinstance(op, str):
            return op
        m = {
            '>': '<',
            '<': '>',
            '>=': '<=',
            '<=': '>='
        }
        op_up = op.strip().upper()
        return m.get(op_up, op_up)

    def parse_where_tokens(self, tokens):
        tokens = [t for t in tokens if t.ttype not in [Whitespace, Punctuation]]
        stack = []
        i = 0
        while i < len(tokens):
            token = tokens[i]

            if isinstance(token, Parenthesis):
                is_in_list = False
                if i > 0:
                    if hasattr(tokens[i-1], "match") and tokens[i-1].match(Keyword, ('IN', 'IIN')):
                        is_in_list = True
                    elif i > 1 and hasattr(tokens[i-2], "match") and tokens[i-2].match(Keyword, 'NOT') and hasattr(tokens[i-1], "match") and tokens[i-1].match(Keyword, ('IN', 'IIN')):
                        is_in_list = True
                if not is_in_list:
                    group = self.parse_where_tokens(token.tokens[1:-1])
                    if group:
                        stack.append(group)
                    i += 1
                    continue

            if hasattr(token, "match") and token.match(Keyword, 'AND'):
                left = stack.pop() if stack else {}
                right = self.parse_where_tokens(tokens[i+1:])
                joins = []
                if left: joins.append(left)
                if right: joins.append(right)
                if len(joins) == 1:
                    return joins[0]
                return {'AND': joins} if joins else {}

            elif hasattr(token, "match") and token.match(Keyword, 'OR'):
                left = stack.pop() if stack else {}
                right = self.parse_where_tokens(tokens[i+1:])
                joins = []
                if left: joins.append(left)
                if right: joins.append(right)
                if len(joins) == 1:
                    return joins[0]
                return {'OR': joins} if joins else {}

            elif hasattr(token, "match") and token.match(Keyword, 'NOT'):
                # handle NOT IN / NOT IS NULL / NOT IS EMPTY
                if (i+2 < len(tokens) and self._is_field_token(tokens[i+1])
                    and hasattr(tokens[i+2], "match") and tokens[i+2].match(Keyword, ('IN', 'IIN'))
                    and i+3 < len(tokens) and isinstance(tokens[i+3], Parenthesis)):
                    field = self._field_name(tokens[i+1])
                    values = self.extract_values_from_parenthesis(tokens[i+3])
                    filt = self.build_graphql_filter(field, 'NOT ' + tokens[i+2].value.upper(), values)  # 'NOT IN' or 'NOT IIN'
                    if filt: stack.append(filt)
                    i += 4
                    continue
                elif (i+2 < len(tokens) and self._is_field_token(tokens[i+1])
                      and hasattr(tokens[i+2], "match") and tokens[i+2].match(Keyword, 'IS')
                      and i+3 < len(tokens)):
                    field = self._field_name(tokens[i+1])
                    if hasattr(tokens[i+3], "match") and tokens[i+3].match(Keyword, 'NULL'):
                        filt = self.build_graphql_filter(field, 'IS NOT NULL', None)
                        if filt: stack.append(filt)
                        i += 4
                        continue
                    if hasattr(tokens[i+3], "match") and tokens[i+3].match(Keyword, 'EMPTY'):
                        filt = self.build_graphql_filter(field, 'IS NOT EMPTY', None)
                        if filt: stack.append(filt)
                        i += 4
                        continue
                right = self.parse_where_tokens(tokens[i+1:])
                return {'NOT': right} if right else {}

            elif isinstance(token, Comparison):
                field, op, value = self.parse_comparison(token)
                filt = self.build_graphql_filter(field, op, value)
                if filt: stack.append(filt)

            # field-first fallbacks
            elif self._is_field_token(token):
                # IS [NOT] NULL/EMPTY and one-token NOT NULL / NOT EMPTY variants
                if i+1 < len(tokens) and hasattr(tokens[i+1], "match") and tokens[i+1].match(Keyword, 'IS'):
                    field = self._field_name(token)
                    if i+2 < len(tokens):
                        third = tokens[i+2]
                        third_val = getattr(third, 'value', '').upper()

                        # One-token variant: NOT NULL / NOT EMPTY
                        if third_val in ('NOT NULL', 'NOT EMPTY'):
                            mapped = 'IS NOT NULL' if third_val == 'NOT NULL' else 'IS NOT EMPTY'
                            filt = self.build_graphql_filter(field, mapped, None)
                            if filt: stack.append(filt)
                            i += 3
                            continue

                        # Two-token variant: IS NOT NULL/EMPTY
                        if third_val == 'NOT' and i+3 < len(tokens):
                            fourth_val = getattr(tokens[i+3], 'value', '').upper()
                            if fourth_val in ('NULL', 'EMPTY'):
                                mapped = 'IS NOT NULL' if fourth_val == 'NULL' else 'IS NOT EMPTY'
                                filt = self.build_graphql_filter(field, mapped, None)
                                if filt: stack.append(filt)
                                i += 4
                                continue

                        # Simple: IS NULL / IS EMPTY
                        if third_val in ('NULL', 'EMPTY'):
                            mapped = 'IS NULL' if third_val == 'NULL' else 'IS EMPTY'
                            filt = self.build_graphql_filter(field, mapped, None)
                            if filt: stack.append(filt)
                            i += 3
                            continue

                # field NOT IN (...), field IN (...)
                if (i+3 < len(tokens) and hasattr(tokens[i+1], "match") and tokens[i+1].match(Keyword, 'NOT')
                    and hasattr(tokens[i+2], "match") and tokens[i+2].match(Keyword, ('IN', 'IIN'))
                    and isinstance(tokens[i+3], Parenthesis)):
                    field = self._field_name(token)
                    values = self.extract_values_from_parenthesis(tokens[i+3])
                    filt = self.build_graphql_filter(field, 'NOT ' + tokens[i+2].value.upper(), values)  # 'NOT IN' or 'NOT IIN'
                    if filt: stack.append(filt)
                    i += 4
                    continue

                if i+1 < len(tokens) and hasattr(tokens[i+1], "match") and tokens[i+1].match(Keyword, ('IN', 'IIN')):
                    field = self._field_name(token)
                    if i+2 < len(tokens) and isinstance(tokens[i+2], Parenthesis):
                        values = self.extract_values_from_parenthesis(tokens[i+2])
                        filt = self.build_graphql_filter(field, tokens[i+1].value.upper(), values)  # 'IN' vagy 'IIN'
                        if filt: stack.append(filt)
                        i += 3
                        continue

                # NOT LIKE / NOT ILIKE via field-based fallback
                if (i+3 < len(tokens) and hasattr(tokens[i+1], "match") and tokens[i+1].match(Keyword, 'NOT')
                    and getattr(tokens[i+2], 'value', '').upper() in ('LIKE', 'ILIKE')):
                    field = self._field_name(token)
                    op = f"NOT {tokens[i+2].value.upper()}"
                    val = self._convert_value_token(tokens[i+3])
                    filt = self.build_graphql_filter(field, op, val)
                    if filt: stack.append(filt)
                    i += 4
                    continue

                # Simple binary ops via field-based fallback (=, !=, >, <, >=, <=, LIKE, ILIKE)
                if i+2 < len(tokens):
                    op_tok = tokens[i+1]
                    op_val = getattr(op_tok, 'value', '').upper()
                    is_cmp = getattr(op_tok, 'ttype', None) == T_Operator.Comparison or op_val in ('=', '!=', '>', '<', '>=', '<=', 'LIKE', 'ILIKE')
                    if is_cmp:
                        field = self._field_name(token)
                        val = self._convert_value_token(tokens[i+2])
                        filt = self.build_graphql_filter(field, op_val, val)
                        if filt: stack.append(filt)
                        i += 3
                        continue

            # NEW: literal-first fallbacks (swap sides and flip operator if needed)
            if self._is_literal_token(token) and i+2 < len(tokens):
                mid = tokens[i+1]
                rhs = tokens[i+2]

                # NOT LIKE / NOT ILIKE with literal first: 'pat' NOT LIKE "field" -> "field" NOT LIKE 'pat'
                if (hasattr(mid, "match") and mid.match(Keyword, 'NOT')
                    and getattr(rhs, 'value', '').upper() in ('LIKE', 'ILIKE')
                    and i+3 < len(tokens) and self._is_field_token(tokens[i+3])):
                    field = self._field_name(tokens[i+3])
                    op = f"NOT {rhs.value.upper()}"
                    value = self._convert_value_token(token)
                    filt = self.build_graphql_filter(field, op, value)
                    if filt: stack.append(filt)
                    i += 4
                    continue

                # LIKE/ILIKE with literal first: 'pat' LIKE "field" -> "field" LIKE 'pat'
                if (getattr(mid, 'value', '').upper() in ('LIKE', 'ILIKE')) and self._is_field_token(rhs):
                    field = self._field_name(rhs)
                    op = getattr(mid, 'value', '').upper()
                    value = self._convert_value_token(token)
                    filt = self.build_graphql_filter(field, op, value)
                    if filt: stack.append(filt)
                    i += 3
                    continue

                # Standard comparison with literal first: '5' > "age" -> "age" < 5
                mid_val = getattr(mid, 'value', '').upper()
                is_cmp = getattr(mid, 'ttype', None) == T_Operator.Comparison or mid_val in ('=', '!=', '>', '<', '>=', '<=')
                if is_cmp and self._is_field_token(rhs):
                    field = self._field_name(rhs)
                    op = self._flip_operator(mid_val)
                    value = self._convert_value_token(token)
                    filt = self.build_graphql_filter(field, op, value)
                    if filt: stack.append(filt)
                    i += 3
                    continue

            # corrected fallback: field name = literal
            if self._is_field_token(token) and i+2 < len(tokens) and getattr(tokens[i+1], 'ttype', None) == T_Operator.Comparison:
                field = self._field_name(token)
                op = tokens[i+1].value.upper()
                value = self._convert_value_token(tokens[i+2])
                filt = self.build_graphql_filter(field, op, value)
                if filt: stack.append(filt)
                i += 3
                continue

            i += 1

        filtered_stack = [x for x in stack if x]
        if len(filtered_stack) == 1:
            return filtered_stack[0]
        elif len(filtered_stack) > 1:
            return {'AND': filtered_stack}
        else:
            return {}

    def parse_comparison(self, token):
        toks = [t for t in token.tokens if t.ttype not in (Whitespace, Punctuation)]
        left_tok = toks[0] if toks else None
        field = left_tok.value.strip('"') if toks else None
        op = None
        val_tok = None

        # NOT IN / NOT IIN
        if len(toks) >= 4 and hasattr(toks[1], "match") and toks[1].match(Keyword, 'NOT') and hasattr(toks[2], "match") and toks[2].match(Keyword, ('IN', 'IIN')):
            op = f"NOT {toks[2].value.upper()}"
            val_tok = toks[3]
        # NOT LIKE / NOT ILIKE
        elif len(toks) >= 4 and hasattr(toks[1], "match") and toks[1].match(Keyword, 'NOT') and getattr(toks[2], 'value', '').upper() in ('LIKE', 'ILIKE'):
            op = f"NOT {toks[2].value.upper()}"
            val_tok = toks[3]
        # IN / IIN
        elif len(toks) >= 3 and hasattr(toks[1], "match") and toks[1].match(Keyword, ('IN', 'IIN')):
            op = toks[1].value.upper()
            val_tok = toks[2]
        else:
            if len(toks) >= 2:
                op = toks[1].value.upper()
            if len(toks) >= 3:
                val_tok = toks[2]

        # literal-left swap...
        if left_tok is not None and val_tok is not None and self._is_literal_token(left_tok) and self._is_field_token(val_tok):
            field = self._field_name(val_tok)
            op = self._flip_operator(op)
            value = self._convert_value_token(left_tok)
        else:
            value = None
            if val_tok is not None:
                if isinstance(val_tok, Parenthesis):
                    value = self.extract_values_from_parenthesis(val_tok)
                else:
                    value = self._convert_value_token(val_tok)

        # LIKE / ILIKE normalization for positive branches (MEGMARAD ITT)
        if isinstance(op, str) and op in ('LIKE', 'ILIKE') and isinstance(value, str):
            op, value = self._normalize_like_pattern(field, op, value)

        return field, op, value

    def _normalize_like_pattern(self, field, op, value):
        op_up = str(op).upper() if op is not None else ""
        if not isinstance(value, str):
            return op_up, value
        s = value
        starts_pct = s.startswith('%')
        ends_pct = s.endswith('%')
        count_pct = s.count('%')

        # NEW: ha nincs egyetlen % sem, akkor pontos egyezés
        # LIKE -> '='  , ILIKE -> 'IEQUALS'  (később equals/iequals)
        if op_up in ('LIKE', 'ILIKE') and count_pct == 0:
            return ('IEQUALS' if op_up == 'ILIKE' else '='), value

        # composite: belső % (nem elején/végén) -> split az utolsó % szerint
        if count_pct >= 1 and not starts_pct and not ends_pct:
            idx = s.rfind('%')
            starts = s[:idx]
            ends = s[idx+1:].replace('%', '')
            if op_up == 'ILIKE':
                return 'ICOMPOSITE', {'starts_with': starts, 'ends_with': ends}
            return 'COMPOSITE', {'starts_with': starts, 'ends_with': ends}

        if starts_pct and ends_pct:
            stripped = s[1:-1]
            return ('ILIKE' if op_up == 'ILIKE' else 'LIKE', stripped)
        elif ends_pct:
            stripped = s[:-1]
            return ('ISTARTS_WITH' if op_up == 'ILIKE' else 'STARTS_WITH', stripped)
        elif starts_pct:
            stripped = s[1:]
            return ('IENDS_WITH' if op_up == 'ILIKE' else 'ENDS_WITH', stripped)

        return op_up, value

    def build_graphql_filter(self, field, op, value):
        # Normalize operator: collapse whitespace and uppercase for lookup
        if isinstance(op, str):
            op_norm = ' '.join(op.strip().upper().split())
        else:
            op_norm = op

        # Normalize NOT LIKE / NOT ILIKE using the positive normalizer
        if isinstance(op_norm, str) and op_norm in ('NOT LIKE', 'NOT ILIKE') and isinstance(value, str):
            inner = 'ILIKE' if op_norm == 'NOT ILIKE' else 'LIKE'
            norm_op, norm_val = self._normalize_like_pattern(field, inner, value)

            if norm_op in ('COMPOSITE', 'ICOMPOSITE') and isinstance(norm_val, dict):
                sw = norm_val.get('starts_with', '')
                ew = norm_val.get('ends_with', '')
                key_sw = 'not_istarts_with' if inner == 'ILIKE' else 'not_starts_with'
                key_ew = 'not_iends_with'  if inner == 'ILIKE' else 'not_ends_with'
                return {'OR': [{field: {key_sw: sw}}, {field: {key_ew: ew}}]}

            elif norm_op in ('ISTARTS_WITH', 'STARTS_WITH', 'IENDS_WITH', 'ENDS_WITH'):
                op_norm = f'NOT {norm_op}'
                value = norm_val

            # NEW: %-mentes NOT LIKE/NOT ILIKE → not_equals/not_iequals
            elif norm_op == '=':
                return {field: {'not_equals': norm_val}}
            elif norm_op == 'IEQUALS':
                return {field: {'not_iequals': norm_val}}
            else:
                op_norm = f'NOT {norm_op}'
                value = norm_val

        GRAPHQL_MAP = {
            '=':                 'equals',
            '!=':                'not_equals',
            '>':                 'greater_than',
            '<':                 'less_than',
            '>=':                'greater_than_or_equals',
            '<=':                'less_than_or_equals',
            'LIKE':              'like',
            'ILIKE':             'ilike',
            'NOT LIKE':          'not_like',
            'NOT ILIKE':         'not_ilike',
            'IN':                'in',
            'NOT IN':            'not_in',
            'IIN':               'iin',
            'NOT IIN':           'not_iin',
            'IS NULL':           'is_null',
            'IS NOT NULL':       'is_not_null',
            'IS EMPTY':          'is_empty',
            'IS NOT EMPTY':      'is_not_empty',
            'STARTS_WITH':       'starts_with',
            'ENDS_WITH':         'ends_with',
            'ISTARTS_WITH':      'istarts_with',
            'IENDS_WITH':        'iends_with',
            'COMPOSITE':         'composite',
            'ICOMPOSITE':        'icomposite',
            'NOT STARTS_WITH':   'not_starts_with',
            'NOT ENDS_WITH':     'not_ends_with',
            'NOT ISTARTS_WITH':  'not_istarts_with',
            'NOT IENDS_WITH':    'not_iends_with',
            'IEQUALS':           'iequals',
            'NOT IEQUALS':       'not_iequals',
        }

        mapped = GRAPHQL_MAP.get(op_norm, None)
        if mapped is None:
            mapped = op_norm.lower().replace(' ', '_') if isinstance(op_norm, str) else op_norm

        # Special: IS NULL/IS NOT NULL/IS EMPTY/IS NOT EMPTY -> True flag
        if mapped in ('is_null', 'is_not_null', 'is_empty', 'is_not_empty'):
            return {field: {mapped: True}}

        # IN/NOT IN: accept list or parenthesis-string
        if mapped in ('in', 'not_in'):
            vals = value
            if isinstance(vals, str):
                vals = self.extract_values_from_parenthesis(vals)
            if not isinstance(vals, list):
                vals = [vals]
            return {field: {mapped: vals}}

        if mapped in ('composite', 'icomposite') and isinstance(value, dict):
            if mapped == 'icomposite':
                return {'AND': [
                    {field: {'istarts_with': value.get('starts_with', '')}},
                    {field: {'iends_with':  value.get('ends_with',  '')}}
                ]}
            else:
                return {'AND': [
                    {field: {'starts_with': value.get('starts_with', '')}},
                    {field: {'ends_with':  value.get('ends_with',  '')}}
                ]}

        if mapped and field is not None:
            return {field: {mapped: value}}
        return None

    def extract_values_from_parenthesis(self, token):
        """
        Extract values from Parenthesis token or parenthesis-like string.
        Always returns list for IN.
        """
        s = token.value.strip() if hasattr(token, 'value') else str(token)
        s = s.strip('()').strip()

        if not s:
            return []

        # If there's actual token objects passed (like sqlparse tokens separated), attempt split by comma
        if isinstance(token, Parenthesis):
            # Get interior tokens as text and split on commas
            inner = ''.join(tok.value for tok in token.tokens[1:-1] if tok.ttype is not Whitespace)
            parts = [p.strip() for p in inner.split(',') if p.strip()]
            return [self._convert_value_token(p if not isinstance(p, str) else p) for p in parts]

        if ',' in s:
            parts = [part.strip() for part in s.split(',')]
            return [self._convert_value_token(p) for p in parts]

        return [self._convert_value_token(s)]

    def _convert_value_token(self, token):
        """
        Convert a token or string to Python value: string, int, float, bool, None.
        """
        if isinstance(token, (int, float, bool)):
            return token
        if token is None:
            return None

        raw = token.value if hasattr(token, "value") else str(token)
        s = raw.strip()

        # Strip quotes
        if (s.startswith("'") and s.endswith("'")) or (s.startswith('"') and s.endswith('"')):
            return s[1:-1]

        # Special values
        low = s.lower()
        if low == 'null':
            return None
        if low == 'true':
            return True
        if low == 'false':
            return False

        # Numbers
        try:
            if s.lstrip('-').isdigit():
                return int(s)
            if '.' in s:
                return float(s)
        except (ValueError, AttributeError):
            pass

        # Else string
        return s


from dataclasses import dataclass
from typing import List, Dict, Optional, Tuple, Iterable, Sequence, Set


@dataclass
class ValidatorResult:
    ok: bool
    errors: List[str]
    details: Dict[str, Optional[int]]  # includes indices if any
    error_index: Optional[int] = None
    error_length: Optional[int] = None
    error_kind: Optional[str] = None  # 'single_quote' | 'double_quote' | 'paren_closing' | 'paren_opening' | custom


@dataclass
class Token:
    kind: str        # 'IDENT_DQ' | 'STRING' | 'NUMBER' | 'OP' | 'LPAREN' | 'RPAREN' | 'COMMA' | 'KEYWORD' | 'BAREWORD' | 'MINUS'
    value: str
    start: int
    end: int        # exclusive


class SqlWhereValidator(QtWidgets.QDialog):
    """
    Lightweight SQL WHERE expression tokenizer and validator.

    Key rules enforced:
      - Field names must be double-quoted: "fieldName"
      - String/date literals must be single-quoted: 'text' or 'YYYY-MM-DD'
      - Recognizes multi-word operators (NOT IN, IS NOT NULL, etc.)
      - IN/IIN must be followed by parenthesized lists
      - Binary predicates should have one identifier and one literal
      - Negative numbers allowed via unary minus
      - Detects quote and parenthesis mismatches for precise error positions
    """

    # Keywords used in operators or logic
    KEYWORDS: Set[str] = {
        "AND", "OR", "NOT", "IN", "IIN", "IS", "NULL", "LIKE", "ILIKE", "EMPTY", "TRUE", "FALSE"
    }

    # Non-standard keywords that are problematic from a deep-parse perspective
    NONSTD_FOR_DEEP: Set[str] = {"ILIKE", "IIN", "EMPTY"}

    # Allowed single/multi-char operator symbols
    OP_SYMBOLS: Set[str] = {"=", "!=", "<", "<=", ">", ">=", "<>"}

    # Normalized, allowed multi-word operators
    MW_OPERATORS: Set[str] = {
        "NOT IN", "NOT IIN", "NOT LIKE", "NOT ILIKE",
        "IS NULL", "IS NOT NULL", "IS EMPTY", "IS NOT EMPTY"
    }

    def _has_nonstandard_ops_for_deep(self, tokens: List[Token]) -> bool:
        n = len(tokens)
        for i, t in enumerate(tokens):
            if t.kind == "KEYWORD" and t.value in self.NONSTD_FOR_DEEP:
                return True
            if (
                t.kind == "KEYWORD" and t.value == "NOT"
                and i + 1 < n
                and tokens[i+1].kind == "KEYWORD"
                and tokens[i+1].value in self.NONSTD_FOR_DEEP
            ):
                return True
        return False

    def _is_alpha_(self, ch: str) -> bool:
        return ch.isalpha() or ch == "_"

    def _is_alnum_(self, ch: str) -> bool:
        return ch.isalnum() or ch == "_"

    def tokenize(self, s: str) -> List[Token]:
        tokens: List[Token] = []
        i = 0
        n = len(s)

        def add(kind: str, val: str, start: int, end: int):
            tokens.append(Token(kind=kind, value=val, start=start, end=end))

        while i < n:
            ch = s[i]

            # whitespace
            if ch.isspace():
                i += 1
                continue

            # string literal: single quotes with '' escape
            if ch == "'":
                start = i
                i += 1
                while i < n:
                    if s[i] == "'":
                        if i + 1 < n and s[i + 1] == "'":
                            i += 2  # skip escaped ''
                            continue
                        i += 1
                        break
                    i += 1
                add("STRING", s[start:i], start, i)
                continue

            # double-quoted identifier: allow "" escape
            if ch == '"':
                start = i
                i += 1
                while i < n:
                    if s[i] == '"':
                        if i + 1 < n and s[i + 1] == '"':
                            i += 2
                            continue
                        i += 1
                        break
                    i += 1
                add("IDENT_DQ", s[start:i], start, i)
                continue

            # number (simple int/float)
            if ch.isdigit():
                start = i
                i += 1
                has_dot = False
                while i < n and (s[i].isdigit() or (s[i] == '.' and not has_dot)):
                    if s[i] == '.':
                        has_dot = True
                    i += 1
                add("NUMBER", s[start:i], start, i)
                continue

            # identifiers / keywords / functions
            if self._is_alpha_(ch):
                start = i
                i += 1
                while i < n and self._is_alnum_(s[i]):
                    i += 1
                word = s[start:i]
                upper = word.upper()
                # function if next non-space is '('
                j = i
                while j < n and s[j].isspace():
                    j += 1
                if upper in self.KEYWORDS:
                    if upper in {"TRUE", "FALSE"}:
                        add("BOOLEAN", upper, start, i)
                    else:                    
                        add("KEYWORD", upper, start, i)
                elif j < n and s[j] == "(":
                    add("BAREWORD", word, start, i)  # function name treated as bareword but allowed if followed by '('
                else:
                    add("BAREWORD", word, start, i)
                continue

            # multi-char operator symbols
            if i + 1 < n:
                two = s[i:i+2]
                if two in {"!=", "<>", "<=", ">=", "||"}:
                    add("OP", two, i, i+2)
                    i += 2
                    continue

            # single-char operators / punctuation
            if ch in "=<>+*/%(),":
                kind = "OP"
                if ch == "(":
                    kind = "LPAREN"
                elif ch == ")":
                    kind = "RPAREN"
                elif ch == ",":
                    kind = "COMMA"
                add(kind, ch, i, i+1)
                i += 1
                continue

            # minus sign (unary or binary)
            if ch == "-":
                add("MINUS", ch, i, i+1)
                i += 1
                continue

            # unknown char
            add("OP", ch, i, i+1)
            i += 1

        return tokens

    def _normalize_ident(self, ident_dq_token: Token) -> str:
        # strip outer " ... " and unescape "" -> "
        raw = ident_dq_token.value
        if len(raw) >= 2 and raw[0] == '"' and raw[-1] == '"':
            inner = raw[1:-1].replace('""', '"')
            return inner
        return raw

    def _post_token_quote_paren_check(self, where_expr: str, tokens: List[Token]) -> Optional[ValidatorResult]:
        # Precise checks for unterminated quotes and parenthesis balance
        # Unterminated single-quoted string
        for idx, t in enumerate(tokens):
            if t.kind == "STRING":
                val = t.value
                properly_closed = (len(val) >= 2 and val[0] == "'" and val[-1] == "'")
                if properly_closed:
                    continue
                prev = tokens[idx-1] if idx > 0 else None
                prev_is_content = prev and prev.end == t.start and prev.kind in {"NUMBER", "BAREWORD", "IDENT_DQ", "RPAREN"}
                if prev_is_content:
                    msg = "Unterminated string literal in WHERE (missing opening ')."
                    details = {"length": len(where_expr), "error_index": prev.start, "error_length": t.end - prev.start}
                    return ValidatorResult(ok=False, errors=[msg], details=details,
                                        error_index=prev.start, error_length=t.end - prev.start, error_kind="single_quote_opening")
                msg = "Unterminated string literal in WHERE (missing closing ')."
                details = {"length": len(where_expr), "error_index": t.start, "error_length": 1}
                return ValidatorResult(ok=False, errors=[msg], details=details,
                                    error_index=t.start, error_length=1, error_kind="single_quote_closing")

        # Unterminated double-quoted identifier
        for idx, t in enumerate(tokens):
            if t.kind == "IDENT_DQ":
                val = t.value
                properly_closed = (len(val) >= 2 and val[0] == '"' and val[-1] == '"')
                if properly_closed:
                    continue
                prev = tokens[idx-1] if idx > 0 else None
                prev_adjacent_word = prev and prev.end == t.start and prev.kind in {"BAREWORD"}
                if prev_adjacent_word:
                    msg = 'Unterminated double-quoted identifier in WHERE (missing opening ").'
                    details = {"length": len(where_expr), "error_index": prev.start, "error_length": t.end - prev.start}
                    return ValidatorResult(ok=False, errors=[msg], details=details,
                                        error_index=prev.start, error_length=t.end - prev.start, error_kind="double_quote_opening")
                msg = 'Unterminated double-quoted identifier in WHERE (missing closing ").'
                details = {"length": len(where_expr), "error_index": t.start, "error_length": 1}
                return ValidatorResult(ok=False, errors=[msg], details=details,
                                    error_index=t.start, error_length=1, error_kind="double_quote_closing")

        # Parenthesis balance
        stack: List[Token] = []
        for t in tokens:
            if t.kind == "LPAREN":
                stack.append(t)
            elif t.kind == "RPAREN":
                if not stack:
                    msg = "Unbalanced parentheses in WHERE (too many ))."
                    details = {"length": len(where_expr), "error_index": t.start, "error_length": 1}
                    return ValidatorResult(ok=False, errors=[msg], details=details,
                                           error_index=t.start, error_length=1, error_kind="paren_closing")
                stack.pop()

        if stack:
            first_unmatched_open = stack[0]
            msg = "Unbalanced parentheses in WHERE (missing ))."
            details = {"length": len(where_expr), "error_index": first_unmatched_open.start, "error_length": 1}
            return ValidatorResult(ok=False, errors=[msg], details=details,
                                   error_index=first_unmatched_open.start, error_length=1, error_kind="paren_opening")

        return None

    def _validate_identifiers(self,
                            tokens: List[Token],
                            field_names: Optional[Iterable[str]],
                            case_sensitive: bool,
                            errors: List[str]) -> Optional[Tuple[int, int, str]]:
        first_err: Optional[Tuple[int, int, str]] = None

        # Prepare field lookup
        field_set = None
        if field_names:
            if case_sensitive:
                field_set = set(field_names)
                def norm(x: str) -> str: return x
            else:
                field_set = {f.lower() for f in field_names}
                def norm(x: str) -> str: return x.lower()
        else:
            def norm(x: str) -> str: return x

        # Validate double-quoted identifiers against field list
        if field_set is not None:
            for t in tokens:
                if t.kind == "IDENT_DQ":
                    name = norm(self._normalize_ident(t))
                    if name not in field_set:
                        msg = f'Unknown field identifier "{self._normalize_ident(t)}" (not in provided field list).'
                        errors.append(msg)
                        if not first_err:
                            first_err = (t.start, t.end - t.start, "identifier_unknown")

        # Flag barewords
        n = len(tokens)
        for idx, t in enumerate(tokens):
            if t.kind != "BAREWORD":
                continue

            is_function = (idx + 1 < n and tokens[idx+1].kind == "LPAREN")
            upper = t.value.upper()
            if upper in self.KEYWORDS or is_function:
                continue

            if field_set is None:
                msg = f'Unquoted identifier "{t.value}" found; fields must be in double quotes.'
                errors.append(msg)
                if not first_err:
                    first_err = (t.start, t.end - t.start, "unquoted_identifier")
                continue

            if norm(t.value) in field_set:
                msg = f'Unquoted identifier "{t.value}" found; fields must be in double quotes.'
                errors.append(msg)
                if not first_err:
                    first_err = (t.start, t.end - t.start, "unquoted_identifier")
            else:
                msg = f'Value "{t.value}" must be enclosed in single quotes.'
                errors.append(msg)
                if not first_err:
                    first_err = (t.start, t.end - t.start, "unquoted_value")

        return first_err

    def _validate_operators(self, tokens: List[Token], errors: List[str]) -> Optional[Tuple[int, int, str]]:
        first_err: Optional[Tuple[int, int, str]] = None

        n = len(tokens)
        
        # Check basic operator operand presence
        for i, t in enumerate(tokens):
            if t.kind != "OP":
                continue
            if t.value in self.OP_SYMBOLS:
                # bal oldali operandus hiányzik
                if i == 0 or tokens[i-1].kind in {"OP", "LPAREN", "COMMA", "KEYWORD"}:
                    # Except if preceded by MINUS (unary minus before operator is invalid syntax)
                    if i > 0 and tokens[i-1].kind == "MINUS":
                        continue
                    errors.append(f'Missing left-hand operand before operator "{t.value}".')
                    if not first_err:
                        first_err = (t.start, t.end - t.start, "missing_lhs")
                    break
                # jobb oldali operandus hiányzik
                if i+1 >= n or tokens[i+1].kind in {"OP", "COMMA", "RPAREN"} or (tokens[i+1].kind == "KEYWORD"):
                    # Allow MINUS as unary operator after comparison
                    if i+1 < n and tokens[i+1].kind == "MINUS":
                        continue
                    if i+1 < n and tokens[i+1].kind == "BOOLEAN":
                        pass  # allow boolean literals as rhs
                    else:
                        errors.append(f'Missing right-hand operand after operator "{t.value}".')
                        if not first_err:
                            pos = tokens[i+1].start if i+1 < n else t.end
                            first_err = (pos, 1, "missing_rhs")
                        break

        if first_err:
            return first_err

        # Validate multi-word operators and IN/IIN
        idx = 0
        while idx < n:
            t = tokens[idx]

            if t.kind == "KEYWORD":
                val = t.value

                # IS NULL / IS NOT NULL / IS EMPTY / IS NOT EMPTY
                if val == "IS":
                    j = idx + 1
                    while j < n and tokens[j].kind == "COMMA":
                        j += 1
                    if j < n and tokens[j].kind == "KEYWORD":
                        if tokens[j].value == "NOT":
                            k = j + 1
                            if k < n and tokens[k].kind == "KEYWORD":
                                if tokens[k].value == "NULL":
                                    op = "IS NOT NULL"
                                    if op not in self.MW_OPERATORS:
                                        msg = f'Operator "{op}" is not allowed.'
                                        errors.append(msg)
                                        if not first_err:
                                            first_err = (t.start, tokens[k].end - t.start, "invalid_operator")
                                elif tokens[k].value == "EMPTY":
                                    op = "IS NOT EMPTY"
                                    if op not in self.MW_OPERATORS:
                                        msg = f'Operator "{op}" is not allowed.'
                                        errors.append(msg)
                                        if not first_err:
                                            first_err = (t.start, tokens[k].end - t.start, "invalid_operator")
                        elif tokens[j].value == "NULL":
                            op = "IS NULL"
                            if op not in self.MW_OPERATORS:
                                msg = f'Operator "{op}" is not allowed.'
                                errors.append(msg)
                                if not first_err:
                                    first_err = (t.start, tokens[j].end - t.start, "invalid_operator")
                        elif tokens[j].value == "EMPTY":
                            op = "IS EMPTY"
                            if op not in self.MW_OPERATORS:
                                msg = f'Operator "{op}" is not allowed.'
                                errors.append(msg)
                                if not first_err:
                                    first_err = (t.start, tokens[j].end - t.start, "invalid_operator")

                # NOT IN / NOT IIN / NOT LIKE / NOT ILIKE
                if val == "NOT":
                    j = idx + 1
                    if j < n and tokens[j].kind == "KEYWORD" and tokens[j].value in {"IN", "IIN", "LIKE", "ILIKE"}:
                        op = f"NOT {tokens[j].value}"
                        if op not in self.MW_OPERATORS:
                            msg = f'Operator "{op}" is not allowed.'
                            errors.append(msg)
                            if not first_err:
                                first_err = (t.start, tokens[j].end - t.start, "invalid_operator")

                # IN / IIN operators must be followed by (...)
                if val in {"IN", "IIN"}:
                    j = idx + 1
                    # Skip whitespace-like tokens
                    while j < n and tokens[j].kind in {"COMMA"}:
                        j += 1
                    if j >= n or tokens[j].kind != "LPAREN":
                        msg = f'Operator "{val}" must be followed by a parenthesized list: {val} (...)'
                        errors.append(msg)
                        if not first_err:
                            first_err = (t.start, t.end - t.start, "missing_in_list")

                idx += 1
                continue

            # symbolic operators
            if t.kind == "OP":
                if t.value in self.OP_SYMBOLS:
                    idx += 1
                    continue
                errors.append(f'Unknown or disallowed operator "{t.value}".')
                if not first_err:
                    first_err = (t.start, t.end - t.start, "invalid_operator")
                idx += 1
                continue

            idx += 1

        return first_err

    def _validate_binary_predicates(self, tokens: List[Token], errors: List[str]) -> Optional[Tuple[int, int, str]]:
        """
        Validates that binary predicates have exactly one identifier and one literal on opposite sides.
        Also checks that IS [NOT] NULL/EMPTY predicates don't have trailing operands.
        """
        first_err: Optional[Tuple[int, int, str]] = None
        n = len(tokens)
        i = 0

        while i < n:
            t = tokens[i]

            # Check IS [NOT] NULL/EMPTY - no trailing operands allowed
            if t.kind == "KEYWORD" and t.value == "IS":
                j = i + 1
                # Skip to next keyword
                while j < n and tokens[j].kind not in {"KEYWORD"}:
                    j += 1
                if j < n:
                    kw = tokens[j].value
                    end_idx = j
                    # IS NOT NULL/EMPTY
                    if kw == "NOT":
                        k = j + 1
                        while k < n and tokens[k].kind not in {"KEYWORD"}:
                            k += 1
                        if k < n and tokens[k].value in {"NULL", "EMPTY"}:
                            end_idx = k
                    # IS NULL/EMPTY
                    elif kw in {"NULL", "EMPTY"}:
                        end_idx = j
                    
                    # Check no trailing operand after IS [NOT] NULL/EMPTY
                    next_idx = end_idx + 1
                    if next_idx < n:
                        next_t = tokens[next_idx]
                        if next_t.kind in {"STRING", "NUMBER", "IDENT_DQ"}:
                            msg = f"Operator 'IS {kw}' cannot have a trailing operand."
                            errors.append(msg)
                            if not first_err:
                                first_err = (next_t.start, next_t.end - next_t.start, "trailing_operand")
                            i = next_idx + 1
                            continue
                i += 1
                continue

            # Check binary operators (comparison, LIKE, ILIKE, etc.)
            if t.kind == "OP" and t.value in self.OP_SYMBOLS:
                # Find left operand (skip MINUS for unary minus)
                left_idx = i - 1
                if left_idx >= 0 and tokens[left_idx].kind == "MINUS":
                    left_idx -= 1
                
                # Find right operand (skip MINUS for unary minus)
                right_idx = i + 1
                if right_idx < n and tokens[right_idx].kind == "MINUS":
                    right_idx += 1

                if left_idx < 0 or right_idx >= n:
                    i += 1
                    continue

                left_t = tokens[left_idx]
                right_t = tokens[right_idx]

                left_is_ident = left_t.kind == "IDENT_DQ"
                left_is_literal = left_t.kind in {"STRING", "NUMBER", "BOOLEAN"}
                right_is_ident = right_t.kind == "IDENT_DQ"
                right_is_literal = right_t.kind in {"STRING", "NUMBER", "BOOLEAN"}

                # Both identifiers
                if left_is_ident and right_is_ident:
                    msg = f"Binary predicate cannot have identifiers on both sides of operator '{t.value}'."
                    errors.append(msg)
                    if not first_err:
                        first_err = (left_t.start, right_t.end - left_t.start, "both_identifiers")
                    i += 1
                    continue

                # Both literals
                if left_is_literal and right_is_literal:
                    msg = f"Binary predicate cannot have literals on both sides of operator '{t.value}'."
                    errors.append(msg)
                    if not first_err:
                        first_err = (left_t.start, right_t.end - left_t.start, "both_literals")
                    i += 1
                    continue

            # Check LIKE/ILIKE operators
            if t.kind == "KEYWORD" and t.value in {"LIKE", "ILIKE"}:
                # Find left and right operands similarly
                left_idx = i - 1
                if left_idx >= 0 and tokens[left_idx].kind == "MINUS":
                    left_idx -= 1
                
                right_idx = i + 1
                if right_idx < n and tokens[right_idx].kind == "MINUS":
                    right_idx += 1

                if left_idx < 0 or right_idx >= n:
                    i += 1
                    continue

                left_t = tokens[left_idx]
                right_t = tokens[right_idx]

                left_is_ident = left_t.kind == "IDENT_DQ"
                left_is_literal = left_t.kind in {"STRING", "NUMBER", "BOOLEAN"}
                right_is_ident = right_t.kind == "IDENT_DQ"
                right_is_literal = right_t.kind in {"STRING", "NUMBER", "BOOLEAN"}

                if left_is_ident and right_is_ident:
                    msg = f"Binary predicate cannot have identifiers on both sides of operator '{t.value}'."
                    errors.append(msg)
                    if not first_err:
                        first_err = (left_t.start, right_t.end - left_t.start, "both_identifiers")

                if left_is_literal and right_is_literal:
                    msg = f"Binary predicate cannot have literals on both sides of operator '{t.value}'."
                    errors.append(msg)
                    if not first_err:
                        first_err = (left_t.start, right_t.end - left_t.start, "both_literals")

            i += 1

        return first_err

    def _looks_like_unquoted_date_seq(self, seq: List[Token]) -> bool:
        # pattern NUMBER - NUMBER - NUMBER, e.g. 2025-10-29
        if len(seq) != 5:
            return False
        a, b, c, d, e = seq
        return (a.kind == "NUMBER" and b.kind == "MINUS" and
                c.kind == "NUMBER" and d.kind == "MINUS" and
                e.kind == "NUMBER")

    def _validate_literals(self, tokens: List[Token], errors: List[str]) -> Optional[Tuple[int, int, str]]:
        first_err: Optional[Tuple[int, int, str]] = None
        n = len(tokens)
        i = 0
        while i < n:
            t = tokens[i]

            # detect unquoted date pattern
            if i + 4 < n and self._looks_like_unquoted_date_seq(tokens[i:i+5]):
                a, _, _, _, e = tokens[i], tokens[i+1], tokens[i+2], tokens[i+3], tokens[i+4]
                msg = "Date literal must be enclosed in single quotes (e.g., 'YYYY-MM-DD')."
                errors.append(msg)
                if not first_err:
                    first_err = (a.start, e.end - a.start, "unquoted_date")
                i += 5
                continue

            i += 1

        return first_err

    def _validate_rhs_string_literals(self, tokens, errors):
        COMPARE_OPS = self.OP_SYMBOLS | self.KEYWORDS
        n = len(tokens)
        first_err = None

        i = 0
        while i < n:
            t = tokens[i]
            if t.kind == "OP" and t.value in COMPARE_OPS:
                j = i + 1
                if j >= n:
                    break
                start_j = j
                rhs_words = []
                while j < n and tokens[j].kind == "BAREWORD":
                    if tokens[j].value.upper() in self.KEYWORDS:
                        rhs_words = []
                        break
                    if j + 1 < n and tokens[j+1].kind == "LPAREN":
                        rhs_words = []
                        break
                    rhs_words.append(tokens[j])
                    j += 1

                if rhs_words:
                    a, e = rhs_words[0], rhs_words[-1]
                    msg = "String literal must be enclosed in single quotes (e.g., 'Abax ater')."
                    errors.append(msg)
                    if not first_err:
                        first_err = (a.start, e.end - a.start, "unquoted_string")
                    break
            i += 1
        return first_err

    def _validate_adjacent_values_and_dangling_logic(self, tokens: List[Token], errors: List[str]) -> Optional[Tuple[int,int,str]]:
        first_err = None
        n = len(tokens)

        def is_value_like(t: Token) -> bool:
            return t.kind in {"STRING", "NUMBER", "IDENT_DQ", "BOOLEAN"}

        # 1) Prohibited: two values/identifiers in a row without an operator
        # Exception: There is a COMMA between the items in the IN/IIN list in parentheses, not a direct sequence.
        for i in range(n - 1):
            a, b = tokens[i], tokens[i+1]
            if is_value_like(a) and is_value_like(b):
                msg = "Missing operator between values (two literals/identifiers cannot be adjacent)."
                errors.append(msg)
                if not first_err:
                    first_err = (b.start, b.end - b.start, "adjacent_values")
                break

        # 2) Prohibited: dangling AND/OR at the end, or without an expression
        # Accepted continuation after AND/OR: NOT | LPAREN | IDENT_DQ | STRING | NUMBER
        for i, t in enumerate(tokens):
            if t.kind == "KEYWORD" and t.value in {"AND", "OR"}:
                if i == n - 1:
                    errors.append(f"Trailing logical operator '{t.value}' without following expression.")
                    if not first_err:
                        first_err = (t.start, t.end - t.start, "dangling_logic")
                    break
                nxt = tokens[i+1]
                if not (nxt.kind == "KEYWORD" and nxt.value == "NOT" or
                        nxt.kind in {"LPAREN", "IDENT_DQ", "STRING", "NUMBER"}):
                    errors.append(f"Logical operator '{t.value}' must be followed by a valid expression.")
                    if not first_err:
                        first_err = (nxt.start, nxt.end - nxt.start, "dangling_logic")
                    break

        return first_err



    def validate(self,
                 where_expr: str,
                 field_names: Optional[Iterable[str]] = None,
                 case_sensitive_fields: bool = False,
                 deep_sql_check: bool = True) -> ValidatorResult:
        """
        Full validation pipeline:
          1) Tokenize expression
          2) Check quotes and parenthesis precisely
          3) Validate identifiers (optional field list)
          4) Check operators and multi-word operators
          5) Validate binary predicates (one identifier + one literal)
          6) Check literal patterns (dates, strings)
          7) Optional deep SQL check using sqlvalidator when operators are standard
        """
        # 1) tokenize
        tokens = self.tokenize(where_expr)
        if not tokens:
            return ValidatorResult(ok=True, errors=[], details={},
                               error_index=None, error_length=None, error_kind=None)

        errors: List[str] = []
        err_index: Optional[int] = None
        err_len: Optional[int] = None
        err_kind: Optional[str] = None

        # 2) token-based precise checks for quotes and parentheses
        early = self._post_token_quote_paren_check(where_expr, tokens)
        if early is not None:
            return early

        # 3) RHS string literal checks
        rhs_err = self._validate_rhs_string_literals(tokens, errors)
        if rhs_err and err_index is None:
            err_index, err_len, err_kind = rhs_err

        # 4) identifiers vs field list and unquoted barewords
        ident_err = self._validate_identifiers(tokens, field_names, case_sensitive_fields, errors)
        if ident_err and err_index is None:
            err_index, err_len, err_kind = ident_err

        # 5) operator checks
        op_err = self._validate_operators(tokens, errors)
        if op_err and err_index is None:
            err_index, err_len, err_kind = op_err

        # 6) binary predicate validation (one identifier + one literal)
        pred_err = self._validate_binary_predicates(tokens, errors)
        if pred_err and err_index is None:
            err_index, err_len, err_kind = pred_err

        # 6.5) adjacent values and dangling logical operators
        seq_err = self._validate_adjacent_values_and_dangling_logic(tokens, errors)
        if seq_err and err_index is None:
            err_index, err_len, err_kind = seq_err
            
        # 7) literal rules (date detection)
        lit_err = self._validate_literals(tokens, errors)
        if lit_err and err_index is None:
            err_index, err_len, err_kind = lit_err

        # early return on tokenizer-level issues
        if errors:
            details = {
                "length": len(where_expr),
                "error_index": err_index,
                "error_length": err_len,
            }
            return ValidatorResult(ok=False, errors=errors, details=details,
                                   error_index=err_index, error_length=err_len, error_kind=err_kind)

        # 8) optional deep SQL validation using sqlvalidator, if there is no non-standard operator
        if deep_sql_check and not self._has_nonstandard_ops_for_deep(tokens):
            ok, deep_errors, _ = self.validate_where_with_sqlvalidator(where_expr)
            if not ok:
                details = {"length": len(where_expr), "error_index": None, "error_length": None}
                return ValidatorResult(ok=False, errors=list(deep_errors), details=details,
                                       error_index=None, error_length=None, error_kind=None)

        details = {"length": len(where_expr), "error_index": None, "error_length": None}
        return ValidatorResult(ok=True, errors=[], details=details,
                               error_index=None, error_length=None, error_kind=None)

    def validate_where_with_sqlvalidator(self, where_expr: str) -> Tuple[bool, List[str], Optional[str]]:
        """
        Returns (ok, errors, error_slice).
        Uses token-based checks for quotes/parentheses before deep parsing.
        """
        try:
            import sqlvalidator
        except Exception as e:
            return False, [f"sqlvalidator is not available: {e}"], None

        sql = f"SELECT 1 FROM t WHERE {where_expr}"
        try:
            parsed = sqlvalidator.parse(sql)
        except StopIteration:
            return False, ["Tokenizer/parsing stopped (possible unterminated string literal)."], None
        except Exception as e:
            return False, [f"Parser setup error: {e}"], None

        try:
            ok = parsed.is_valid()
            errs = list(getattr(parsed, "errors", [])) if not ok else []
            return ok, errs, None
        except StopIteration:
            return False, ["Parsing stopped: likely unterminated string or malformed token."], None
        except Exception as e:
            return False, [f"Validation error: {e}"], None

# -------------------

