# -*- coding: utf-8 -*-
"""
OGR Expression Builder.

v4.1.0: Migrated from before_migration/modules/backends/ogr_backend.py

This module contains the filter logic for OGR-based layers (Shapefiles, etc.).
Unlike PostgreSQL/Spatialite, OGR uses QGIS processing algorithms for filtering.

It implements the GeometricFilterPort interface for backward compatibility.

Features:
- QGIS processing selectbylocation algorithm
- Memory layer optimization for PostgreSQL
- Spatial index auto-creation
- Thread-safe reference management
- Cancellable feedback for interruption

Author: FilterMate Team
Date: January 2026
"""

import json
import logging
import threading
from typing import Dict, Optional, Any

logger = logging.getLogger('FilterMate.Backend.OGR.ExpressionBuilder')

# Import the port interface
try:
    from ....core.ports.geometric_filter_port import GeometricFilterPort
except ImportError:
    from core.ports.geometric_filter_port import GeometricFilterPort

# Import safe_set_subset_string from infrastructure
try:
    from ....infrastructure.database.sql_utils import safe_set_subset_string
except ImportError:
    def safe_set_subset_string(layer, expression):
        """Fallback implementation."""
        if layer is None:
            return False
        try:
            return layer.setSubsetString(expression)
        except Exception:
            return False

# Thread safety for OGR operations
_ogr_operations_lock = threading.Lock()
_last_operation_thread = None

# Import QgsProcessingFeedback for proper inheritance
try:
    from qgis.core import QgsProcessingFeedback
    _HAS_PROCESSING_FEEDBACK = True
except ImportError:
    _HAS_PROCESSING_FEEDBACK = False
    QgsProcessingFeedback = object  # Fallback for type hints


class CancellableFeedback(QgsProcessingFeedback if _HAS_PROCESSING_FEEDBACK else object):
    """
    Feedback class for cancellable QGIS processing operations.
    
    Inherits from QgsProcessingFeedback to be compatible with QGIS processing.
    Allows interrupting long-running processing algorithms.
    """
    
    def __init__(self, is_cancelled_callback=None):
        """
        Initialize feedback.
        
        Args:
            is_cancelled_callback: Callable returning True if cancelled
        """
        if _HAS_PROCESSING_FEEDBACK:
            super().__init__()
        self._cancelled = False
        self._is_cancelled_callback = is_cancelled_callback
    
    def isCanceled(self) -> bool:
        """Check if operation is cancelled."""
        if self._cancelled:
            return True
        if self._is_cancelled_callback:
            return self._is_cancelled_callback()
        return False
    
    def cancel(self):
        """Cancel the operation."""
        self._cancelled = True
        if _HAS_PROCESSING_FEEDBACK:
            try:
                super().cancel()
            except Exception:
                pass
    
    def setProgress(self, progress: float):
        """Set progress (0-100)."""
        if _HAS_PROCESSING_FEEDBACK:
            try:
                super().setProgress(progress)
            except Exception:
                pass


class OGRExpressionBuilder(GeometricFilterPort):
    """
    OGR expression builder.
    
    Uses QGIS processing algorithms for spatial filtering since OGR
    providers don't support complex SQL expressions.
    
    Implements the legacy GeometricFilterPort interface.
    
    Features:
    - QGIS selectbylocation algorithm
    - FID-based filtering
    - Memory layer optimization
    - Cancellable operations
    
    Example:
        builder = OGRExpressionBuilder(task_params)
        expr = builder.build_expression(
            layer_props={'layer_name': 'buildings'},
            predicates={'intersects': True},
            source_geom=source_layer
        )
        builder.apply_filter(layer, expr)
    """
    
    # QGIS predicate codes for selectbylocation
    PREDICATE_CODES = {
        'intersects': 0,
        'contains': 1,
        'disjoint': 2,
        'equals': 3,
        'touches': 4,
        'overlaps': 5,
        'within': 6,
        'crosses': 7,
    }
    
    def __init__(self, task_params: Dict[str, Any]):
        """
        Initialize OGR expression builder.
        
        Args:
            task_params: Task configuration parameters
        """
        super().__init__(task_params)
        self._logger = logger
        self.source_geom = None
        self._temp_layers_keep_alive = []
        self._source_layer_keep_alive = []
        self._feedback = None
    
    def get_backend_name(self) -> str:
        """Get backend name."""
        return "OGR"
    
    def supports_layer(self, layer: 'QgsVectorLayer') -> bool:
        """
        Check if this backend supports the given layer.
        
        OGR is the fallback backend - supports everything not handled
        by PostgreSQL or Spatialite.
        
        Args:
            layer: QGIS vector layer to check
            
        Returns:
            True for OGR-based layers (Shapefile, GeoJSON, etc.)
        """
        if layer is None:
            return False
        
        provider = layer.providerType()
        
        # Don't handle PostgreSQL or Spatialite
        if provider in ('postgres', 'spatialite'):
            return False
        
        # Handle OGR and memory providers
        return provider in ('ogr', 'memory')
    
    def build_expression(
        self,
        layer_props: Dict[str, Any],
        predicates: Dict[str, bool],
        source_geom: Optional[Any] = None,
        buffer_value: Optional[float] = None,
        buffer_expression: Optional[str] = None,
        source_filter: Optional[str] = None,
        use_centroids: bool = False,
        **kwargs
    ) -> str:
        """
        Build expression for OGR backend.
        
        OGR uses QGIS processing, so this returns JSON parameters
        rather than SQL. The actual filtering happens in apply_filter().
        
        Args:
            layer_props: Layer properties
            predicates: Spatial predicates to apply
            source_geom: Source layer reference
            buffer_value: Buffer distance
            buffer_expression: Dynamic buffer expression
            source_filter: Not used
            use_centroids: Already applied in source preparation
            **kwargs: Additional parameters
            
        Returns:
            JSON string with processing parameters
        """
        self.log_debug(f"Preparing OGR processing for {layer_props.get('layer_name', 'unknown')}")
        
        # Log buffer parameters
        self.log_info(f"📐 OGR buffer parameters:")
        self.log_info(f"  - buffer_value: {buffer_value}")
        self.log_info(f"  - buffer_expression: {buffer_expression}")
        
        if buffer_value is not None and buffer_value < 0:
            self.log_info(f"  ⚠️ NEGATIVE BUFFER (erosion) requested: {buffer_value}m")
        
        # Store source geometry for apply_filter
        self.source_geom = source_geom
        
        # Keep source layer alive
        if source_geom is not None:
            try:
                from qgis.core import QgsVectorLayer
                if isinstance(source_geom, QgsVectorLayer):
                    self._source_layer_keep_alive.append(source_geom)
            except ImportError:
                pass
        
        # Return JSON parameters
        params = {
            'predicates': list(predicates.keys()),
            'buffer_value': buffer_value,
            'buffer_expression': buffer_expression
        }
        return json.dumps(params)
    
    def apply_filter(
        self,
        layer: 'QgsVectorLayer',
        expression: str,
        old_subset: Optional[str] = None,
        combine_operator: Optional[str] = None
    ) -> bool:
        """
        Apply filter using QGIS processing selectbylocation algorithm.
        
        Thread Safety:
        - Uses lock for concurrent access detection
        - Uses data provider directly to avoid layer signals
        
        Args:
            layer: Layer to filter
            expression: JSON parameters from build_expression
            old_subset: Existing subset (handled via selection)
            combine_operator: Combine operator
            
        Returns:
            True if filter applied successfully
        """
        global _last_operation_thread, _ogr_operations_lock
        
        # Thread safety check
        current_thread = threading.current_thread().ident
        with _ogr_operations_lock:
            if _last_operation_thread is not None and _last_operation_thread != current_thread:
                self.log_warning(
                    f"⚠️ OGR apply_filter called from different thread! "
                    f"Previous: {_last_operation_thread}, Current: {current_thread}"
                )
            _last_operation_thread = current_thread
        
        try:
            from qgis import processing
            from qgis.core import QgsVectorLayer, QgsFeatureRequest
            
            # Parse parameters
            params = json.loads(expression) if expression else {}
            predicates = params.get('predicates', ['intersects'])
            buffer_value = params.get('buffer_value')
            
            # Get source layer
            source_layer = self.source_geom
            
            if source_layer is None:
                self.log_error("No source layer available for OGR filter")
                return False
            
            if not isinstance(source_layer, QgsVectorLayer):
                self.log_error(f"Source is not a QgsVectorLayer: {type(source_layer)}")
                return False
            
            self.log_info(f"📍 Applying OGR filter to {layer.name()}")
            self.log_info(f"  - Source: {source_layer.name()} ({source_layer.featureCount()} features)")
            
            # Map predicates to QGIS codes
            predicate_codes = []
            for pred in predicates:
                pred_lower = pred.lower().replace('st_', '')
                code = self.PREDICATE_CODES.get(pred_lower, 0)
                predicate_codes.append(code)
            
            # Create feedback for cancellation
            self._feedback = CancellableFeedback()
            
            # Run selectbylocation
            try:
                result = processing.run(
                    'native:selectbylocation',
                    {
                        'INPUT': layer,
                        'INTERSECT': source_layer,
                        'PREDICATE': predicate_codes,
                        'METHOD': 0  # New selection
                    },
                    feedback=self._feedback
                )
            except Exception as e:
                self.log_error(f"Processing failed: {e}")
                return False
            
            # Get selected feature IDs
            selected_ids = list(layer.selectedFeatureIds())
            self.log_info(f"  - Selected: {len(selected_ids)} features")
            
            if not selected_ids:
                self.log_warning("No features selected - applying empty filter")
                safe_set_subset_string(layer, "1 = 0")
                return True
            
            # Build FID filter
            fid_filter = self._build_fid_filter(layer, selected_ids)
            
            # Clear selection (filter applied via subset)
            layer.removeSelection()
            
            # Combine with existing filter if needed
            if old_subset and combine_operator:
                if self._is_geometric_filter(old_subset):
                    final_filter = fid_filter
                else:
                    final_filter = f"({old_subset}) {combine_operator} ({fid_filter})"
            else:
                final_filter = fid_filter
            
            # Apply filter
            self.log_info(f"  - Applying filter: {final_filter[:200]}..." if len(final_filter) > 200 else f"  - Applying filter: {final_filter}")
            success = safe_set_subset_string(layer, final_filter)
            
            if success:
                self.log_info(f"✓ OGR filter applied: {len(selected_ids)} features")
            else:
                self.log_error(f"✗ Failed to apply FID filter to {layer.name()}")
                self.log_error(f"  - Filter expression: {final_filter[:500]}...")
                self.log_error(f"  - Primary key field: {self._get_primary_key(layer)}")
                self.log_error(f"  - Number of FIDs: {len(selected_ids)}")
                # Try to get more diagnostic info
                try:
                    provider = layer.dataProvider()
                    self.log_error(f"  - Provider capabilities: {provider.capabilities()}")
                    self.log_error(f"  - Storage type: {provider.storageType()}")
                except Exception as diag_e:
                    self.log_error(f"  - Could not get diagnostics: {diag_e}")
            
            return success
            
        except Exception as e:
            self.log_error(f"Error in OGR apply_filter: {e}")
            return False
    
    def cancel(self):
        """Cancel ongoing operation."""
        if self._feedback:
            self._feedback.cancel()
    
    def cleanup(self):
        """Clean up temporary layers."""
        self._temp_layers_keep_alive.clear()
        self._source_layer_keep_alive.clear()
        self.source_geom = None
    
    # =========================================================================
    # Private Helper Methods
    # =========================================================================
    
    def _build_fid_filter(self, layer, fids: list) -> str:
        """
        Build FID-based filter expression for OGR layers (v4.0.7).
        
        Improved to handle various primary key types:
        - Numeric IDs: fid IN (1, 2, 3)
        - UUIDs: uuid IN ('abc-123', 'def-456')
        - GeoPackage: "fid" IN (1, 2, 3)
        - Shapefiles: fid IN (1, 2, 3)
        
        Args:
            layer: QGIS vector layer
            fids: List of feature IDs
            
        Returns:
            Filter expression string
        """
        if not fids:
            return "1 = 0"
        
        # Get storage type and primary key
        storage_type = ""
        try:
            storage_type = layer.dataProvider().storageType().lower()
        except Exception:
            pass
        
        pk_field = self._get_primary_key(layer)
        pk_field_lower = pk_field.lower()
        
        # Check if PK field is numeric or text (for quoting values)
        is_numeric_pk = True
        try:
            fields = layer.fields()
            pk_idx = fields.indexOf(pk_field)
            if pk_idx >= 0:
                from qgis.PyQt.QtCore import QVariant
                field_type = fields.at(pk_idx).type()
                is_numeric_pk = field_type in (QVariant.Int, QVariant.LongLong, QVariant.UInt, QVariant.ULongLong, QVariant.Double)
        except Exception:
            pass
        
        # Build value list based on PK type
        if is_numeric_pk:
            fid_list = ", ".join(str(fid) for fid in fids)
        else:
            # Quote string values (UUID, etc.)
            fid_list = ", ".join(f"'{fid}'" for fid in fids)
        
        # Shapefile special case: QGIS 3.x requires lowercase 'fid' for setSubsetString
        if 'shapefile' in storage_type or 'esri' in storage_type:
            self.log_info(f"  - Shapefile detected: using lowercase 'fid' for QGIS subset")
            return f'fid IN ({fid_list})'
        
        # GeoPackage and SQLite-based formats: use quoted field name
        if 'geopackage' in storage_type or 'gpkg' in storage_type or 'sqlite' in storage_type:
            self.log_info(f"  - GeoPackage/SQLite detected: using quoted '{pk_field}'")
            return f'"{pk_field}" IN ({fid_list})'
        
        # For other OGR formats with detected primary key
        if pk_field and pk_field_lower not in ['fid']:
            self.log_info(f"  - Using detected primary key: {pk_field}")
            return f'"{pk_field}" IN ({fid_list})'
        
        # Default: try lowercase fid (more compatible with QGIS setSubsetString)
        self.log_info(f"  - Unknown format ({storage_type}): using lowercase 'fid' syntax")
        return f'fid IN ({fid_list})'
    
    def _get_primary_key(self, layer) -> str:
        """
        Get primary key field name with improved detection (v4.0.7).
        
        Priority order:
        1. Provider-declared primary key
        2. Exact PK names: id, fid, pk, gid, ogc_fid, objectid, oid, rowid
        3. UUID fields (uuid, guid in name)
        4. Numeric fields with ID patterns (_id, id_, identifier, etc.)
        5. First numeric integer field
        6. Default to "fid"
        
        Args:
            layer: QGIS vector layer
            
        Returns:
            Primary key field name
        """
        # Common primary key field names (exact match, case-insensitive)
        PK_EXACT_NAMES = ['id', 'fid', 'pk', 'gid', 'ogc_fid', 'objectid', 'oid', 'rowid']
        # UUID field patterns (contains, case-insensitive)
        UUID_PATTERNS = ['uuid', 'guid']
        # ID field patterns (contains, case-insensitive)
        ID_PATTERNS = ['_id', 'id_', 'identifier', 'feature_id', 'object_id']
        
        try:
            from qgis.PyQt.QtCore import QVariant
            
            fields = layer.fields()
            if not fields:
                return "fid"
            
            # 1. Try provider-declared primary key
            try:
                pk_indexes = layer.dataProvider().pkAttributeIndexes()
                if pk_indexes:
                    pk_name = fields.at(pk_indexes[0]).name()
                    self.log_debug(f"Using provider PK: {pk_name}")
                    return pk_name
            except Exception:
                pass
            
            # 2. Look for exact match PK names
            for field in fields:
                if field.name().lower() in PK_EXACT_NAMES:
                    self.log_debug(f"Found exact PK name: {field.name()}")
                    return field.name()
            
            # 3. Look for UUID fields
            for field in fields:
                field_name_lower = field.name().lower()
                for pattern in UUID_PATTERNS:
                    if pattern in field_name_lower:
                        self.log_debug(f"Found UUID field: {field.name()}")
                        return field.name()
            
            # 4. Look for numeric fields with ID patterns
            numeric_types = (QVariant.Int, QVariant.LongLong, QVariant.UInt, QVariant.ULongLong)
            for field in fields:
                field_name_lower = field.name().lower()
                if field.type() in numeric_types:
                    for pattern in ID_PATTERNS:
                        if pattern in field_name_lower:
                            self.log_debug(f"Found numeric ID field: {field.name()}")
                            return field.name()
            
            # 5. First numeric integer field
            for field in fields:
                if field.type() in numeric_types:
                    self.log_debug(f"Using first numeric field: {field.name()}")
                    return field.name()
            
        except Exception as e:
            self.log_warning(f"Error detecting primary key: {e}")
        
        # 6. Default to fid
        return "fid"
    
    def _is_geometric_filter(self, subset: str) -> bool:
        """Check if subset contains geometric filter patterns."""
        subset_lower = subset.lower()
        
        # OGR filters are typically FID-based
        geometric_patterns = [
            'intersects',
            'contains',
            'within',
            'st_'
        ]
        
        return any(p in subset_lower for p in geometric_patterns)


# =============================================================================
# Exports
# =============================================================================

__all__ = [
    'OGRExpressionBuilder',
    'CancellableFeedback',
]
