# -*- coding: utf-8 -*-
"""
/***************************************************************************
 QText+ Utilities
 
 Shared utility functions for delimiter detection, UTM extraction, and
 common operations across the plugin.
 
 PURPOSE:
 - Centralize repeated code from dialogs and processors
 - Provide clean, testable utility functions
 - Reduce code duplication
 
 FILE: core/utils.py
                              -------------------
        begin                : 2026-01-02
        copyright            : (C) 2024 by Aziz TRAORE
        email                : aziz.explorer@gmail.com
 ***************************************************************************/
"""

import re
from typing import Optional, Tuple, Dict, Any


class DelimiterDetector:
    """Automatic delimiter detection from file content."""
    
    @staticmethod
    def detect_from_line(line: str) -> str:
        if not line:
            return ','
        
        # Count occurrences of common delimiters
        delimiters = {
            ',': line.count(','),
            ';': line.count(';'),
            '\t': line.count('\t'),
            '|': line.count('|'),
            ' ': line.count(' ')
        }

        if delimiters[' '] > len(line) * 0.3:
            delimiters[' '] = 0
        
        # Find delimiter with max count
        max_count = max(delimiters.values())
        
        if max_count == 0:
            return ','  # Default fallback
        
        # Return first delimiter with max count
        for delim, count in delimiters.items():
            if count == max_count:
                return delim
        
        return ','

    @staticmethod
    def detect_from_content(line: str) -> str:
        """Alias for detect_from_line() for backward compatibility."""
        return DelimiterDetector.detect_from_line(line)
    
    @staticmethod
    def extract_from_settings(settings: Dict[str, Any]) -> str:
        """Extract delimiter from import settings dict."""
        delim_type = settings.get('delimiter_type', 'csv')
        
        if delim_type == 'csv':
            return ','
        
        elif delim_type == 'custom':
            delim_value = settings.get('delimiter_value', ',')
            # Convert escaped tab
            return delim_value.replace('\\t', '\t')
        
        elif delim_type == 'regexp':
            return settings.get('delimiter_value', r'\s+')
        
        return ','
    
    @staticmethod
    def normalize_delimiter_for_display(delimiter: str) -> str:
        """Convert delimiter to human-readable form for UI display."""
        mapping = {
            '\t': 'Tab',
            ',': 'Comma',
            ';': 'Semicolon',
            '|': 'Pipe',
            ' ': 'Space',
            ':': 'Colon'
        }
        return mapping.get(delimiter, delimiter)

class UTMHelper:
    """UTM zone extraction and CRS manipulation."""
    
    @staticmethod
    def extract_zone_from_epsg(epsg_code: str) -> Tuple[Optional[int], Optional[str]]:
        """Extract UTM zone number and hemisphere from EPSG code."""
        if not epsg_code or ':' not in epsg_code:
            return None, None
        
        try:
            code = int(epsg_code.split(':')[1])
            
            # WGS84 / UTM zone XXN (North)
            if 32601 <= code <= 32660:
                zone = code - 32600
                hemisphere = 'N'
                return zone, hemisphere
            
            # WGS84 / UTM zone XXS (South)
            elif 32701 <= code <= 32760:
                zone = code - 32700
                hemisphere = 'S'
                return zone, hemisphere
            
        except (ValueError, IndexError):
            pass
        
        return None, None
    
    @staticmethod
    def build_utm_epsg(zone: int, hemisphere: str) -> str:
        """Build EPSG code from UTM zone and hemisphere."""
        if not 1 <= zone <= 60:
            raise ValueError(f"Invalid UTM zone: {zone} (expected 1-60)")
        
        if hemisphere.upper() not in ['N', 'S']:
            raise ValueError(f"Invalid hemisphere: {hemisphere} (expected N or S)")
        
        if hemisphere.upper() == 'N':
            epsg_code = 32600 + zone
        else:
            epsg_code = 32700 + zone
        
        return f'EPSG:{epsg_code}'
    
    @staticmethod
    def is_utm_crs(epsg_code: str) -> bool:
        """Check if EPSG code represents a UTM CRS."""
        zone, hemi = UTMHelper.extract_zone_from_epsg(epsg_code)
        return zone is not None


class FieldMatcher:
    """Fuzzy field name matching for auto-detection and mapping."""
    
    # Common patterns for coordinate fields
    X_PATTERNS = ['x', 'lon', 'longitude', 'long', 'east', 'easting', 'x_coord']
    Y_PATTERNS = ['y', 'lat', 'latitude', 'north', 'northing', 'y_coord']
    WKT_PATTERNS = ['wkt', 'geom', 'geometry', 'shape', 'the_geom']
    
    @staticmethod
    def find_x_field(headers: list) -> Optional[str]:
        """Auto-detect X/Longitude field from headers."""
        return FieldMatcher._find_field(headers, FieldMatcher.X_PATTERNS)
    
    @staticmethod
    def find_y_field(headers: list) -> Optional[str]:
        """Auto-detect Y/Latitude field from headers."""
        return FieldMatcher._find_field(headers, FieldMatcher.Y_PATTERNS)
    
    @staticmethod
    def find_wkt_field(headers: list) -> Optional[str]:
        """Auto-detect WKT geometry field from headers."""
        return FieldMatcher._find_field(headers, FieldMatcher.WKT_PATTERNS)
    
    @staticmethod
    def _find_field(headers: list, patterns: list) -> Optional[str]:
        """Find field matching any pattern (case-insensitive substring match)."""
        headers_lower = [h.lower() for h in headers]
        
        for pattern in patterns:
            for i, header_lower in enumerate(headers_lower):
                if pattern in header_lower:
                    return headers[i]
        
        return None
    
    @staticmethod
    def fuzzy_match_fields(source_fields: list, target_fields: list, 
                          threshold: float = 0.75) -> Dict[str, str]:
        """Create fuzzy mapping between two field lists."""
        from difflib import SequenceMatcher
        
        mapping = {}
        
        for target_field in target_fields:
            # Exact match first
            if target_field in source_fields:
                continue  # No mapping needed
            
            # Fuzzy match
            best_match = None
            best_ratio = threshold
            
            for source_field in source_fields:
                ratio = SequenceMatcher(
                    None,
                    target_field.lower(),
                    source_field.lower()
                ).ratio()
                
                if ratio > best_ratio:
                    best_match = source_field
                    best_ratio = ratio
            
            if best_match:
                mapping[target_field] = best_match
        
        return mapping


class EncodingHelper:
    """Encoding detection and validation."""
    
    COMMON_ENCODINGS = [
        'UTF-8', 'UTF-16', 'UTF-32',
        'ISO-8859-1', 'ISO-8859-15',
        'Windows-1252', 'ASCII',
        'CP437', 'CP850', 'CP1252'
    ]
    
    @staticmethod
    def detect_encoding(filepath: str, sample_size: int = 10000) -> str:
        """Detect file encoding using chardet."""
        try:
            import chardet
            
            with open(filepath, 'rb') as f:
                raw_data = f.read(sample_size)
                result = chardet.detect(raw_data)
                encoding = result.get('encoding', 'UTF-8')
                
                # Normalize encoding name
                if encoding:
                    encoding = encoding.upper()
                    
                    # Map common variations
                    if encoding in ['ISO8859-1', 'LATIN-1']:
                        encoding = 'ISO-8859-1'
                    elif encoding in ['WINDOWS-1252', 'CP1252']:
                        encoding = 'Windows-1252'
                
                return encoding or 'UTF-8'
        
        except ImportError:
            # chardet not available
            return EncodingHelper._detect_encoding_simple(filepath)
        
        except Exception:
            # Detection failed
            return 'UTF-8'
    
    @staticmethod
    def validate_encoding(encoding: str) -> bool:
        """Check if encoding is valid and supported."""
        try:
            'test'.encode(encoding)
            return True
        except (LookupError, TypeError):
            return False


class SettingsValidator:
    """Validate settings dictionaries for consistency."""
    
    @staticmethod
    def validate_geometry_settings(geom_settings: Dict[str, Any], 
                                   available_headers: list) -> Tuple[bool, list]:
        """Validate geometry configuration."""
        errors = []
        
        geom_type = geom_settings.get('type', 'none')
        
        if geom_type not in ['point', 'wkt', 'none']:
            errors.append(f"Invalid geometry type: {geom_type}")
            return False, errors
        
        if geom_type == 'point':
            x_field = geom_settings.get('x_field')
            y_field = geom_settings.get('y_field')
            
            if not x_field:
                errors.append("X field is required for point geometry")
            elif x_field not in available_headers:
                errors.append(f"X field '{x_field}' not found in headers")
            
            if not y_field:
                errors.append("Y field is required for point geometry")
            elif y_field not in available_headers:
                errors.append(f"Y field '{y_field}' not found in headers")
        
        elif geom_type == 'wkt':
            wkt_field = geom_settings.get('wkt_field')
            
            if not wkt_field:
                errors.append("WKT field is required for WKT geometry")
            elif wkt_field not in available_headers:
                errors.append(f"WKT field '{wkt_field}' not found in headers")
        
        return len(errors) == 0, errors
    
    @staticmethod
    def validate_crs_settings(crs_settings: Dict[str, Any]) -> Tuple[bool, list]:
        """Validate CRS configuration."""
        errors = []
        
        source_authid = crs_settings.get('source_authid')
        
        if not source_authid:
            errors.append("Source CRS is required")
        elif ':' not in source_authid:
            errors.append(f"Invalid CRS format: {source_authid}")
        
        # Validate UTM settings if projected
        if crs_settings.get('source_is_projected'):
            zone = crs_settings.get('source_utm_zone')
            hemisphere = crs_settings.get('source_utm_hemisphere')
            
            if zone is not None and not 1 <= zone <= 60:
                errors.append(f"Invalid UTM zone: {zone} (expected 1-60)")
            
            if hemisphere is not None and hemisphere.upper() not in ['N', 'S']:
                errors.append(f"Invalid hemisphere: {hemisphere} (expected N or S)")
        
        return len(errors) == 0, errors

class PathHelper:
    """File path manipulation and normalization."""
    
    @staticmethod
    def normalize_filepath(filepath: str) -> str:
        """Normalize file path (resolve .., ., etc.)."""
        import os
        return os.path.normpath(os.path.abspath(filepath))
    
    @staticmethod
    def extract_layer_name(filepath: str) -> str:
        """Extract layer name from file path (basename without extension)."""
        import os
        basename = os.path.basename(filepath)
        name, _ = os.path.splitext(basename)
        return name