import re
import numpy as np
from .jaro_winkler import jaro_winkler_similarity

class StringMatcher():
    # Remove special characters, numbers, file extension
    pass
    
    
def remove_characters(name):
    if name:
        name = re.sub(r"\.\w+$", "", name)
        name = re.sub(r"\d+", "", name)
        name = re.sub(r"[^a-zA-ZäöüÄÖÜß]", "", name)
        normalized_name = name.lower() 
        if normalized_name in [None, ""]:
            normalized_name = name
        return normalized_name
    return name


def find_best_matching_entity(layer_embedding, spatial_entity_embeddings):
    best_match = None
    best_similarity = 0
    
    for key, value in spatial_entity_embeddings.items():
        similarity = cosine_similarity(layer_embedding, value)
        if similarity > best_similarity:
            best_similarity = similarity
            best_match = key
            if best_similarity >= 1:
                break  # Stop searching if similarity is perfect
            
    return best_match, best_similarity


def cosine_similarity(emb1, emb2):
    dot_product = np.dot(emb1, emb2)
    norm_emb1 = np.linalg.norm(emb1)
    norm_emb2 = np.linalg.norm(emb2)
    return dot_product / (norm_emb1 * norm_emb2)


def find_best_match_by_jarowinkler(layer_name: str, spatial_entities: dict, language: str = 'English') -> tuple[float, dict, list, list]:
    """
    Pure logic function to find the best matching entity using Jaro-Winkler similarity.
    Returns: (best_match_score, best_entity, colors, icons)
    """
    layer_name_parts = re.split(r"[\W_]+", layer_name.lower())
    layer_name_parts.append(layer_name.lower())
    best_match_score = 0
    best_entity = None
    colors = None
    icons = None

    lang_method = None
    if language == 'English':
        lang_method = lambda entity, layer_name_part: match_language(entity, layer_name_part, 'en')
    elif language == 'German':
        lang_method = lambda entity, layer_name_part: match_language(entity, layer_name_part, 'de')
    elif language == 'French':
        lang_method = lambda entity, layer_name_part: match_language(entity, layer_name_part, 'fr')

    for layer_name_part in layer_name_parts:
        for entity in spatial_entities['data']:
            match = lang_method(entity, layer_name_part)

            if match > best_match_score:
                best_match_score = match
                best_entity = entity
                colors = entity['color']
                icons = entity['icons']

    return best_match_score, best_entity, colors, icons


def match_language(entity, layer_name_part, language='en', cnt=0):
    matches = []
    lang_key = f'name_{language}'
    
    for name in entity[lang_key]:
        matches.append(jaro_winkler_similarity(layer_name_part.lower(), name.lower()))

    match = max(matches)
    # If match is poor
    if match < 0.5 and cnt <= 2:
        # Cycle through languages: en -> fr -> de -> en
        next_lang = {'en': 'fr', 'fr': 'de', 'de': 'en'}[language]
        match = match_language(entity, layer_name_part, next_lang, cnt + 1)
    
    return match



