import json
import os
from pathlib import Path

from PyQt5.QtCore import pyqtSignal, QObject
from qgis.core import QgsApplication

from .jaro_winkler import jaro_winkler_similarity
from .path_utils import load_entity_embedding_json
from .embedder_utils import request_embedding

PATH = Path(os.path.dirname(os.path.abspath(__file__)))
END_POINT = "https://api.llmhub.infs.ch/embedding"
LLM_MODEL = "intfloat/multilingual-e5-large-instruct"
DESCRIPTION = {
    "description": f"embeddings generated by {END_POINT}"
}


class Embedder(QObject):
    step_signal = pyqtSignal()

    def __init__(self, api_key: str, layers):
        super().__init__()
        self.api_key = api_key
        self.new_and_cached_embedding = {}
        self.cache_path = Path(
            QgsApplication.qgisSettingsDirPath()) / "aiamas_cache/llmhub_embeddings_cache.json"
        self.layers = layers

        self.cache_and_entity_embeddings = load_entity_embedding_json(END_POINT, LLM_MODEL)

    def request_embedding(self, text):
        request_embedding(text=text, api_key=self.api_key)

    def name_embedder(self):
        for layer in self.layers:
            if layer.embedding is None:
                embedding = self.request_embedding(layer.normalized_name)
                self.new_and_cached_embedding[layer.normalized_name] = embedding
                layer.embedding = embedding
            self.step_signal.emit()
            QgsApplication.processEvents()  # Ensure GUI updates

        self.create_embedding_cache_json()

    def load_embedding_cache(self):
        self.new_and_cached_embedding = self.load_embedding_cache_json()
        self.cache_and_entity_embeddings = self.new_and_cached_embedding.copy()
        if len(self.cache_and_entity_embeddings) > 0:
            for layer in self.layers:
                self.set_embedding_from_cache_for_layer(layer)

    def set_embedding_from_cache_for_layer(self, layer):
        layer.embedding = None

        def similarity_to_current_layer(x):
            return jaro_winkler_similarity(x, layer.normalized_name)

        best_matching_name = max(self.cache_and_entity_embeddings.keys(), default=None, key=similarity_to_current_layer)
        if jaro_winkler_similarity(best_matching_name, layer.normalized_name) >= 0.95:
            layer.embedding = self.cache_and_entity_embeddings[best_matching_name]

    def create_embedding_cache_json(self):
        data_with_description = {
            "_llm_url": END_POINT,
            "_llm_model": LLM_MODEL,
            "entity_embeddings": self.new_and_cached_embedding
        }
        with open(self.cache_path, 'w', encoding='utf-8') as file:
            json.dump(data_with_description, file, ensure_ascii=False)

    def delete_embedding_cache_json(self):
        if os.path.exists(self.cache_path):
            os.remove(self.cache_path)

    def load_embedding_cache_json(self):
        if os.path.exists(self.cache_path):
            with open(self.cache_path, 'r', encoding='utf-8') as file:
                data = json.load(file)
                if "_llm_url" in data and "_llm_model" in data and "entity_embeddings" in data:
                    if data["_llm_url"] == END_POINT and data["_llm_model"] == LLM_MODEL:
                        data = data["entity_embeddings"]
                    else:
                        data = {}
                else:
                    data = {}
        else:
            data = {}
        return data


