# -*- coding: utf-8 -*-
"""
translation_tester.py — version corrigée (2025)
Inclut normalisation des newlines pour éliminer les faux positifs TS.
"""

import re
import sys
import difflib
import datetime
from pathlib import Path
import xml.etree.ElementTree as ET
from html.parser import HTMLParser

# ⚠️ Remplacer par le nom réel de ton module
from .translate_ts_html_gui_API_google import get_full_text

# -------------------------------------------------------------------
#  LOAD VENDOR PACKAGES (html5lib)
# -------------------------------------------------------------------
_vendor = Path(__file__).parent / "vendor"
if _vendor.exists():
    vendor_paths = [
        str(_vendor),
        str(_vendor / "html5lib"),
        str(_vendor / "webencodings")
    ]
    for p in vendor_paths:
        if p not in sys.path:
            sys.path.insert(0, p)
else:
    from qgis.core import QgsMessageLog, Qgis
    QgsMessageLog.logMessage("Vendor folder not found !", "PluginTranslator", Qgis.Critical)

# Test immédiat (log QGIS)
try:
    # *** AVERTISSEMENT Pycharm à ignorer, html5lib est fourni avec le plugin ***
    from .vendor import html5lib
except Exception as e:
    from qgis.core import QgsMessageLog, Qgis
    QgsMessageLog.logMessage(
        f"CRITICAL — html5lib FAILED to import: {e}",
        "PluginTranslator",
        Qgis.Critical
    )


# =====================================================================
# 🔧 UTILITAIRES
# =====================================================================

PLACEHOLDER_RE = re.compile(r"%\d+|%n")


def extract_placeholders(text: str):
    return sorted(set(PLACEHOLDER_RE.findall(text or "")))


def logical_lines(txt: str):
    """
    Retourne la liste de lignes *logiques* :
    - homogénéise CRLF/LF
    - enlève espaces en fin de ligne
    - enlève lignes vides en début/fin (indentation XML)
    """
    if not txt:
        return []

    txt = txt.replace("\r\n", "\n").replace("\r", "\n")
    lines = [l.rstrip() for l in txt.split("\n")]

    # supprimer lignes structurelles
    while lines and not lines[0].strip():
        lines.pop(0)
    while lines and not lines[-1].strip():
        lines.pop()

    return lines


# =====================================================================
# LOGGING
# =====================================================================

_LOG_LISTENERS = set()


def add_log_listener(cb):
    if cb:
        _LOG_LISTENERS.add(cb)


def remove_log_listener(cb):
    _LOG_LISTENERS.discard(cb)


def _log_file():
    docs = Path.home() / "Documents"
    docs.mkdir(parents=True, exist_ok=True)
    return docs / "traduction_test.log"


def _emit_log(msg: str):
    ts = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    line = f"[{ts}] {msg}\n"

    try:
        with _log_file().open("a", encoding="utf-8") as f:
            f.write(line)
    except:
        pass

    print(line, end="")

    for cb in list(_LOG_LISTENERS):
        try:
            cb(line.rstrip("\n"))
        except:
            pass


# =====================================================================
# ✔️ TEST TS (VERSION CORRIGÉE)
# =====================================================================

def run_ts_test(pivot_ts: Path, translated_ts: Path, progress_cb=None):

    _emit_log("=" * 70)
    _emit_log(f"[TS] Pivot       : {pivot_ts}")
    _emit_log(f"[TS] Traduction  : {translated_ts}")

    if progress_cb:
        progress_cb(0)

    try:
        tree_pivot = ET.parse(pivot_ts)
        tree_trans = ET.parse(translated_ts)
    except Exception as e:
        _emit_log(f"[TS][ERREUR] Lecture XML : {e}")
        if progress_cb:
            progress_cb(100)
        return

    root_p = tree_pivot.getroot()
    root_t = tree_trans.getroot()

    msgs_p = [m for c in root_p.findall("context") for m in c.findall("message")]
    msgs_t = [m for c in root_t.findall("context") for m in c.findall("message")]

    if len(msgs_p) != len(msgs_t):
        _emit_log(f"[TS][WARNING] Nombre de messages différent pivot={len(msgs_p)}, trad={len(msgs_t)}")

    n = min(len(msgs_p), len(msgs_t))
    errors = 0

    for i in range(n):

        mp = msgs_p[i]
        mt = msgs_t[i]

        # SOURCE pivot
        src_p = (mp.findtext("source") or "").strip()

        # SOURCE traduction (doit être identique)
        src_t = (mt.findtext("source") or "").strip()

        if src_p != src_t:
            _emit_log(f"[TS][MSG {i+1}] ❌ Source différente.")
            _emit_log(f"    pivot : {src_p}")
            _emit_log(f"    trad  : {src_t}")
            errors += 1
            continue

        tp = mp.find("translation")
        tt = mt.find("translation")

        nums_p = tp.findall("numerusform") if tp is not None else []
        nums_t = tt.findall("numerusform") if tt is not None else []

        # ----------------------------------------------------------------
        # PLURIELS
        # ----------------------------------------------------------------
        if nums_p or nums_t:

            if len(nums_p) != len(nums_t):
                _emit_log(f"[TS][MSG {i+1}] ❌ Nombre de pluriels différent.")
                errors += 1

            for j in range(min(len(nums_p), len(nums_t))):

                raw_src = get_full_text(nums_p[j])
                raw_tr  = get_full_text(nums_t[j])

                p_lines = logical_lines(raw_src)
                t_lines = logical_lines(raw_tr)

                p_txt = "\n".join(p_lines)
                t_txt = "\n".join(t_lines)

                # placeholders
                ph_p = extract_placeholders(p_txt)
                ph_t = extract_placeholders(t_txt)

                if ph_p != ph_t:
                    _emit_log(f"[TS][MSG {i+1}][PLUR {j+1}] ❌ Placeholders diff : {ph_p} vs {ph_t}")
                    errors += 1

                # lignes
                if len(p_lines) != len(t_lines):
                    _emit_log(f"[TS][MSG {i+1}][PLUR {j+1}] ❌ Nb de lignes différent.")
                    _emit_log(f"    pivot : {repr(p_lines)}")
                    _emit_log(f"    trad  : {repr(t_lines)}")
                    errors += 1

        # ----------------------------------------------------------------
        # SINGULIER — comparaison pivot/source ↔ traduction
        # ----------------------------------------------------------------
        else:

            raw_src = src_p                      # ← référence pivot = SOURCE !
            raw_tr  = get_full_text(tt or mp)    # ← traduction FR

            p_lines = logical_lines(raw_src)
            t_lines = logical_lines(raw_tr)

            p_txt = "\n".join(p_lines)
            t_txt = "\n".join(t_lines)

            # placeholders
            ph_p = extract_placeholders(p_txt)
            ph_t = extract_placeholders(t_txt)

            for ph in ph_p:
                if ph not in ph_t:
                    _emit_log(f"[TS][MSG {i+1}] ❌ Placeholder manquant : {ph}")
                    errors += 1

            # texte tronqué ?
            if p_txt != t_txt:
                # On ignore la casse & traduction, on regarde uniquement structure + placeholders
                if len(p_lines) != len(t_lines):
                    _emit_log(f"[TS][MSG {i+1}] ❌ Nb de lignes différent.")
                    _emit_log(f"    pivot : {repr(p_lines)}")
                    _emit_log(f"    trad  : {repr(t_lines)}")
                    errors += 1

        if progress_cb:
            progress_cb(int(100 * (i+1) / max(1, n)))

    # Résultat
    if errors == 0:
        _emit_log("[TS] ✓ Aucun problème détecté.")
    else:
        _emit_log(f"[TS] ❌ {errors} problème(s) détecté(s).")

    if progress_cb:
        progress_cb(100)


# =====================================================================
# HTML TEST (inchangé)
# =====================================================================

class SimpleHTMLFingerprint(HTMLParser):
    def __init__(self):
        super().__init__(convert_charrefs=True)
        self.events = []
        self.texts = []

    def handle_starttag(self, tag, attrs):
        keys = tuple(sorted(k for k, _ in attrs))
        self.events.append(("start", tag.lower(), keys))

    def handle_endtag(self, tag):
        self.events.append(("end", tag.lower(), ()))

    def handle_data(self, data):
        if data.strip():
            self.texts.append(data.strip())


def html_fingerprint(text: str):
    parser = SimpleHTMLFingerprint()
    try:
        parser.feed(text)
    except:
        pass
    return parser.events, parser.texts


def run_html_test(src: Path, translated: Path, progress_cb=None):

    _emit_log("=" * 70)
    _emit_log(f"[HTML] Source    : {src}")
    _emit_log(f"[HTML] Traduit   : {translated}")

    if progress_cb:
        progress_cb(0)

    try:
        txt_src = src.read_text(encoding="utf-8", errors="ignore")
        txt_tr  = translated.read_text(encoding="utf-8", errors="ignore")
    except Exception as e:
        _emit_log(f"[HTML][ERREUR] Lecture : {e}")
        if progress_cb:
            progress_cb(100)
        return

    # --------------------------------------------
    # 1. Extraire texte visible des deux fichiers
    # --------------------------------------------
    try:
        from bs4 import BeautifulSoup, Comment

        def visible_texts(html):
            soup = BeautifulSoup(html, "html.parser")
            for t in soup(["script","style","meta","link","noscript"]):
                t.decompose()
            for c in soup.find_all(string=lambda x: isinstance(x, Comment)):
                c.extract()
            return [t.strip() for t in soup.stripped_strings if t.strip()]

        src_texts = visible_texts(txt_src)
        tr_texts  = visible_texts(txt_tr)

    except Exception as e:
        _emit_log(f"[HTML][ERREUR] Analyse du texte : {e}")
        if progress_cb:
            progress_cb(100)
        return

    # --------------------------------------------
    # 2. Vérification : pas de perte majeure
    # --------------------------------------------
    if len(tr_texts) < len(src_texts) * 0.9:
        _emit_log(f"[HTML] ❌ Texte visible perdu : {len(tr_texts)} / {len(src_texts)}")
    else:
        _emit_log(f"[HTML] ✓ Quantité de texte OK : {len(tr_texts)} / {len(src_texts)}")

    # --------------------------------------------
    # 3. Vérification : fichier traduit non vide
    # --------------------------------------------
    if "<body" not in txt_tr.lower():
        _emit_log("[HTML] ❌ Le HTML traduit semble corrompu (pas de <body>)")
    else:
        _emit_log("[HTML] ✓ Structure minimale OK")

    # --------------------------------------------
    # 4. Diff visuel HTML (pour inspection humaine)
    # --------------------------------------------
    try:
        docs = Path.home() / "Documents"
        docs.mkdir(exist_ok=True)
        out_file = docs / f"html_diff_{src.stem}_{translated.stem}.html"

        import difflib
        hdiff = difflib.HtmlDiff()
        content = hdiff.make_file(
            txt_src.splitlines(),
            txt_tr.splitlines(),
            str(src),
            str(translated)
        )
        out_file.write_text(content, encoding="utf-8")
        _emit_log(f"[HTML] 💡 Diff HTML visuel créé : {out_file}")

    except Exception as e:
        _emit_log(f"[HTML][ERREUR] Diff HTML visuel : {e}")

    if progress_cb:
        progress_cb(100)

