# -*- coding: utf-8 -*-
from qgis.PyQt.QtCore import QCoreApplication, QVariant
from qgis.core import (
    QgsProcessing, QgsProcessingAlgorithm,
    QgsProcessingParameterFile, QgsProcessingParameterEnum,
    QgsProcessingParameterBoolean, QgsProcessingParameterString,
    QgsProcessingParameterVectorLayer,
    QgsVectorLayer, QgsProject, QgsLayerTreeLayer,
    QgsFeature, QgsField, QgsFields, QgsGeometry, QgsPointXY
)
from qgis import processing
import os, zipfile, tempfile, shutil, glob, csv, io, unicodedata

HELP = """
このツールは、e-Statからダウンロードした地域メッシュ統計（ZIP形式）をまとめてインポートし、
秘匿処理された値を自動的に合算して1つのメッシュデータに統合するための処理アルゴリズムです。

【主な機能】
- 複数のZIPファイルをフォルダごと指定して一括インポート（解凍不要）
- メッシュコードと秘匿先コードを自動判定し、秘匿合算を反映
- メッシュポリゴンを自動生成（1km / 500m / 250m に対応）
- 統合メッシュに、各グループの元行数（SRC_COUNT）を付与
- 【オプション】行政区域ポリゴンと交差するメッシュのみを抽出可能

【使い方】
1. e-Statから対象となる地域メッシュ統計をZIPのままダウンロードし、1つのフォルダにまとめます。
2. 本アルゴリズムを開き、「ZIPを含むフォルダ」を指定します。
3. （必要に応じて）「行政区域で抽出する」にチェックを入れ、行政区域レイヤを選択します。
   - レイヤでポリゴンを選択していれば、その選択範囲のみ抽出できます。
4. 実行すると、統合済みメッシュレイヤが生成され、属性に秘匿合算済みの値が反映されます。
"""

def mesh_bbox(code: str):
    c = ''.join(ch for ch in str(code) if ch.isdigit())
    if len(c) < 8:
        raise ValueError('mesh code length must be >= 8')
    p = int(c[0:2]); q = int(c[2:4]); a = int(c[4]); b = int(c[5]); x = int(c[6]); y = int(c[7])
    lat0 = p / 1.5; lon0 = q + 100.0
    lat0 += a * (5.0/60.0); lon0 += b * (7.5/60.0)
    lat0 += x * (0.5/60.0); lon0 += y * (0.75/60.0)
    dlat = (0.5/60.0); dlon = (0.75/60.0)
    if len(c) >= 9:
        q1 = int(c[8])
        if q1 == 2: lon0 += dlon/2.0
        elif q1 == 3: lat0 += dlat/2.0
        elif q1 == 4: lat0 += dlat/2.0; lon0 += dlon/2.0
        dlat /= 2.0; dlon /= 2.0
    if len(c) >= 10:
        q2 = int(c[9])
        if q2 == 2: lon0 += dlon/2.0
        elif q2 == 3: lat0 += dlat/2.0
        elif q2 == 4: lat0 += dlat/2.0; lon0 += dlon/2.0
        dlat /= 2.0; dlon /= 2.0
    return (lon0, lat0, lon0+dlon, lat0+dlat)

def rect_geom(lon_w, lat_s, lon_e, lat_n):
    pts = [(lon_w,lat_s),(lon_e,lat_s),(lon_e,lat_n),(lon_w,lat_n),(lon_w,lat_s)]
    return QgsGeometry.fromPolygonXY([[QgsPointXY(x,y) for x,y in pts]])

def clean_and_names(path, encoding, delimiter):
    """
    Read the csv/txt and:
      - Extract row1 (codes) and row2 (jp) -> build name_map: code -> "code_jp"
      - Write cleaned csv removing row2
      - Return cleaned_path, name_map
    """
    name_map = {}
    cleaned = path + ".clean.csv"
    with open(path, "r", encoding=encoding, newline="") as src:
        text = src.read()
    reader = csv.reader(io.StringIO(text), delimiter=delimiter)
    rows = list(reader)
    if not rows:
        return path, name_map
    header = rows[0]
    jp = rows[1] if len(rows) > 1 else []
    for i, code in enumerate(header):
        jpname = jp[i].strip() if i < len(jp) else ""
        name_map[code] = f"{code}_{jpname}" if jpname else code
    with open(cleaned, "w", encoding=encoding, newline="") as out:
        w = csv.writer(out, delimiter=delimiter)
        w.writerow(header)
        for r in rows[2:]:
            w.writerow(r)
    return cleaned, name_map

def ascii_digits(s):
    out = []
    for ch in str(s):
        if ch.isdigit():
            try:
                out.append(str(unicodedata.digit(ch)))
            except Exception:
                if '0' <= ch <= '9':
                    out.append(ch)
    return ''.join(out)

class AutoGridAlg(QgsProcessingAlgorithm):
    PARAM_ZIP_FOLDER = 'ZIP_FOLDER'
    PARAM_RECURSIVE = 'RECURSIVE'
    PARAM_ENCODING = 'ENCODING'
    PARAM_DELIMITER = 'DELIMITER'
    PARAM_GROUP_NAME = 'GROUP_NAME'
    PARAM_KEY_FIELD = 'KEY_FIELD'
    # optional filter
    PARAM_FILTER_ENABLE = 'FILTER_ENABLE'
    PARAM_POLY_LAYER = 'POLY_LAYER'
    PARAM_USE_SELECTED = 'USE_SELECTED'

    def tr(self, text):
        return QCoreApplication.translate('AutoGridAlg', text)

    def createInstance(self):
        return AutoGridAlg()

    def name(self):
        return 'autogrid_zip_secret_merge'

    def displayName(self):
        # 要望の名称
        return self.tr('e-Statメッシュ一括インポート（秘匿合算＋行政区域抽出）')

    def group(self):
        return self.tr('メッシュ統合インポート')

    def groupId(self):
        return 'mesh_union_import'

    def shortHelpString(self):
        return self.tr(HELP)

    def initAlgorithm(self, config=None):
        self.addParameter(QgsProcessingParameterFile(self.PARAM_ZIP_FOLDER, self.tr('ZIPを含むフォルダ'), behavior=QgsProcessingParameterFile.Folder))
        self.addParameter(QgsProcessingParameterBoolean(self.PARAM_RECURSIVE, self.tr('サブフォルダも含める'), defaultValue=True))
        self.addParameter(QgsProcessingParameterEnum(self.PARAM_ENCODING, self.tr('文字コード'), options=['CP932', 'UTF-8'], defaultValue=0))
        self.addParameter(QgsProcessingParameterEnum(self.PARAM_DELIMITER, self.tr('区切り文字'), options=[', (カンマ)', '\t (タブ)'], defaultValue=0))
        self.addParameter(QgsProcessingParameterString(self.PARAM_KEY_FIELD, self.tr('メッシュコードのフィールド名（既定: KEY_CODE）'), defaultValue='KEY_CODE'))
        self.addParameter(QgsProcessingParameterString(self.PARAM_GROUP_NAME, self.tr('レイヤグループ名'), defaultValue='メッシュ統合インポート'))
        # filter options
        self.addParameter(QgsProcessingParameterBoolean(self.PARAM_FILTER_ENABLE, self.tr('【オプション】行政区域で抽出する'), defaultValue=False))
        self.addParameter(QgsProcessingParameterVectorLayer(self.PARAM_POLY_LAYER, self.tr('行政区域ポリゴンレイヤ（任意）'), types=[QgsProcessing.TypeVectorPolygon], optional=True))
        self.addParameter(QgsProcessingParameterBoolean(self.PARAM_USE_SELECTED, self.tr('選択フィーチャのみ使用（選択があれば）'), defaultValue=True))

    def _collect_zips(self, folder, recursive):
        pat = '**/*.zip' if recursive else '*.zip'
        return glob.glob(os.path.join(folder, pat), recursive=recursive)

    def _extract_first_text(self, zip_path, tmpdir):
        with zipfile.ZipFile(zip_path, 'r') as z:
            for name in z.namelist():
                low = name.lower()
                if low.endswith('.csv') or low.endswith('.txt'):
                    out_path = os.path.join(tmpdir, os.path.basename(name))
                    with z.open(name) as src, open(out_path, 'wb') as dst:
                        dst.write(src.read())
                    return out_path
        return None

    def _apply_names_as_fields(self, src_layer, name_map, key_field_names):
        """Create a memory table whose field names are replaced by name_map (for numeric columns).
           key_field_names remain unchanged.
        """
        fields = QgsFields()
        old_to_new = {}
        for f in src_layer.fields():
            nm = f.name()
            if nm in key_field_names:
                fields.append(QgsField(nm, f.type()))
                old_to_new[nm] = nm
            else:
                newn = name_map.get(nm, nm)
                fields.append(QgsField(newn, f.type()))
                old_to_new[nm] = newn

        out = QgsVectorLayer('None', 'renamed', 'memory')
        out_dp = out.dataProvider()
        out_dp.addAttributes(fields)
        out.updateFields()

        feats = []
        for ft in src_layer.getFeatures():
            newf = QgsFeature(out.fields())
            # copy attributes with remapped names
            attrs = []
            for f in src_layer.fields():
                attrs.append(ft[f.name()])
            newf.setAttributes(attrs)
            feats.append(newf)
        out_dp.addFeatures(feats)
        return out

    def processAlgorithm(self, parameters, context, feedback):
        folder = self.parameterAsFile(parameters, self.PARAM_ZIP_FOLDER, context)
        recursive = self.parameterAsBoolean(parameters, self.PARAM_RECURSIVE, context)
        enc = ['CP932', 'UTF-8'][self.parameterAsEnum(parameters, self.PARAM_ENCODING, context)]
        delim = ',' if self.parameterAsEnum(parameters, self.PARAM_DELIMITER, context) == 0 else '\t'
        key_field = self.parameterAsString(parameters, self.PARAM_KEY_FIELD, context)
        group_name = self.parameterAsString(parameters, self.PARAM_GROUP_NAME, context)
        filter_enable = self.parameterAsBoolean(parameters, self.PARAM_FILTER_ENABLE, context)
        poly_layer = self.parameterAsVectorLayer(parameters, self.PARAM_POLY_LAYER, context)
        use_selected = self.parameterAsBoolean(parameters, self.PARAM_USE_SELECTED, context)

        zips = self._collect_zips(folder, recursive)
        if not zips:
            feedback.reportError(self.tr('ZIPが見つかりません。フォルダと再帰設定を確認してください。'))
            return {}

        tmpdir = tempfile.mkdtemp(prefix='estat_zip_')
        assembled = None
        name_master = {}

        try:
            for i, zp in enumerate(zips, start=1):
                feedback.pushInfo(self.tr('[%d/%d] %s を展開中…' % (i, len(zips), os.path.basename(zp))))
                raw = self._extract_first_text(zp, tmpdir)
                if not raw:
                    feedback.reportError(self.tr('テキストが見つかりません: ') + os.path.basename(zp))
                    continue

                cleaned, name_map = clean_and_names(raw, enc, '\t' if delim=='\t' else ',')
                # store for later
                name_master.update(name_map)

                uri = 'file:///%s?encoding=%s&delimiter=%s&detectTypes=yes' % (cleaned.replace('\\','/'), enc, ('\t' if delim=='\t' else ','))
                tlyr = QgsVectorLayer(uri, os.path.splitext(os.path.basename(cleaned))[0], 'delimitedtext')
                if not tlyr.isValid():
                    feedback.reportError(self.tr('読み込み失敗: ') + cleaned)
                    continue

                # Convert to memory with new field names
                key_names = {key_field, 'HTKSYORI', 'HTKSAKI', 'GASSAN'}
                tlyr_named = self._apply_names_as_fields(tlyr, name_map, key_names)

                if assembled is None:
                    assembled = QgsVectorLayer('None', '統合テーブル', 'memory')
                    pr = assembled.dataProvider()
                    pr.addAttributes(tlyr_named.fields())
                    assembled.updateFields()

                assembled.dataProvider().addFeatures(list(tlyr_named.getFeatures()))
                feedback.setProgress(int(100 * i / len(zips)))

            if assembled is None:
                feedback.reportError(self.tr('有効なテーブルが作成できませんでした。'))
                return {}

            names = [f.name() for f in assembled.fields()]
            for must in ['HTKSYORI','HTKSAKI','GASSAN']:
                if must not in names:
                    feedback.reportError(self.tr('必須列が見つかりません: ') + must)
                    return {}
            if key_field not in names:
                feedback.reportError(self.tr('メッシュコード列が見つかりません: ') + key_field)
                return {}

            # Determine numeric columns (renamed) excluding keys
            num_cols = [f.name() for f in assembled.fields() if f.name() not in (key_field,'HTKSYORI','HTKSAKI','GASSAN')]

            # Grouping with normalization
            group_sums = {}
            group_counts = {}
            key_to_group = {}
            for f in assembled.getFeatures():
                code = ascii_digits(f[key_field])
                gto = ascii_digits(f['HTKSAKI'])
                gid = gto if gto else code
                key_to_group[code] = gid
                bucket = group_sums.setdefault(gid, {c:0.0 for c in num_cols})
                for c in num_cols:
                    v = f[c]
                    try:
                        bucket[c] += float(v) if v not in (None, '') else 0.0
                    except Exception:
                        pass
                group_counts[gid] = group_counts.get(gid, 0) + 1

            # Polygons
            poly = QgsVectorLayer('Polygon?crs=EPSG:4326', 'mesh_polys', 'memory')
            pr = poly.dataProvider()
            fields = QgsFields()
            fields.append(QgsField('KEY_CODE', QVariant.String))
            fields.append(QgsField('GROUP_ID', QVariant.String))
            pr.addAttributes(fields); poly.updateFields()

            feats = []
            for code, gid in key_to_group.items():
                try:
                    w,s,e,n = mesh_bbox(code)
                except Exception:
                    continue
                geom = rect_geom(w,s,e,n)
                ft = QgsFeature(fields); ft.setGeometry(geom); ft.setAttributes([code, gid])
                feats.append(ft)
            if not feats:
                feedback.reportError(self.tr('有効なメッシュコードがありません。'))
                return {}
            pr.addFeatures(feats); poly.updateExtents()

            dis = processing.run('native:dissolve', {'INPUT': poly, 'FIELD': ['GROUP_ID'], 'SEPARATE_DISJOINT': False, 'OUTPUT': 'memory:'})['OUTPUT']
            dis.setName(self.tr('統合メッシュ（秘匿合算）'))

            # Sum table with SRC_COUNT and Japanese column names
            sum_layer = QgsVectorLayer('None', 'group_sums', 'memory')
            pr2 = sum_layer.dataProvider()
            sfields = QgsFields()
            sfields.append(QgsField('GROUP_ID', QVariant.String))
            sfields.append(QgsField('SRC_COUNT', QVariant.Int))
            for c in num_cols:
                sfields.append(QgsField(c, QVariant.Double))
            pr2.addAttributes(sfields); sum_layer.updateFields()

            rows = []
            for gid, d in group_sums.items():
                cnt = group_counts.get(gid, 0)
                vals = [d.get(c, 0.0) for c in num_cols]
                ft = QgsFeature(sfields); ft.setAttributes([gid, cnt] + vals)
                rows.append(ft)
            pr2.addFeatures(rows); sum_layer.updateExtents()

            out = processing.run('native:joinattributestable',
                                 {'INPUT': dis, 'FIELD': 'GROUP_ID', 'INPUT_2': sum_layer, 'FIELD_2': 'GROUP_ID',
                                  'FIELDS_TO_COPY': [], 'METHOD': 1, 'DISCARD_NONMATCHING': False, 'PREFIX': '', 'OUTPUT': 'memory:'})['OUTPUT']

            # remove GROUP_ID_2 if present
            names_out = [f.name() for f in out.fields()]
            if 'GROUP_ID_2' in names_out:
                out = processing.run('native:deletecolumn', {'INPUT': out, 'COLUMN': ['GROUP_ID_2'], 'OUTPUT': 'memory:'})['OUTPUT']

            # Optional polygon filter
            if filter_enable and poly_layer:
                src = poly_layer
                if use_selected and hasattr(src, 'selectedFeatureCount') and src.selectedFeatureCount() > 0:
                    src = processing.run('native:saveselectedfeatures', {'INPUT': src, 'OUTPUT': 'memory:'})['OUTPUT']
                filtered = processing.run('native:extractbylocation', {
                    'INPUT': out,
                    'PREDICATE': [0],  # 0=intersects
                    'INTERSECT': src,
                    'OUTPUT': 'memory:'
                })['OUTPUT']
                filtered.setName(self.tr('統合メッシュ（秘匿合算・溶解｜行政区域で抽出）'))
                out = filtered
            else:
                out.setName(self.tr('統合メッシュ（秘匿合算・溶解）'))

            # Register layers
            root = QgsProject.instance().layerTreeRoot()
            group = root.findGroup(group_name) or root.addGroup(group_name)
            QgsProject.instance().addMapLayer(out, False)
            group.insertChildNode(0, QgsLayerTreeLayer(out))

            return {'OUTPUT': out.id()}

        finally:
            try:
                shutil.rmtree(tmpdir)
            except Exception:
                pass
