# -*- coding: utf-8 -*-

"""
/***************************************************************************
 AMERTA
                                 A QGIS plugin
 Analisis Multi-kriteria Embung dan Rencana Tata Air
 Generated by Plugin Builder: http://g-sherman.github.io/Qgis-Plugin-Builder/
                              -------------------
        begin                : 2025-09-18
        copyright            : (C) 2025 by Badan Riset dan Inovasi Nasional
        email                : sitaranisafitri@gmail.com
 ***************************************************************************/

/***************************************************************************
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 ***************************************************************************/
"""

__author__ = 'Sitarani Safitri, Orbita Roswintiarti, Okta Fajar Saputra, Galdita Aruba Chulafak, Gatot Nugroho, Wismu Sunarmodo, Kusumaning Ayu Dyah Sukowati, Hana Listi Fitriana'
__date__ = '2025-09-18'
__copyright__ = '(C) 2025 by Badan Riset dan Inovasi Nasional'

# This will get replaced with a git SHA1 when you do a git archive

__revision__ = '$Format:%H$'

import os, re, glob
import numpy as np
from osgeo import gdal

from qgis.PyQt.QtCore import QCoreApplication
from qgis.core import (
    QgsProcessing, QgsProcessingException, QgsFeatureSource,
    QgsProcessingAlgorithm, QgsProcessingParameterFile,
    QgsProcessingParameterString, QgsProcessingParameterNumber,
    QgsProcessingParameterBoolean, QgsProcessingParameterFeatureSource,
    QgsProcessingParameterRasterDestination, QgsProcessingParameterFileDestination,
    QgsProcessingParameterDefinition
)
import processing
import os
from qgis.PyQt.QtGui import QIcon
from qgis.PyQt.QtCore import QUrl

class ChirpsPCAAlgorithm(QgsProcessingAlgorithm):
    # Keys
    INPUT_DIR     = 'INPUT_DIR'
    AUTO_YEARS    = 'AUTO_YEARS'
    START_YEAR    = 'START_YEAR'
    END_YEAR      = 'END_YEAR'
    FILE_PREFIX   = 'FILE_PREFIX'
    FILE_SUFFIX   = 'FILE_SUFFIX'
    FLIP_PC1_POS  = 'FLIP_PC1_POS'   # hidden in UI (default True)
    CLIP          = 'CLIP'
    AOI           = 'AOI'
    OUTPUT_PC1    = 'OUTPUT_PC1'
    OUTPUT_CSV    = 'OUTPUT_CSV'

    # Boilerplate
    def tr(self, s): return QCoreApplication.translate('Processing', s)
    def name(self): return 'c_chirps_pca_variabilitas'
    def displayName(self): return self.tr('CHIRPS – PCA Variability')
    def groupId(self): return 'A. RainPCA'
    def group(self): return self.tr(self.groupId())
    def createInstance(self): return ChirpsPCAAlgorithm()
    def icon(self):
        return QIcon(os.path.join(os.path.dirname(__file__), 'rainpca.png'))

    def shortHelpString(self):
        return self.tr("""\
    🇮🇩 ID Modul ini menghitung indeks variabilitas curah hujan berbasis PCA (Principal Component Analysis) dari deret waktu CHIRPS.
    Algoritma menumpuk raster tahunan untuk tahun A–B, lalu menghitung PCA:
    • Keluaran raster = Eigenvector PC1 (pola spasial variabilitas utama).
    • Keluaran CSV   = Skor PC (time series), mis. nilai PC1 untuk setiap tahun.

    Alur pakai:
    1) Folder containing CHIRPS TIFFs: pilih folder yang berisi GeoTIFF CHIRPS tahunan (atau bulanan yang sudah diakumulasikan per tahun) untuk rentang A–B.
    2) Centang Detect years automatically from filenames bila tahun dapat dibaca dari nama berkas; jika dicentang, tahun dalam nama berkas dipakai dan Start/End year bertindak sebagai penyaring (opsional).
    3) Start year (A) & End year (B): tentukan periode analisis bila tidak otomatis.
    4) Filename prefix / suffix: bagian nama berkas sebelum & sesudah token tahun untuk mempermudah pencocokan. Contoh: `chirps-v2.0-2010_jawa.tif` → prefix=`chirps-v2.0-`, suffix=`_jawa.tif`.
    5) (Opsional) Clip output with AOI (polygon) → pilih poligon AOI untuk memotong hasil.
    6) Klik Run → hasil raster Eigenvector PCA PC1 dan CSV skor PC disimpan ke keluaran.

    ──────────────
            
    🌍 EN This module computes a rainfall variability index using PCA on CHIRPS time-series.
    Annual rasters from years A–B are stacked, then PCA is performed:
    • Raster output = PCA PC1 eigenvector (spatial pattern of dominant variability).
    • CSV output    = PC scores (time series), e.g., PC1 value for each year.

    Usage:
    1) Folder containing CHIRPS TIFFs: point to the directory containing annual CHIRPS GeoTIFFs (or monthly already aggregated to annual) for A–B.
    2) Check Detect years automatically from filenames if years can be parsed from filenames; when checked, parsed years are used and Start/End year act as optional filters.
    3) Set Start year (A) & End year (B) when not using auto-detection.
    4) Filename prefix / suffix: parts before/after the year token to help matching. Example: `chirps-v2.0-2010_jawa.tif` → prefix=`chirps-v2.0-`, suffix=`_jawa.tif`.
    5) (Optional) Clip output with AOI (polygon) to crop the results.
    6) Click Run → produces the PC1 eigenvector raster and a CSV of PC scores.""")

    # Parameters
    def initAlgorithm(self, config=None):
        self.addParameter(
            QgsProcessingParameterFile(
                self.INPUT_DIR, self.tr('Folder containing CHIRPS TIFFs'),
                behavior=QgsProcessingParameterFile.Folder
            )
        )
        self.addParameter(
            QgsProcessingParameterBoolean(
                self.AUTO_YEARS, self.tr('Detect years automatically from filenames'), defaultValue=True
            )
        )
        self.addParameter(
            QgsProcessingParameterNumber(
                self.START_YEAR, self.tr('Start year (A)'),
                type=QgsProcessingParameterNumber.Integer, defaultValue=1981
            )
        )
        self.addParameter(
            QgsProcessingParameterNumber(
                self.END_YEAR, self.tr('End year (B)'),
                type=QgsProcessingParameterNumber.Integer, defaultValue=2024
            )
        )
        self.addParameter(
            QgsProcessingParameterString(
                self.FILE_PREFIX, self.tr('Filename prefix'),
                defaultValue='chirps-v2.0.'
            )
        )
        self.addParameter(
            QgsProcessingParameterString(
                self.FILE_SUFFIX, self.tr('Filename suffix (e.g., _jawa.tif)'),
                defaultValue='_jawa.tif'
            )
        )

        # Hidden param: PC1 harus positif (default True)
        p_flip = QgsProcessingParameterBoolean(self.FLIP_PC1_POS, self.tr('PC1 harus positif'), defaultValue=True)
        p_flip.setFlags(p_flip.flags() | QgsProcessingParameterDefinition.FlagHidden)
        self.addParameter(p_flip)

        self.addParameter(QgsProcessingParameterBoolean(self.CLIP, self.tr('Clip output with AOI (polygon)'), defaultValue=False))
        aoi = QgsProcessingParameterFeatureSource(
            self.AOI, self.tr('AOI (polygon) for clipping'), [QgsProcessing.TypeVectorPolygon]
        )
        aoi.setFlags(aoi.flags() | QgsProcessingParameterDefinition.FlagOptional)
        self.addParameter(aoi)

        self.addParameter(QgsProcessingParameterRasterDestination(self.OUTPUT_PC1, self.tr('Eigenvector PCA PC1')))
        self.addParameter(QgsProcessingParameterFileDestination(self.OUTPUT_CSV, self.tr('Output CSV of PC scores'), 'CSV files (*.csv)'))

    # Helpers
    def _list_chirps(self, folder, prefix, suffix):
        pat = re.compile(r'^' + re.escape(prefix) + r'(\d{4})\.(\d{2})' + re.escape(suffix) + r'$')
        out, years = [], set()
        for path in sorted(glob.glob(os.path.join(folder, '*.tif'))):
            name = os.path.basename(path)
            m = pat.match(name)
            if not m: continue
            yy = int(m.group(1)); mm = int(m.group(2))
            years.add(yy)
            out.append((yy, mm, path))
        return sorted(out), sorted(years)

    def _read_array(self, tif_path):
        ds = gdal.Open(tif_path, gdal.GA_ReadOnly)
        if ds is None:
            raise QgsProcessingException(self.tr(f'Failed to open: {tif_path}'))
        arr = ds.GetRasterBand(1).ReadAsArray().astype(np.float32)
        # Perlakukan <0 sebagai NoData (CHIRPS)
        arr[arr < 0] = np.nan
        gt = ds.GetGeoTransform(); proj = ds.GetProjectionRef()
        ds = None
        return arr, gt, proj

    def _save_geotiff(self, out_path, array_f32, ref_path):
        ref = gdal.Open(ref_path, gdal.GA_ReadOnly)
        if ref is None:
            raise QgsProcessingException(self.tr('Failed to open reference raster when writing output.'))
        driver = gdal.GetDriverByName('GTiff')
        h, w = array_f32.shape
        dst = driver.Create(out_path, w, h, 1, gdal.GDT_Float32,
                            options=['COMPRESS=LZW', 'TILED=YES', 'BIGTIFF=IF_SAFER'])
        dst.SetGeoTransform(ref.GetGeoTransform())
        dst.SetProjection(ref.GetProjectionRef())
        band = dst.GetRasterBand(1)
        band.WriteArray(array_f32)
        band.SetNoDataValue(-9999.0)
        band.FlushCache()
        dst = None
        ref = None

    def _is_processing_temp(self, path: str) -> bool:
        if not isinstance(path, str): return False
        p = path.replace('\\', '/').lower()
        return '/appdata/local/temp/processing_' in p or '/tmp/processing_' in p

    # Core
    def processAlgorithm(self, parameters, context, feedback):
        folder     = self.parameterAsFile(parameters, self.INPUT_DIR, context)
        auto_years = self.parameterAsBool(parameters, self.AUTO_YEARS, context)
        y0         = int(self.parameterAsInt(parameters, self.START_YEAR, context))
        y1         = int(self.parameterAsInt(parameters, self.END_YEAR, context))
        prefix     = self.parameterAsString(parameters, self.FILE_PREFIX, context) or ''
        suffix     = self.parameterAsString(parameters, self.FILE_SUFFIX, context) or '.tif'
        flip_pc1   = self.parameterAsBool(parameters, self.FLIP_PC1_POS, context)  # hidden, default True
        do_clip    = self.parameterAsBool(parameters, self.CLIP, context)
        aoi_src    = self.parameterAsSource(parameters, self.AOI, context)
        out_pc1    = self.parameterAsOutputLayer(parameters, self.OUTPUT_PC1, context)
        out_csv    = self.parameterAsFileOutput(parameters, self.OUTPUT_CSV, context)

        if not os.path.isdir(folder):
            raise QgsProcessingException(self.tr('Invalid folder.'))

        # 1) Enumerasi file
        indexed, years_all = self._list_chirps(folder, prefix, suffix)
        if not indexed:
            raise QgsProcessingException(self.tr('No files matched the given prefix/suffix.'))
        if auto_years:
            y0, y1 = min(years_all), max(years_all)
        if y1 < y0:
            raise QgsProcessingException(self.tr('End year must be ≥ start year.'))
        sel = [(yy, mm, p) for (yy, mm, p) in indexed if y0 <= yy <= y1]
        if not sel:
            raise QgsProcessingException(self.tr('No files in the selected year range.'))

        # Ref geospasial
        ref_path = sel[0][2]
        ref_arr, gt, proj = self._read_array(ref_path)
        H, W = ref_arr.shape

        # --- Tentukan mode: lintas-tahun atau lintas-bulan (jika hanya 1 tahun) ---
        years_sorted = sorted(list({yy for (yy, _, _) in sel}))
        single_year_mode = (len(years_sorted) == 1)

        if not single_year_mode:
            # =========================================
            # MODE LINTAS-TAHUN (seperti sebelumnya)
            # =========================================
            feedback.pushInfo(self.tr(f'File selected: {len(sel)} (years {y0}–{y1}; {len(years_sorted)} years)'))

            # Kelompokkan bulanan → rata-rata tahunan
            by_year = {yy: [] for yy in years_sorted}
            total = len(sel)
            for i, (yy, mm, p) in enumerate(sel, 1):
                arr, _, _ = self._read_array(p)
                by_year[yy].append(arr)
                if i % 50 == 0 or i == total:
                    feedback.setProgress(int(100 * i / total))

            yearly_means = []
            for yy in years_sorted:
                stack = np.stack(by_year[yy], axis=0)    # [nMon, H, W]
                mean_yy = np.nanmean(stack, axis=0)      # [H, W]
                yearly_means.append(mean_yy)

            # Matriks [Ty, Npix]
            X = np.stack([m.reshape(-1) for m in yearly_means], axis=0)
            valid_mask = ~np.isnan(X).any(axis=0)
            if not np.any(valid_mask):
                raise QgsProcessingException(self.tr('No valid pixels across all years.'))
            Xv = X[:, valid_mask].astype(np.float64)

            # z-score per piksel (kolom)
            mean_cols = np.nanmean(Xv, axis=0, keepdims=True)
            std_cols  = np.nanstd(Xv, axis=0, ddof=1, keepdims=True)
            std_cols[std_cols == 0] = 1.0
            Z = (Xv - mean_cols) / std_cols  # [Ty, Nvalid]

            # PCA (SVD)
            U, S, Vt = np.linalg.svd(Z, full_matrices=False)
            scores = U * S
            pc1_loadings = Vt[0, :]

            # Flip tanda → positif
            if flip_pc1 and np.nanmean(pc1_loadings) < 0:
                pc1_loadings = -pc1_loadings
                scores[:, 0] = -scores[:, 0]
                feedback.pushInfo(self.tr('Flip PC1 sign → positive.'))

            # Rekonstruksi grid
            pc1_grid = np.full((H * W,), np.nan, dtype=np.float32)
            pc1_grid[valid_mask] = pc1_loadings.astype(np.float32)
            pc1_grid = pc1_grid.reshape((H, W))

            # CSV per tahun
            header = ['year'] + [f'PC{i+1}' for i in range(scores.shape[1])]
            lines = [','.join(header)]
            for i, yy in enumerate(years_sorted):
                row = [str(yy)] + [f'{scores[i, j]:.6f}' for j in range(scores.shape[1])]
                lines.append(','.join(row))

        else:
            # =========================================
            # MODE LINTAS-BULAN (1 tahun → 12 observasi)
            # =========================================
            year = years_sorted[0]
            # pastikan urut bulan
            sel_year = sorted([(mm, p) for (yy, mm, p) in sel if yy == year])
            months = [mm for (mm, _) in sel_year]
            paths  = [p  for (_,  p) in sel_year]
            feedback.pushInfo(self.tr(f'Selected year {year}: {len(paths)} monthly rasters.'))

            # Stack bulanan: [Tm, H, W]
            stacks = []
            for p in paths:
                arr, _, _ = self._read_array(p)
                stacks.append(arr)
            Mstack = np.stack(stacks, axis=0)  # [Tm, H, W]

            # Matriks [Tm, Npix]
            X = np.stack([Mstack[i].reshape(-1) for i in range(Mstack.shape[0])], axis=0)
            # valid jika semua bulan valid pada piksel tsb
            valid_mask = ~np.isnan(X).any(axis=0)
            if not np.any(valid_mask):
                raise QgsProcessingException(self.tr('No valid pixels across all months.'))
            Xv = X[:, valid_mask].astype(np.float64)  # [Tm, Nvalid]

            # z-score per piksel across months (kolom)
            mean_cols = np.nanmean(Xv, axis=0, keepdims=True)
            std_cols  = np.nanstd(Xv, axis=0, ddof=1, keepdims=True)
            std_cols[std_cols == 0] = 1.0
            Z = (Xv - mean_cols) / std_cols  # [Tm, Nvalid]

            # PCA (SVD) pada 12xNvalid
            U, S, Vt = np.linalg.svd(Z, full_matrices=False)
            scores = U * S                   # skor per-bulan
            pc1_loadings = Vt[0, :]         # loading per piksel

            # Flip tanda → positif
            if flip_pc1 and np.nanmean(pc1_loadings) < 0:
                pc1_loadings = -pc1_loadings
                scores[:, 0] = -scores[:, 0]
                feedback.pushInfo(self.tr('Flip PC1 sign → positive.'))

            # Rekonstruksi grid
            pc1_grid = np.full((H * W,), np.nan, dtype=np.float32)
            pc1_grid[valid_mask] = pc1_loadings.astype(np.float32)
            pc1_grid = pc1_grid.reshape((H, W))

            # CSV per bulan (label YYYY-MM)
            header = ['month'] + [f'PC{i+1}' for i in range(scores.shape[1])]
            lines = [','.join(header)]
            for i, mm in enumerate(months):
                label = f'{year}-{mm:02d}'
                row = [label] + [f'{scores[i, j]:.6f}' for j in range(scores.shape[1])]
                lines.append(','.join(row))

        # --- Simpan raster (clip opsional & translate ke OUTPUT bila perlu) ---
        pc1_unclipped = out_pc1 if not self._is_processing_temp(out_pc1) else os.path.join(
            os.path.dirname(out_pc1), 'pc1_unclipped.tif'
        )
        self._save_geotiff(pc1_unclipped, pc1_grid, ref_path)

        pc1_final_src = pc1_unclipped
        if do_clip:
            # Hanya untuk cek: AOI tidak boleh kosong
            aoi_src = self.parameterAsSource(parameters, self.AOI, context)
            if aoi_src is None:
                raise QgsProcessingException(self.tr('AOI is required when clipping is enabled.'))

            # Tentukan path file clip
            pc1_clip = os.path.join(os.path.dirname(pc1_unclipped), 'pc1_clip.tif') \
                       if self._is_processing_temp(out_pc1) else out_pc1

            # Penting: kirim nilai parameter mentah, BUKAN aoi_src
            processing.run(
                'gdal:cliprasterbymasklayer',
                {
                    'INPUT': pc1_unclipped,
                    'MASK': parameters[self.AOI],  # <-- ini kuncinya
                    'SOURCE_CRS': None,
                    'TARGET_CRS': None,
                    'NODATA': -9999.0,
                    'ALPHA_BAND': False,
                    'CROP_TO_CUTLINE': True,
                    'KEEP_RESOLUTION': True,
                    'SET_RESOLUTION': False,
                    'X_RESOLUTION': None,
                    'Y_RESOLUTION': None,
                    'MULTITHREADING': True,
                    'OPTIONS': 'COMPRESS=LZW',
                    'DATA_TYPE': 0,
                    'EXTRA': '',
                    'OUTPUT': pc1_clip
                },
                context=context,
                feedback=feedback,
                is_child_algorithm=True
            )
            pc1_final_src = pc1_clip

        if pc1_final_src != out_pc1:
            try:
                processing.run(
                    'gdal:translate',
                    {'INPUT': pc1_final_src, 'TARGET_CRS': None, 'NODATA': None, 'COPY_SUBDATASETS': False,
                     'OPTIONS': 'COMPRESS=LZW', 'EXTRA': '', 'OUTPUT': out_pc1},
                    context=context, feedback=feedback, is_child_algorithm=True
                )
            except Exception:
                arr, _, _ = self._read_array(pc1_final_src)
                self._save_geotiff(out_pc1, arr, ref_path)

        # Simpan CSV
        with open(out_csv, 'w', encoding='utf-8') as f:
            f.write('\n'.join(lines))

        feedback.pushInfo(self.tr('Finished PCA (PC1 raster + PC scores CSV).'))
        return {self.OUTPUT_PC1: out_pc1, self.OUTPUT_CSV: out_csv}
