# -*- coding: utf-8 -*-
"""
MobiVista — Facebook Mobility Analyzer (with optional Synthetic OD)
QGIS Processing: Daily CSV Folder → Daily SHP or Monthly GPKG + Analysis & Visuals + (optional) OD

Features:
- Input a folder of daily CSVs (Meta Mobility).
- Pivot 4 categories (p0, p0_10, p10_100, p100p), optional normalization.
- Join to GADM via a chosen field (default GID_2); summaries & visuals ONLY for regions present in GADM.
- Outputs:
  * One SHP per day (to a folder), OR
  * A single monthly GeoPackage (one layer per day).
  * All generated layers are auto-added to the Layers panel.
- Visuals & insights:
  * monthly_line.png, monthly_stacked_area.png
  * monthly_heatmap.png (RdYlGn)
  * monthly_boxplot.png
  * weekend_weekday.png (distinct colors + value labels)
  * stability_topstd.png (+ stability_per_region.csv)
  * mobility_corr.png (category correlations)
  * hotspot_scatter.png (centroids; sensible aspect; value-based colormap)
  * top100_p100p.png (+ top_p100p_by_day.csv)
- Optional Synthetic OD:
  * Doubly-constrained gravity with distance-decay calibration to match Meta distance-bin shares per origin.
  * Outputs an OD CSV and a weighted OD lines layer (GPKG or SHP depending on output mode).
- Log prints clickable links (file:///...) to the output folder and, if GPKG mode, to the GPKG file.

Minimum CSV columns:
- gadm_id
- home_to_ping_distance_category ∈ {0, (0, 10), [10, 100), 100+}
- distance_category_ping_fraction
- ds (YYYY-MM-DD)
"""

from qgis.PyQt.QtCore import QCoreApplication, QVariant, QUrl
from qgis.core import (
    QgsProcessing,
    QgsProcessingAlgorithm,
    QgsProcessingParameterFile,
    QgsProcessingParameterVectorLayer,
    QgsProcessingParameterString,
    QgsProcessingParameterBoolean,
    QgsProcessingParameterEnum,
    QgsProcessingParameterFolderDestination,
    QgsProcessingParameterFileDestination,
    QgsProcessingException,
    QgsFields,
    QgsGeometry,
    QgsField,
    QgsFeature,
    QgsVectorLayer,
    QgsVectorFileWriter,
    QgsWkbTypes,
    QgsCoordinateReferenceSystem,
    QgsCoordinateTransform,
    QgsProcessingContext,
    QgsProject,
    QgsPointXY,  # needed for OD lines
)
import os

# matplotlib only (no seaborn), one chart per figure
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt


first_write_gpkg = True  # global flag for first-day GPKG writing


class MobiVistaFacebookMobility2(QgsProcessingAlgorithm):
    INPUT_FOLDER = 'INPUT_FOLDER'              # CSV folder
    INPUT_VECTOR = 'INPUT_VECTOR'
    JOIN_FIELD_VECTOR = 'JOIN_FIELD_VECTOR'    # default GID_2
    NAME_FIELD_VECTOR = 'NAME_FIELD_VECTOR'    # default NAME_2
    NORMALIZE = 'NORMALIZE'
    OUTPUT_MODE = 'OUTPUT_MODE'                # 0 SHP per day, 1 GPKG per month
    OUTPUT_FOLDER = 'OUTPUT_FOLDER'
    OUTPUT_GPKG = 'OUTPUT_GPKG'
    VIS_ENABLE = 'VIS_ENABLE'
    TOPK = 'TOPK'
    RING_ENABLE = 'RING_ENABLE'
    RING_RADII_KM = 'RING_RADII_KM'  # comma: 1,10,100,200
    RING_MODE = 'RING_MODE'  # 0=Extent center, 1=GADM centroids, 2=Point layer
    INPUT_POINTS = 'INPUT_POINTS'
    # New: Synthetic OD
    MAKE_OD = 'MAKE_OD'
    OD_OUTPUT_CSV = 'OD_OUTPUT_CSV'
    
    def initAlgorithm(self, config=None):
        self.addParameter(
            QgsProcessingParameterFile(
                self.INPUT_FOLDER,
                self.tr('Daily Facebook Mobility CSV folder'),
                behavior=QgsProcessingParameterFile.Folder
            )
        )
        self.addParameter(
            QgsProcessingParameterVectorLayer(
                self.INPUT_VECTOR,
                self.tr('GADM Administrative Boundary Layer'),
                types=[QgsProcessing.TypeVectorPolygon]
            )
        )
        self.addParameter(
            QgsProcessingParameterString(
                self.JOIN_FIELD_VECTOR,
                self.tr('GADM join field name (default: GID_2)'),
                defaultValue='GID_2'
            )
        )
        self.addParameter(
            QgsProcessingParameterString(
                self.NAME_FIELD_VECTOR,
                self.tr('GADM region label field (default: NAME_2)'),
                defaultValue='NAME_2'
            )
        )
        self.addParameter(
            QgsProcessingParameterBoolean(
                self.NORMALIZE,
                self.tr('Normalize category proportions per region per day (sum ≈ 1)'),
                defaultValue=True
            )
        )
        self.addParameter(
            QgsProcessingParameterEnum(
                self.OUTPUT_MODE,
                self.tr('Output mode'),
                options=['Daily SHP files', 'Monthly GeoPackage'],
                defaultValue=1
            )
        )
        self.addParameter(
            QgsProcessingParameterFolderDestination(
                self.OUTPUT_FOLDER,
                self.tr('Output folder for daily SHP files')
            )
        )
        self.addParameter(
            QgsProcessingParameterFileDestination(
                self.OUTPUT_GPKG,
                self.tr('Monthly GeoPackage output'),
                self.tr('GeoPackage (*.gpkg)')
            )
        )
        self.addParameter(
            QgsProcessingParameterBoolean(
                self.VIS_ENABLE,
                self.tr('Generate monthly PNG visuals'),
                defaultValue=True
            )
        )
        self.addParameter(
            QgsProcessingParameterString(
                self.TOPK,
                self.tr('Top-K regions by p100p per day to save in CSV'),
                defaultValue='10'
            )
        )
        self.addParameter(
            QgsProcessingParameterBoolean(
                self.RING_ENABLE,
                self.tr('Add “Distance Rings” SHP/GPKG as a spatial legend'),
                defaultValue=True
            )
        )
        self.addParameter(
            QgsProcessingParameterString(
                self.RING_RADII_KM,
                self.tr('Ring radii (km), ascending order'),
                defaultValue='1,10,100,200'
            )
        )
        self.addParameter(
            QgsProcessingParameterEnum(
                self.RING_MODE,
                self.tr('Ring center source'),
                options=['Extent center (single center)',
                         'Centroid of each GADM region',
                         'From point layer'],
                defaultValue=1
            )
        )
        self.addParameter(
            QgsProcessingParameterVectorLayer(
                self.INPUT_POINTS,
                self.tr('Point layer for ring centers (optional; used if “From point layer” is selected)'),
                types=[QgsProcessing.TypeVectorPoint],
                optional=True
            )
        )

        # Synthetic OD parameters
        self.addParameter(
            QgsProcessingParameterBoolean(
                self.MAKE_OD,
                self.tr('Generate Synthetic OD from Meta distance shares'),
                defaultValue=False
            )
        )
        self.addParameter(
            QgsProcessingParameterFileDestination(
                self.OD_OUTPUT_CSV,
                self.tr('OD matrix CSV output'),
                self.tr('Comma Separated Values (*.csv)')
            )
        )

    # ------------------- Synthetic OD helpers -------------------
    @staticmethod
    def _haversine_km(lon1, lat1, lon2, lat2):
        import numpy as np
        R = 6371.0088
        lon1, lat1, lon2, lat2 = map(np.radians, [lon1,lat1,lon2,lat2])
        dlon, dlat = lon2 - lon1, lat2 - lat1
        a = np.sin(dlat/2.0)**2 + np.cos(lat1)*np.cos(lat2)*np.sin(dlon/2.0)**2
        return 2*R*np.arcsin(np.sqrt(a))

    def _build_distance_matrix(self, centroids_df):
        import numpy as np
        X = centroids_df[['lon','lat']].to_numpy()
        n = len(X)
        D = np.zeros((n,n), dtype=float)
        for i in range(n):
            D[i,:] = self._haversine_km(X[i,0], X[i,1], X[:,0], X[:,1])
        np.fill_diagonal(D, 0.0)
        return D

    def _gravity_od_with_bin_targets(
        self,
        regions_df,            # gadm_id, pop, lon, lat
        prod_vec=None,         # optional; defaults to pop
        attr_vec=None,         # optional; defaults to pop
        bin_targets=None,      # dict {gadm_id: {'p0', 'p0_10', 'p10_100', 'p100p'}}
        distance_bins=((0,1), (0,10), (10,100), (100,1e9)),  # km
        beta_init=0.05,
        max_iter=200,
        tol=1e-5
    ):
        import numpy as np
        import pandas as pd

        df = regions_df[['gadm_id','pop','lon','lat']].copy()
        n = len(df)
        if prod_vec is None:
            prod = df['pop'].to_numpy(dtype=float)
        else:
            prod = np.asarray(prod_vec, dtype=float)
        if attr_vec is None:
            attr = df['pop'].to_numpy(dtype=float)
        else:
            attr = np.asarray(attr_vec, dtype=float)

        Ptot = prod.sum()
        Atot = attr.sum()
        if Ptot <= 0 or Atot <= 0:
            raise ValueError("Non-positive totals for production/attraction.")
        attr *= (Ptot / Atot)

        D = self._build_distance_matrix(df)
        beta = beta_init

        def impedance(beta_val):
            return np.exp(-beta_val * np.maximum(D, 1e-6))

        bins = {
            'p0':      (0.0, distance_bins[0][1]),
            'p0_10':   (distance_bins[1][0], distance_bins[1][1]),
            'p10_100': (distance_bins[2][0], distance_bins[2][1]),
            'p100p':   (distance_bins[3][0], distance_bins[3][1]),
        }
        masks = {k: ((D>=lo) & (D<hi)).astype(float) for k,(lo,hi) in bins.items()}

        if bin_targets is None:
            bt = np.tile(np.array([0.25,0.25,0.25,0.25]), (n,1))
        else:
            bt = np.zeros((n,4), dtype=float)
            order = ['p0','p0_10','p10_100','p100p']
            for i, gid in enumerate(df['gadm_id']):
                b = bin_targets.get(gid, None)
                if b is None:
                    bt[i,:] = 0.25
                else:
                    arr = np.array([b.get(k,0.0) for k in order], dtype=float)
                    s = arr.sum()
                    bt[i,:] = arr/s if s>0 else 0.25

        F = np.outer(prod, attr) / attr.sum()
        A = np.ones(n)
        B = np.ones(n)

        for _ in range(max_iter):
            C = impedance(beta)
            F_old = F.copy()
            # column scaling
            A = attr / np.maximum((C.T @ (B * prod)), 1e-12)
            # row scaling
            B = prod / np.maximum(C @ (A * np.ones(n)), 1e-12)
            # flows
            F = (B[:,None] * prod[:,None]) * C * (A[None,:])
            F *= (Ptot / np.maximum(F.sum(), 1e-12))

            # distance-bin adjustment (match per-origin shares)
            row_sum = np.maximum(F.sum(axis=1, keepdims=True), 1e-12)
            shares = np.stack([
                (F * masks['p0']).sum(axis=1),
                (F * masks['p0_10']).sum(axis=1),
                (F * masks['p10_100']).sum(axis=1),
                (F * masks['p100p']).sum(axis=1),
            ], axis=1) / row_sum

            dev = shares - bt
            step = 0.3
            far_excess = dev[:,3].mean() + 0.5*dev[:,2].mean() - (dev[:,0].mean()+dev[:,1].mean())*0.5
            beta = max(1e-5, beta + step * far_excess)

            diff = np.linalg.norm(F - F_old) / np.maximum(np.linalg.norm(F_old), 1e-12)
            if diff < tol:
                break

        gids = df['gadm_id'].tolist()
        od = []
        for i, oi in enumerate(gids):
            for j, dj in enumerate(gids):
                v = float(F[i,j])
                if v > 0:
                    od.append((oi, dj, v, float(D[i,j])))
        od_df = pd.DataFrame(od, columns=['origin','destination','flow','distance_km'])
        return od_df, beta

    # ------------------------------------------------------------

    def _write_distance_rings(self, vector_layer, base_out, out_gpkg, output_mode,
                              radii_km, centers, context: QgsProcessingContext):
        from qgis.core import (
            QgsVectorLayer, QgsFields, QgsField, QgsFeature, QgsGeometry,
            QgsWkbTypes, QgsCoordinateTransform, QgsCoordinateReferenceSystem,
            QgsVectorFileWriter, QgsPointXY
        )
        from qgis.PyQt.QtCore import QVariant

        src_crs = vector_layer.sourceCrs()
        dst_crs = QgsCoordinateReferenceSystem('EPSG:3857')
        to_3857 = QgsCoordinateTransform(src_crs, dst_crs, QgsProject.instance())
        to_src  = QgsCoordinateTransform(dst_crs, src_crs, QgsProject.instance())

        mem = QgsVectorLayer(f"{QgsWkbTypes.displayString(QgsWkbTypes.Polygon)}?crs={src_crs.authid()}",
                             "distance_rings_tmp", "memory")
        prov = mem.dataProvider()

        fields = QgsFields()
        fields.append(QgsField('gid', QVariant.String))
        fields.append(QgsField('name', QVariant.String))
        fields.append(QgsField('rank', QVariant.Int))
        fields.append(QgsField('radius_km', QVariant.Double))
        fields.append(QgsField('label', QVariant.String))
        prov.addAttributes(list(fields)); mem.updateFields()

        radii_km_sorted = sorted([float(x) for x in radii_km if x is not None])

        feats = []
        for center in centers:
            center_src = QgsPointXY(center['x'], center['y'])
            center_3857 = to_3857.transform(center_src)

            prev_buf_3857 = None
            for idx, r_km in enumerate(radii_km_sorted, start=1):
                r_m = r_km * 1000.0
                buf_3857 = QgsGeometry.fromPointXY(center_3857).buffer(r_m, 48)
                ring_3857 = buf_3857 if prev_buf_3857 is None else buf_3857.difference(prev_buf_3857)
                prev_buf_3857 = buf_3857
                ring_src = ring_3857
                ring_src.transform(to_src)

                if r_km <= 1:      lab = 'home (~1 km)'
                elif r_km <= 10:   lab = '(0–10 km)'
                elif r_km <= 100:  lab = '[10–100) km'
                else:              lab = '100+ km'

                ft = QgsFeature(fields)
                ft.setGeometry(ring_src)
                ft.setAttributes([center['id'], center['name'], idx, float(r_km), lab])
                feats.append(ft)

        prov.addFeatures(feats)

        if output_mode == 0:
            out_path = os.path.join(base_out, 'distance_rings.shp')
            res = QgsVectorFileWriter.writeAsVectorFormat(mem, out_path, 'UTF-8', src_crs, 'ESRI Shapefile')
            if res[0] != QgsVectorFileWriter.NoError:
                raise QgsProcessingException(self.tr(f'Failed to write distance_rings.shp: {res[1]}'))
            vl = QgsVectorLayer(out_path, "distance_rings", "ogr")
        else:
            opts = QgsVectorFileWriter.SaveVectorOptions()
            opts.driverName = "GPKG"
            opts.layerName = "distance_rings"
            opts.layerOptions = ["GEOMETRY_NAME=geom"]
            opts.actionOnExistingFile = QgsVectorFileWriter.CreateOrOverwriteLayer
            res, err = QgsVectorFileWriter.writeAsVectorFormatV2(mem, out_gpkg, context.transformContext(), opts)
            if res != QgsVectorFileWriter.NoError:
                raise QgsProcessingException(self.tr(f'Failed to write distance_rings layer to GPKG: {err}'))
            gpkg_uri = f"{out_gpkg}|layername=distance_rings"
            vl = QgsVectorLayer(gpkg_uri, "distance_rings", "ogr")

        if vl.isValid():
            context.temporaryLayerStore().addMapLayer(vl)
            context.addLayerToLoadOnCompletion(vl.id(), QgsProcessingContext.LayerDetails(vl.name(), context.project(), None))
      
    def _collect_ring_centers(self, ring_mode, vector_layer, point_layer, join_field, name_field):
        """
        Return list of ring centers:
        [{'x': lon/epsg, 'y': lat/epsg, 'id': '...', 'name': '...'}, ...]
        CRS follows vector_layer for consistency with outputs.
        """
        centers = []
        if ring_mode == 0:
            c = vector_layer.extent().center()
            centers.append({'x': c.x(), 'y': c.y(), 'id': 'CENTER', 'name': 'CENTER'})
        elif ring_mode == 1:
            for f in vector_layer.getFeatures():
                geom = f.geometry()
                if geom is None or geom.isEmpty():
                    continue
                c = geom.centroid().asPoint()
                cid = str(f.attribute(join_field)) if join_field in vector_layer.fields().names() else str(f.id())
                nm  = f.attribute(name_field) if name_field in vector_layer.fields().names() else cid
                centers.append({'x': c.x(), 'y': c.y(), 'id': cid, 'name': str(nm) if nm is not None else cid})
        else:
            if point_layer is None or not point_layer.isValid():
                return centers
            src = point_layer.sourceCrs()
            dst = vector_layer.sourceCrs()
            if src != dst:
                xform = QgsCoordinateTransform(src, dst, QgsProject.instance())
            else:
                xform = None
            for f in point_layer.getFeatures():
                g = f.geometry()
                if g is None or g.isEmpty():
                    continue
                pt = g.asPoint()
                if xform is not None:
                    pt = xform.transform(pt)
                centers.append({'x': pt.x(), 'y': pt.y(), 'id': str(f.id()), 'name': str(f.id())})
        return centers

    def processAlgorithm(self, parameters, context: QgsProcessingContext, feedback):
        import pandas as pd
        import numpy as np

        global first_write_gpkg
        first_write_gpkg = True

        in_folder = self.parameterAsFile(parameters, self.INPUT_FOLDER, context)
        vector_layer = self.parameterAsVectorLayer(parameters, self.INPUT_VECTOR, context)
        join_field = self.parameterAsString(parameters, self.JOIN_FIELD_VECTOR, context) or 'GID_2'
        name_field = self.parameterAsString(parameters, self.NAME_FIELD_VECTOR, context) or 'NAME_2'
        do_norm = self.parameterAsBoolean(parameters, self.NORMALIZE, context)
        output_mode = self.parameterAsEnum(parameters, self.OUTPUT_MODE, context)
        out_folder = self.parameterAsString(parameters, self.OUTPUT_FOLDER, context)
        out_gpkg = self.parameterAsFileOutput(parameters, self.OUTPUT_GPKG, context)
        vis_enable = self.parameterAsBoolean(parameters, self.VIS_ENABLE, context)
        make_od = self.parameterAsBoolean(parameters, self.MAKE_OD, context)
        od_csv = self.parameterAsFileOutput(parameters, self.OD_OUTPUT_CSV, context)

        try:
            topk = int(self.parameterAsString(parameters, self.TOPK, context))
            if topk <= 0: topk = 10
        except Exception:
            topk = 10

        if not os.path.isdir(in_folder):
            raise QgsProcessingException(self.tr('Invalid input folder'))
        if vector_layer is None or not vector_layer.isValid():
            raise QgsProcessingException(self.tr('Invalid GADM layer'))
        field_names = vector_layer.fields().names()
        if join_field not in field_names:
            raise QgsProcessingException(self.tr(f'Join field {join_field} not found in GADM layer'))
        if name_field not in field_names:
            feedback.pushInfo(self.tr(f'Warning: name field {name_field} not found. Falling back to join field for labels'))
            name_field = join_field

        if output_mode == 0:
            if not out_folder: raise QgsProcessingException(self.tr('Output folder for SHP is empty'))
            os.makedirs(out_folder, exist_ok=True)
        else:
            if not out_gpkg: raise QgsProcessingException(self.tr('Monthly GPKG output path is empty'))

        # Collect CSVs
        csv_files = [os.path.join(in_folder, f) for f in os.listdir(in_folder)
                     if f.lower().endswith('.csv') and os.path.isfile(os.path.join(in_folder, f))]
        if not csv_files:
            raise QgsProcessingException(self.tr('No CSV files found in the input folder'))
        feedback.pushInfo(self.tr(f'Found {len(csv_files)} CSV files'))

        # Read CSVs (required columns)
        required = {'gadm_id', 'home_to_ping_distance_category', 'distance_category_ping_fraction', 'ds'}
        frames = []
        for pth in csv_files:
            try:
                df = pd.read_csv(pth, dtype=str, encoding='utf-8')
            except UnicodeDecodeError:
                df = pd.read_csv(pth, dtype=str, encoding='latin-1')
            miss = [c for c in required if c not in df.columns]
            if miss:
                feedback.pushInfo(self.tr(f'Skip {os.path.basename(pth)} missing columns {miss}'))
                continue
            df = df[list(required)].copy()
            df['gadm_id'] = df['gadm_id'].astype(str).str.strip()
            df['home_to_ping_distance_category'] = df['home_to_ping_distance_category'].astype(str).str.strip()
            df['distance_category_ping_fraction'] = df['distance_category_ping_fraction'].astype(str).str.strip()
            df['ds'] = df['ds'].astype(str).str.strip()
            def f2(x):
                try: return float(str(x).strip())
                except Exception: return None
            df['ping_frac'] = df['distance_category_ping_fraction'].apply(f2)
            df = df[(df['gadm_id']!='') & df['ping_frac'].notna() & (df['ds']!='')]
            if not df.empty:
                frames.append(df[['gadm_id','home_to_ping_distance_category','ping_frac','ds']])

        if not frames:
            raise QgsProcessingException(self.tr('No valid CSVs after validation'))
        big = pd.concat(frames, ignore_index=True)

        # Keys & names from GADM
        gadm_keys, name_lookup = set(), {}
        for feat in vector_layer.getFeatures():
            key = str(feat.attribute(join_field)).strip()
            nm = feat.attribute(name_field)
            nm = str(nm) if nm is not None else key
            gadm_keys.add(key)
            name_lookup[key] = nm

        # Filter to GADM
        big = big[big['gadm_id'].isin(gadm_keys)]
        if big.empty:
            raise QgsProcessingException(self.tr('After filtering by GADM, no data remains'))

        # Aggregate by mean if duplicated
        grp = big.groupby(['gadm_id','ds','home_to_ping_distance_category'], as_index=False)['ping_frac'].mean()

        # Optional normalization
        if do_norm:
            sums = grp.groupby(['gadm_id','ds'], as_index=False)['ping_frac'].sum().rename(columns={'ping_frac':'psum'})
            grp = grp.merge(sums, on=['gadm_id','ds'], how='left')
            grp['ping_frac'] = grp.apply(lambda r: (r['ping_frac']/r['psum']) if r['psum'] and r['psum']>0 else r['ping_frac'], axis=1)
            grp.drop(columns=['psum'], inplace=True)

        # Category mapping
        cat_map = {'0':'p0','(0, 10)':'p0_10','[10, 100)':'p10_100','100+':'p100p'}
        grp['cat_col'] = grp['home_to_ping_distance_category'].map(cat_map)
        grp = grp[grp['cat_col'].notna()].copy()

        days = sorted(grp['ds'].unique())

        # Cache GADM features
        gadm_feats = []
        join_idx = field_names.index(join_field)
        for feat in vector_layer.getFeatures():
            keyval = str(feat[join_idx]).strip()
            gadm_feats.append((feat.attributes(), feat.geometry(), keyval))

        extra_fields = [
            QgsField('p0', QVariant.Double),
            QgsField('p0_10', QVariant.Double),
            QgsField('p10_100', QVariant.Double),
            QgsField('p100p', QVariant.Double),
            QgsField('psum', QVariant.Double),
            QgsField('ds', QVariant.String)
        ]

        daily_summary, ranking_rows = [], []

        def write_shp_for_day(day, wide_df):
            new_fields = vector_layer.fields()
            for f in extra_fields:
                if f.name() not in new_fields.names():
                    new_fields.append(f)
            out_name = f"Mobility_{day.replace('-','')}.shp"
            out_path = os.path.join(out_folder, out_name)
            writer = QgsVectorFileWriter.create(
                out_path, new_fields, vector_layer.wkbType(), vector_layer.sourceCrs(),
                transformContext=context.transformContext()
            )
            if writer.hasError() != QgsVectorFileWriter.NoError:
                del writer
                raise QgsProcessingException(self.tr(f'Failed to create SHP {out_path}'))
            wmap = {str(r['gadm_id']).strip(): r for _, r in wide_df.iterrows()}
            for attrs, geom, keyval in gadm_feats:
                ofeat = QgsFeature(new_fields)
                ofeat.setGeometry(geom)
                out_attrs = list(attrs)
                row = wmap.get(keyval)
                if row is not None:
                    out_attrs += [float(row['p0']), float(row['p0_10']), float(row['p10_100']),
                                  float(row['p100p']), float(row['psum']), str(row['ds'])]
                else:
                    out_attrs += [0.0,0.0,0.0,0.0,0.0,str(day)]
                ofeat.setAttributes(out_attrs)
                writer.addFeature(ofeat)
            del writer
            # auto load
            vl = QgsVectorLayer(out_path, os.path.splitext(out_name)[0], "ogr")
            if vl.isValid():
                context.temporaryLayerStore().addMapLayer(vl)
                context.addLayerToLoadOnCompletion(vl.id(), QgsProcessingContext.LayerDetails(vl.name(), context.project(), None))

        def write_gpkg_layer_for_day(day, wide_df):
            from qgis.core import QgsVectorFileWriter, QgsVectorLayer, QgsFields, QgsField, QgsFeature, QgsWkbTypes
            global first_write_gpkg
            crs = vector_layer.sourceCrs()
            wkb_str = QgsWkbTypes.displayString(vector_layer.wkbType())
            mem = QgsVectorLayer(f"{wkb_str}?crs={crs.authid()}", "tmp", "memory")
            prov = mem.dataProvider()
            out_fields = QgsFields()
            for f in vector_layer.fields(): out_fields.append(f)
            for f in extra_fields:
                if f.name() not in out_fields.names(): out_fields.append(f)
            prov.addAttributes(list(out_fields)); mem.updateFields()
            wmap = {str(r['gadm_id']).strip(): r for _, r in wide_df.iterrows()}

            feats = []
            for attrs, geom, keyval in gadm_feats:
                of = QgsFeature(out_fields)
                of.setGeometry(geom)
                out_attrs = list(attrs)
                row = wmap.get(keyval)
                if row is not None:
                    out_attrs += [float(row['p0']), float(row['p0_10']), float(row['p10_100']),
                                  float(row['p100p']), float(row['psum']), str(row['ds'])]
                else:
                    out_attrs += [0.0,0.0,0.0,0.0,0.0,str(day)]
                of.setAttributes(out_attrs)
                feats.append(of)
            prov.addFeatures(feats)

            opts = QgsVectorFileWriter.SaveVectorOptions()
            opts.driverName = "GPKG"
            layer_name = f"day_{day.replace('-','')}"
            opts.layerName = layer_name
            opts.layerOptions = ["GEOMETRY_NAME=geom"]
            opts.actionOnExistingFile = (QgsVectorFileWriter.CreateOrOverwriteFile if first_write_gpkg
                                         else QgsVectorFileWriter.CreateOrOverwriteLayer)
            res, err = QgsVectorFileWriter.writeAsVectorFormatV2(mem, out_gpkg, context.transformContext(), opts)
            if res != QgsVectorFileWriter.NoError:
                if not first_write_gpkg:
                    opts.actionOnExistingFile = QgsVectorFileWriter.CreateOrOverwriteFile
                    res2, err2 = QgsVectorFileWriter.writeAsVectorFormatV2(mem, out_gpkg, context.transformContext(), opts)
                    if res2 != QgsVectorFileWriter.NoError:
                        raise QgsProcessingException(self.tr(f'Failed to write layer {layer_name} to {out_gpkg}: {err2}'))
                else:
                    raise QgsProcessingException(self.tr(f'Failed to write layer {layer_name} to {out_gpkg}: {err}'))
            first_write_gpkg = False
            # auto load
            gpkg_uri = f"{out_gpkg}|layername={layer_name}"
            vl = QgsVectorLayer(gpkg_uri, layer_name, "ogr")
            if vl.isValid():
                context.temporaryLayerStore().addMapLayer(vl)
                context.addLayerToLoadOnCompletion(vl.id(), QgsProcessingContext.LayerDetails(vl.name(), context.project(), None))

        # Per-day loop
        for i, day in enumerate(days):
            feedback.pushInfo(self.tr(f'Processing day {day} {i+1}/{len(days)}'))
            sub = grp[grp['ds']==day].copy()
            wide = sub.pivot_table(index='gadm_id', columns='cat_col', values='ping_frac', aggfunc='mean').reset_index()
            for c in ['p0','p0_10','p10_100','p100p']:
                if c not in wide.columns: wide[c] = 0.0
            wide['psum'] = wide[['p0','p0_10','p10_100','p100p']].sum(axis=1)
            wide['ds'] = day
            # summary & ranking
            if not wide.empty:
                daily_summary.append({
                    'ds': day,
                    'p0': float(wide['p0'].mean()),
                    'p0_10': float(wide['p0_10'].mean()),
                    'p10_100': float(wide['p10_100'].mean()),
                    'p100p': float(wide['p100p'].mean()),
                    'psum_mean': float(wide['psum'].mean())
                })
                tmp = wide[['gadm_id','p100p']].copy().sort_values('p100p', ascending=False).head(topk)
                tmp['ds'] = day
                ranking_rows.extend(tmp[['ds','gadm_id','p100p']].to_dict('records'))
            else:
                daily_summary.append({'ds': day,'p0':0.0,'p0_10':0.0,'p10_100':0.0,'p100p':0.0,'psum_mean':0.0})
            # write
            if output_mode == 0: write_shp_for_day(day, wide)
            else: write_gpkg_layer_for_day(day, wide)
        
        # =============== MONTHLY AVERAGE OUTPUT ===============
        all_wide_month = []
        for day in days:
            sub = grp[grp['ds']==day].copy()
            wide = sub.pivot_table(index='gadm_id', columns='cat_col', values='ping_frac', aggfunc='mean').reset_index()
            for c in ['p0','p0_10','p10_100','p100p']:
                if c not in wide.columns: wide[c] = 0.0
            wide['ds'] = day
            all_wide_month.append(wide)
        month_avg = None
        if all_wide_month:
            bw = pd.concat(all_wide_month, ignore_index=True)
            month_avg = bw.groupby('gadm_id', as_index=False)[['p0','p0_10','p10_100','p100p']].mean()
            month_avg['psum'] = month_avg[['p0','p0_10','p10_100','p100p']].sum(axis=1)
            month_avg['ds'] = 'monthly_avg'

            if output_mode == 0:
                # SHP
                new_fields = vector_layer.fields()
                for f in extra_fields:
                    if f.name() not in new_fields.names():
                        new_fields.append(f)
                out_path = os.path.join(out_folder, "Mobility_monthly_avg.shp")
                writer = QgsVectorFileWriter.create(
                    out_path, new_fields, vector_layer.wkbType(), vector_layer.sourceCrs(),
                    transformContext=context.transformContext()
                )
                wmap = {str(r['gadm_id']).strip(): r for _, r in month_avg.iterrows()}
                for attrs, geom, keyval in gadm_feats:
                    of = QgsFeature(new_fields)
                    of.setGeometry(geom)
                    out_attrs = list(attrs)
                    row = wmap.get(keyval)
                    if row is not None:
                        out_attrs += [float(row['p0']), float(row['p0_10']), float(row['p10_100']),
                                      float(row['p100p']), float(row['psum']), str(row['ds'])]
                    else:
                        out_attrs += [0.0,0.0,0.0,0.0,0.0,'monthly_avg']
                    of.setAttributes(out_attrs)
                    writer.addFeature(of)
                del writer
                vl = QgsVectorLayer(out_path, "Mobility_monthly_avg", "ogr")
                if vl.isValid():
                    context.temporaryLayerStore().addMapLayer(vl)
                    context.addLayerToLoadOnCompletion(vl.id(), QgsProcessingContext.LayerDetails(vl.name(), context.project(), None))

            else:
                # GPKG
                crs = vector_layer.sourceCrs()
                wkb_str = QgsWkbTypes.displayString(vector_layer.wkbType())
                mem = QgsVectorLayer(f"{wkb_str}?crs={crs.authid()}", "tmp", "memory")
                prov = mem.dataProvider()
                out_fields = QgsFields()
                for f in vector_layer.fields(): out_fields.append(f)
                for f in extra_fields:
                    if f.name() not in out_fields.names(): out_fields.append(f)
                prov.addAttributes(list(out_fields)); mem.updateFields()
                wmap = {str(r['gadm_id']).strip(): r for _, r in month_avg.iterrows()}
                feats = []
                for attrs, geom, keyval in gadm_feats:
                    of = QgsFeature(out_fields)
                    of.setGeometry(geom)
                    out_attrs = list(attrs)
                    row = wmap.get(keyval)
                    if row is not None:
                        out_attrs += [float(row['p0']), float(row['p0_10']), float(row['p10_100']),
                                      float(row['p100p']), float(row['psum']), str(row['ds'])]
                    else:
                        out_attrs += [0.0,0.0,0.0,0.0,0.0,'monthly_avg']
                    of.setAttributes(out_attrs)
                    feats.append(of)
                prov.addFeatures(feats)
                opts = QgsVectorFileWriter.SaveVectorOptions()
                opts.driverName = "GPKG"
                opts.layerName = "monthly_avg"
                opts.layerOptions = ["GEOMETRY_NAME=geom"]
                opts.actionOnExistingFile = QgsVectorFileWriter.CreateOrOverwriteLayer
                QgsVectorFileWriter.writeAsVectorFormatV2(mem, out_gpkg, context.transformContext(), opts)
                gpkg_uri = f"{out_gpkg}|layername=monthly_avg"
                vl = QgsVectorLayer(gpkg_uri, "monthly_avg", "ogr")
                if vl.isValid():
                    context.temporaryLayerStore().addMapLayer(vl)
                    context.addLayerToLoadOnCompletion(vl.id(), QgsProcessingContext.LayerDetails(vl.name(), context.project(), None))
        
        # save summary & ranking
        base_out = out_folder if output_mode == 0 else (os.path.dirname(out_gpkg) if os.path.dirname(out_gpkg) else '.')
        sum_df = pd.DataFrame(daily_summary).sort_values('ds')
        sum_df.to_csv(os.path.join(base_out, 'mobility_daily_summary.csv'), index=False)
        if ranking_rows:
            pd.DataFrame(ranking_rows)[['ds','gadm_id','p100p']].to_csv(os.path.join(base_out, 'top_p100p_by_day.csv'), index=False)
        
        ring_enable = self.parameterAsBoolean(parameters, self.RING_ENABLE, context)
        ring_mode   = self.parameterAsEnum(parameters, self.RING_MODE, context)
        point_layer = self.parameterAsVectorLayer(parameters, self.INPUT_POINTS, context)

        radii_str = self.parameterAsString(parameters, self.RING_RADII_KM, context) or '1,10,100,200'
        try:
            radii_km = [float(x.strip()) for x in radii_str.split(',') if x.strip()!='']
        except Exception:
            radii_km = [1.0, 10.0, 100.0, 200.0]

        if ring_enable:
            centers = self._collect_ring_centers(ring_mode, vector_layer, point_layer, join_field, name_field)
            if centers:
                self._write_distance_rings(vector_layer=vector_layer,
                                           base_out=base_out,
                                           out_gpkg=out_gpkg,
                                           output_mode=output_mode,
                                           radii_km=radii_km,
                                           centers=centers,
                                           context=context)

        # =============== PER-REGION MONTHLY PROPORTION BAR CHART ===============
        if vis_enable and month_avg is not None and not month_avg.empty:
            month_avg['name'] = month_avg['gadm_id'].map(lambda k: name_lookup.get(k, k))
            cats = ['p0','p0_10','p10_100','p100p']
            figb, axb = plt.subplots(figsize=(12, max(6, len(month_avg)*0.25)))
            bottom = np.zeros(len(month_avg))
            colors = ['#66c2a5','#fc8d62','#8da0cb','#e78ac3']
            for i, c in enumerate(cats):
                vals = month_avg[c].fillna(0.0).values
                axb.barh(month_avg['name'], vals, left=bottom, label=c, color=colors[i])
                bottom += vals
            
            axb.set_title('Monthly average mobility proportion per region')
            axb.set_xlabel('Proportion')
            axb.set_xlim(0,1.0)
            axb.legend(loc='upper right', bbox_to_anchor=(1.2, 1.0))
            figb.tight_layout()
            figb.savefig(os.path.join(base_out, 'mobility_per_region.png'), dpi=150)
            plt.close(figb)

        # ===================== MONTHLY VISUALS =====================
        if vis_enable and not sum_df.empty:
            x = list(range(len(sum_df)))
            # Line
            fig1, ax1 = plt.subplots(figsize=(10,5))
            ax1.plot(x, sum_df['p0'], label='0')
            ax1.plot(x, sum_df['p0_10'], label='(0,10)')
            ax1.plot(x, sum_df['p10_100'], label='[10,100)')
            ax1.plot(x, sum_df['p100p'], label='100+')
            ax1.set_title('Daily category averages'); ax1.set_xlabel('Day index'); ax1.set_ylabel('Proportion'); ax1.legend()
            fig1.tight_layout(); fig1.savefig(os.path.join(base_out, 'monthly_line.png'), dpi=150); plt.close(fig1)

            # Stacked area
            fig2, ax2 = plt.subplots(figsize=(10,5))
            ax2.stackplot(
                x,
                sum_df['p0'], sum_df['p0_10'], sum_df['p10_100'], sum_df['p100p'],
                labels=['0','(0,10)','[10,100)','100+']
            )
            ax2.set_title('Daily category composition')
            ax2.set_xlabel('Day index')
            ax2.set_ylabel('Proportion')
            fig2.subplots_adjust(right=0.8)
            ax2.legend(loc='center left', bbox_to_anchor=(1.02, 0.5), borderaxespad=0., frameon=True)
            fig2.savefig(os.path.join(base_out, 'monthly_stacked_area.png'), dpi=150, bbox_inches='tight')
            plt.close(fig2)

            # Heatmap (RdYlGn)
            cats = ['p0','p0_10','p10_100','p100p']
            M = np.vstack([sum_df[c].values for c in cats])
            fig3, ax3 = plt.subplots(figsize=(10,3))
            im = ax3.imshow(M, aspect='auto', cmap='RdYlGn', vmin=0.0, vmax=1.0)
            ax3.set_yticks(range(len(cats))); ax3.set_yticklabels(cats)
            ax3.set_xlabel('Day index'); ax3.set_title('Heatmap Day × Category')
            cbar = fig3.colorbar(im, ax=ax3, fraction=0.046, pad=0.04); cbar.set_label('Proportion')
            fig3.tight_layout(); fig3.savefig(os.path.join(base_out, 'monthly_heatmap.png'), dpi=150); plt.close(fig3)

            # Boxplot
            fig4, ax4 = plt.subplots(figsize=(8,5))
            data_box = [sum_df[c].values for c in cats]
            ax4.boxplot(data_box, labels=cats, showmeans=True)
            ax4.set_title('Daily variation per category (boxplot)'); ax4.set_ylabel('Proportion')
            fig4.tight_layout(); fig4.savefig(os.path.join(base_out, 'monthly_boxplot.png'), dpi=150); plt.close(fig4)

        # =============== ADDITIONAL ANALYSES & CENTROIDS (needed for OD) ===============
        centroids_src_rows = []  # centroids in source CRS (for OD lines)
        cent_rows = []          # centroids in WGS84 (for distances)
        if vis_enable or make_od:
            # Build full wide per day
            all_wide = []
            for day in days:
                sub = grp[grp['ds']==day].copy()
                wide = sub.pivot_table(index='gadm_id', columns='cat_col', values='ping_frac', aggfunc='mean').reset_index()
                for c in ['p0','p0_10','p10_100','p100p']:
                    if c not in wide.columns: wide[c] = 0.0
                wide['ds'] = day
                all_wide.append(wide)

            # Compute centroids both in EPSG:4326 (for distance) and in source CRS (for lines)
            src_crs = vector_layer.sourceCrs()
            dst_crs = QgsCoordinateReferenceSystem('EPSG:4326')
            to4326 = QgsCoordinateTransform(src_crs, dst_crs, QgsProject.instance())

            for attrs, geom, keyval in gadm_feats:
                if keyval in gadm_keys:
                    c = geom.centroid()
                    p_src = c.asPoint()
                    centroids_src_rows.append({'gadm_id': keyval, 'x': p_src.x(), 'y': p_src.y()})
                    try:
                        p_ll = to4326.transform(p_src)
                        lon, lat = p_ll.x(), p_ll.y()
                    except Exception:
                        lon, lat = p_src.x(), p_src.y()
                    cent_rows.append({'gadm_id': keyval, 'lon': lon, 'lat': lat, 'name': name_lookup.get(keyval, keyval)})

            # Extra visuals (weekend/weekday, stability, corr, hotspots, top100)
            if vis_enable and all_wide:
                bw = pd.concat(all_wide, ignore_index=True)
                # Weekend vs Weekday chart
                bw['ds_dt'] = pd.to_datetime(bw['ds'], errors='coerce')
                bw['dow'] = bw['ds_dt'].dt.dayofweek
                bw['is_weekend'] = bw['dow'] >= 5
                ww = bw.groupby('is_weekend', as_index=False)[['p0','p0_10','p10_100','p100p']].mean()
                figw, axw = plt.subplots(figsize=(7,5))
                bars = axw.bar([0,1], ww['p0_10'], tick_label=['Weekday','Weekend'], color=['#1f77b4','#ff7f0e'])
                axw.set_title('Near-distance mobility (0–10 km): Weekday vs Weekend'); axw.set_ylabel('Average proportion')
                axw.set_ylim(0, max(ww['p0_10'].max()*1.15, 0.01))
                for b in bars:
                    h = b.get_height(); x = b.get_x() + b.get_width()/2
                    axw.text(x, h/2, f'{h:.3f}', ha='center', va='center', fontsize=11, color='white', fontweight='bold')
                figw.tight_layout(); figw.savefig(os.path.join(base_out, 'weekend_weekday.png'), dpi=150); plt.close(figw)

                # Stability (std dev p100p)
                std_df = bw.groupby('gadm_id', as_index=False)['p100p'].std().rename(columns={'p100p':'std_p100p'})
                std_df['name'] = std_df['gadm_id'].map(lambda k: name_lookup.get(k, k))
                std_df = std_df.sort_values('std_p100p', ascending=False)
                std_df.to_csv(os.path.join(base_out, 'stability_per_region.csv'), index=False)
                topn = std_df.head(20)
                figstd, axstd = plt.subplots(figsize=(8,7))
                axstd.barh(topn['name'][::-1], topn['std_p100p'][::-1])
                axstd.set_title('Top regions with highest p100p fluctuation'); axstd.set_xlabel('Std dev of p100p')
                figstd.tight_layout(); figstd.savefig(os.path.join(base_out, 'stability_topstd.png'), dpi=150); plt.close(figstd)

                # Correlation among categories (greyscale)
                corr = bw[['p0','p0_10','p10_100','p100p']].corr()
                figc, axc = plt.subplots(figsize=(5,4))
                cax = axc.imshow(corr.values, vmin=-1, vmax=1, cmap='Greys')
                axc.set_xticks(range(4)); axc.set_xticklabels(['p0','p0_10','p10_100','p100p'])
                axc.set_yticks(range(4)); axc.set_yticklabels(['p0','p0_10','p10_100','p100p'])
                axc.set_title('Category correlation')
                figc.colorbar(cax, ax=axc, fraction=0.046, pad=0.04)
                figc.tight_layout(); figc.savefig(os.path.join(base_out, 'mobility_corr.png'), dpi=150); plt.close(figc)

                # Hotspot centroid scatter
                cent_df_ll = pd.DataFrame(cent_rows)
                avg_p100p = bw.groupby('gadm_id', as_index=False)['p100p'].mean().rename(columns={'p100p':'p100p_mean'})
                hotspot = cent_df_ll.merge(avg_p100p, on='gadm_id', how='left')
                hotspot.to_csv(os.path.join(base_out, 'hotspot_centroids.csv'), index=False)

                vals = hotspot['p100p_mean'].fillna(0.0).clip(lower=0.0)
                size = (vals.rank(pct=True).values * 300) + 30

                lon_min, lon_max = hotspot['lon'].min(), hotspot['lon'].max()
                lat_min, lat_max = hotspot['lat'].min(), hotspot['lat'].max()
                mean_lat_rad = np.deg2rad(hotspot['lat'].median() if not hotspot['lat'].empty else 0.0)
                x_span = (lon_max - lon_min) * max(np.cos(mean_lat_rad), 0.2)
                y_span = (lat_max - lat_min)
                aspect = x_span / max(y_span, 1e-6)
                fig_w = 10
                fig_h = max(5, fig_w / max(aspect, 1e-3))

                fhs, ahs = plt.subplots(figsize=(fig_w, fig_h))
                # boundary background
                for f in vector_layer.getFeatures():
                    g = f.geometry()
                    if g is None or g.isEmpty():
                        continue
                    g_geo = QgsGeometry(g)
                    try:
                        g_geo.transform(QgsCoordinateTransform(vector_layer.sourceCrs(), QgsCoordinateReferenceSystem('EPSG:4326'), QgsProject.instance()))
                    except Exception:
                        pass
                    if g_geo.isMultipart():
                        mpoly = g_geo.asMultiPolygon()
                        for poly in mpoly:
                            if not poly: continue
                            ext = poly[0]
                            xs = [p.x() for p in ext]; ys = [p.y() for p in ext]
                            ahs.plot(xs, ys, linewidth=0.8, color='lightgrey', alpha=0.9, zorder=1)
                    else:
                        poly = g_geo.asPolygon()
                        if poly:
                            ext = poly[0]
                            xs = [p.x() for p in ext]; ys = [p.y() for p in ext]
                            ahs.plot(xs, ys, linewidth=0.8, color='lightgrey', alpha=0.9, zorder=1)

                sc = ahs.scatter(hotspot['lon'], hotspot['lat'], s=size, c=vals, cmap='viridis', alpha=0.85, edgecolors='none', zorder=3)
                ahs.set_title('Mean p100p hotspots by centroid')
                ahs.set_xlabel('Longitude'); ahs.set_ylabel('Latitude')
                pad_x = (lon_max - lon_min) * 0.02; pad_y = (lat_max - lat_min) * 0.02
                ahs.set_xlim(lon_min - pad_x, lon_max + pad_x); ahs.set_ylim(lat_min - pad_y, lat_max + pad_y)
                ahs.set_aspect('equal', adjustable='box')
                cb = plt.colorbar(sc, ax=ahs, fraction=0.046, pad=0.04); cb.set_label('p100p mean')
                fhs.tight_layout(); fhs.savefig(os.path.join(base_out, 'hotspot_scatter.png'), dpi=150, bbox_inches='tight'); plt.close(fhs)

                # Top 100 with labels
                top100 = hotspot.sort_values('p100p_mean', ascending=False).head(100)
                ftop, atop = plt.subplots(figsize=(8,18))
                atop.barh(top100['name'][::-1], top100['p100p_mean'][::-1])
                atop.set_title('Top 100 regions by mean p100p'); atop.set_xlabel('p100p mean')
                ftop.tight_layout(); ftop.savefig(os.path.join(base_out, 'top100_p100p.png'), dpi=200); plt.close(ftop)

        # ===================== SYNTHETIC OD (optional) =====================
        od_csv_written = ''
        od_lines_layername = ''
        if make_od:
            if month_avg is None or month_avg.empty:
                raise QgsProcessingException(self.tr('Cannot generate OD: monthly averages are not available'))
            if not od_csv:
                raise QgsProcessingException(self.tr('OD CSV output path is empty'))

            # Build bin targets from monthly averages
            bin_targets = {}
            for _, r in month_avg.iterrows():
                gid = str(r['gadm_id']).strip()
                bin_targets[gid] = {
                    'p0': float(r.get('p0', 0.0)),
                    'p0_10': float(r.get('p0_10', 0.0)),
                    'p10_100': float(r.get('p10_100', 0.0)),
                    'p100p': float(r.get('p100p', 0.0)),
                }

            # Regions DF with lon/lat from centroids; use uniform pop=1.0 by default
            cent_df_ll = pd.DataFrame(cent_rows)
            regions_df = month_avg[['gadm_id']].merge(cent_df_ll[['gadm_id','lon','lat']], on='gadm_id', how='left')
            if regions_df[['lon','lat']].isna().any().any():
                raise QgsProcessingException(self.tr('Missing centroids for some regions — OD cannot be computed'))
            regions_df['pop'] = 1.0  # replace with real mass if available

            od_df, beta_final = self._gravity_od_with_bin_targets(
                regions_df=regions_df[['gadm_id','pop','lon','lat']].copy(),
                bin_targets=bin_targets,
                beta_init=0.05,
                max_iter=300,
                tol=1e-6
            )
            # Save CSV
            od_df.to_csv(od_csv, index=False)
            od_csv_written = od_csv
            feedback.pushInfo(self.tr(f'OD CSV written: {QUrl.fromLocalFile(os.path.normpath(od_csv)).toString()}'))
            feedback.pushInfo(self.tr(f'Calibrated beta: {beta_final:.6f}'))

            # Build OD lines layer (straight lines between source-CRS centroids)
            src_pt_map = {row['gadm_id']: (row['x'], row['y']) for row in centroids_src_rows}
            crs = vector_layer.sourceCrs()
            wkb_str = QgsWkbTypes.displayString(QgsWkbTypes.LineString)
            mem = QgsVectorLayer(f"{wkb_str}?crs={crs.authid()}", "od_lines_tmp", "memory")
            prov = mem.dataProvider()
            od_fields = QgsFields()
            od_fields.append(QgsField('origin', QVariant.String))
            od_fields.append(QgsField('destination', QVariant.String))
            od_fields.append(QgsField('flow', QVariant.Double))
            od_fields.append(QgsField('dist_km', QVariant.Double))
            prov.addAttributes(list(od_fields)); mem.updateFields()

            feats = []
            for _, r in od_df.iterrows():
                oi = str(r['origin'])
                dj = str(r['destination'])
                p1 = src_pt_map.get(oi, None)
                p2 = src_pt_map.get(dj, None)
                if p1 is None or p2 is None:
                    continue
                line = QgsGeometry.fromPolylineXY([QgsPointXY(p1[0], p1[1]), QgsPointXY(p2[0], p2[1])])
                ft = QgsFeature()
                ft.setFields(od_fields)
                ft.setGeometry(line)
                ft.setAttributes([oi, dj, float(r['flow']), float(r['distance_km'])])
                feats.append(ft)
            prov.addFeatures(feats)

            if output_mode == 0:
                out_path = os.path.join(out_folder, "od_lines.shp")
                res = QgsVectorFileWriter.writeAsVectorFormat(mem, out_path, 'UTF-8', crs, 'ESRI Shapefile')
                if res[0] != QgsVectorFileWriter.NoError:
                    raise QgsProcessingException(self.tr(f'Failed to write od_lines.shp: {res[1]}'))
                vl_od = QgsVectorLayer(out_path, "od_lines", "ogr")
                od_lines_layername = "od_lines"
            else:
                opts = QgsVectorFileWriter.SaveVectorOptions()
                opts.driverName = "GPKG"
                opts.layerName = "od_lines"
                opts.layerOptions = ["GEOMETRY_NAME=geom"]
                opts.actionOnExistingFile = QgsVectorFileWriter.CreateOrOverwriteLayer
                res, err = QgsVectorFileWriter.writeAsVectorFormatV2(mem, out_gpkg, context.transformContext(), opts)
                if res != QgsVectorFileWriter.NoError:
                    raise QgsProcessingException(self.tr(f'Failed to write od_lines to GPKG: {err}'))
                gpkg_uri = f"{out_gpkg}|layername=od_lines"
                vl_od = QgsVectorLayer(gpkg_uri, "od_lines", "ogr")
                od_lines_layername = "od_lines"

            if vl_od.isValid():
                context.temporaryLayerStore().addMapLayer(vl_od)
                context.addLayerToLoadOnCompletion(vl_od.id(), QgsProcessingContext.LayerDetails(vl_od.name(), context.project(), None))

        # Log clickable links (folder and GPKG if any)
        out_click = out_folder if output_mode == 0 else os.path.dirname(out_gpkg)
        out_url = QUrl.fromLocalFile(os.path.normpath(out_click)).toString()
        feedback.pushInfo(self.tr(f'Outputs saved at: {out_url}'))
        if output_mode == 1 and out_gpkg:
            gpkg_url = QUrl.fromLocalFile(os.path.normpath(out_gpkg)).toString()
            feedback.pushInfo(self.tr(f'GPKG: {gpkg_url}'))
        
        result = {
            'OUTPUT_MODE': 'Monthly GeoPackage' if output_mode==1 else 'Daily SHP files',
            'OUTPUT_DIR': out_folder if output_mode==0 else os.path.dirname(out_gpkg),
            'OUTPUT_GPKG': out_gpkg if output_mode==1 else '',
        }
        if make_od:
            result['OD_OUTPUT_CSV'] = od_csv_written
            result['OD_LINES_LAYER'] = od_lines_layername
        return result

    def createInstance(self):
        return MobiVistaFacebookMobility2()

    def name(self):
        return 'mobivista_facebook_mobility2'

    def displayName(self):
        return self.tr('Meta Mobility Analyzer')

    def shortHelpString(self):
        return self.tr(
            "<p><i>Created by</i> <b>FIRMAN AFRIANTO</b>, <b>MAYA SAFIRA</b>, 2025 </p>"
            "<p>Analyzes <b>Meta Data for Good (Mobility)</b> from a folder of daily CSVs, "
            "joins them with a GADM boundary layer, and produces <b>spatial outputs + rich visuals</b> "
            "to understand monthly mobility patterns. Optionally, synthesizes an OD matrix consistent with "
            "Meta distance-bin shares using a <b>doubly-constrained gravity model</b>.</p>"

            "<p><b>What it does</b></p>"
            "<ul>"
            "<li>Imports all daily Meta Mobility CSV files from a folder.</li>"
            "<li>Joins to the GADM layer using the <b>gadm_id</b> key.</li>"
            "<li>Optional normalization so regional daily category sums ≈ 1.</li>"
            "<li>Outputs either <b>daily SHP files</b> or a <b>monthly GPKG</b> (one layer per day).</li>"
            "<li>Automatically adds outputs to the QGIS Layers panel.</li>"
            "<li>Generates a daily summary CSV and Top-K ranking by p100p.</li>"
            "<li>Creates multiple monthly mobility visuals.</li>"
            "<li><i>Optional</i>: Generates a synthetic OD matrix and OD lines.</li>"
            "</ul>"

            "<p><b>Inputs</b></p>"
            "<ul>"
            "<li><b>CSV Folder</b>: daily Meta Mobility files (required columns: gadm_id, distance category, ping fraction, ds).</li>"
            "<li><b>GADM Layer</b>: polygon administrative boundaries with <b>GID_2</b> or your chosen join field.</li>"
            "</ul>"

            "<p><b>Key Parameters</b></p>"
            "<ul>"
            "<li><b>Normalize</b>: scale category proportions so per-region per-day totals equal 1.</li>"
            "<li><b>Output mode</b>: daily SHP or monthly GPKG.</li>"
            "<li><b>Top-K</b>: number of highest p100p regions to store in a CSV per day.</li>"
            "<li><b>Visualization</b>: toggle to render monthly PNG charts.</li>"
            "<li><b>Generate Synthetic OD</b>: builds OD consistent with Meta distance-bin shares; writes CSV and OD lines.</li>"
            "</ul>"

            "<p><b>Outputs</b></p>"
            "<ul>"
            "<li><b>SHP/GPKG</b>: daily spatial layers auto-loaded into QGIS.</li>"
            "<li><b>mobility_daily_summary.csv</b>: per-day category averages.</li>"
            "<li><b>top_p100p_by_day.csv</b>: per-day top regions by p100p.</li>"
            "<li><b>PNG visuals</b>: monthly_line, stacked_area, RdYlGn heatmap, boxplot, "
            "weekday vs weekend, stability, category correlation, centroid hotspots, and Top 100 regions.</li>"
            "<li><i>Optional</i> <b>OD</b>: od_matrix.csv and an <b>od_lines</b> layer (SHP or in GPKG).</li>"
            "</ul>"

            "<p><b>Notes</b></p>"
            "<ul>"
            "<li><b>Data source</b>: Meta Data for Good (Movement Range Maps).</li>"
            "<li><b>CRS</b>: outputs follow the CRS of the input GADM layer.</li>"
            "<li><b>Dependencies</b>: pandas, numpy, matplotlib.</li>"
            "<li><b>OD is synthetic</b>: calibrated to match distance-bin shares; not observed flows.</li>"
            "</ul>"
            
        )

    def tr(self, s):
        return QCoreApplication.translate('Processing', s)
