# -*- coding: utf-8 -*-

"""
/***************************************************************************
 RiskAssessment
                                 A QGIS plugin
 Risk assessment calculation for forecast based financing
 Generated by Plugin Builder: http://g-sherman.github.io/Qgis-Plugin-Builder/
                              -------------------
        begin                : 2023-10-11
        copyright            : (C) 2023 by HeiGIT gGmbH
        email                : info@heigit.org
 ***************************************************************************/

/***************************************************************************
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 ***************************************************************************/
"""

__author__ = "HeiGIT gGmbH"
__date__ = "2023-10-11"
__copyright__ = "(C) 2023 by HeiGIT gGmbH"

# This will get replaced with a git SHA1 when you do a git archive

__revision__ = "$Format:%H$"


import pandas as pd
from numpy import isnan
from qgis._core import Qgis, QgsField, QgsFields
from qgis.core import (
    QgsFeatureSink,
    QgsMessageLog,
    QgsProcessing,
    QgsProcessingAlgorithm,
    QgsProcessingParameterFeatureSink,
    QgsProcessingParameterFeatureSource,
    QgsProcessingParameterFile,
)
from qgis.PyQt.QtCore import QCoreApplication, QVariant

from risk_assessment.utils import *


class RiskAssessmentAlgorithm(QgsProcessingAlgorithm):
    """
    This is an example algorithm that takes a vector layer and
    creates a new identical one.

    It is meant to be used as an example of how to create your own
    algorithms and explain methods and variables used to do it. An
    algorithm like this will be available in all elements, and there
    is not need for additional work.

    All Processing algorithms should extend the QgsProcessingAlgorithm
    class.
    """

    # Constants used to refer to parameters and outputs. They will be
    # used when calling the algorithm from another algorithm, or when
    # calling from the QGIS console.

    OUTPUT = "OUTPUT"
    DISTRICTS = "DISTRICTS"
    EXPOSURE = "EXPOSURE"
    COPING = "COPING"
    VUL = "VULNERABILITY"
    WEIGHTS = "WEIGHTS"
    INPUT = "INPUT"

    def initAlgorithm(self, config):
        """
        Here we define the inputs and output of the algorithm, along
        with some other properties.
        """

        # We add the input vector features source. It can have any kind of
        # geometry.
        self.addParameter(
            QgsProcessingParameterFeatureSource(
                name=self.DISTRICTS,
                description=self.tr("<b>Inputs</b><br>Administrative Boundaries"),
                types=[QgsProcessing.SourceType.TypeVectorPolygon],
            )
        )

        self.addParameter(
            QgsProcessingParameterFile(
                name=self.EXPOSURE,
                description=self.tr("Exposure indicators (.csv)"),
                extension="csv",
            )
        )

        self.addParameter(
            QgsProcessingParameterFile(
                name=self.VUL,
                description=self.tr("Vulnerability indicators (.csv)"),
                extension="csv",
            )
        )

        self.addParameter(
            QgsProcessingParameterFile(
                name=self.COPING,
                description=self.tr("Coping Capacity indicators (.csv)"),
                extension="csv",
                optional=True,
            )
        )

        self.addParameter(
            QgsProcessingParameterFile(
                name=self.WEIGHTS,
                description=self.tr("Weights (.csv)"),
                extension="csv",
            )
        )

        self.addParameter(
            QgsProcessingParameterFeatureSink(
                name=self.OUTPUT,
                description=self.tr("<b>Outputs</b><br>Risk Assessment Vector Layer"),
            )
        )

    def processAlgorithm(self, parameters, context, feedback):
        districts = self.parameterAsSource(parameters, self.DISTRICTS, context)

        field_names = [field.name().upper() for field in districts.fields()]
        if "ADM_PCODE" not in field_names or "ADM_EN" not in field_names:
            feedback.reportError(
                "Error: 'ADM_PCODE' and/or 'ADM_EN' field not found in the 'Administrative Boundaries' file.\n\n"
            )
            return

        #############
        # Read Data #
        #############
        exposure_file = self.parameterAsFile(parameters, self.EXPOSURE, context)
        coping_file = self.parameterAsFile(parameters, self.COPING, context)
        vul_file = self.parameterAsFile(parameters, self.VUL, context)
        weights_file = self.parameterAsFile(parameters, self.WEIGHTS, context)

        coping = None
        if coping_file:
            try:
                coping = pd.read_csv(
                    coping_file, index_col="ADM_PCODE", sep=None, engine="python"
                )
            except ValueError:
                feedback.reportError(
                    "Error reading coping file: 'ADM_PCODE' column not found.\n\n"
                )
                return

        if coping is None:
            feedback.reportError(
                "Warning: No coping capacity data was provided.\n"
                "For a more comprehensive and realistic risk assessment, it is strongly recommended to include coping capacity indicators.\n\n"
            )

        try:
            exposure = pd.read_csv(
                exposure_file, index_col="ADM_PCODE", sep=None, engine="python"
            )
        except ValueError:
            feedback.reportError(
                "Error reading exposure file: 'ADM_PCODE' column not found.\n\n"
            )
            return

        try:
            vulnerability = pd.read_csv(
                vul_file, index_col="ADM_PCODE", sep=None, engine="python"
            )
        except ValueError:
            feedback.reportError(
                "Error reading vulnerability file: 'ADM_PCODE' column not found.\n\n"
            )
            return

        weights = pd.read_csv(weights_file, sep=None, engine="python")

        id_col = "ADM_PCODE"

        # Check if datasets have different lengths
        if not (len(districts) == len(exposure) == len(vulnerability)) or (
            coping is not None and len(districts) != len(coping)
        ):
            feedback.reportError(
                "Error: Datasets do not have the same size.\n"
                f"\nAdministrative Boundaries: {len(districts)} rows"
                f"\nExposure indicators: {len(exposure)} rows"
                f"\nVulnerability indicators: {len(vulnerability)} rows"
                f"\nCoping Capacity indicators: {len(coping)} rows"
                if coping is not None
                else "\n\nPlease check the length of the input files.\n\n",
                fatalError=True,
            )
            QgsMessageLog.logMessage(
                "Error: Datasets do not have the same size.", level=Qgis.Critical
            )
            return

        # Check for invalid values in weight-file
        if (
            weights["weight"].isnull().any()
            or weights["weight"].apply(lambda x: isinstance(x, str)).any()
        ):
            feedback.reportError(
                "Invalid weight-value in the weight-file: NULL or string detected among weights",
                fatalError=True,
            )
            return
        if (
            weights["direction"].isnull().any()
            or weights["direction"].apply(lambda x: isinstance(x, str)).any()
        ):
            feedback.reportError(
                "Invalid direction-value in the weight-file: NULL or string detected among directions",
                fatalError=True,
            )
            return

        # Check for indicators with no related weight
        # Identify missing columns
        missing_vul_indicators = [
            col
            for col in vulnerability
            if not col.startswith("ADM") and col not in weights["variable_name"].values
        ]
        missing_exp_indicators = [
            col
            for col in exposure
            if not col.startswith("ADM") and col not in weights["variable_name"].values
        ]
        missing_cop_indicators = []
        if coping is not None:
            missing_cop_indicators = [
                col
                for col in coping
                if not col.startswith("ADM")
                and col not in weights["variable_name"].values
            ]

        if missing_vul_indicators or missing_exp_indicators or missing_cop_indicators:
            feedback.reportError(
                "Error: Indicators with no related weight were detected.\n"
                f"\nExposure indicators not found in weights file: {missing_exp_indicators}"
                f"\nVulnerability indicators not found in weights file: {missing_vul_indicators}"
                f"\nCoping Capacity indicators not found in weights file: {missing_cop_indicators}"
                if coping is not None
                else ""
                "\n\nPlease check the corresponding indicators of the input files.\n\n",
                fatalError=True,
            )
            QgsMessageLog.logMessage(
                "Error: Indicators with no related weight were detected.",
                level=Qgis.Critical,
            )
            return

        # Check if weights are normalized
        category_sums = weights.groupby("category")["weight"].sum()
        # AHP often leaves a small margin, which is acceptable.
        valid_weights = category_sums.between(0.95, 1.05).all()

        # In case of non-normalized weights, re-normalize and report to user
        if not valid_weights:
            # Normalize weights per category
            weights["normalized_weight"] = weights.groupby("category")[
                "weight"
            ].transform(lambda x: x / x.sum())
            weights["weight"] = weights[
                "normalized_weight"
            ]  # Replace old weights with normalized values
            weights.drop(columns=["normalized_weight"], inplace=True)  # Cleanup
            weights_subset = weights[["variable_name", "category", "weight"]]

            feedback.reportError(
                "Warning: The weight sum per category is not equal to 1. The weights were normalized to the new values:\n"
                f"\n{weights_subset}\n\n",
                fatalError=False,
            )

        ####################
        # Pre-Process Data #
        ####################

        # The resulting prefix length of 4 chars is significant later
        pre_coping = "cop"
        pre_exposure = "exp"
        pre_vulnerability = "vul"

        # Check for invalid prefixes
        valid_categories = {pre_exposure, pre_vulnerability}
        if coping is not None:
            valid_categories.add(pre_coping)

        invalid_rows = weights[weights["category"].isin(valid_categories) == False]
        if not invalid_rows.empty:
            feedback.reportError(
                f"Error: Invalid prefixes found in the weighting file: {invalid_rows['category'].tolist()}\n\n",
                fatalError=True,
            )
            QgsMessageLog.logMessage(
                "Error: Invalid prefixes found in the weighting file",
                level=Qgis.Critical,
            )
            return

        # Add prefixes to variable names
        if coping is not None:
            coping = coping.rename(
                columns={
                    col: f"{pre_coping}_{col}"
                    for col in coping.columns
                    if not col.startswith("ADM")
                }
            )
            coping = coping.drop(
                [
                    col
                    for col in coping.columns
                    if col.startswith("ADM") and col != id_col
                ],
                axis=1,
            )

        exposure = exposure.rename(
            columns={
                col: f"{pre_exposure}_{col}"
                for col in exposure.columns
                if not col.startswith("ADM")
            }
        )
        exposure = exposure.drop(
            [
                col
                for col in exposure.columns
                if col.startswith("ADM") and col != id_col
            ],
            axis=1,
        )

        vulnerability = vulnerability.rename(
            columns={
                col: f"{pre_vulnerability}_{col}"
                for col in vulnerability.columns
                if not col.startswith("ADM")
            }
        )
        vulnerability = vulnerability.drop(
            [
                col
                for col in vulnerability.columns
                if col.startswith("ADM") and col != id_col
            ],
            axis=1,
        )

        weights["variable_name"] = weights["category"] + "_" + weights["variable_name"]

        # Join indicators
        indicators = exposure.join(vulnerability)
        if coping is not None:
            indicators = indicators.join(coping)

        # Check if all dataframes were merged successfully
        if not (len(districts) == len(indicators)):
            QgsMessageLog.logMessage(
                "Datasets do not have the same size after merging.", level=Qgis.Critical
            )
            feedback.reportError(
                "Error: Datasets do not have the same size after merging.\n\n",
                fatalError=True,
            )
            return

        ####################
        # Risk Calculation #
        ####################

        normalized_indicators = normalize_indicators(indicators)

        full_indicators = guess_missing_indicators(normalized_indicators)

        scores, error = calculate_scores(full_indicators, weights)
        if error != "":
            feedback.reportError(error, fatalError=False)
        # scores = calculate_scores(normalized_indicators, weights)

        # Calculate final scores
        if coping is not None:
            scores["sus"] = calculate_geometric_mean(scores["vul"], scores["cop"])
            scores["risk"] = calculate_geometric_mean(scores["exp"], scores["sus"])
        else:
            scores["risk"] = calculate_geometric_mean(scores["exp"], scores["vul"])

        # Rank the risk scores
        scores["ranking"] = scores["risk"].rank(ascending=False)

        # define fields in output
        district_fields = districts.fields()
        name = district_fields.field(district_fields.lookupField("ADM_EN"))
        pcode = district_fields.field(district_fields.lookupField("ADM_PCODE"))

        output_fields = QgsFields()
        output_fields.append(name)
        output_fields.append(pcode)

        for field_name in scores.columns:
            # TODO: get appropriate data types
            output_fields.append(QgsField(field_name, QVariant.Double))

        (output, dest_id) = self.parameterAsSink(
            parameters,
            self.OUTPUT,
            context,
            output_fields,
            districts.wkbType(),
            districts.sourceCrs(),
        )

        # Compute the number of steps to display within the progress bar and
        # get features from source
        total = 100.0 / districts.featureCount() if districts.featureCount() else 0
        features = districts.getFeatures()

        for current, feature in enumerate(features):
            # Stop the algorithm if cancel button has been clicked
            if feedback.isCanceled():
                break

            feature_name = feature["ADM_EN"]
            feature_pcode = feature["ADM_PCODE"]

            # setting fields will reset all attributes.
            feature.setFields(output_fields)

            feature["ADM_EN"] = feature_name
            feature["ADM_PCODE"] = feature_pcode

            data = scores.loc[feature_pcode]

            for label in data.index:
                if isnan(data[label]):
                    feature[label] = -999
                    continue
                feature[label] = float(data[label])

            # Add a feature in the sink
            output.addFeature(feature, QgsFeatureSink.FastInsert)

            # Update the progress bar
            feedback.setProgress(int(current * total))

        # Rename output layer
        if context.willLoadLayerOnCompletion(dest_id):
            QgsMessageLog.logMessage("Renaming.")
            layer_details = context.layerToLoadOnCompletionDetails(dest_id)
            layer_details.name = "Risk Assessment Output"
        else:
            QgsMessageLog.logMessage("Cannt rename - Layer not loaded?")

        # Return the results of the algorithm. In this case our only result is
        # the feature sink which contains the processed features, but some
        # algorithms may return multiple feature sinks, calculated numeric
        # statistics, etc. These should all be included in the returned
        # dictionary, with keys matching the feature corresponding parameter
        # or output names.
        return {self.OUTPUT: dest_id}

    def shortHelpString(self):
        return """
        This algorithm calculates a risk score based on the three indicators exposure, susceptibility and coping capacity provided by the user and is applicable across different countries and disaster contexts.
        The user can assign different weights and directions to the indicators via the weights inputfile.
        By processing the inserted information, it will become clear which areas are predicted to be most severely impacted.

        The required input files must follow a given structure which can be found <a href="https://giscience.github.io/gis-training-resource-center/content/GIS_AA/en_qgis_risk_assessment_plugin.html">in the documentation.</a>
        """

    def helpUrl(self):
        return "https://giscience.github.io/gis-training-resource-center/content/GIS_AA/en_qgis_risk_assessment_plugin.html"

    def name(self):
        """
        Returns the algorithm name, used for identifying the algorithm. This
        string should be fixed for the algorithm, and must not be localised.
        The name should be unique within each provider. Names should contain
        lowercase alphanumeric characters only and no spaces or other
        formatting characters.
        """
        return "Calculate Risk Assessment"

    def displayName(self):
        """
        Returns the translated algorithm name, which should be used for any
        user-visible display of the algorithm name.
        """
        return self.tr(self.name())

    def group(self):
        """
        Returns the name of the group this algorithm belongs to. This string
        should be localised.
        """
        return self.tr(self.groupId())

    def groupId(self):
        """
        Returns the unique ID of the group this algorithm belongs to. This
        string should be fixed for the algorithm, and must not be localised.
        The group id should be unique within each provider. Group id should
        contain lowercase alphanumeric characters only and no spaces or other
        formatting characters.
        """
        return "Risk Assessment"

    def tr(self, string):
        return QCoreApplication.translate("Processing", string)

    def createInstance(self):
        return RiskAssessmentAlgorithm()
