# -*- coding: utf-8 -*-

"""
/***************************************************************************
 Geo_detector
                                 A QGIS plugin
 This plugin adds an algorithm to measure the spatial stratified heter
 Generated by Plugin Builder: http://g-sherman.github.io/Qgis-Plugin-Builder/
                              -------------------
        begin                : 2021-12-21
        copyright            : (C) 2021 by Guojg
        email                : guojg@lreis.ac.cn
 ***************************************************************************/

/***************************************************************************
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 ***************************************************************************/
"""

__author__ = 'Guojg'
__date__ = '2021-12-21'
__copyright__ = '(C) 2021 by Guojg'

# This will get replaced with a git SHA1 when you do a git archive

__revision__ = '$Format:%H$'

# import os
import numpy as np
import pandas as pd
# import pydevd_pycharm
from qgis.PyQt.QtCore import QCoreApplication
# from qgis.core import QgsProcessingException
from qgis.core import (QgsProcessing,
                       QgsProcessingAlgorithm,
                       QgsProcessingParameterFeatureSource,
                       QgsProcessingParameterFileDestination,
                       QgsProcessingParameterField,
                       QgsProcessingMultiStepFeedback,
                       QgsProcessingParameterNumber,
                       QgsProcessingParameterDefinition,
                       QgsProcessingException)

from gd_core import geodetector
from gd_core import optimal_stratification as op_st


class Geo_detectorAlgorithm(QgsProcessingAlgorithm):
    """
    The geo-detector models

    All Processing algorithms should extend the QgsProcessingAlgorithm
    class.
    """

    OUTPUT = 'OUTPUT'
    INPUT = 'INPUT'
    VALUES_FIELD_NAME = 'VALUES_FIELD_NAME'
    CATEGORICAL_FIELD_NAME = 'CATEGORIES_FIELD_NAME'
    NUMERICAL_FIELD_NAME = 'NUMERICAL_FIELD_NAME'
    MIN_GROUP = 'MIN_GROUP'
    MAX_GROUP = 'MAX_GROUP'
    MIN_SAMPLES_GROUP = 'MIN_SAMPLES_GROUP'
    MIN_SAMPLES_SPLIT = 'MIN_SAMPLES_SPLIT'
    EQUALITY_FIELD_NAME = "EQUALITY_FIELD_NAME"
    MINIMUM_RATIO = "MINIMUM_RATIO"
    CV_SIZE = 'CV_SIZE'
    CV_SEED = 'SV_SEED'
    CV_TIMES = 'CV_TIMES'
    INTERPOLATION_DATA = 'INTERPOLATION_DATA'
    IMPROVING_Q = 'IMPROVING_Q'

    def __init__(self):
        super().__init__()
        self.criterion = ["Sum squared error", "Significance test"]
        self.strata_num = ['Fixed number', 'Optimal']

        # self.VALUES_FIELD_NAME = None

    def initAlgorithm(self, config=None):
        """
        Here we define the inputs and output of the algorithm, along
        with some other properties.
        """
        self.addParameter(
            QgsProcessingParameterFeatureSource(
                self.INPUT,
                self.tr('Input layer'),
                [QgsProcessing.TypeVectorAnyGeometry]
            )
        )
        # add choosing the study field name
        self.addParameter(QgsProcessingParameterField(self.VALUES_FIELD_NAME,
                                                      self.tr(
                                                          'Study variable'),
                                                      parentLayerParameterName=self.INPUT))

        self.addParameter(QgsProcessingParameterField(self.CATEGORICAL_FIELD_NAME,
                                                      self.tr('Field(s) with categories'),
                                                      parentLayerParameterName=self.INPUT,
                                                      type=QgsProcessingParameterField.Any, allowMultiple=True,
                                                      optional=True))

        # add choosing the field(s) with numerical data
        self.addParameter(QgsProcessingParameterField(self.NUMERICAL_FIELD_NAME,
                                                      self.tr('Field(s) with numeric'),
                                                      parentLayerParameterName=self.INPUT,
                                                      type=QgsProcessingParameterField.Any, allowMultiple=True,
                                                      optional=True))
        # advanced parameters
        max_groups = QgsProcessingParameterNumber(self.MAX_GROUP,
                                                  self.tr('Maximum number of groups'), minValue=2, defaultValue=20,
                                                  optional=True)
        max_groups.setFlags(max_groups.flags() | QgsProcessingParameterDefinition.FlagAdvanced)
        self.addParameter(max_groups)

        min_groups = QgsProcessingParameterNumber(self.MIN_GROUP,
                                                  self.tr('Minimum number of groups'), minValue=1, defaultValue=2,
                                                  optional=True)
        min_groups.setFlags(min_groups.flags() | QgsProcessingParameterDefinition.FlagAdvanced)
        self.addParameter(min_groups)

        min_sample_group = QgsProcessingParameterNumber(self.MIN_SAMPLES_GROUP,
                                                        self.tr('Minimum number of samples in a group'),
                                                        minValue=2, defaultValue=5, optional=True)
        min_sample_group.setFlags(min_sample_group.flags() | QgsProcessingParameterDefinition.FlagAdvanced)
        self.addParameter(min_sample_group)

        # add equality constrains field
        equality_field = QgsProcessingParameterField(self.EQUALITY_FIELD_NAME,
                                                     self.tr('Field for equality constraint'),
                                                     parentLayerParameterName=self.INPUT,
                                                     type=QgsProcessingParameterField.Any,
                                                     optional=True)
        equality_field.setFlags(equality_field.flags() | QgsProcessingParameterDefinition.FlagAdvanced)
        self.addParameter(equality_field)

        min_ratio = QgsProcessingParameterNumber(self.MINIMUM_RATIO, self.tr('Minimum ratio for equality measure'),
                                                 minValue=0, maxValue=1, type=QgsProcessingParameterNumber.Double,
                                                 defaultValue=0, optional=True)
        min_ratio.setFlags(min_ratio.flags() | QgsProcessingParameterDefinition.FlagAdvanced)
        self.addParameter(min_ratio)

        improving_q = QgsProcessingParameterNumber(self.IMPROVING_Q, self.tr('Minimum threshold for q-value increase'),
                                                   minValue=0, maxValue=1, type=QgsProcessingParameterNumber.Double,
                                                   defaultValue=0, optional=True)
        improving_q.setFlags(min_ratio.flags() | QgsProcessingParameterDefinition.FlagAdvanced)
        self.addParameter(improving_q)

        cv_size = QgsProcessingParameterNumber(self.CV_SIZE, self.tr('Cross-validation number'), minValue=2,
                                               defaultValue=10)
        cv_size.setFlags(cv_size.flags() | QgsProcessingParameterDefinition.FlagAdvanced)
        self.addParameter(cv_size)

        cv_seed = QgsProcessingParameterNumber(self.CV_SEED, self.tr('Cross-validation random state'), minValue=2,
                                               defaultValue=None, optional=True)
        cv_seed.setFlags(cv_seed.flags() | QgsProcessingParameterDefinition.FlagAdvanced)
        self.addParameter(cv_seed)

        cv_times = QgsProcessingParameterNumber(self.CV_TIMES,
                                                self.tr('Times of repeating cross-validation'), minValue=1,
                                                defaultValue=1, optional=True)
        cv_times.setFlags(cv_times.flags() | QgsProcessingParameterDefinition.FlagAdvanced)
        self.addParameter(cv_times)
        # add a file output of type XLSX
        self.addParameter(
            QgsProcessingParameterFileDestination(
                self.OUTPUT,
                self.tr('Output File'),
                'MS Excel format (*.xls);;TXT files (*.txt)',
            )
        )

    # add help/introduction windows
    def shortDescription(self):  # pylint: disable=missing-docstring
        desc_file = "Q-GD “QGIS-geographical detector” is a statistical tool to measure Spatial Stratified " \
                    "Heterogeneity(SSH) and test the coupling between two variables Y (Study variable) and X " \
                    "(Explanatory variable), according to their SSHs, without assumption of linearity of the " \
                    "association. In " \
                    "GeoDetector model, the Study variable is numerical variable and the explanatory variable must be " \
                    "categorical. " \
                    "If an explanatory variable is numerical it should be transformed to be categorical."
        return self.tr(desc_file)

    def processAlgorithm(self, parameters, context, feedback):
        """
        Here is where the processing itself takes place.
        """
        source = self.parameterAsSource(parameters, self.INPUT, context)
        if source is None:
            raise QgsProcessingException(self.invalidSourceError(parameters, self.INPUT))
        #
        value_field_name = self.parameterAsString(parameters, self.VALUES_FIELD_NAME, context)
        category_field_names = self.parameterAsFields(parameters, self.CATEGORICAL_FIELD_NAME, context)
        numerical_field_names = self.parameterAsFields(parameters, self.NUMERICAL_FIELD_NAME, context)
        output_res = self.parameterAsFileOutput(parameters, self.OUTPUT, context)

        max_group = self.parameterAsInt(parameters, self.MAX_GROUP, context)
        min_group = self.parameterAsInt(parameters, self.MIN_GROUP, context)
        min_sample = self.parameterAsInt(parameters, self.MIN_SAMPLES_GROUP, context)
        pop_field = self.parameterAsString(parameters, self.EQUALITY_FIELD_NAME, context)
        pop_threshold = self.parameterAsDouble(parameters, self.MINIMUM_RATIO, context)
        inc_q = self.parameterAsDouble(parameters, self.IMPROVING_Q, context)
        cv_fold = self.parameterAsInt(parameters, self.CV_SIZE, context)
        cv_random_seed = self.parameterAsInt(parameters, self.CV_SEED, context)
        cv_rep = self.parameterAsInt(parameters, self.CV_TIMES, context)

        # update the advance parameters
        if max_group == 0:
            max_group = None
        if min_group == 0:
            min_group = 2
        if min_sample == 0:
            min_group = 5
        if cv_random_seed == 0:
            cv_random_seed = None

        # get features from source
        total = 100.0 / source.featureCount() if source.featureCount() else 0
        features = source.getFeatures()

        if (len(category_field_names) == 0) & (len(numerical_field_names) == 0):
            raise ValueError("'Field(s) with categories' and 'Field(s) with numeric' cannot both be empty")
        else:
            cols = category_field_names + numerical_field_names
            cols.insert(0, value_field_name)
            cols.insert(0, pop_field)

        # create the geo-detector raw data
        data_gen = ([f[col] for col in cols] for f in features)
        df = pd.DataFrame.from_records(data=data_gen, columns=cols)

        # checking the input data
        feedback = QgsProcessingMultiStepFeedback(2, feedback)
        #

        y = df[value_field_name]
        if not pd.api.types.is_numeric_dtype(y):
            raise TypeError('Study variable is not of a numeric type')
        pop_data = df[pop_field]
        if not pd.api.types.is_numeric_dtype(pop_data):
            raise TypeError('Equality variable is not of a numeric type')
        pop_data = pop_data.to_numpy()

        # stratification
        if len(numerical_field_names) > 0:
            for x in numerical_field_names:
                if not pd.api.types.is_numeric_dtype(df[x]):
                    feedback.reportError(
                        self.tr('\nERROR: Field ' + x + 'is not of a numeric type. We cannot continue...\n'))
                    raise TypeError('Field ' + x + 'is not of a numeric type')
                else:
                    xdata = df[x]
                    gd_x = op_st.optimal_geo_detector(x=xdata.to_numpy(), y=y.to_numpy(), min_group=min_group,
                                                      min_samples_group=min_sample,
                                                      max_group=max_group, pop_data=pop_data,
                                                      pop_threshold=pop_threshold,
                                                      cv_seed=cv_random_seed, cv_fold=cv_fold, cv_times=cv_rep,
                                                      min_delta_q=inc_q)
                    x_group = gd_x.group_interval
                    cat_x_name = 'Cat_' + x
                    df[cat_x_name] = x_group
                    category_field_names.append(cat_x_name)

        row_shape = df.shape[0]
        for cate_name in category_field_names:
            uniq_val, freq_val = np.unique(df[cate_name], return_counts=True)
            # if the unique value is more than 1/3 length of df
            if len(freq_val) > (row_shape / 3):
                feedback.reportError(self.tr('\nERROR:  There are too many groups in ' + cate_name +
                                             '. We cannot continue...\n'))
                raise TypeError('There are too many groups in ' + cate_name + '.')
            if any(freq_val == 1):
                feedback.pushWarning(self.tr('\nWarnings: The group ' + str(uniq_val[freq_val == 1][0]) + ' of ' +
                                             cate_name + ' has only one element\n'))
        # import pydevd_pycharm
        # pydevd_pycharm.settrace('localhost', port=10112, stdoutToServer=True, stderrToServer=True)
        gd_result = geodetector.GeoDetector(df, category_field_names, value_field_name)

        if output_res.split('.')[-1] == 'xls':
            gd_result.save_to_xls(output_res)

        elif output_res.split('.')[-1] == 'txt':
            gd_result.print_result(output_res)

        return {self.OUTPUT: output_res}

    def name(self):
        """
        Returns the algorithm name, used for identifying the algorithm. This
        string should be fixed for the algorithm, and must not be localised.
        The name should be unique within each provider. Names should contain
        lowercase alphanumeric characters only and no spaces or other
        formatting characters.
        """
        return 'Q_GD'

    def displayName(self):
        """
        Returns the translated algorithm name, which should be used for any
        user-visible display of the algorithm name.
        """
        return self.tr(self.name())

    def group(self):
        """
        Returns the name of the group this algorithm belongs to. This string
        should be localised.
        """
        return self.tr(self.groupId())

    def groupId(self):
        """
        Returns the unique ID of the group this algorithm belongs to. This
        string should be fixed for the algorithm, and must not be localised.
        The group id should be unique within each provider. Group id should
        contain lowercase alphanumeric characters only and no spaces or other
        formatting characters.
        """
        return ''

    def tr(self, string):
        return QCoreApplication.translate('Processing', string)

    def createInstance(self):
        return Geo_detectorAlgorithm()
