# Needed to support type hint LandsklimConfiguration
# Convert type hint as str to avoid Class Not Found exceptions
from __future__ import annotations

import cProfile
import csv
import json
from enum import Enum
from typing import List, TYPE_CHECKING, Callable, Optional, Tuple, Union, Dict
import os
import shutil
import time
import numpy as np
import unicodedata
import re

from osgeo import gdal, gdal_array
from qgis import processing
from scipy.stats import pearsonr

from landsklim.lk.regression_model import TooFewSamplesException
from landsklim.lk import environment
from landsklim.lk.logger import Log

try:
    import pandas as pd
    Log.info("pandas version", pd.__version__)
except ImportError:
    Log.critical("pandas not available")
from qgis._core import QgsGeometry, QgsFeature, QgsRasterLayer, QgsRaster, QgsCoordinateTransform, QgsProject, \
    QgsRectangle, QgsPointXY, QgsPoint, QgsCoordinateReferenceSystem

# To avoid cyclic import of LandsklimConfiguration as this import is mainly used for type hinting
if TYPE_CHECKING:
    from landsklim.lk.landsklim_configuration import LandsklimConfiguration

from landsklim.lk.map_layer import RasterLayer, VectorLayer
from landsklim.lk.utils import LandsklimUtils
from landsklim.lk.landsklim_interpolation import LandsklimInterpolation
from landsklim.lk.landsklim_constants import LAYER_TYPE_PATH, LandsklimLayerType, LANDSKLIM_ANALYSIS_PATH, DATASET_RESPONSE_VARIABLE, \
    DATASET_COLUMN_X, DATASET_COLUMN_Y
from landsklim.lk.cache import qgis_project_cache
from landsklim.lk.phase import IPhase
from landsklim.lk.regressor import Regressor
from landsklim.lk.phase_factory import PhaseFactory
from landsklim.lk.phase_multiple_regression import PhaseMultipleRegression
from landsklim.lk.phase_composite import PhaseComposite
from landsklim.processing.algorithm_polygons import PolygonsProcessingAlgorithm
from landsklim.lk.polygons_definition import PolygonsDefinition
import landsklim.lk.cache as lkcache
import traceback


class LandsklimAnalysisMode(Enum):
    """
    Define analysis mode
    """
    Global = False
    """
    Represents a global analysis
    """
    Local = True
    """
    Represents a local analysis
    """


class LandsklimAnalysis:
    """
    Represents an analysis. Is linked to a configuration and a stations shapefile.
    Predictive variables are analyzed. A regression is computed for each situations (days for example)
    and weights are stored.

    :param configuration: Reference to configuration containing this analysis
    :type configuration: LandsklimConfiguration

    :param analysis_mode: If local (true), this analysis will produce a local interpolation.
        If global (false), only one interpolation model is computed on the extent
    :type analysis_mode: LandsklimAnalysisMode

    :param neighborhood_size: Neighborhood size in the case of a local analysis
    :type neighborhood_size: int

    :param stations: Vector layer of points containing the records
    :type stations: VectorLayer

    :param station_situations: List of situations as index of stations shapefile columns
    :type station_situations: List[int]

    :param use_partial_correlations: TODO
    :type use_partial_correlations: bool

    :param phase_1: First phase of the regression model
    :type phase_1: str

    :param phase_2: Second phase of the regression model
    :type phase_2: str

    :param regressors: Explanatory variables used to build the regression model
    :type regressors: List[Regressor]

    :ivar str _name: Name of the analysis

    :ivar LandsklimConfiguration _configuration: Reference to configuration containing this analysis

    :ivar int _neighborhood_size: Neighborhood size in the case of a local analysis

    :ivar VectorLayer _stations: Vector layer of points containing the records

    :ivar List[int] _station_situations: List of situations as index of stations shapefile columns

    :ivar bool use_partial_correlations: Not developed yet.

    :ivar List[LandsklimInterpolation] _interpolations: List of interpolations based on this analysis

    :ivar str _phase_1: First phase of the regression model

    :ivar str _phase_2: Second phase of the regression model

    :ivar List[Regressor] _regressors: Explanatory variables used to build the regression model

    :ivar Dict[int, pd.DataFrame] _datasets: For each situation, dataset used to fit models.
        The rows of the dataset are the points and the columns of the dataset are the regressors values at these points

    :ivar Optional[pd.DataFrame] _regressors_dataset: Dataset of points with matching regressor values

    :ivar Dict[int, RasterLayer] _kriging_layers: Kriging made with this analysis

    :ivar Optional[PolygonsDefinition] _polygons: Polygons definition

    :ivar Optional[pd.DataFrame] _correlation_table: Correlation table between each regressors

    :ivar Optional[Dict[int, Dict[Regressor, Tuple[float, float]]]] _pearson_correlation:
        Contains for each situation the pearson r (with its p-value) between regressors and response variable

    :ivar Dict[int, List[IPhase]] _models:
        Define phases for each situation.
        Each situation is defined by a List of phases

    """

    def __init__(self, name: str, configuration: "LandsklimConfiguration", analysis_mode: LandsklimAnalysisMode, neighborhood_size: int, stations: VectorLayer, station_situations: List[int], use_partial_correlations: bool, phase_1: str, phase_2: str, regressors: List[Regressor], stations_no_data: Optional[float]):
        self._name: str = name
        self._configuration: LandsklimConfiguration = configuration
        self._analysis_mode: LandsklimAnalysisMode = analysis_mode
        self._neighborhood_size: int = neighborhood_size
        self._stations: VectorLayer = stations
        self._station_situations: List[int] = sorted(station_situations)
        self._use_partial_correlations: bool = use_partial_correlations
        self._interpolations: List[LandsklimInterpolation] = []
        self._phase_1: str = phase_1
        self._phase_2: str = phase_2
        self._regressors: List[Regressor] = regressors
        self._datasets: Dict[int, pd.DataFrame] = {}
        self._regressors_dataset: Optional[pd.DataFrame] = None
        self._kriging_layers: Dict[int, RasterLayer] = {}
        # TODO: Store polygones raster under Analysis folder ? (other polygons with other stations ...)
        self._polygons: Optional[PolygonsDefinition] = None

        self._stations_no_data: Optional[float] = stations_no_data

        """TODO: Eventually move data-related attributes to a separated structure for readability purpose"""

        self._correlation_table: Optional[pd.DataFrame] = None
        self._pearson_correlation: Optional[Dict[int, Dict[Regressor, Tuple[float, float]]]] = None

        self._models: Dict[int, List[IPhase]] = {}

        # Once polygons are created, we can initialize self._models with every phases involved in the
        # interpolation process.
        for situation in self._station_situations:
            # Only Kriging phase need layer_path for now but gives info to all phase, only PhaseKriging will process it
            # phase_1_args = {**self._phase_1.get_kwargs(), **{"layer_path": self.get_kriging_layer_path(situation)}}
            # phase_2_args = {**self._phase_2.get_kwargs(), **{"layer_path": self.get_kriging_layer_path(situation)}}
            phase_1: IPhase = PhaseFactory.get_phase(self._phase_1, layer_path=self.get_kriging_layer_path(situation), crs=self.get_crs().authid(), dem=self.get_dem())
            phase_2: IPhase = PhaseFactory.get_phase(self._phase_2, layer_path=self.get_kriging_layer_path(situation), crs=self.get_crs().authid(), dem=self.get_dem())
            self._models[situation] = [phase_1,
                                       phase_2,
                                       PhaseFactory.get_phase(PhaseComposite.class_name(), phase1=phase_1, phase2=phase_2, crs=self.get_crs().authid())]


        self._on_polygons_start: Optional[Callable[[str], None]] = None
        self._on_polygons_end: Optional[Callable[[], None]] = None
        self._on_polygons_failed: Optional[Callable[[LandsklimAnalysis], None]] = None

        self._on_interpolation_computation_started: Optional[Callable[[int], None]] = None
        self._on_interpolation_computation_finished: Optional[Callable[[], None]] = None
        self._on_interpolation_computation_step: Optional[Callable[[str, int, int], None]] = None
        self._on_interpolation_computation_failed: Optional[Callable[[LandsklimAnalysis, LandsklimInterpolation], None]] = None

        self._on_models_computation_started: Optional[Callable[[int], None]] = None
        self._on_models_computation_finished: Optional[Callable[[], None]] = None
        self._on_models_computation_fail: Optional[Callable[[LandsklimAnalysis, str], None]] = None
        self._on_models_computation_step: Optional[Callable[[str, int], None]] = None

    def __getstate__(self):
        state_dict: Dict = self.__dict__.copy()
        state_dict.pop("_on_polygons_start", None)
        state_dict.pop("_on_polygons_end", None)
        state_dict.pop("_on_polygons_failed", None)
        state_dict.pop("_on_interpolation_computation_started", None)
        state_dict.pop("_on_interpolation_computation_finished", None)
        state_dict.pop("_on_interpolation_computation_step", None)
        state_dict.pop("_on_interpolation_computation_failed", None)
        state_dict.pop("_on_models_computation_started", None)
        state_dict.pop("_on_models_computation_finished", None)
        state_dict.pop("_on_models_computation_fail", None)
        state_dict.pop("_on_models_computation_step", None)
        return state_dict

    def __setstate__(self, state):
        for k, v in state.items():
            setattr(self, LandsklimUtils.rename_attr("LISDQSAnalysis", "LandsklimAnalysis", k), v)

        self._on_polygons_start = None
        self._on_polygons_end = None
        self._on_polygons_failed = None
        self._on_interpolation_computation_started = None
        self._on_interpolation_computation_finished = None
        self._on_interpolation_computation_step = None
        self._on_interpolation_computation_failed = None
        self._on_models_computation_started = None
        self._on_models_computation_finished = None
        self._on_models_computation_fail = None
        self._on_models_computation_step = None

    def get_name(self) -> str:
        return self._name

    def get_configuration_name(self) -> str:
        return self._configuration.get_name()

    def is_local(self) -> bool:
        return self._analysis_mode == LandsklimAnalysisMode.Local

    def handle_on_polygons_start(self, handle_function: Callable[[str], None]):
        self._on_polygons_start = handle_function

    def handle_on_polygons_end(self, handle_function: Callable[[str], None]):
        self._on_polygons_end = handle_function

    def handle_on_polygons_failed(self, handle_function: Callable[[LandsklimAnalysis], None]):
        self._on_polygons_failed = handle_function

    """def handle_on_explicative_variables_compute_started(self, handle_function: Callable[[int], None]):
        self._on_explicative_variables_compute_started = handle_function

    def handle_on_regressor_compute_finished(self, handle_function: Callable[[Regressor, int, int, int], None]):
        self._on_regressor_compute_finished = handle_function

    def handle_on_explicative_variables_compute_finished(self, handle_function: Callable[[], None]):
        self._on_explicative_variables_compute_finished = handle_function"""

    def handle_on_interpolation_computation_started(self, handle_function: Callable[[int], None]):
        self._on_interpolation_computation_started = handle_function

    def handle_on_interpolation_computation_step(self, handle_function: Callable[[str, int, int], None]):
        self._on_interpolation_computation_step = handle_function

    def handle_on_interpolation_computation_finished(self, handle_function: Callable[[], None]):
        self._on_interpolation_computation_finished = handle_function

    def handle_on_interpolation_computation_failed(self, handle_function: Callable[[LandsklimAnalysis, LandsklimInterpolation], None]):
        self._on_interpolation_computation_failed = handle_function

    def handle_on_models_computation_started(self, handle_function: Callable[[int], None]):
        self._on_models_computation_started = handle_function

    def handle_on_models_computation_step(self, handle_function: Callable[[str, int], None]):
        self._on_models_computation_step = handle_function

    def handle_on_models_computation_finished(self, handle_function: Callable[[], None]):
        self._on_models_computation_finished = handle_function

    def handle_on_models_computation_fail(self, handle_function: Callable[[LandsklimAnalysis, str], None]):
        self._on_models_computation_fail = handle_function

    def get_dem(self) -> RasterLayer:
        return self._configuration.get_project().get_dem()

    def slugify(self, value: str):
        """
        Make a string path-compatible
        From https://stackoverflow.com/a/295466
        """
        value = str(value)
        value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode('ascii')
        value = re.sub(r'[^\w\s-]', '', value)  # , value.lower()
        return re.sub(r'[-\s]+', '-', value).strip('-_')

    def slugify_configuration(self) -> str:
        return self.slugify(self._configuration.to_string())

    def slugify_analysis(self) -> str:
        return self.slugify(self.to_string())

    def get_path(self) -> str:
        configuration_str: str = self.slugify_configuration()
        analysis_str: str = self.slugify_analysis()
        qgis_project_home_path = qgis_project_cache().homePath()  # QgsProject().instance().homePath()
        return os.path.join(qgis_project_home_path, 'Landsklim', LANDSKLIM_ANALYSIS_PATH.format(configuration_str, analysis_str))

    def get_relative_path(self) -> str:
        configuration_str: str = self.slugify_configuration()
        analysis_str: str = self.slugify_analysis()
        return os.path.join('Landsklim', LANDSKLIM_ANALYSIS_PATH.format(configuration_str, analysis_str))

    def get_json_polygons_path(self, relative: bool) -> str:
        """
        Returns the path where polygons data are stored

        :param relative: Returns relative path from QGIS project home path
        :type relative: bool
        """
        path: str = self.get_relative_path() if relative else self.get_path()
        return os.path.join(path, "polygons.json")

    def get_json_models_path(self, relative: bool) -> str:
        """
        Returns the path where models data are stored

        :param relative: Returns relative path from QGIS project home path
        :type relative: bool
        """
        path: str = self.get_relative_path() if relative else self.get_path()
        return os.path.join(path, "models.json")

    def get_polygons_layer_source_name(self) -> str:
        """
        Get the polygons layer filename
        """
        return "polygons_{0}.tif".format(self._neighborhood_size)

    def get_polygons_layer_displayed_name(self) -> str:
        """
        Get the displayed polygons layer name
        """
        return "{0}_{1}".format(self.get_name(), self.get_polygons_layer_source_name())

    def get_neighborhood_size(self) -> int:
        return self._neighborhood_size

    def get_polygons_raster_path(self) -> str:
        project_home_path: str = qgis_project_cache().homePath()  # QgsProject().instance().homePath()
        return os.path.join(project_home_path, 'Landsklim', LAYER_TYPE_PATH[LandsklimLayerType.Polygons].format(self.slugify_configuration(), self.slugify_analysis()), self.get_polygons_layer_source_name())

    def get_station_data(self, situation: int, station_index: int):
        """
        Prepare dataset containing only the data of the specified station index (in [0, n])

        :param situation: Situation
        :type situation: int

        :param station_index: Index of the station (in [0, n])
        :type station_index: int

        :returns: Features dataset and response variable for the specified station
        :rtype: Tuple[pd.DataFrame, float]
        """
        station_row = self._datasets[situation].iloc[[station_index]]
        data = station_row.drop(DATASET_RESPONSE_VARIABLE, axis=1), station_row[DATASET_RESPONSE_VARIABLE]
        return data

    def phase_to_situation(self, phase: IPhase) -> int:
        """
        Get the situation number from an IPhase object

        :param phase: Phase
        :type phase: IPhase

        :returns: Situation corresponding to the phase (-1 if not found)
        :rtype: int
        """
        situation_res: int = -1
        for situation in self._station_situations:
            if phase in self.get_phases(situation):
                situation_res = situation
        return situation_res

    def regressor_from_name(self, regressor_layer_name: str) -> Optional["Regressor"]:
        """
        Get a regressor from its layer name. Layer name are expected to be unique

        :param regressor_layer_name: Layer name of the expected regressor
        :type regressor_layer_name: str

        :returns: The regressor object corresponding to the name
        :rtype: Optional["Regressor"]
        """
        matching_regressor: Optional["Regressor"] = None
        for regressor in self.get_regressors():  # type: "Regressor"
            if regressor.layer_name() == regressor_layer_name:
                matching_regressor = regressor
        return matching_regressor

    def get_phases(self, situation: int) -> List[IPhase]:
        return self._models[situation]

    def construct_models(self) -> None:
        """
        Build models for each situation
        """
        # First step, tell each phases what the polygons are
        for situation in self.get_station_situations():  # type: int
            for phase in self.get_phases(situation):  # type: IPhase
                phase.set_polygons(self._polygons)
                dataset_size: int = self._neighborhood_size if self.is_local() else len(self._datasets[situation].dropna())
                phase.update_parameters(kriging_estimation_neighborhood=dataset_size/2)

        analysis_path = self.get_path()
        if not os.path.exists(analysis_path):
            os.makedirs(analysis_path)

        self.compute_correlations()

        failed: bool = False
        for i, situation in enumerate(self._station_situations):
            self.callback_computation_step(situation, i)
            try:
                self._models[situation][0].construct_model(self._datasets[situation])
                residuals: np.ndarray = self._models[situation][0].get_residuals_for_phase_2()
                # dataframe_residuals: pd.DataFrame = pd.DataFrame({DATASET_COLUMN_X : self._datasets[situation].dropna()[DATASET_COLUMN_X].values, DATASET_COLUMN_Y: self._datasets[situation].dropna()[DATASET_COLUMN_Y].values, DATASET_RESPONSE_VARIABLE: residuals})
                dataframe_residuals: pd.DataFrame = self._datasets[situation].copy().dropna()
                dataframe_residuals[DATASET_RESPONSE_VARIABLE] = residuals
                self._models[situation][1].construct_model(dataframe_residuals)
                self._models[situation][2].construct_model(self._datasets[situation])
            except TooFewSamplesException as e:
                failed = True
                self.callback_computation_failed("")
                break

        if not failed:
            self.serialize_components()
            self.callback_computation_finished()

    def callback_computation_step(self, situation: int, i: int):
        if self._on_models_computation_step is not None:
            self._on_models_computation_step("Situation : {0} ({1}/{2})".format(self.get_situation_name(situation), i + 1, len(self._station_situations)), i)

    def callback_computation_failed(self, invalid_fields: str):
        if self._on_models_computation_fail is not None:
            self._on_models_computation_fail(self, invalid_fields)
        self.callback_computation_finished()

    def callback_computation_finished(self):
        if self._on_models_computation_finished is not None:
            self._on_models_computation_finished()

    def get_variables_dataset_from_points(self, points_layer: VectorLayer, response_variable_index: Optional[int], drop_na: Optional[bool] = True) -> pd.DataFrame:
        """
        Get predictors dataset for each points

        :param points_layer: Source points layer
        :type points_layer: VectorLayer

        :param response_variable_index: if not None, add the feature at the index specified by response_variable from VectorLayer source as the response variable
        :type response_variable_index: Optional[int]

        :param drop_na: If True, rows with NaN values are dropped from the returned dataset
        :type drop_na: Optional[bool]

        :returns: Dataset
        :rtype: pd.DataFrame
        """
        variable_names: List[str] = [DATASET_COLUMN_X, DATASET_COLUMN_Y]
        for regressor in self._regressors:
            variable_names.append(regressor.layer_name())
        if response_variable_index is not None:
            variable_names.append(DATASET_RESPONSE_VARIABLE)

        # Transform from points layer to regressor layers
        transform = QgsCoordinateTransform(points_layer.qgis_layer().crs(),
                                           self._configuration.get_project().get_dem().qgis_layer().crs(),
                                           qgis_project_cache())

        dem_no_data = self._configuration.get_project().get_dem().no_data()
        # For each day
        predictors = []
        for feat in points_layer.qgis_layer().getFeatures():  # type: QgsFeature
            geometry: QgsGeometry = feat.geometry()
            geographic_position: QgsPointXY = transform.transform(geometry.asPoint())

            station_predictors: List[float] = [geographic_position.x(), geographic_position.y()]
            # For each explicative variable
            for regressor in self._regressors:
                layer: QgsRasterLayer = regressor.get_raster_layer().qgis_layer()
                # layer: QgsRasterLayer = next((x.qgis_layer() for x in self._explicative_variables if x.qgis_layer().name() == name), None)
                data = layer.dataProvider().identify(transform.transform(geometry.asPoint()),
                                                     QgsRaster.IdentifyFormatValue)
                if data.isValid() and 1 in data.results() and data.results()[1] is not None:
                    station_predictors.append(data.results()[1] if data.results()[1] != dem_no_data else np.nan)
                else:
                    station_predictors.append(np.nan)
            if response_variable_index is not None:
                response_variable = feat.attributes()[response_variable_index]
                # TODO: Manage non trivial data type (empty cell, str)
                station_predictors.append(response_variable if response_variable != self._stations_no_data else np.nan)
            predictors.append(station_predictors)
        dataset: pd.DataFrame = pd.DataFrame(np.array(predictors), columns=variable_names)
        """with pd.option_context('display.max_rows', None, 'display.max_columns', None):
            Log.info(dataset)"""

        if drop_na:
            dataset: pd.DataFrame = dataset.dropna().astype(float)
        return dataset

    def __check_nan_fields(self) -> List[str]:
        """
        Get the columns of the dataset where all values are np.nan

        :returns: List of columns where all values are np.nan
        :rtype: List[str]
        """
        dataset = self.get_variables_dataset_from_points(self._stations, response_variable_index=None, drop_na=False)
        null_columns = np.all(np.isnan(dataset), axis=0)
        return null_columns[null_columns].index.values

    def construct_datasets(self) -> None:
        """
        Build dataset for each situation before creating the regression model
        """
        if self._on_models_computation_started is not None:
            self._on_models_computation_started(len(self._station_situations))
        if self._on_models_computation_step is not None:
            self._on_models_computation_step("Build dataset", 0)
        incorrect_features: List[str] = self.__check_nan_fields()
        if len(incorrect_features) == 0:
            self._datasets = {}
            self._regressors_dataset = self.get_variables_dataset_from_points(self._stations, response_variable_index=None).drop([DATASET_COLUMN_X, DATASET_COLUMN_Y], axis=1)
            # For each day
            for situation in self._station_situations:
                # For each station
                self._datasets[situation] = self.get_variables_dataset_from_points(self._stations, situation)
        else:
            self.callback_computation_failed(", ".join(incorrect_features))

    def get_variables_dataset(self, extent: QgsRectangle) -> pd.DataFrame:
        """
        Get predictors dataset for each pixel from regressors
        Take from 1 to 10 sec to construct the dataset
        """
        time_start = time.perf_counter()

        dem: QgsRasterLayer = self.get_dem().qgis_layer()
        array: np.ndarray = self.get_dem().clip(extent)
        raster_resolution_x: float = dem.rasterUnitsPerPixelX()  # (extent.xMaximum() - extent.xMinimum()) / array.shape[1]
        raster_resolution_y: float = dem.rasterUnitsPerPixelY()  # (extent.yMaximum() - extent.yMinimum()) / array.shape[0]

        # Add geographic coordinates
        x_values: np.ndarray = np.linspace(extent.xMinimum(), extent.xMaximum()-raster_resolution_x, array.shape[1])  # TODO: Geographic coordinates ?
        y_values: np.ndarray = np.linspace(extent.yMaximum(), extent.yMinimum()+raster_resolution_y, array.shape[0])  # TODO: Geographic coordinates ?
        variables: np.ndarray = np.array(np.meshgrid(y_values, x_values)).T.reshape(-1, 2)
        variables[:, [0, 1]] = variables[:, [1, 0]]

        dataset: pd.DataFrame = pd.DataFrame({DATASET_COLUMN_X: variables[:, 0], DATASET_COLUMN_Y: variables[:, 1]})
        # data = {DATASET_COLUMN_X: variables[:, 0], DATASET_COLUMN_Y: variables[:, 1]}

        # Add predictors
        for regressor in self._regressors:
            # variable_names.append(regressor.layer_name())
            layer: RasterLayer = regressor.get_raster_layer()
            no_data: Union[int, float] = regressor.get_raster_layer().no_data()
            layer_array: np.ndarray = layer.clip(extent).ravel()#.reshape(-1, 1)
            layer_array[layer_array == no_data] = np.nan
            # variables = np.hstack((variables, layer_array))
            # data[regressor.layer_name()] = layer_array
            dataset[regressor.layer_name()] = layer_array

        # dataset: "pd.DataFrame" = pd.DataFrame(data)
        time_end = time.perf_counter()
        # layer.clip(extent) take 85% of time
        print("[get_variables_dataset] Took {0:.3f}s".format(time_end - time_start))
        return dataset

    def get_valid_stations(self, situation: int) -> np.ndarray:
        """
        Return indices in [0, n[ of valid stations : stations not positioned on the NO_DATA
        and where measure is not NO_DATA

        :returns: NumPy array with indices of stations not located on the NO_DATA
        :rtype: np.ndarray
        """
        return self.get_variables_dataset_from_points(self._stations, situation).index.values

    def get_stations_names(self, attribute: Optional[str] = None) -> List[str]:
        """
        Get stations names (identifiers) used for this analysis.
        Stations names are got from a field if given, else it's fid.
        :returns: List of stations names
        :rtype: List[str]
        """
        station_names: List[str]
        if attribute is not None:
            field_names: List[str] = [f.name() for f in self._stations.qgis_layer().fields()]
            id_column = field_names.index(attribute)
            station_names = list(map(str, [f.attributes()[id_column] for f in self._stations.qgis_layer().getFeatures()]))
        else:
            station_names = list(map(str, [str(f.id()) for f in self._stations.qgis_layer().getFeatures()]))
        return station_names

    def get_stations_position(self) -> List[QgsPoint]:
        """
        Get the geographic position of each station

        :returns: List of geographic points
        :rtype: List[QgsPoint]
        """
        return [f.geometry() for f in self._stations.qgis_layer().getFeatures()]

    def get_stations_count(self) -> int:
        """
        Get the number of stations

        :returns: The number of stations
        :rtype: int
        """
        return self._stations.qgis_layer().featureCount()

    def get_crs(self) -> QgsCoordinateReferenceSystem:
        """
        :returns: The coordinate reference system of DEM used on this analysis
        :rtype: QgsCoordinateReferenceSystem
        """
        return self.get_dem().qgis_layer().crs()
        # return self._stations.qgis_layer().crs()

    def get_situation_name(self, situation: int) -> str:
        """
        Get name of a specific situation where an analysis was made
        """
        qgis_layer = self._stations.qgis_layer()
        return qgis_layer.fields()[situation].name()

    def get_situations_names(self) -> List[str]:
        """
        Get name of situations where an analysis must be made
        """
        qgis_layer = self._stations.qgis_layer()
        names: List[str] = []
        if qgis_layer is not None:
            for situation in self.get_station_situations():
                names.append(qgis_layer.fields()[situation].name())
        return names

    def get_station_situations(self) -> List[int]:
        """
        Get situations where an analysis must be made
        """
        return self._station_situations

    def get_situation_vector_field_name(self, situation: int) -> str:
        return "int_{0}".format(self.get_situation_name(situation))

    def get_kriging_layer(self, situation: int) -> Optional[RasterLayer]:
        return self._kriging_layers[situation] if situation in self._kriging_layers else None

    def set_kriging_layer(self, layer: RasterLayer, situation: int):
        self._kriging_layers[situation] = layer

    def get_polygons(self) -> PolygonsDefinition:
        return self._polygons

    def get_kriging_layer_path(self, situation: int) -> str:
        return os.path.join(self.get_path(), "kriging-{0}.tif".format(self.get_situation_name(situation)))

    def get_correlation_table(self) -> pd.DataFrame:
        return self._correlation_table

    def get_pearson_correlations(self, situation: int) -> Dict[Regressor, Tuple[float, float]]:
        return self._pearson_correlation[situation]

    def get_regression_coefficients(self, situation: int) -> Dict[Regressor, List[float]]:
        """
        Get coefficients of regressors used on the multiple regression models

        :param situation: Get the regression of this situation
        :type situation: int

        :returns: Dictionary mapping regressor with its coefficient on the multiple regression, for each model
        :rtype: Dict[Regressor, List[float]]
        """
        # TODO: Untestable for now
        coefs: Dict[Regressor, List[float]] = {}

        regression_phase: Optional[PhaseMultipleRegression] = None
        for phase in self.get_phases(situation):  # type: IPhase
            if phase.class_name() == PhaseMultipleRegression.class_name():
                regression_phase = phase

        if regression_phase is not None:
            regressor_names: List[str] = regression_phase.get_regressors_name()
            regression_coefs: np.ndarray = regression_phase.get_coefficients_array()

            for regressor, regressor_coefficients in zip(regressor_names, regression_coefs.T):
                if not np.isnan(regressor_coefficients).all():
                    coefs[self.layer_name_to_regressor(regressor)] = regressor_coefficients
        return coefs

    def layer_name_to_regressor(self, layer_name: str) -> Optional[Regressor]:
        """
        Returns the matching regressor from layer name

        :param layer_name: Layer name
        :type layer_name: str

        :returns: Regressor object matching layer name. None if layer name doesn't match a regressor.
        :rtyle: Optional[Regressor]
        """
        matching_regressor: Optional[Regressor] = None
        for regressor in self._regressors:
            if regressor.layer_name() == layer_name:
                matching_regressor = regressor
        return matching_regressor

    def compute_interpolations(self, interpolation: LandsklimInterpolation):
        """
        Compute interpolations for each situation from LandsklimInterpolation object specified
        """
        total = len(self.get_station_situations())
        i = 0
        if self._on_interpolation_computation_started is not None:
            self._on_interpolation_computation_started(total)

        lkcache.enable_interpolation_cache()
        try:
            # Enable cache aiming to keep trace of polygons of each predicted points, as the extent for each situation is the same
            for i, situation in enumerate(self.get_station_situations()):  # type: int
                if self._on_interpolation_computation_step is not None:
                    self._on_interpolation_computation_step(self.get_situation_name(situation), i+1, total)
                interpolation.make_interpolation(situation)
        except Exception as e:
            self.delete_phases_composite_cache()
            traceback.print_exc()
            if self._on_interpolation_computation_failed is not None:
                self._on_interpolation_computation_failed(self, interpolation)

        lkcache.disable_interpolation_cache()

        if self._on_interpolation_computation_finished is not None:
            self._on_interpolation_computation_finished()

    def compute_correlations(self):
        # Compute correlation between regressors
        self._correlation_table = pd.DataFrame()# np.empty((len(self._regressors), len(self._regressors)))
        dataset: pd.DataFrame = self._regressors_dataset.dropna()
        if len(dataset) > 0:
            self._correlation_table = dataset.corr(method="pearson")

            # Compute corr. coef and R² correlation between regressors and response variable for each situation
            self._pearson_correlation = {}
            for situation in self._station_situations:
                self._pearson_correlation[situation] = {}
                for regressor in self._regressors:
                    x = self._datasets[situation].dropna()[regressor.layer_name()].values
                    y = self._datasets[situation].dropna()[DATASET_RESPONSE_VARIABLE].values
                    self._pearson_correlation[situation][regressor] = (pearsonr(x, y)[0], pearsonr(x, y)[1])

    """def create_explicative_variables(self):
        # Create explicative variables defined on self._regressors

        total = len(self._regressors)
        i = 0
        if self._on_explicative_variables_compute_started is not None:
            self._on_explicative_variables_compute_started(total)
        for regressor in self._regressors:
            if self._on_regressor_compute_finished is not None:
                self._on_regressor_compute_finished(regressor, regressor.get_windows(), i+1, total)
            # Get source raster according to regressor "policy". Default regressors uses DEM, but RegressorRasterVariable use source variables
            source_raster: QgsRasterLayer = self._configuration.get_project().get_dem().qgis_layer() if regressor.specific_source_raster() is None else regressor.specific_source_raster().qgis_layer()
            regressor.compute(source_raster)
            i = i + 1

        if self._on_explicative_variables_compute_finished is not None:
            self._on_explicative_variables_compute_finished()"""

    def serialize_components(self):
        """
        JSON serialization of polygons
        JSON serialization of models
        """
        from landsklim.serialization.json_encoder import LandsklimEncoder
        dirname = os.path.dirname(self.get_json_polygons_path(relative=False))
        os.makedirs(dirname, exist_ok=True)

        tstart = time.perf_counter()
        with open(self.get_json_polygons_path(relative=False), "w") as file_polygons:
            json.dump(self._polygons, fp=file_polygons, cls=LandsklimEncoder, indent=2)
        with open(self.get_json_models_path(relative=False), "w") as file_models:
            json.dump(self._models, fp=file_models, cls=LandsklimEncoder, indent=2)
        print("[Landsklim][JSON][Encoder][Components] {0:.3f}s".format(time.perf_counter() - tstart))

    def create_polygons(self):
        if self._on_polygons_start is not None:
            self._on_polygons_start("Polygon calculation ...")

        has_failed: bool = False
        if self._analysis_mode == LandsklimAnalysisMode.Local:
            polygons_raster_path: str = self.get_polygons_raster_path()
            fields: List[str] = [self.get_situation_name(i) for i in self._station_situations]

            params = {
                PolygonsProcessingAlgorithm.INPUT_N: self._neighborhood_size,
                PolygonsProcessingAlgorithm.INPUT_MASK: self.get_dem().qgis_layer(),
                PolygonsProcessingAlgorithm.INPUT_POINTS_SHAPEFILE: self._stations.qgis_layer(),
                PolygonsProcessingAlgorithm.INPUT_POINTS_FIELDS: fields,
                PolygonsProcessingAlgorithm.INPUT_NO_DATA: self._stations_no_data,
                PolygonsProcessingAlgorithm.INPUT_RETURN_METADATA: True,
                PolygonsProcessingAlgorithm.OUTPUT_RASTER: polygons_raster_path
            }
            if not os.path.exists(os.path.dirname(polygons_raster_path)):
                os.makedirs(os.path.dirname(polygons_raster_path))

            try:
                processing_output = processing.run("landsklim:polygons", params)
                polygons_definition: List[List[int]] = processing_output[PolygonsProcessingAlgorithm.OUTPUT_POLYGONS_DEFINITION]
                polygons_connectedness: List[List[int]] = processing_output[PolygonsProcessingAlgorithm.OUTPUT_POLYGONS_CONNECTED_SPACE]
                polygons_centroids: np.ndarray = processing_output[PolygonsProcessingAlgorithm.OUTPUT_POLYGONS_CENTER_OF_GRAVITY]
                self._polygons = PolygonsDefinition(polygons_definition, polygons_connectedness, polygons_centroids)
                self._polygons.set_polygon_path(self.get_polygons_raster_path())
            except Exception as e:
                has_failed = True
                traceback.print_exc()
                print("[Exception]", e)

        if self._analysis_mode == LandsklimAnalysisMode.Global:
            stations_count = self.get_stations_count()
            self._polygons = PolygonsDefinition(np.linspace(0, stations_count - 1, stations_count, dtype=int).reshape(1, -1),[[]], None)

        if self._on_polygons_end is not None and not has_failed:
            self._on_polygons_end()

        if self._on_polygons_failed is not None and has_failed:
            self._on_polygons_failed(self)

    def add_interpolation(self, interpolation: LandsklimInterpolation):
        self._interpolations.append(interpolation)

    def get_interpolations(self) -> List[LandsklimInterpolation]:
        return self._interpolations

    def get_regressors(self) -> List[Regressor]:
        return self._regressors

    def polygons_count(self) -> int:
        """
        :returns: Returns the number of polygons in the analysis. If it's a global analysis, there is only 1 polygon.
        :rtype: int
        """
        return self._polygons.polygons_count()

    def to_string(self) -> str:
        return self.get_name()

    def get_dataset_regression(self) -> pd.DataFrame:
        indexes_situations = []
        indexes_polygons = []

        regression_dict: Dict[Regressor, List[float]] = {}
        polygons_count = self.polygons_count()
        for regressor in self._regressors:
            regression_dict[regressor] = []

        for situation in self._station_situations:
            regression_coefficients: Dict[Regressor, List[float]] = self.get_regression_coefficients(situation)
            for reg in self._regressors:
                coefs = [np.nan] * polygons_count if reg not in regression_coefficients else regression_coefficients[reg]
                regression_dict[reg].extend(coefs)

            indexes_polygons.extend(np.arange(polygons_count)+1)
            indexes_situations.extend([self.get_situation_name(situation)] * polygons_count)

        dataframe_dict = {'situation': indexes_situations, 'polygon': indexes_polygons}
        for reg, coefs in regression_dict.items():
            dataframe_dict[reg.layer_name()] = coefs

        dataframe = pd.DataFrame(dataframe_dict)
        dataframe.set_index(['situation', 'polygon'], inplace=True)
        return dataframe

    def get_dataset_coefficients(self) -> pd.DataFrame:
        indexes_situations = []
        indexes_polygons = []

        coefficients: Dict[str, List[float]] = {}
        polygons_count = self.polygons_count()
        for regressor in self._regressors:
            coefficients[regressor.layer_name()] = []

        for situation in self._station_situations:
            multiple_regression: PhaseMultipleRegression = [phase for phase in self.get_phases(situation) if phase.class_name() == PhaseMultipleRegression.class_name()][0]
            for polygon_model in multiple_regression.get_model():
                correlations = polygon_model.get_pearson_correlations()
                for reg_name, (corr, _) in correlations.items():
                    coefficients[reg_name].append(corr)

            indexes_polygons.extend(np.arange(polygons_count)+1)
            indexes_situations.extend([self.get_situation_name(situation)] * polygons_count)

        coefficients['situation'] = indexes_situations
        coefficients['polygon'] = indexes_polygons

        dataframe = pd.DataFrame(coefficients)
        dataframe.set_index(['situation', 'polygon'], inplace=True)
        return dataframe

    def get_dataset_metrics(self) -> pd.DataFrame:
        # pour chaque sit, r2p1, r2p2, r2p3, autocor_p1, autocor_p3, std_reg, rmse_reg
        # 14h00-15h00

        header = ["situation", "r2_phase1", "r2_phase2", "r2_phase3", "ar2_phase1", "ar2_phase2", "ar2_phase3", "autocorrelation_phase1", "autocorrelation_phase3", "std_regression", "rmse_regression"]
        metrics = []
        for situation in self._station_situations:
            phases: List[IPhase] = self.get_phases(situation)
            r2p1, ar2p1 = phases[0].r2(), phases[0].get_adjusted_r2()
            r2p2, ar2p2 = phases[1].r2(), phases[1].get_adjusted_r2()
            r2p3, ar2p3 = phases[2].r2(), phases[2].get_adjusted_r2()
            autocor_p1 = phases[0].get_residuals_autocorrelation()
            autocor_p3 = phases[2].get_residuals_autocorrelation()
            multiple_regression: PhaseMultipleRegression = [phase for phase in phases if phase.class_name() == PhaseMultipleRegression.class_name()][0]
            std_reg = multiple_regression.get_residuals_standard_deviation()
            rmse_reg = multiple_regression.get_rmse()
            metrics.append([self.get_situation_name(situation), r2p1, r2p2, r2p3, ar2p1, ar2p2, ar2p3, autocor_p1, autocor_p3, std_reg, rmse_reg])

        df: pd.DataFrame = pd.DataFrame(np.array(metrics), columns=header)
        df.set_index("situation", inplace=True)
        header.pop(0)
        for col in header:
            df[col] = pd.to_numeric(df[col])
        return df

    def delete_phases_composite_cache(self):
        for situation in self._station_situations:
            for phase in self.get_phases(situation):
                if phase.class_name() == PhaseComposite.class_name():
                    phase.remove_cache()

    def delete_interpolation(self, interpolation: LandsklimInterpolation) -> str:
        """
        Delete an interpolation

        :param interpolation: Interpolation to delete
        :type interpolation: LandsklimInterpolation

        :returns: Deleted interpolation path
        :rtype: str
        """
        interpolation_path = interpolation.get_path()
        self._interpolations.remove(interpolation)
        for itype in interpolation.get_interpolation_types():
            for situation, layer in interpolation.get_layers(itype).items():
                QgsProject.instance().removeMapLayer(layer.qgis_layer())
                source = layer.qgis_layer().source()
                LandsklimUtils.free_path(source)
                if os.path.exists(source):
                    os.remove(source)

        if os.path.isdir(interpolation_path) and len(os.listdir(interpolation_path)) == 0:  # If the directory is empty, delete it
            shutil.rmtree(interpolation_path)

        return interpolation_path

    def delete(self) -> str:
        """
        Delete analysis content

        :returns: Analysis path
        :rtype: str
        """
        analysis_path = self.get_path()
        json_polygons_path = self.get_json_polygons_path(relative=False)
        if os.path.exists(json_polygons_path):
            os.remove(json_polygons_path)
        json_models_path = self.get_json_models_path(relative=False)
        if os.path.exists(json_models_path):
            os.remove(json_models_path)

        for i in range(len(self.get_interpolations())-1, -1, -1):
            self.delete_interpolation(self.get_interpolations()[i])
        if self.is_local() and self._polygons is not None:
            # source = self._polygons.polygons_layer().qgis_layer().source()
            source = self._polygons.polygons_path()
            LandsklimUtils.free_path(source)
            if os.path.exists(source):
                os.remove(source)
        return analysis_path


    def to_json(self) -> Dict:
        state_dict: Dict = self.__getstate__()
        state_dict.pop("_configuration", None)

        state_dict.pop("_polygons", None)
        state_dict.pop("_models", None)
        state_dict["json_polygons"] = self.get_json_polygons_path(relative=True)
        state_dict["json_models"] = self.get_json_models_path(relative=True)

        state_correlations = {}
        for situation, dict_correlations in state_dict["_pearson_correlation"].items():  # type: int, Dict[Regressor, "PearsonRResult"]
            state_correlations[situation] = {}
            regressor_keys = dict_correlations.keys()
            for regressor_key in regressor_keys:
                regressor_index = self._regressors.index(regressor_key)
                values = state_dict["_pearson_correlation"][situation][regressor_key]
                state_correlations[situation][regressor_index] = values
        state_dict['_pearson_correlation'] = state_correlations
        return state_dict
