import time
from enum import Enum
from math import ceil
from typing import Dict, Any, Union, Optional, Tuple, List, Iterable

from PyQt5.QtCore import QVariant
from qgis._core import QgsVectorLayer, QgsFeature, QgsRectangle
from scipy import interpolate, stats
from scipy.linalg import lstsq, lu_solve, lu_factor

from landsklim.lk import environment
from landsklim.lk.landsklim_interpolation import LandsklimRectangle
from landsklim.lk.utils import LandsklimUtils
from landsklim.lk.logger import Log
from landsklim.lk.map_layer import VectorLayer
from landsklim.processing.algorithm_moran_i import MoranIProcessingAlgorithm
from landsklim.lk.landsklim_constants import DATASET_RESPONSE_VARIABLE, DATASET_COLUMN_X, DATASET_COLUMN_Y
from landsklim.lk.regressor import Regressor
from copy import deepcopy

try:
    import pandas as pd
except ImportError:
    Log.critical("pandas not available")

import numpy as np
from scipy.stats import pearsonr

from landsklim.lk.phase import IPhase

# Prevent error when launching plugin for the first time on a Python installation without sklearn.
# Sklearn will be installed when instantiating the plugin
try:
    from sklearn.linear_model import LinearRegression
    from sklearn.base import clone
    from sklearn.preprocessing import StandardScaler
    import sklearn
except ImportError:
    Log.critical("sklearn not available")


class SmoothingMode(Enum):
    """
    Define which method to use to smooth values during an interpolation
    """

    Global = 0
    """
    Smooth values following the "global" method
    """
    Local = 1
    """
    Smooth values following the "local" method
    """


class TooFewSamplesException(Exception):
    pass

class MultipleRegressionModel:
    r"""
    Represents an individual multiple regression model, working with one dataset, used by PhaseMultipleRegression

    :param standardize: If True, standardize (center reduce) features and response variable.
    :type standardize: bool

    :ivar List[str] _labels: Names of regressors (headers of dataset features) kept on the final model

    :ivar Optional[sklearn.linear_model.LinearRegression] _model: An instance of :class:`sklearn.linear_model.LinearRegression`. Default: ``None``

    :ivar Optional[sklearn.linear_model.LinearRegression] _unstandardized_coefficients: Numpy array containing intercept + coefficients fitted with unstandardized data

    :ivar Optional[pd.DataFrame] _dataset: Dataset used to build the model

    :ivar Optional[pd.Series] _scale_means: Pandas serie mapping each feature to its mean for standardisation

    :ivar Optional[pd.Series] _scale_std: Pandas serie mapping each feature with its standard deviation for standardisation

    :ivar bool _standardize: Standardize the features and the response variable if True

    :ivar bool _predictors_are_integers: If True, predictors are converted to int

    :ivar Optional[np.ndarray] __residuals_cv: Residuals computed through cross-validation

    :ivar Optional[np.ndarray] __predictions_cv: Predictions computed through cross-validation

    :ivar Optional[Dict[str, Tuple[float, float]]] _pearson_correlations: Pearson correlation between each regressors
        and the response variable.

    :ivar SmoothingMode __smoothing_mode:

        .. deprecated:: 0.4.4.
            ``__smoothing_mode`` is no longer used. Smoothing behaviours were uniformized.

        How to smooth out-of-bound predictors.

        Global : Out-of-bound predictors, defined by the extrapolation margin, are smoothing according to value of
        the dataset used to build the model

        .. math::

            v_{min} = d_{min} - (d_{max} * \text{tolerance})

            v_{max} = d_{max} + (d_{max} * \text{tolerance})

            d_{min} \small \text{ is the minimum value of the dataset used to build the model}

            d_{max} \small \text{ is the maximum value of the dataset used to build the model}

        Predictors are smoothing according to the following formula

        .. math::

            v = v - log(abs(v - v_{min})) + 1

            \forall v < v_{min}

            v = v + log(abs(v + v_{max})) + 1

            \forall v > v_{max}

        Local : There is an extra step when smoothing predictors for local interpolation

        .. math::

            v = v + log(abs(v + v_{max})) + 1

        This formula is applied before the classic smoothing, implying predictors are smoothed two times on the
        upper edge and one time on the lower edge.
        The formula to get the lower bound is different, written as

        .. math::

            v_{min} = d_{min} - (d_{min} * \text{tolerance})

            d_{min} \small \text{ is the minimum value of the dataset used to build the model}

        Default : ``SmoothingMode.Global``
    """

    STANDARDIZE_DEFAULT_VALUE = True

    def __init__(self, standardize: bool = STANDARDIZE_DEFAULT_VALUE):
        self._labels: List[str] = []
        self._model: Optional[LinearRegression] = None
        self._model_lstsq: Optional[np.ndarray] = None
        self._unstandardized_coefficients: Optional[np.ndarray] = None
        self._dataset_mins: Optional[pd.Series] = None
        self._dataset_maxs: Optional[pd.Series] = None
        self._scale_means: Optional[pd.Series] = None
        self._scale_std: Optional[pd.Series] = None
        self._scale_column_names: Optional[List[str]] = None
        self._standardize: bool = standardize
        self._predictors_are_integers: bool = False
        self.__residuals_cv: Optional[np.ndarray] = None
        self.__predictions_cv: Optional[np.ndarray] = None
        self._pearson_correlations: Optional[Dict[str, Tuple[float, float]]] = None
        self.__smoothing_mode: SmoothingMode = SmoothingMode.Global
        self.__adjusted_r2: Optional[float] = None
        self.__mse: Optional[float] = None
        self.__residuals: Optional[np.ndarray] = None

    def set_smoothing_mode(self, smoothing_mode: SmoothingMode):
        self.__smoothing_mode = smoothing_mode

    def clone(self) -> "MultipleRegressionModel":
        """
        Clone a MultipleRegressionModel instance
        """
        cloned: MultipleRegressionModel = MultipleRegressionModel()
        cloned._labels = self._labels.copy()
        cloned._model = deepcopy(self._model) if self._model is not None else None
        cloned._dataset_mins = self._dataset_mins.copy() if self._dataset_mins is not None else None
        cloned._dataset_maxs = self._dataset_maxs.copy() if self._dataset_maxs is not None else None
        cloned._scale_means = self._scale_means.copy() if self._scale_means is not None else None
        cloned._scale_std = self._scale_std.copy() if self._scale_std is not None else None
        cloned._scale_column_names = self._scale_column_names.copy() if self._scale_column_names is not None else None
        cloned._standardize = self._standardize
        cloned._unstandardized_coefficients = np.array(self._unstandardized_coefficients) if self._unstandardized_coefficients is not None else None
        cloned.__residuals_cv = np.array(self.__residuals_cv) if self.__residuals_cv is not None else None
        cloned.__residuals = np.array(self.__residuals) if self.__residuals is not None else None
        cloned.__predictions_cv = np.array(self.__predictions_cv) if self.__predictions_cv is not None else None
        cloned.__adjusted_r2 = self.__adjusted_r2
        cloned.__mse = self.__mse
        return cloned

    def get_mins_and_maxs(self, dataset: pd.DataFrame):
        """
        Keep trace of mins and maxs of the dataset used by, in the same order as self._scale_column_names
        """
        self._dataset_mins = np.nanmin(dataset.values, axis=0)
        self._dataset_maxs = np.nanmax(dataset.values, axis=0)

    def create_standardization_params(self, dataset: pd.DataFrame):
        """
        Create standardization params to reduce center features

        :param dataset: Source dataset to compute params on
        :type dataset: pd.DataFrame
        """
        self._scale_means = dataset.mean()
        self._scale_std = dataset.std(ddof=0)
        self._scale_std[self._scale_std == 0] = 1
        self._scale_column_names = list(dataset.columns)

    def get_model(self) -> LinearRegression:
        return self._model

    def get_unstandardized_coefficients(self) -> np.ndarray:
        """
        Coefficients of unstandardized model
        First coefficient is the intercept.
        Other coefficients are in the same order as self._labels
        """
        return self._unstandardized_coefficients

    def get_labels(self) -> List[str]:
        return self._labels

    def get_coefficients(self, unstandardized: bool = False) -> Dict[str, float]:
        """
        Get regression coefficients

        :param unstandardized: Get unstandardized coefficients if True
        :type unstandardized: True
        """
        coefs = {}
        for i in range(len(self._model.coef_)):
            coefs[self._labels[i]] = self._model.coef_[i]
        return coefs

    def get_formula(self, unstandardized: bool) -> str:
        if unstandardized:
            intercept = self._unstandardized_coefficients[0]
            coefs = self._unstandardized_coefficients[1:]
        else:
            intercept = self._model.intercept_
            coefs = self._model.coef_
        res = str(round(intercept, 4))
        for i, coef in enumerate(coefs):
            res = "{0} + {1} x {2}".format(res, round(coef, 4), self._labels[i])
        return res

    def get_adjusted_r2(self) -> Optional[float]:
        """X, y = self.split_dataset()
        X = self.standardize(X)[self._labels]
        y = self.standardize(y)[DATASET_RESPONSE_VARIABLE]
        return LandsklimUtils.adjusted_r2(self._model.score(X, y), n=len(y), p=X.shape[1])"""
        return self.__adjusted_r2

    def __compute_adjusted_r2(self, X: pd.DataFrame, y: pd.DataFrame) -> float:
        self.__adjusted_r2 = LandsklimUtils.adjusted_r2(self._model.score(X, y), n=len(y), p=X.shape[1])

    def create_sublists(self, lst, index=0, current=[], output=[]):  # pragma: no cover
        """
        Create every combination possible from a list

        :param lst: Input list to create combinations from
        :type lst: List

        :param output: Output list where all combinations are stored
        :type output: List
        """
        if index == len(lst):
            if len(current) > 0:
                output.append(current)
            return
        self.create_sublists(lst, index + 1, current, output)
        self.create_sublists(lst, index + 1, current + [lst[index]], output)

    def rss(self, y, y_hat) -> float:
        """
        Residual sum of squares
        """
        residuals: np.ndarray = y - y_hat
        return np.dot(residuals, residuals)

    def likelihood(self, rss, n):
        return n * np.log(rss/n)

    def aicc(self, likelihood, k, n):
        aic = self.aic(likelihood, k)
        aicc = aic + ((2*k*(k+1))/(n-k-1))
        return aicc

    def aic(self, likelihood, k):
        #aic = (-2 * likelihood) + (2 * k)
        aic = likelihood + (2 * k)
        return aic

    """def aic_estimator(self, estimator, X, y):
        n_samples, n_features = X.shape
        y_pred = estimator.predict(X)
        rss = np.sum((y - y_pred) ** 2)
        k = n_features
        aic = n_samples * np.log(rss / n_samples) + 2 * k
        return -aic"""

    def regressor_class_name(self, regressor_name: str) -> str:
        array = regressor_name.split('_')
        return "_".join(array[:-1]) if len(array) > 1 else array[0]

    """def old_standardize(self, dataset: pd.DataFrame) -> pd.DataFrame:
        if self._standardize:
            columns = dataset.columns
            res = ((dataset - self._scale_means[columns]) / self._scale_std[columns])
            return res

        return dataset"""

    def columns_to_indices(self, columns: Iterable[str], all_columns: List[str]) -> np.ndarray:
        """
        Get positional indices of columns in the entire dataset.

        .. code-block:: python

           self.columns_to_indices(['altit_3', 'slope_5'], ['X', 'Y', 'altit_1', 'altit_3', 'altit_5', 'slope_3', 'slope_5', 'orien_3'])
           np.array([3, 6])

        :param columns: Column of the dataset to filter
        :type columns: Iterable[str]

        :param all_columns: List of columns in the source dataset (usually list(self._dataset.columns))
        :type all_columns: List[str]
        """
        indices = []
        for column in columns:
            indices.append(all_columns.index(column))
        return np.array(indices)

    def standardize(self, dataset: pd.DataFrame) -> pd.DataFrame:
        """
        Standardize each columns of a dataset

        :param dataset: Dataset to standardize
        :type dataset: pd.DataFrame

        :returns: The standardized dataset
        :rtype: pd.DataFrame
        """
        if self._standardize:
            columns = dataset.columns
            indices = self.columns_to_indices(columns, self._scale_column_names)
            res = pd.DataFrame((dataset.values - self._scale_means.values[indices]) / self._scale_std.values[indices], index=dataset.index, columns=columns)
            return res

        return dataset

    def inverse_standardize(self, array: np.ndarray, scaler_name: str) -> np.array:
        """
        Inverse standardize an array.

        :param array: 1D array
        :type array: np.ndarray

        :param scaler_name: Feature name
        :type scaler_name: str

        :returns: Inverse standardized 1D array
        :rtype: np.ndarray
        """
        return (array * self._scale_std[scaler_name] + self._scale_means[scaler_name]) if self._standardize else array

    def remove_geographic_features(self, dataset: pd.DataFrame) -> pd.DataFrame:
        return dataset.drop([DATASET_COLUMN_X, DATASET_COLUMN_Y], axis=1)

    def _split_dataset(self, dataset: pd.DataFrame, drop_na: bool = True) -> Tuple[pd.DataFrame, pd.DataFrame]:
        dataset = dataset.dropna() if drop_na else dataset.copy()
        X, y = dataset.drop(DATASET_RESPONSE_VARIABLE, axis=1), dataset[[DATASET_RESPONSE_VARIABLE]]
        if self._predictors_are_integers:
            X, y = np.trunc(X), np.trunc(y)
        return X, y

    def split_dataset(self, dataset: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]:
        """
        Split the dataset on two distinct dataframes (one for the feature, the second with the response variable)
        and remove rows containing missing data
        """
        return self._split_dataset(dataset, drop_na=True)

    def __pearson_correlations(self, X: np.ndarray, y: np.ndarray, columns: Iterable[str]) -> Dict[str, Tuple[float, float]]:
        """pearson_correlation: Dict[str, Tuple[float, float]] = {}
        for i, column in enumerate(columns):  # type: str
            x = X[:, i]
            if self._predictors_are_integers:
                x, y = np.trunc(x), np.trunc(y)
            result: PearsonRResult = pearsonr(x, y)
            pearson_correlation[column] = (result.statistic, result.pvalue)"""

        # faster than pearsonr() because pearson() have a big overhead cost
        correlations = self.__corrwith(X.T, y.reshape(1, -1))
        ab = len(X) / 2 - 1
        p_values = 2 * stats.beta(ab, ab, loc=-1, scale=2).sf(np.abs(correlations))
        pearson_correlation: Dict[str, Tuple[float, float]] = {cl: (co, p) for co, p, cl in zip(correlations, p_values, columns)}
        return pearson_correlation

    def correlations(self, X: pd.DataFrame, y: np.ndarray):
        y = y.ravel()
        y_hat_collection = []
        X_array = X.values
        columns_count = len(X.columns)
        # for column, x in X.items():  # type: Tuple[str, pd.Series]
        for i in range(columns_count):
            x = X_array[:, i].reshape(-1, 1)
            coefs = lstsq(x, y, lapack_driver='gelsy', check_finite=False)[0]
            y_hat = np.sum(coefs * x, axis=1)
            y_hat_collection.append(y_hat.ravel())

        corr_array = self.__corrwith(np.concatenate(y_hat_collection).reshape(len(X.columns), -1), y.reshape(1, -1))  # faster than df.corrwith
        corr = {k: v for k, v in zip(X.columns, corr_array) if np.isfinite(v)}
        return corr

    def __corrwith(self, X: np.ndarray, y: np.ndarray) -> np.ndarray:
        """
        Compute correlation between each X feature with y array.

        :param X: Features array
        :type X: np.ndarray

        :param y: Response array
        :type y: np.ndarray

        :returns: Pearson's correlation coefficient between each feature of X and y.
        :rtype: np.ndarray
        """
        X = np.atleast_2d(X)
        y = np.atleast_2d(y)
        mean1 = X.mean(axis=1)
        mean2 = y.mean(axis=1)
        std1 = X.std(axis=1)
        std2 = y.std(axis=1)
        corr = ((X * y).mean(axis=1) - mean1 * mean2) / (std1 * std2)
        return corr

    """def get_rse(self) -> float:
        # Residual Standard Error
        
        residuals = self.get_residuals()
        n = len(residuals)
        return LandsklimUtils.rse(residuals, n, len(self._labels))"""

    def get_mse(self) -> float:
        """
        Mean Squared Error
        """
        """X, y = self.split_dataset()
        y = y[DATASET_RESPONSE_VARIABLE]
        y_hat = self.predict(X)
        n = len(X)
        return (1/n) * np.sum(np.power((y - y_hat), 2))"""
        return self.__mse

    def __compute_mse(self, X: pd.DataFrame, us_y: np.ndarray):
        y_hat = self.inverse_standardize(self._model.predict(X), DATASET_RESPONSE_VARIABLE)
        n = len(X)
        self.__mse = (1 / n) * np.sum(np.square(us_y.ravel() - y_hat))

    """def get_residuals(self) -> np.ndarray:
        # Get residuals of the dataset on the full model
        return self.__residuals

    def __compute_residuals(self, X: pd.DataFrame, y: pd.DataFrame):
        y_hat: np.ndarray = self.predict(X)
        self.__residuals = y.values - y_hat"""

    def __compute_cv(self, X: pd.DataFrame, y: pd.DataFrame):
        predictions, residuals = np.zeros(len(X)), np.zeros(len(X))
        y_tests, y_preds = np.zeros(len(X)), np.zeros(len(X))

        # The first row is the tested sample for each CV round.
        # Making the array roll len(X) times is used to browse the entire array and test successively each row
        features = np.hstack((X, np.ones((len(X), 1)), y.values.reshape(-1, 1)))
        np.roll(features, -1, axis=0)

        for i in range(len(X)):
            # Create train test dataset without X[i]
            X_test = features[0, :-1]
            y_test = features[0, -1]
            X_batch = features[1:, :-1]
            y_batch = features[1:, -1]

            """model: LinearRegression = LinearRegression()
            with sklearn.config_context(assume_finite=True): # Marginal time speed-up
                model.fit(X_batch, y_batch)
            # For a single prediction, it's way faster to use the raw formula than model.predict
            y_pred = np.array([np.dot(model.coef_, X_test) + model.intercept_]) #model.predict(X_test)
            y_pred = self.inverse_standardize(y_pred, DATASET_RESPONSE_VARIABLE)[0]"""

            coefs = lstsq(X_batch, y_batch, lapack_driver='gelsy', check_finite=False)[0]
            y_pred = np.dot(coefs, X_test).ravel()[0]
            y_tests[i] = y_test
            y_preds[i] = y_pred

            features = np.roll(features, -1, axis=0)
            #Log.info("[Truth] {0:.3f} [Pred] {1:.3f} [res] {2:.3f}".format(y_test[0], y_pred, residuals[i]))
        y_tests = self.inverse_standardize(y_tests, DATASET_RESPONSE_VARIABLE)
        y_preds = self.inverse_standardize(y_preds, DATASET_RESPONSE_VARIABLE)
        residuals = y_preds - y_tests
        predictions = y_preds
        return predictions, residuals

    def get_predictions_cv(self) -> Optional[np.ndarray]:
        """
        Get predictions of the dataset through cross-validation
        """
        return self.__predictions_cv

    def get_residuals_cv(self) -> Optional[np.ndarray]:
        """
        Get residuals of the dataset through cross-validation
        """
        return self.__residuals_cv

    def get_residuals_standard_deviation(self) -> float:
        """
        Compute model error by cross-validation
        """
        return LandsklimUtils.unbiased_estimate_standard_deviation(self.get_residuals_cv())

    def get_pearson_correlations(self) -> Dict[str, Tuple[float, float]]:
        return self._pearson_correlations

    def valid_interpolation_mask(self, points: pd.DataFrame, extrapolation_margin: float) -> np.ndarray:
        """
        Get a prediction validity mask according to values of predictors and predicted variable

        :param points: List of predictors
        :type points: pd.DataFrame

        :param extrapolation_margin: Extrapolation margin.
                                     Accept predictions where predictors (and predictions) are in the range of values
                                     used to build model + a margin in percentage, specified by extrapolation_margin
        :type extrapolation_margin: float

        :returns: Prediction validity mask
        :rtype: np.ndarray[bool]
        """
        mask = np.ones(len(points), dtype=bool)
        for column in self.get_labels():
            i_scalers: int = self._scale_column_names.index(column)
            dataset_min, dataset_max = self._dataset_mins[i_scalers], self._dataset_maxs[i_scalers]
            min_allowed = dataset_min - np.abs(dataset_max * extrapolation_margin)  # dataset_min - (dataset_min * extrapolation_margin)
            max_allowed = dataset_max + np.abs(dataset_max * extrapolation_margin)  # dataset_max * (1 + extrapolation_margin)
            mask = mask & ((points[column] >= min_allowed) & (points[column] <= max_allowed)).values

        return mask

    def t_pearson(self, n: int) -> float:  # pragma: no cover
        x = np.array([0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22 ,23, 24, 25, 26, 27, 28, 29, 30, 35, 40, 45, 50, 60, 70, 80, 90, 100, 200, 300, 400, 500, 1000])
        y = np.array([0.99, 0.98, 0.96, 0.95, 0.878, 0.811, 0.754, 0.707, 0.666, 0.632, 0.602, 0.576, 0.553, 0.532, 0.514, 0.497, 0.482, 0.468, 0.456, 0.444, 0.433, 0.423, 0.413, 0.404, 0.396, 0.388, 0.381, 0.374, 0.367, 0.361, 0.334, 0.312, 0.294, 0.279, 0.254, 0.235, 0.220, 0.207, 0.197, 0.139, 0.113, 0.098, 0.088, 0.062])
        f = interpolate.interp1d(x, y)
        return f(n)

    def construct_model(self, dataset: pd.DataFrame):
        """
        Construct multiple regression model.

        1. Select the best window for each regressor
        2. Upward feature selection to keep the most relevant features
        3. Fit a multiple linear regression model
        4. Compute model error through cross-validation

        :param dataset: Input dataset for building the model
        :type dataset: pd.DataFrame
        """
        time_init = time.perf_counter()
        dataset = self.remove_geographic_features(dataset.copy())

        regressor_count = len(set(map(self.regressor_class_name, dataset.columns)))
        if regressor_count >= (dataset.shape[0]-1) or dataset.shape[0] < 3:
            raise TooFewSamplesException()

        self.get_mins_and_maxs(dataset)
        self.create_standardization_params(dataset)
        X, y = self.split_dataset(dataset)

        self._pearson_correlations = self.__pearson_correlations(X.values, y.values.ravel(), X.columns)
        if self._predictors_are_integers:
            X, y = np.trunc(X), np.trunc(y)
        us_X, us_y = X.copy(), y.copy().values  # Keep unstandardized dataset
        X = self.standardize(X)
        y = self.standardize(y)[DATASET_RESPONSE_VARIABLE]

        # Get best window of each regressor
        regressor_correlations: Dict[str, float] = self.correlations(us_X, us_y)  # TODO: self.pearson_correlations()

        # self.___compare_coef_dicts(regressor_correlations, new_correlations)

        if self._predictors_are_integers:
            regressor_correlations = {k: int(v*100)/100 for k, v in regressor_correlations.items()}
        best_window_by_regressor: Dict[str, str] = {}
        for regressor, metric in regressor_correlations.items():  # type: str, float
            regressor_name: str = self.regressor_class_name(regressor)
            if not np.isnan(metric):
                if regressor_name not in best_window_by_regressor or metric > regressor_correlations[best_window_by_regressor[regressor_name]]:
                    best_window_by_regressor[regressor_name] = regressor

        # Sort candidate regressors
        regressors_pool: List[Tuple[str, float]] = []
        for regressor_name, regressor in best_window_by_regressor.items():  # type: str, str
            regressors_pool.append((regressor, regressor_correlations[regressor]))

        regressors_pool = sorted(regressors_pool, key=lambda x: abs(x[1]), reverse=True)
        # Create regression model for each possible regressors combination and keep the best with the AIC
        time_init = time.perf_counter() - time_init

        """if len(X) > 2:
            if len(X) > 1000:
                selected_variables = self.greedy_feature_selection(X, y, regressors_pool.copy())
            elif len(X) <= 45:
                selected_variables = self.local_feature_selection(X, y, regressors_pool)
            else:
                selected_variables = self.upward_feature_selection(X, y, regressors_pool.copy(), use_aic=True)
                # selected_variables = self.global_naive_upward_feature_selection_aic(X, y, regressors_pool.copy())"""

        time_feature_selection = time.perf_counter()

        # Response variable is constant
        if np.all(np.isclose(y.values, y.values[0])):
            # TODO: Handle model without predictors in case of constant response variable
            selected_variables = [X.columns[0]]
        else:
            selected_variables = self.upward_feature_selection(X, y, regressors_pool.copy(), use_aic=True)
        time_feature_selection = time.perf_counter() - time_feature_selection

        time_model = time.perf_counter()

        self._labels = selected_variables
        column_indices = self.columns_to_indices(self._labels, list(X.columns))
        # Create model with the selected variables
        X_selected = X.values[:, column_indices] # X[self._labels].values
        self._model = LinearRegression()
        self._model.fit(X_selected, y)

        self._model_lstsq = lstsq(np.hstack((X_selected, np.ones((len(X), 1)))), y, lapack_driver='gelsy', check_finite=False)[0]

        self.__create_unstandardized_model(us_X.values[:, column_indices], us_y)
        self.__compute_adjusted_r2(X_selected, y)
        self.__compute_mse(X_selected, us_y)
        # self.__compute_residuals(us_X, us_y)

        time_model = time.perf_counter() - time_model

        time_cv = time.perf_counter()
        self.compute_residuals_cv(X.copy().values[:, column_indices], y.copy())
        time_cv = time.perf_counter() - time_cv

        # self.force_model_params(['altit_1'], 49.8063, np.array([0.00182280]))
        # self.force_model_params(['altit_1', 'encai_3'], 52.034584, np.array([0.000634297, 0.591040]))
        return time_init, time_feature_selection, time_model, time_cv

    def greedy_feature_selection(self, X: pd.DataFrame, y: pd.DataFrame, pool: List[Tuple[str, float]]):  # pragma: no cover
        Log.info("[greedy_feature_selection]")
        tot_time_fit = 0
        tot_time_metrics = 0
        combinations = []
        self.create_sublists(pool, output=combinations)
        combinations_result = {}
        Log.info("There is", len(combinations), "combinations")
        for combination in combinations:  # type: List[Tuple[str, float]]
            selected_variables = [c[0] for c in combination]
            X_step = X[selected_variables].values
            time_fit = time.perf_counter()
            model_step = LinearRegression()
            model_step.fit(X_step, y)
            tot_time_fit += time.perf_counter() - time_fit
            # model_score = r2_score(y, model_step.predict(X_step))
            time_t = time.perf_counter()
            y_hat = model_step.predict(X_step)
            rss = self.rss(y, y_hat)
            llf = self.likelihood(rss, len(X_step))
            model_score = self.aicc(llf, len(selected_variables) + 1, len(X_step))
            tot_time_metrics += time.perf_counter() - time_t
            combinations_result[tuple(selected_variables)] = model_score
        Log.info("[time fit] {0:.3f}s".format(tot_time_fit))
        Log.info("[time metrics] {0:.3f}s".format(tot_time_metrics))

        # Select the best combination
        selected_variables, selected_score = min(combinations_result.items(), key=lambda k: k[1])
        return list(selected_variables)

    def upward_feature_selection(self, X: pd.DataFrame, y: pd.DataFrame, pool: List[Tuple[str, float]], use_aic: bool = False) -> List[str]:
        if len(pool) == 1:
            return [pool[0][0]]
        n = len(X)
        y = y.values
        models = 0
        time_models = 0
        time_metrics = 0
        n_var_cor = len(pool)
        X_array = X.values
        v_exp = np.empty(n_var_cor, dtype=object)
        v_exp[:] = np.nan

        v_exp_i = np.empty(n_var_cor, dtype=int)
        v_exp_i[:] = -1
        pool_i = []
        for v in pool:
            pool_i.append(X.columns.get_loc(v[0]))

        best_combination = np.empty(n_var_cor, dtype=object)
        best_combination[:] = np.nan
        corr_trav = None
        tested_combinations = []
        # Pour chaque variable explicative
        for k1 in range(n_var_cor):
            v_exp[0] = pool[k1][0]
            v_exp_i[0] = pool_i[k1]
            nouv_k1 = True
            for k2 in range(k1 + 1, n_var_cor + 1):
                f1 = 1
                if k2 > 1 and not nouv_k1:
                    f1 = 2
                nouv_k1 = False
                for nk1 in range(f1, n_var_cor + 1 - k2):
                    num_ord_fonc = 0
                    max_cst = -1
                    for cst in range(nk1 - 1):
                        num_ord_fonc += 1
                        v_exp[num_ord_fonc] = pool[k2 + cst][0]
                        v_exp_i[num_ord_fonc] = pool_i[k2 + cst]
                        max_cst = cst
                    max_cst += 1

                    num_ord_fonc += 1
                    for k3 in range(n_var_cor - (k2 + max_cst)):
                        v_exp[num_ord_fonc] = pool[k2 + max_cst + k3][0]
                        v_exp_i[num_ord_fonc] = pool_i[k2 + max_cst + k3]

                        # regressors = v_exp[:num_ord_fonc + 1]
                        regressors_i = v_exp_i[:num_ord_fonc + 1]

                        nk = num_ord_fonc + 2
                        n_var = nk
                        time_model = time.perf_counter()
                        X_step = X_array[:, regressors_i]
                        coefs = lstsq(X_step, y, lapack_driver='gelsy', check_finite=False)[0]
                        y_hat = np.sum(coefs * X_step, axis=1)

                        if len(set(list(regressors_i))) != len(regressors_i):
                            raise RuntimeError("Duplicate regressor selected")
                        models += 1
                        time_models += (time.perf_counter() - time_model)

                        time_metric = time.perf_counter()
                        is_the_best_model: bool = False
                        if use_aic:
                            rss = self.rss(y, y_hat)
                            llf = self.likelihood(rss, n)
                            model_score = self.aicc(llf, len(regressors_i) + 1, n)
                            is_the_best_model = corr_trav is None or model_score < corr_trav
                        else:
                            model_score = pearsonr(y_hat, y)[0]
                            is_the_best_model = corr_trav is None or (model_score > corr_trav + 0.01)
                        time_metrics += (time.perf_counter() - time_metric)

                        # Log.info("Score : ", model_score)
                        if is_the_best_model:
                            best_combination[:] = np.nan
                            corr_trav = model_score
                            best_combination[:n_var - 1] = v_exp[:n_var - 1]

        selected_variables = best_combination[~pd.isnull(best_combination)]
        return list(selected_variables)

    def global_naive_upward_feature_selection_aic(self, X: pd.DataFrame, y: pd.DataFrame, pool: List[Tuple[str, float]]):  # pragma: no cover
        Log.info("[global_naive_upward_feature_selection_aic]")
        current_best: float = np.nan
        current_pool: List[str] = []
        pool = dict(pool)
        is_building: bool = True
        while is_building:
            step_scores = []
            for candidate, score in pool.items():
                model_pool = current_pool + [candidate]
                X_step = X[model_pool].values
                Log.info("[New regression model]", model_pool)
                model_step = LinearRegression()
                model_step.fit(X_step, y)
                y_hat = model_step.predict(X_step)
                rss = self.rss(y, y_hat)
                llf = self.likelihood(rss, len(X_step))
                model_score = self.aicc(llf, len(model_pool)+1, len(X_step))
                model_score = LandsklimUtils.r2(y_hat, y)
                model_score = pearsonr(y_hat, y)[0]
                Log.info("Score : ", model_score)
                step_scores.append((candidate, model_score))
            step_scores = sorted(step_scores, key=lambda x: -x[1])
            step_best_added_variable, step_best_score = step_scores[0]
            if current_best is np.nan or step_best_score > current_best + 0.01:
                current_best = step_best_score
                current_pool.append(step_best_added_variable)
                pool.pop(step_best_added_variable)
            else:
                is_building = False
            if len(pool) == 0:
                is_building = False
        selected_variables = current_pool
        return selected_variables

    def local_feature_selection(self, X: pd.DataFrame, y: pd.DataFrame, pool: List[Tuple[str, float]]) -> List[str]:  # pragma: no cover
        n_freedom: int = len(X)-2
        threshold: float = self.t_pearson(n_freedom)
        Log.info("[local_feature_selection]. Threshold ({0}) : {1}".format(n_freedom, threshold))
        features: List[str] = []
        # We keep each feature were correlation coefficient is above threshold
        for regressor, r in pool:  # type: str, float
            if r > threshold:
                features.append(regressor)
        # If no feature were selected, we select the best feature. Features are already sorted (best to worst)
        if len(features) == 0:
            return [pool[0][0]]
        Log.info("Pool (Selected features) : {0} ({1})".format(pool, features))
        return features

    def __create_unstandardized_model(self, us_X_selected: np.ndarray, us_y: np.ndarray):
        x = np.hstack((np.ones((len(us_X_selected), 1)), us_X_selected))
        self._unstandardized_coefficients = lstsq(x, us_y, lapack_driver='gelsy', check_finite=False)[0].ravel()

    def compute_residuals_cv(self, X: pd.DataFrame, y: pd.DataFrame):
        # Compute error by cross-val
        self.__predictions_cv, self.__residuals_cv = self.__compute_cv(X, y)
        if self._predictors_are_integers:
            self.__predictions_cv, self.__residuals_cv = np.trunc(self.__predictions_cv), np.trunc(self.__residuals_cv)

        # self._residuals_std = self.get_residuals_standard_deviation()  # FIXME: Maybe not useful to keep _residuals_std, get_residuals_standard_deviation() is pretty fast. Check with bigger dataset

    def force_model_params(self, labels: List[str], intercept: float, coefficients: np.ndarray, dataset: pd.DataFrame):
        """
        For tests purposes
        """
        self._standardize: bool = False

        self._labels = labels

        self._model = LinearRegression()
        # Avoid "wrong labels" error
        # X, y = self.split_dataset()
        # self._model.fit(X.head(1)[self._labels], y.head(1))

        # self._model.feature_names_in_ = self._labels

        self._model.coef_ = np.array(coefficients, dtype=float)
        self._model.intercept_ = intercept
        X, y = self.split_dataset(self.remove_geographic_features(dataset))
        self._pearson_correlations = self.__pearson_correlations(X.values, y.values.ravel(), X.columns)
        X = self.standardize(X)[self._labels]
        y = self.standardize(y)[DATASET_RESPONSE_VARIABLE]
        self.compute_residuals_cv(X, y)

    def local_prior_correction(self, dataset: pd.DataFrame, extrapolation_margin: float):  # pragma: no cover
        maxs: np.ndarray = self._dataset_maxs
        maxs = np.fix(maxs) if self._predictors_are_integers else maxs
        upper_bounds = maxs + np.abs(maxs * extrapolation_margin)
        upper_bounds = np.fix(upper_bounds) if self._predictors_are_integers else upper_bounds
        if self.debug > 0:
            Log.info("[local_prior_correction] maxs :", maxs[dataset.columns].values.tolist(), "upper bounds:", upper_bounds[dataset.columns].values.tolist())

        # dataset = dataset.copy()
        array = dataset.values
        data: np.ndarray = np.empty(dataset.shape)
        for dcol_i, column in enumerate(dataset.columns):
            i: int = self._scale_column_names.index(column)
            column_values = array[:, dcol_i]
            upper_bound = upper_bounds[i]
            is_upper_bound = column_values > upper_bound
            column_values[column_values > upper_bound] = upper_bound + (np.log(np.abs(column_values[is_upper_bound] - upper_bound)) + 1)
            # dataset[column] = np.fix(column_values) if self._predictors_are_integers else column_values
            data[:, dcol_i] = np.fix(column_values) if self._predictors_are_integers else column_values

        dataset = pd.DataFrame(data, index=dataset.index, columns=dataset.columns)

        if self.debug > 0:
            Log.info("[local_prior_correction]", dataset.iloc[self.debug].values)
        return dataset

    def smooth_dataframe(self, dataset: pd.DataFrame, extrapolation_margin: float) -> pd.DataFrame:
        maxs: np.ndarray = self._dataset_maxs
        mins: np.ndarray = self._dataset_mins
        amplitude_tolerance: np.ndarray = (self._dataset_maxs - self._dataset_mins) * extrapolation_margin

        maxs, mins = (np.fix(maxs), np.fix(mins)) if self._predictors_are_integers else (maxs, mins)
        lower_bounds = mins - amplitude_tolerance
        upper_bounds = maxs + amplitude_tolerance
        """if environment.TEST_MODE:
            lower_bounds = (mins - np.abs(mins * extrapolation_margin))# if self.__smoothing_mode == SmoothingMode.Local else (mins - np.abs(maxs * extrapolation_margin))
            upper_bounds = maxs + np.abs(maxs * extrapolation_margin)"""
        lower_bounds, upper_bounds = (np.fix(lower_bounds), np.fix(upper_bounds)) if self._predictors_are_integers else (lower_bounds, upper_bounds)
        if self.debug > 0:
            Log.info("[smooth_dataframe] bounds : {0} < v < {1}".format(upper_bounds[dataset.columns].values.tolist(), lower_bounds[dataset.columns].values.tolist()))

        # dataset = dataset.copy()
        data: np.ndarray = np.empty(dataset.shape)
        array = dataset.values
        for dcol_i, column in enumerate(dataset.columns):
            i: int = self._scale_column_names.index(column)
            column_values = array[:, dcol_i]  # dataset[column].values
            lower_bound, upper_bound = lower_bounds[i], upper_bounds[i]
            is_lower_bound = column_values < lower_bound
            is_upper_bound = column_values > upper_bound
            column_values[is_lower_bound] = lower_bound - (np.log(np.abs(column_values[is_lower_bound] - lower_bound)) + 1)
            column_values[is_upper_bound] = upper_bound + (np.log(np.abs(column_values[is_upper_bound] - upper_bound)) + 1)
            # dataset[column] = np.fix(column_values) if self._predictors_are_integers else column_values
            data[:, dcol_i] = np.fix(column_values) if self._predictors_are_integers else column_values
            if self.debug > 0:
                Log.info("[smooth_dataframe][{0}][{1:.2f} to {2:.2f}]".format(column, lower_bound, upper_bound))

        dataset = pd.DataFrame(data, index=dataset.index, columns=dataset.columns)

        if self.debug > 0:
            Log.info(dataset.iloc[self.debug])
        return dataset

    def predict(self, points: pd.DataFrame, extrapolation_margin: Optional[float] = None, no_data: Optional[Union[int, float]] = None, debug: int = -1) -> np.ndarray:
        """
        Predict values

        :param points: List of points to predict. Points must contain features used to build the model
        :type points: pd.DataFrame
        :param extrapolation_margin: When predict values, smooth encountered predictors outside of values used to build the model + this specified margin in percent
               If None, no check are made.
               For each predictor, upper_bound = max(p) + abs((max(p) * margin))
               For each predictor, lower_bound = min(p) - abs((max(p) * margin))
               Predictors p are smoothed thanks to the following computation : p +-= log(abs(p-pbound))+1
        :type extrapolation_margin: Optional[float]
        :param no_data: No data value
        :type no_data: Union[int, float]
        :returns: List of predicted value
        :rtype: np.ndarray
        """
        self.debug = debug

        # points = points[self._labels]  # consequent computation time
        columns_indices: np.ndarray = self.columns_to_indices(self._labels, list(points.columns))
        points = points.iloc[:, columns_indices]  # Faster (not tested if correct) but less clear

        if self._predictors_are_integers:
            # points[~points.isnull().any(axis=1)] = points[~points.isnull().any(axis=1)].astype(int).astype(float)
            # Suppress Pandas SettingWithCopyWarning by explicitly indicating that it's a copy of a dataframe
            points = points.copy()
            points.loc[~points.isnull().any(axis=1)] = points.loc[~points.isnull().any(axis=1)].astype(int).astype(float)

        if extrapolation_margin is not None:
            points = self.smooth_dataframe(points, extrapolation_margin)
        if self._predictors_are_integers:
            # points[~points.isnull().any(axis=1)] = points[~points.isnull().any(axis=1)].astype(int).astype(float)
            # Suppress Pandas SettingWithCopyWarning by explicitly indicating that it's a copy of a dataframe
            points = points.copy()
            points.loc[~points.isnull().any(axis=1)] = points.loc[~points.isnull().any(axis=1)].astype(int).astype(float)

        points = self.standardize(points)
        points_array = points.fillna(0).values
        prediction = self._model.predict(points_array)
        # prediction_fast = self.prediction_lstsq(points_array)  # 15-20% faster but self._model.predict is already fast
        prediction = self.inverse_standardize(prediction, DATASET_RESPONSE_VARIABLE)
        """if extrapolation_margin is not None and not no_interpolation_smoothing:
            prediction = self.smooth_interpolation(prediction, extrapolation_margin)"""
        if no_data is not None:
            prediction[np.nonzero(np.any(points.isna().values, axis=1))[0]] = no_data

        """if self._predictors_are_integers:
            prediction = np.trunc(prediction)"""
        return prediction

    """def prediction_lstsq(self, points):
        points = np.hstack((points, np.ones(len(points)).reshape(-1, 1)))
        return np.sum(self._model_lstsq * points, axis=1).reshape(1, -1)"""

    def __transform_pearson_correlations(self):
        pearsons_correlation = []
        if self._pearson_correlations is not None:
            for c in self._scale_column_names:
                if c != DATASET_RESPONSE_VARIABLE:
                    pearsons_correlation.extend(self._pearson_correlations[c])
        return pearsons_correlation

    def to_json(self) -> Dict:
        state_dict: Dict = self.__dict__.copy()
        state_dict.pop("_predictors_are_integers", None)
        state_dict.pop("_standardize", None)
        state_dict.pop("debug", None)
        state_dict.pop("_MultipleRegressionModel__residuals", None)

        # These attributes are stored as pd.DataFrame in PhaseMultipleRegression json representation
        state_dict.pop("_scale_std", None)
        state_dict.pop("_scale_means", None)
        state_dict.pop("_scale_column_names", None)
        state_dict.pop("_pearson_correlations", None)
        state_dict.pop("_dataset_mins", None)
        state_dict.pop("_dataset_maxs", None)
        state_dict["pearson_correlations"] = self.__transform_pearson_correlations()

        return state_dict
