import math

LAYER_PREFIXES = ["cop", "exp", "vul"]


# Function to normalize indicators
def normalize_indicators(indicators_df):
    def normalize(x):
        range_min = x.min()
        range_max = x.max()
        if math.isnan(range_min) or math.isnan(range_max):
            return x
        return (x - range_min) / (range_max - range_min)

    for prefix in LAYER_PREFIXES:
        columns_to_normalize = [
            col for col in indicators_df.columns if col.startswith(prefix + "_")
        ]
        indicators_df[columns_to_normalize] = indicators_df[columns_to_normalize].apply(
            normalize, axis=0
        )

    return indicators_df


# Missing indicators will lead to problems in score calculation and are thus guessed based on a heuristic.
# Current heuristic: use the worst value possible
# For exposure we have to assume that NA or 0 values certainly mean that there is no exposure or we dont have information about any exposure,
# so no missing indicators are guessed.
def guess_missing_indicators(indicators_df):
    # coping: Low values are bad
    coping_columns = [
        col for col in indicators_df.columns if col.startswith("cop" + "_")
    ]
    indicators_df[coping_columns] = indicators_df[coping_columns].fillna(0)

    # vuln: High values are bad
    vulnerability_columns = [
        col for col in indicators_df.columns if col.startswith("vul" + "_")
    ]
    indicators_df[vulnerability_columns] = indicators_df[vulnerability_columns].fillna(
        1
    )

    return indicators_df


def calculate_scores(indicators_df, weights_df):
    scores = indicators_df.copy()
    missing_variables = set()  # track missing variables across all rows
    for prefix in LAYER_PREFIXES:
        columns_to_score = [
            col for col in indicators_df.columns if col.startswith(prefix + "_")
        ]
        weighted_sums = []  # list with all the scores cop/exp/vul respectively
        for _, indicator_row in indicators_df.iterrows():
            weighted_sum = 0
            not_nan_count = 0
            for _, weight_row in weights_df.iterrows():
                column_name = f"{weight_row['variable_name']}"
                if column_name.startswith(prefix):
                    if column_name not in columns_to_score:
                        missing_variables.add(column_name)
                        continue
                    value = indicator_row[column_name]
                    if math.isnan(value):
                        continue
                    if weight_row["direction"] == -1:
                        value = 1 - value
                    weighted_sum += value * weight_row["weight"]
                    not_nan_count += 1
                else:
                    continue

            if not_nan_count == 0:
                weighted_sum = math.nan

            weighted_sums.append(weighted_sum)

        scores[prefix] = weighted_sums

    # Print all missing variables once at the end
    if missing_variables:
        return (
            scores,
            f"The following weights are not applicable: {missing_variables}\n\n",
        )
    return scores, ""


# Function to calculate geometric mean of two columns
def calculate_geometric_mean(col1, col2):
    n = len(col1)
    if n == 0:
        raise ValueError("The input must not be empty.")

    if len(col2) != n:
        raise ValueError("The inputs must have the same length.")

    gm_col = [math.sqrt(col1[i] * col2[i]) for i in range(n)]
    return gm_col
