import pyproj
import pandas as pd
import requests
import re
from osgeo import gdal, ogr
from concurrent.futures import ThreadPoolExecutor

from ..utils.codigos import departamentos, municipios
from ..utils.config import EPSG_ORIGEN, EPSG_DESTINO
import json
import time

# Diccionario de mapeo de atributos para fauna marina
mapeo = {
    "OPERADOR": "institutionCode",
    "ABUND_ABS": "individualCount",
    "ID_MUEST_P": "eventID",
    "NOMBRE": "fieldNumber",
    "HABITAT": "habitat",
    #"DESCRIP": "samplingProtocol",
    "CUERPO_AGU": "waterBody",
    "VEREDA": "locality",
    "latitud": "decimalLatitude",
    "longitud": "decimalLongitude",
    "COOR_NORTE": "verbatimLatitude",
    "COOR_ESTE": "verbatimLongitude",
    "ESPECIE": "scientificName", 
    "canonicalName": "acceptedNameUsage",
    "scientificNameAuthorship": "scientificNameAuthorship",
    "taxonomicStatus": "taxonomicStatus",
    "DIVISION": "phylum",
    "CLASE": "class",
    "ORDEN": "order",
    "FAMILIA": "family",
    "GENERO": "genus",
    "taxonRank": "taxonRank",
    #"N_COMUN": "vernacularName",
    #"PROF_MUES": "verbatimDepth",
    "OBSERV": "eventRemarks",
    "T_ESF_MUEST": "samplingEffort",
    "PROYECTO": "datasetName"
}

# Valores constantes para fauna marina
valores_constantes = {
    "occurrenceStatus": "present",
    "language": "es",
    "continent": "América del Sur",
    "country": "Colombia",
    "countryCode": "CO",
    "geodeticDatum": "WGS84",
    "verbatimSRS": EPSG_ORIGEN,
    "verbatimCoordinateSystem": "Coordenadas proyectadas"
}

# Lista completa de atributos en el archivo final
lista_atributos = [
    "id", "type", "language", "institutionID", "institutionCode", "datasetName",
    "basisOfRecord", "occurrenceID", "recordNumber", "individualCount",
    "occurrenceStatus", "occurrenceRemarks", "organismRemarks", "eventID",
    "parentEventID", "fieldNumber", "eventDate", "year", "month", "day",
    "verbatimEventDate", "habitat", "samplingProtocol", "sampleSizeValue", "sampleSizeUnit",
    "eventRemarks", "continent", "waterBody", "country",
    "countryCode", "stateProvince", "county", "municipality", "locality",
    "verbatimDepth", "samplingEffort",
    "locationRemarks", "decimalLatitude", "decimalLongitude", "geodeticDatum",
    "verbatimCoordinates", "verbatimLatitude", "verbatimLongitude",
    "verbatimCoordinateSystem", "verbatimSRS", "footprintWKT", "footprintSRS",
    "verbatimIdentification", "identificationQualifier", "scientificName",
    "acceptedNameUsage", "higherClassification", "kingdom", "phylum", "class",
    "order", "family", "genus", "specificEpithet", "infraspecificEpithet",
    "taxonRank", "verbatimTaxonRank", "scientificNameAuthorship", "vernacularName",
    "taxonomicStatus"
]


def consultar_taxon_rank(especie):
    url = f"http://www.marinespecies.org/rest/AphiaRecordsByMatchNames?scientificnames[]={especie}&marine_only=false"

    try:
        time.sleep(1)
        response = requests.get(url)

        if response.status_code == 200:
            data = response.json()
            especie_data = data[0][0] if data and data[0] else {}

            return {
                "taxonRank": especie_data.get("rank", None),
                "canonicalName": especie_data.get("valid_name", None),
                "scientificNameAuthorship": especie_data.get("authority", None),
                "taxonomicStatus": especie_data.get("status", None),
                "scientificNameID": f"https://www.marinespecies.org/aphia.php?p=taxdetails&id={especie_data.get('AphiaID')}" if especie_data.get("AphiaID") else None,
                "nameAccordingTo": "World Register of Marine Species",
                "nameAccordingToID": "https://www.marinespecies.org"
            }

        else:

            return None

    except Exception as e:

        return None


def agregar_datos_api_a_excel(ruta_excel_fauna_marina):
    try:

     
        df = pd.read_excel(ruta_excel_fauna_marina)

        if "ESPECIE" not in df.columns:
            raise ValueError("❌ ERROR: La columna 'ESPECIE' no está en el archivo Excel.")


        especies = df["ESPECIE"].tolist()

        # Ejecutar en paralelo las consultas a la API
        with ThreadPoolExecutor(max_workers=1000) as executor:
            resultados = list(executor.map(consultar_taxon_rank, especies))

        # Convertir a DataFrame
        datos_api = pd.DataFrame([r if isinstance(r, dict) else {} for r in resultados])

        # Agregar columnas nuevas al DataFrame original
        df["taxonRank"] = datos_api["taxonRank"]
        df["canonicalName"] = datos_api["canonicalName"]
        df["scientificNameAuthorship"] = datos_api["scientificNameAuthorship"]
        df["taxonomicStatus"] = datos_api["taxonomicStatus"]
        df["scientificNameID"] = datos_api.get("scientificNameID")
        df["nameAccordingTo"] = datos_api.get("nameAccordingTo")
        df["nameAccordingToID"] = datos_api.get("nameAccordingToID")


        df.to_excel(ruta_excel_fauna_marina, index=False)


    except Exception as e:
        return None

# Función para extraer coordenadas
def extraer_coordenadas(layer):
    resultados = []
    transformer = pyproj.Transformer.from_crs(EPSG_ORIGEN, EPSG_DESTINO, always_xy=True)

    for feature in layer:
        geom = feature.GetGeometryRef()
        if geom and geom.GetGeometryType() == ogr.wkbPoint:
            lon, lat = transformer.transform(geom.GetX(), geom.GetY())

            atributos = feature.items()
            atributos["latitud"] = lat
            atributos["longitud"] = lon
            resultados.append(atributos)

    return resultados


# Función para realizar el join
def realizar_join(capa, tabla, enlace_fauna_marina):
    try:


        capa_df = pd.DataFrame(capa)
        tabla_df = pd.DataFrame(tabla)

        if capa_df.empty or tabla_df.empty:

            return None

        resultado_df = pd.merge(capa_df, tabla_df, on=enlace_fauna_marina, how="inner")

        if resultado_df.empty:

            return None

        return resultado_df

    except Exception as e:

        return None

# 🔹 Función para exportar a Excel
def exportar_excel(dataframe, ruta_salida):
    try:

        dataframe.to_excel(ruta_salida, index=False)

    except Exception as e:
        return None


# Función para extraer los nombres de los departamentos y los municipios
def convertir_codigos_nombres(df):
    """Convierte los códigos de departamentos y municipios a nombres."""
    if "DEPTO" in df.columns:

        df["stateProvince"] = df["DEPTO"].apply(lambda x: departamentos.get(x, "Desconocido"))
    else:
        df["stateProvince"] = None

    if "MUNICIPIO" in df.columns:

        df["county"] = df["MUNICIPIO"].apply(lambda x: municipios.get(x, "Desconocido"))
    else:
        df["county"] = None

    return df

# Función para procesar campos específicos
def procesar_campos_especificos(df):
    """Calcula los campos en orden secuencial asegurando que cada campo esté disponible antes de ser usado."""

    if df is None or df.empty:

        return df  

    # Mapear FECHA_MFA a verbatimEventDate
    if "FECHA_MFA" in df.columns:
        df["verbatimEventDate"] = df["FECHA_MFA"]
    else:

        df["verbatimEventDate"] = None

     # Mapear COOR_NORTE a verbatimLatitude
    if "COOR_NORTE" in df.columns:
        df["verbatimLatitude"] = df["COOR_NORTE"]
    else:

        df["verbatimLatitude"] = None

    # Mapear COOR_ESTE a verbatimLongitude
    if "COOR_ESTE" in df.columns:
        df["verbatimLongitude"] = df["COOR_ESTE"]
    else:

        df["verbatimLongitude"] = None

    # Se calcula `recordNumber`
    if "OBJECTID" in df.columns:
        df["recordNumber"] = df["OBJECTID"]
    else:

        df["recordNumber"] = None

    # Se calcula `type`, ya que otros campos dependerán de él
    if "DETERM" in df.columns:
        df["DETERM"] = df["DETERM"].fillna(0).astype(float).astype(int).astype(str).replace({"0": ""})
        mapeo_determ = {
            411: "Event",
            413: "Event",
            414: "PhysicalObject",
            415: "Sound",
            416: "PhysicalObject",
            417: "PhysicalObject",
            418: "PhysicalObject",
            419: "Otro"
        }
        df["type"] = df["DETERM"].map(mapeo_determ).fillna("")
    else:

        df["type"] = None

    # Calcular `occurrenceID`
    if "OPERADOR" in df.columns and "recordNumber" in df.columns:
        df["occurrenceID"] = df.apply(
            lambda row: re.sub(r'[^A-Za-z0-9]', '', row["OPERADOR"]) + ":fauna_marina:" + str(row["recordNumber"]),
            axis=1
        )
    else:

        df["occurrenceID"] = None

     # Asignar `occurrenceID` a `id`
    if "occurrenceID" in df.columns:
        df["id"] = df["occurrenceID"]
    else:

        df["id"] = None

    # Calcular `basisOfRecord`, que depende de `type`
    if "type" in df.columns:
        mapeo_basis = {
            "Event": "HumanObservation",
            "StillImage": "MachineObservation",
            "PhysicalObject": "MaterialSample",
            "Sound": "HumanObservation"
        }
        df["basisOfRecord"] = df["type"].map(mapeo_basis).fillna("Desconocido")
    else:

        df["basisOfRecord"] = None

    # Calcular `occurrenceRemarks`
    if "DETERM" in df.columns in df.columns:
        df["DETERM"] = df["DETERM"].fillna(0).astype(float).astype(int).astype(str).replace({"0": ""})
        mapeo_determ = {
            411: "Captura de individuos",
            413: "Observación",
            414: "Marcas de Individuos",
            415: "Detección auditiva",
            416: "Huellas",
            417: "Heces",
            418: "Pelos",
            419: "Otro"
        }
        df["temp_occurrenceRemarks"] = df["DETERM"].map(mapeo_determ).fillna("")
    else:

        df["temp_occurrenceRemarks"] = None

    if "temp_occurrenceRemarks" in df.columns and "OBSERV" in df.columns:
        df["occurrenceRemarks"] = df.apply(
            lambda row: (
                str(row["temp_occurrenceRemarks"]) if pd.notna(row["temp_occurrenceRemarks"]) and pd.isna(row["OBSERV"]) else
                str(row["OBSERV"]) if pd.notna(row["OBSERV"]) and pd.isna(row["temp_occurrenceRemarks"]) else
                f"{row['temp_occurrenceRemarks']} | {row['OBSERV']}" if pd.notna(row["temp_occurrenceRemarks"]) and pd.notna(row["OBSERV"]) else
                None
            ),
            axis=1
        )
    else:

        df["occurrenceRemarks"] = None




    # Calcular `eventDate`, `year`, `month`, `day` y conservamos `verbatimEventDate`
    if "verbatimEventDate" in df.columns:

        df["eventDate"] = pd.to_datetime(df["verbatimEventDate"], errors='coerce').dt.strftime('%Y-%m-%d')
        df["year"] = pd.to_datetime(df["verbatimEventDate"], errors='coerce').dt.year
        df["month"] = pd.to_datetime(df["verbatimEventDate"], errors='coerce').dt.month
        df["day"] = pd.to_datetime(df["verbatimEventDate"], errors='coerce').dt.day

    else:

        df["eventDate"] = None
        df["year"] = None
        df["month"] = None
        df["day"] = None

    # Concatenar `COOR_ESTE` y `COOR_NORTE` para `verbatimCoordinates`
    if "verbatimLongitude" in df.columns and "verbatimLatitude" in df.columns:
        df["verbatimCoordinates"] = df.apply(
            lambda row: str(row["verbatimLongitude"]).replace(",", ".") + ", " + str(row["verbatimLatitude"]).replace(",", "."),
            axis=1
        )

    else:

        df["verbatimCoordinates"] = None


    # Asignar `ESPECIE` a `verbatimIdentification`
    if "ESPECIE" in df.columns:
        df["verbatimIdentification"] = df["ESPECIE"]
    else:

        df["verbatimIdentification"] = None

    # se mapea a dynamicProperties
    if "ESTACIONAL" in df.columns:
        df["ESTACIONAL"] = df["ESTACIONAL"].fillna(0).astype(float).astype(int).astype(str).replace({"0": ""})
        mapeo_estacional = {
            "401": "Seca",
            "402": "Lluvias",
            "403": "Transición",
            "404": "Veranillo de San Juan"           
        }
        df["temp_estacional"] = df["ESTACIONAL"].map(mapeo_estacional).fillna("")
    else:

        df["temp_estacional"] = None

    
    campos_clasificacion = ["temp_estacional", "DESCR_EPOC"]
    for col in campos_clasificacion:
        if col not in df.columns:
            df[col] = None

    df["dynamicProperties"] = df[campos_clasificacion].apply(
        lambda row: json.dumps(
            {k: v for k, v in row.items() if pd.notna(v) and str(v).strip().lower() not in ["", "none", "nan", "<null>"]},
            ensure_ascii=False
        ),
        axis=1
    )


    # Se mapea a samplingProtocol
    if "INS_MUEST" in df.columns:
        df["INS_MUEST"] = df["INS_MUEST"].fillna(0).astype(float).astype(int).astype(str).replace({"0": ""})
        mapeo_ins = {
            "501": "ADCP",
            "502": "Botella Go Flo",
            "503": "Botella Nansen",
            "504": "Botella Niskin",
            "505": "Botella Routner",
            "506": "Box corer",
            "507": "Censo visual",
            "508": "CTD",
            "509": "CTDO",
            "510": "Cuadrante",
            "511": "Draga",
            "512": "Draga Van Veen",
            "513": "HADCP",
            "514": "LADCP",
            "515": "Nasas",
            "516": "Nucleador (Corazonador)",
            "517": "Observación ",
            "518": "Otro",
            "519": "Palangre horizontal ",
            "520": "Palangre vertical",
            "521": "Piston corer",
            "522": "Recolección directa",
            "523": "Red cónica",
            "524": "Red de arrastre",
            "525": "Red de encierro",
            "526": "Red de enmalle",
            "527": "Transecto con punto intercepto",
            "528": "Transectos con cadena intercepto",
            "529": "Transectos en línea"          
        }
        df["samplingProtocol"] = df["INS_MUEST"].map(mapeo_ins).fillna("")
    else:

        df["samplingProtocol"] = None


    # Mapeamos a verbatimDepth    
    if "PROFUN" in df.columns and "PROF_MUES" in df.columns:
        df["verbatimDepth"] = df.apply(
            lambda row: (
                f"{int(row['PROFUN'])} adultos" if pd.notna(row["PROFUN"]) and pd.isna(row["PROF_MUES"]) else
                f"{int(row['PROF_MUES'])} juveniles" if pd.notna(row["PROF_MUES"]) and pd.isna(row["PROFUN"]) else
                f"{int(row['PROFUN'])} adultos | {int(row['PROF_MUES'])} juveniles" if pd.notna(row["PROFUN"]) and pd.notna(row["PROF_MUES"]) else
                None
            ),
            axis=1
        )
    else:

        df["verbatimDepth"] = None


    # Mapear vernacularName 
    campos_clasificacion = [
        "GRUPO_x", "GRUPO_y", "N_COMUN"
    ]

    for col in campos_clasificacion:
        if col not in df.columns:
            df[col] = None

    df["vernacularName"] = df[campos_clasificacion]\
        .applymap(lambda x: x if pd.notna(x) and str(x).strip().lower() not in ["", "none", "nan", "<null>"] else pd.NA)\
        .apply(lambda row: " | ".join(row.dropna().astype(str)), axis=1)


    return df



# Función principal para procesar fauna_marina
def procesar_fauna_marina(ruta_gdb, capa_fauna_marina, tabla_fauna_marina, enlace_fauna_marina, ruta_excel_fauna_marina, archivo_entrada_fauna_marina, archivo_salida_fauna_marina):
    try:
        # Abrir la Geodatabase
        gdb = gdal.OpenEx(ruta_gdb, gdal.OF_VECTOR)
        if not gdb:
            raise RuntimeError(f"❌ No se pudo abrir la GDB en {ruta_gdb}")

        # Extraer coordenadas de la capa de fauna_marina
        datos_capa = extraer_coordenadas(gdb.GetLayerByName(capa_fauna_marina))

        # Extraer atributos de la tabla de fauna_marina
        datos_tabla = []
        layer = gdb.GetLayerByName(tabla_fauna_marina)
        for feature in layer:
            datos_tabla.append(feature.items())  # Obtiene los atributos correctamente


        resultado = realizar_join(datos_capa, datos_tabla, enlace_fauna_marina)

        if resultado is None or resultado.empty:

            return




        # Exportar el resultado del join a un archivo Excel intermedio
        exportar_excel(resultado, ruta_excel_fauna_marina)

        # Leer el archivo Excel intermedio y agregar taxonRank
        agregar_datos_api_a_excel(ruta_excel_fauna_marina)

        # Leer el archivo Excel con taxonRank agregado
        df_intermedio = pd.read_excel(ruta_excel_fauna_marina)

        # Procesar campos específicos
        df_intermedio = procesar_campos_especificos(df_intermedio)

        # Crear DataFrame final con todos los atributos de lista_atributos
        df_final = pd.DataFrame(columns=lista_atributos)

        # Mapear los datos del DataFrame intermedio al DataFrame final
        for columna_intermedia, columna_final in mapeo.items():
            if columna_intermedia in df_intermedio.columns:
                df_final[columna_final] = df_intermedio[columna_intermedia]

        # Agregar valores constantes
        for clave, valor in valores_constantes.items():
            df_final[clave] = valor

        # Agregar los campos calculados
        df_final["type"] = df_intermedio["type"]
        df_final["recordNumber"] = df_intermedio["recordNumber"]
        df_final["basisOfRecord"] = df_intermedio["basisOfRecord"]
        df_final["occurrenceRemarks"] = df_intermedio["occurrenceRemarks"]
        df_final["eventDate"] = df_intermedio["eventDate"]
        df_final["year"] = df_intermedio["year"]
        df_final["month"] = df_intermedio["month"]
        df_final["day"] = df_intermedio["day"]
        df_final["verbatimEventDate"] = df_intermedio["verbatimEventDate"]
        #df_final["verbatimElevation"] = df_intermedio["verbatimElevation"]
        #df_final["minimumElevationInMeters"] = df_intermedio["minimumElevationInMeters"]
        #df_final["maximumElevationInMeters"] = df_intermedio["maximumElevationInMeters"]
        df_final["verbatimCoordinates"] = df_intermedio["verbatimCoordinates"]
        df_final["verbatimIdentification"] = df_intermedio["verbatimIdentification"]
        df_final["occurrenceID"] = df_intermedio["occurrenceID"]
        df_final["id"] = df_intermedio["id"]
        #df_final["stateProvince"] = df_intermedio["stateProvince"]
        #df_final["county"] = df_intermedio["county"]
        df_final["dynamicProperties"] = df_intermedio["dynamicProperties"]
        df_final["samplingProtocol"] = df_intermedio["samplingProtocol"]
        df_final["verbatimDepth"] = df_intermedio["verbatimDepth"]
        df_final["vernacularName"] = df_intermedio["vernacularName"]




        # Exportar el DataFrame final a un archivo Excel
        exportar_excel(df_final, archivo_salida_fauna_marina)

    except Exception as e:
        return None