import pyproj
import pandas as pd
import requests
import re
from osgeo import gdal, ogr
from concurrent.futures import ThreadPoolExecutor
import time

from ..utils.codigos import departamentos, municipios
from ..utils.config import EPSG_DESTINO

# Diccionario de mapeo de atributos para otrafauna_OFM
mapeo = {
    "OPERADOR": "institutionCode",
    #"NOMBRE": "fieldNumber",
    "latitud": "decimalLatitude",
    "longitud": "decimalLongitude",
    "ID_INGRESO": "eventID",
    "ID_AVISTAM": "fieldNumber",
    "FECHA": "verbatimEventDate",
    "PROFUND_m": "verbatimDepth",
    "ORDEN": "order",
    "FAMILIA": "family",
    "GENERO": "genus",   
    "ESPEC_NOMB": "scientificName", 
    "NOMB_COMUN": "vernacularName",
    "taxonRank": "taxonRank",
    "canonicalName": "acceptedNameUsage",
    "scientificNameAuthorship": "scientificNameAuthorship",
    "taxonomicStatus": "taxonomicStatus",
    "scientificNameID": "scientificNameID",
    "nameAccordingTo": "nameAccordingTo",
    "nameAccordingToID": "nameAccordingToID",
    "NUM_ANIMAL": "individualCount",
    "PROYECTO": "datasetName"
    
}



# Lista completa de atributos en el archivo final
lista_atributos = [
    "id", "type", "language", "institutionID", "institutionCode", "datasetName", "basisOfRecord", "occurrenceID", "recordedBy", 
    "individualCount", "lifeStage", "organismQuantity", "organismQuantityType", "behavior", "occurrenceStatus", "occurrenceRemarks", 
    "eventID", "parentEventID", "fieldNumber", "eventDate", "eventTime", "year", "month", "day", "verbatimEventDate", "samplingProtocol", 
    "samplingEffort", "eventRemarks", "locationID", "continent", "waterBody", "country", "countryCode", "stateProvince", "minimumDepthInMeters", "maximumDepthInMeters",
    "verbatimDepth", "decimalLatitude", "decimalLongitude", "geodeticDatum", "verbatimCoordinates", "verbatimLatitude", "verbatimLongitude", "verbatimCoordinateSystem",
    "verbatimSRS", "verbatimIdentification", "identificationQualifier", "dateIdentified", "scientificNameID", "nameAccordingToID", "scientificName", "acceptedNameUsage", 
    "nameAccordingTo", "higherClassification", "kingdom", "phylum", "class", "order", "family", "genus", "specificEpithet", "taxonRank", "verbatimTaxonRank", 
    "scientificNameAuthorship", "vernacularName", "taxonomicStatus", "identificationVerificationStatus", "measurementValue", "measurementType", 
    "measurementValue_1", "measurementType_1", "measurementValue_2", "measurementType_2", "measurementUnit_2", "measurementValue_3", "measurementType_3", "measurementUnit_3"
]


def consultar_taxon_rank(especie):
    url = f"http://www.marinespecies.org/rest/AphiaRecordsByMatchNames?scientificnames[]={especie}&marine_only=false"

    try:
        time.sleep(1)  
        response = requests.get(url)

        if response.status_code == 200:
            data = response.json()
            especie_data = data[0][0] if data and data[0] else {}

            return {
                "taxonRank": especie_data.get("rank", None),
                "canonicalName": especie_data.get("valid_name", None),
                "scientificNameAuthorship": especie_data.get("authority", None),
                "taxonomicStatus": especie_data.get("status", None),
                "scientificNameID": f"https://www.marinespecies.org/aphia.php?p=taxdetails&id={especie_data.get('AphiaID')}" if especie_data.get("AphiaID") else None,
                "nameAccordingTo": "World Register of Marine Species",
                "nameAccordingToID": "https://www.marinespecies.org"
            }

        else:

            return None

    except Exception as e:

        return None


def agregar_datos_api_a_excel(ruta_excel_otrafauna_OFM):
    try:

     
        df = pd.read_excel(ruta_excel_otrafauna_OFM)

        if "ESPEC_NOMB" not in df.columns:
            raise ValueError("❌ ERROR: La columna 'ESPEC_NOMB' no está en el archivo Excel.")


        especies = df["ESPEC_NOMB"].tolist()

        # Ejecutar en paralelo las consultas a la API
        with ThreadPoolExecutor(max_workers=50) as executor:
            resultados = list(executor.map(consultar_taxon_rank, especies))

        # Convertir a DataFrame
        datos_api = pd.DataFrame([r if isinstance(r, dict) else {} for r in resultados])

        # Agregar columnas nuevas al DataFrame original
        df["taxonRank"] = datos_api["taxonRank"]
        df["canonicalName"] = datos_api["canonicalName"]
        df["scientificNameAuthorship"] = datos_api["scientificNameAuthorship"]
        df["taxonomicStatus"] = datos_api["taxonomicStatus"]
        df["scientificNameID"] = datos_api.get("scientificNameID")
        df["nameAccordingTo"] = datos_api.get("nameAccordingTo")
        df["nameAccordingToID"] = datos_api.get("nameAccordingToID")


        df.to_excel(ruta_excel_otrafauna_OFM, index=False)


    except Exception as e:
        return None

# Función para extraer coordenadas
def extraer_coordenadas(layer, epsg_origen):
    resultados = []
    transformer = pyproj.Transformer.from_crs(epsg_origen, EPSG_DESTINO, always_xy=True)

    for feature in layer:
        geom = feature.GetGeometryRef()
        if geom and geom.GetGeometryType() == ogr.wkbPoint:
            lon, lat = transformer.transform(geom.GetX(), geom.GetY())

            atributos = feature.items()
            atributos["latitud"] = lat
            atributos["longitud"] = lon
            resultados.append(atributos)

    return resultados


# Función para realizar el join
def realizar_join(capa, tabla, enlace_otrafauna_OFM):
    try:


        capa_df = pd.DataFrame(capa)
        tabla_df = pd.DataFrame(tabla)

        if capa_df.empty or tabla_df.empty:

            return None

        resultado_df = pd.merge(capa_df, tabla_df, on=enlace_otrafauna_OFM, how="inner")

        if resultado_df.empty:

            return None

        return resultado_df

    except Exception as e:

        return None



# Función para exportar a Excel
def exportar_excel(dataframe, ruta_salida):
    try:

        dataframe.to_excel(ruta_salida, index=False)

    except Exception as e:
        return None


# Función para procesar campos específicos
def procesar_campos_especificos(df):
    """Calcula los campos en orden secuencial asegurando que cada campo esté disponible antes de ser usado."""

    if df is None or df.empty:

        return df  

     # Mapear COOR_NORTE a verbatimLatitude
    if "COOR_NORTE" in df.columns:
        df["verbatimLatitude"] = df["COOR_NORTE"]
    else:

        df["verbatimLatitude"] = None

    # Mapear COOR_ESTE a verbatimLongitude
    if "COOR_ESTE" in df.columns:
        df["verbatimLongitude"] = df["COOR_ESTE"]
    else:

        df["verbatimLongitude"] = None

    # Se calcula  `recordNumber`
    if "OBJECTID" in df.columns:
        df["recordNumber"] = df["OBJECTID"]
    else:

        df["recordNumber"] = None

    # Calcular `occurrenceID`
    if "OPERADOR" in df.columns and "recordNumber" in df.columns:
        df["occurrenceID"] = df.apply(
            lambda row: re.sub(r'[^A-Za-z0-9]', '', row["OPERADOR"]) + ":otrafaunaOFM:" + str(row["recordNumber"]),
            axis=1
        )
    else:

        df["occurrenceID"] = None

     # Asignar `occurrenceID` a `id`
    if "occurrenceID" in df.columns:
        df["id"] = df["occurrenceID"]
    else:

        df["id"] = None


    # Concatenar `COOR_ESTE` y `COOR_NORTE` para `verbatimCoordinates`
    if "verbatimLongitude" in df.columns and "verbatimLatitude" in df.columns:
        df["verbatimCoordinates"] = df.apply(
            lambda row: str(row["verbatimLongitude"]).replace(",", ".") + ", " + str(row["verbatimLatitude"]).replace(",", "."),
            axis=1
        )
     
    else:

        df["verbatimCoordinates"] = None

    
    
    #  se mapea a variables temporales para despues sacar el type

    if "EVIDENCIA" in df.columns:
        mapeo_evidencia = {
            181: "StillImage",
            182: "MovingImage",
            183: "MovingImage",
            184: "Event"           
        }
        df["type"] = df["EVIDENCIA"].map(mapeo_evidencia).fillna("")
    else:

        df["type"] = None

    # Calcular `basisOfRecord`, que depende de `type`

    if "type" in df.columns:
        mapeo_basis_2 = {
            "StillImage": "MachineObservation",
            "MovingImage": "MachineObservation",
            "Event": "HumanObservation"
        }
        df["basisOfRecord"] = df["type"].map(mapeo_basis_2).fillna("")
    else:

        df["basisOfRecord"] = None
      
    
    # Calcular `occurrenceRemarks`
    if "EVIDENCIA" in df.columns:
        mapeo_evidencia = {
            181: "Fotografía",
            182: "video",
            183: "Video y fotografía",
            184: "Solo observación" 
        }
        df["temp_occurrenceRemarks"] = df["EVIDENCIA"].map(mapeo_evidencia).fillna("")
    else:

        df["temp_occurrenceRemarks"] = None

    if "temp_occurrenceRemarks" in df.columns and "OBSERVACIO_y" in df.columns:
        df["occurrenceRemarks"] = df.apply(
            lambda row: (
                str(row["temp_occurrenceRemarks"]) if pd.notna(row["temp_occurrenceRemarks"]) and pd.isna(row["OBSERVACIO_y"]) else
                str(row["OBSERVACIO_y"]) if pd.notna(row["OBSERVACIO_y"]) and pd.isna(row["temp_occurrenceRemarks"]) else
                f"{row['temp_occurrenceRemarks']} | {row['OBSERVACIO_y']}" if pd.notna(row["temp_occurrenceRemarks"]) and pd.notna(row["OBSERVACIO_y"]) else
                None
            ),
            axis=1
        )



    # Mapear HORA a eventTime (formato HH:MM:SS)
    if "HORA" in df.columns:
        df["eventTime"] = pd.to_timedelta(df["HORA"], unit="h").dt.components.apply(
            lambda x: f"{int(x.hours):02}:{int(x.minutes):02}:{int(x.seconds):02}", axis=1
        )
    else:

        df["eventTime"] = None


    
    # Mapear higherClassification 
    campos_clasificacion = [
        "ORDEN", "SUBORDEN", "FAMILIA", "GENERO"
    ]

    for col in campos_clasificacion:
        if col not in df.columns:
            df[col] = None

    df["higherClassification"] = df[campos_clasificacion]\
        .applymap(lambda x: x if pd.notna(x) and str(x).strip().lower() not in ["", "none", "nan", "<null>"] else pd.NA)\
        .apply(lambda row: " | ".join(row.dropna().astype(str)), axis=1)

    # Mapear a scientificName con validación
    df["scientificName"] = df.apply(
        lambda row: (
            row["ESPEC_NOMB"]
            if pd.notna(row["ESPEC_NOMB"]) and str(row["ESPEC_NOMB"]).strip().lower() not in ["", "none", "nan", "<null>"]
            else (
                row["higherClassification"].split(" | ")[-1]
                if pd.notna(row["higherClassification"]) and " | " in row["higherClassification"]
                else row["higherClassification"]
            )
        ),
        axis=1
    )


    if "DIRE_GRUPO" in df.columns:
        df["DIRE_GRUPO"] = df["DIRE_GRUPO"].fillna(0).astype(float).astype(int).astype(str).replace({"0": ""})
        mapa_dire_gr = {
        "171": "Towards (T)",
        "172": "Away (A)",
        "173": "Same direction (S)",
        "174": "Opposite direction (O)",
        "175": "Crossing perpendicular (C)",
        "176": "Variable (V)"
    }
    df["temp_measurementValue"] = df["DIRE_GRUPO"].map(mapa_dire_gr).fillna("")

    if "temp_measurementValue" in df.columns and df["temp_measurementValue"].dropna().empty is False:
        df["measurementValue"] = df["temp_measurementValue"]
        df["measurementType"] = "Comportamiento de viaje de los individuos respecto a la plataforma de observación"


    return df



# Función principal para procesar otrafauna_OFM
def procesar_otrafauna_OFM(ruta_gdb, capa_otrafauna_OFM, tabla_otrafauna_OFM, enlace_otrafauna_OFM, ruta_excel_otrafauna_OFM, archivo_entrada_otrafauna_OFM, archivo_salida_otrafauna_OFM, epsg_origen):
    try:
        # Abrir la Geodatabase
        gdb = gdal.OpenEx(ruta_gdb, gdal.OF_VECTOR)
        if not gdb:
            raise RuntimeError(f"❌ No se pudo abrir la GDB en {ruta_gdb}")

        # Extraer coordenadas de la capa de otrafauna_OFM
        datos_capa = extraer_coordenadas(gdb.GetLayerByName(capa_otrafauna_OFM), epsg_origen)

        # Extraer atributos de la tabla de otrafauna_OFM
        datos_tabla = []
        layer = gdb.GetLayerByName(tabla_otrafauna_OFM)
        for feature in layer:
            datos_tabla.append(feature.items())  

        resultado = realizar_join(datos_capa, datos_tabla, enlace_otrafauna_OFM)

        if resultado is None or resultado.empty:

            return

        # Exportar el resultado del join a un archivo Excel intermedio
        exportar_excel(resultado, ruta_excel_otrafauna_OFM)

        # Leer el archivo Excel intermedio y agregar taxonRank
        agregar_datos_api_a_excel(ruta_excel_otrafauna_OFM)

        # Leer el archivo Excel con taxonRank agregado
        df_intermedio = pd.read_excel(ruta_excel_otrafauna_OFM)
    
        # Procesar campos específicos
        df_intermedio = procesar_campos_especificos(df_intermedio)

        # Crear DataFrame final con todos los atributos de lista_atributos
        df_final = pd.DataFrame(columns=lista_atributos)

        # Mapear los datos del DataFrame intermedio al DataFrame final
        for columna_intermedia, columna_final in mapeo.items():
            if columna_intermedia in df_intermedio.columns:
                df_final[columna_final] = df_intermedio[columna_intermedia]

        
        crs = pyproj.CRS.from_user_input(epsg_origen)
        if crs.is_geographic:
            coord_system = "Coordenadas geográficas"
        else:
            coord_system = "Coordenadas proyectadas"
        # Valores constantes para otrafauna_OFM
        valores_constantes = {
            "occurrenceStatus": "present",
            "language": "es",
            "continent": "América del Sur",
            "country": "Colombia",
            "countryCode": "CO",
            "geodeticDatum": "WGS84",
            "verbatimSRS": epsg_origen,
            "verbatimCoordinateSystem": coord_system
        }
                
        # Agregar valores constantes
        for clave, valor in valores_constantes.items():
            df_final[clave] = valor

        # Agregar los campos calculados
        
        df_final["verbatimCoordinates"] = df_intermedio["verbatimCoordinates"]
        #df_final["verbatimIdentification"] = df_intermedio["verbatimIdentification"]
        df_final["occurrenceID"] = df_intermedio["occurrenceID"]
        df_final["type"] = df_intermedio["type"]
        df_final["basisOfRecord"] = df_intermedio["basisOfRecord"]
        df_final["occurrenceRemarks"] = df_intermedio["occurrenceRemarks"]
        df_final["eventTime"] = df_intermedio["eventTime"]
        
        #df_final["temp_suborden"] = df_intermedio["temp_suborden"]
       
        df_final["higherClassification"] = df_intermedio["higherClassification"]
        df_final["scientificName"] = df_intermedio["scientificName"]
        df_final["measurementValue"] = df_intermedio["measurementValue"]
        df_final["measurementType"] = df_intermedio["measurementType"]
       

        # Exportar el DataFrame final a un archivo Excel
        exportar_excel(df_final, archivo_salida_otrafauna_OFM)

    except Exception as e:
        return None