from shapely.geometry import shape
import pyproj
import pandas as pd
import requests
import re
from osgeo import gdal, ogr
from concurrent.futures import ThreadPoolExecutor
import time

from ..utils import config as cfg


# Diccionario de mapeo de atributos 
mapeo = {

    "municipio": "county",
    "departament": "stateProvince",
    "corregimiento_": "municipality",
    #"ap_nombre": "locality",
    "ecosiste": "habitat",
    #"diseno_monitoreo": "samplingProtocol",
    "clima_da": "eventRemarks",
    "id_transecto": "eventID",
    #"id_indiv": "organismID",
    #"superfamilia_": "superfamily",
    #"familia_": "family",
    #"genero_": "genus",  
    "nomb_cientif": "scientificName", 
    "grupo_funcion": "vernacularName",
    "canonicalName": "acceptedNameUsage",
    "scientificNameAuthorship": "scientificNameAuthorship",
    "taxonomicStatus": "taxonomicStatus",     
    "taxonRank": "taxonRank",
    "comentarios":"occurrenceRemarks",
    #"nom_res": "recordedBy",
    "latitud_1": "decimalLatitude",
    "longitud_1": "decimalLongitude",
    "numero_muestras": "materialSampleID",
    "profundidad_": "verbatimDepth",
    "num_indiv": "individualCount",
    "id_sexo": "sex",
    "und_muestreo": "parentEventID",
    "presencia_agua": "waterBody",
    "pendiente_grad": "inclinationInDegrees",
    "tipo_registro": "type"

    
    

}



# Lista completa de atributos en el archivo final
lista_atributos = [
    "id", "type", "language", "institutionID", "institutionCode", "datasetName",
    "basisOfRecord", "occurrenceID", "recordNumber", "individualCount",
    "occurrenceStatus", "occurrenceRemarks", "lifeStage", "recordedBy", "organismRemarks", "eventID", "materialSampleID","organismID",
    "parentEventID", "fieldNumber", "eventDate", "year", "month", "day", "eventTime",
    "verbatimEventDate", "habitat", "samplingProtocol", "sampleSizeValue", "sampleSizeUnit",
    "eventRemarks", "continent", "waterBody", "country",
    "countryCode", "stateProvince", "county", "municipality", "locality",
    "minimumElevationInMeters", "maximumElevationInMeters", "verbatimElevation", "inclinationInDegrees",
    "locationRemarks", "decimalLatitude", "decimalLongitude", "geodeticDatum",
    "verbatimCoordinates", "verbatimLatitude", "verbatimLongitude",
    "verbatimCoordinateSystem", "verbatimSRS", "footprintWKT", "footprintSRS",
    "verbatimIdentification", "sex", "identificationQualifier", "scientificName",
    "acceptedNameUsage", "higherClassification", "kingdom", "phylum", "class",
    "order", "superfamily", "family", "genus", "specificEpithet", "infraspecificEpithet",
    "taxonRank", "verbatimTaxonRank", "scientificNameAuthorship", "vernacularName", "vitality",
    "taxonomicStatus", "permitText"
]


def consultar_taxon_rank(nomb_cientif):
    url = "https://api.gbif.org/v1/species/match"
    params = {"name": nomb_cientif}
    
    try:
        time.sleep(1) 
        response = requests.get(url, params=params)
        if response.status_code == 200:
            data = response.json()
            scientific_name = data.get("scientificName", "")
            canonical_name = data.get("canonicalName", "")
            
            # Extraer la autoría eliminando el nombre científico del nombre canónico
            if canonical_name and scientific_name and scientific_name != canonical_name:
                scientific_name_authorship = scientific_name.replace(canonical_name, "").strip()
            else:
                scientific_name_authorship = None

            return {
                "taxonRank": data.get("rank", None),
                "canonicalName": canonical_name,
                "scientificNameAuthorship": scientific_name_authorship,
                "taxonomicStatus": data.get("status", None)
            }
        else:

            return None
    except Exception as e:

        return None

# Función para agregar los datos de la API al archivo Excel existente
def agregar_datos_api_a_excel(ruta_excel_flora_pnn):
    try:

        df = pd.read_excel(ruta_excel_flora_pnn)

        if "nomb_cientif" not in df.columns:
            raise ValueError("❌ ERROR: La columna 'nomb_cientif' no está en el archivo Excel.")


        especies = df["nomb_cientif"].tolist()

        # Ejecutar consultas a la API en paralelo
        with ThreadPoolExecutor(max_workers=50) as executor:
            resultados = list(executor.map(consultar_taxon_rank, especies))

        # Convertir a DataFrame
        datos_api = pd.DataFrame([r if isinstance(r, dict) else {} for r in resultados])

        # Agregar columnas nuevas al dataframe original
        df["taxonRank"] = datos_api["taxonRank"]
        df["canonicalName"] = datos_api["canonicalName"]
        df["scientificNameAuthorship"] = datos_api["scientificNameAuthorship"]
        df["taxonomicStatus"] = datos_api["taxonomicStatus"]

        df.to_excel(ruta_excel_flora_pnn, index=False)


    except Exception as e:
        return None

# Función para extraer coordenadas
def extraer_coordenadas(layer, epsg_origen):
    resultados = []
    transformer = pyproj.Transformer.from_crs( epsg_origen, cfg.EPSG_DESTINO, always_xy=True)

    for feature in layer:
        geom = feature.GetGeometryRef()
        if geom and geom.GetGeometryType() == ogr.wkbPoint:
            lon, lat = transformer.transform(geom.GetX(), geom.GetY())

            atributos = feature.items()
            atributos["latitud_1"] = lat
            atributos["longitud_1"] = lon
            resultados.append(atributos)

    return resultados


# Función para exportar a Excel
def exportar_excel(dataframe, ruta_salida):
    try:

        dataframe.to_excel(ruta_salida, index=False)

    except Exception as e:
        return None


# Función para procesar campos específicos
def procesar_campos_especificos(df):
    """Calcula los campos en orden secuencial asegurando que cada campo esté disponible antes de ser usado."""

    if df is None or df.empty:

        return df  

    # Mapear elevacion_m a verbatimElevation
    if "elevacion_m" in df.columns:
        df["verbatimElevation"] = df["elevacion_m"]
    else:

        df["verbatimElevation"] = None

     # Mapear latitud a verbatimLatitude
    if "latitud" in df.columns:
        df["verbatimLatitude"] = df["latitud"]
    else:

        df["verbatimLatitude"] = None
    
    # Mapear fecha_da a verbatimEventDate
    if "fecha_da" in df.columns:
        df["verbatimEventDate"] = df["fecha_da"]
    else:

        df["verbatimEventDate"] = None

    # Calcular `eventDate`, `year`, `month`, `day` y conservamos `verbatimEventDate`
    if "verbatimEventDate" in df.columns:

        df["eventDate"] = pd.to_datetime(df["verbatimEventDate"], errors='coerce').dt.strftime('%Y-%m-%d')
        df["year"] = pd.to_datetime(df["verbatimEventDate"], errors='coerce').dt.year
        df["month"] = pd.to_datetime(df["verbatimEventDate"], errors='coerce').dt.month
        df["day"] = pd.to_datetime(df["verbatimEventDate"], errors='coerce').dt.day
    
    else:

        df["eventDate"] = None
        df["year"] = None
        df["month"] = None
        df["day"] = None

    # Mapear longitud a verbatimLongitude
    if "longitud" in df.columns:
        df["verbatimLongitude"] = df["longitud"]
    else:

        df["verbatimLongitude"] = None

    # Se calcula  `recordNumber`
    if "object_id" in df.columns:
        df["recordNumber"] = df["object_id"]
    else:

        df["recordNumber"] = None


    # Calcular `occurrenceID`
    if "territorial" in df.columns and "recordNumber" in df.columns:
        df["occurrenceID"] = df.apply(
            lambda row: re.sub(r'[^A-Za-z0-9]', '', row["territorial"]) + ":Fauna ocupacion anfibios:" + str(row["recordNumber"]),
            axis=1
        )
    else:

        df["occurrenceID"] = None

     # Asignar `occurrenceID` a `id`
    if "occurrenceID" in df.columns:
        df["id"] = df["occurrenceID"]
    else:

        df["id"] = None


    # Asignar `verbatimElevation` a `minimumElevationInMeters` y `maximumElevationInMeters`
    if "verbatimElevation" in df.columns:
        df["minimumElevationInMeters"] = df["verbatimElevation"]
        df["maximumElevationInMeters"] = df["verbatimElevation"]
    else:

        df["minimumElevationInMeters"] = None
        df["maximumElevationInMeters"] = None

    # Concatenar `COOR_ESTE` y `COOR_NORTE` para `verbatimCoordinates`
    if "verbatimLongitude" in df.columns and "verbatimLatitude" in df.columns:
        df["verbatimCoordinates"] = df.apply(
            lambda row: str(row["verbatimLongitude"]).replace(",", ".") + ", " + str(row["verbatimLatitude"]).replace(",", "."),
            axis=1
        )

    else:

        df["verbatimCoordinates"] = None


    # Asignar `nomb_cientif` a `verbatimIdentification`
    if "nomb_cientif" in df.columns:
        df["verbatimIdentification"] = df["nomb_cientif"]
    else:

        df["verbatimIdentification"] = None



    # concatenar ap_nombre y sector para locality
    if "sector" in df.columns and "ap_nombre" in df.columns:
        df["locality"] = df.apply(
            lambda row: f"{row['ap_nombre']}" if pd.notna(row["ap_nombre"]) and pd.isna(row["sector"]) else
                        f"{row['sector']}" if pd.notna(row["sector"]) and pd.isna(row["ap_nombre"]) else
                        f"{row['ap_nombre']}, {row['sector']}" if pd.notna(row["ap_nombre"]) and pd.notna(row["sector"]) else None,
                        
            axis=1
        )

    # concatenar nom_colector y nom_res para recordedBy
    if "nom_res" in df.columns and "nom_colector" in df.columns:
        df["recordedBy"] = df.apply(
            lambda row: f"{row['nom_colector']}" if pd.notna(row["nom_colector"]) and pd.isna(row["nom_res"]) else
                        f"{row['nom_res']}" if pd.notna(row["nom_res"]) and pd.isna(row["nom_colector"]) else
                        f"{row['nom_colector']}, {row['nom_res']}" if pd.notna(row["nom_colector"]) and pd.notna(row["nom_res"]) else None,
                        
            axis=1
        )
    else:
        df["recordedBy"] = None

    
    # Mapear hora_da a eventTime (formato HH:MM:SS)
    if "hora_da" in df.columns:
        def extraer_hora(valor):
            try:
                if isinstance(valor, pd.Timestamp):
                    return valor.strftime("%H:%M:%S")
                elif isinstance(valor, str) and any(c in valor for c in ["/", "-"]):
                    return pd.to_datetime(valor, errors="coerce").strftime("%H:%M:%S")
                elif isinstance(valor, str):
                    return pd.to_datetime(valor, format="%H:%M:%S", errors="coerce").strftime("%H:%M:%S")
                else:
                    return None
            except Exception:
                return None

        df["eventTime"] = df["hora_da"].apply(extraer_hora)
    else:
        df["eventTime"] = None

    # Agregar cada campo de medición individualmente
    df["measurementValue_1"] = df["estrato_veg"] if "estrato_veg" in df.columns else None
    df["measurementType_1"] = "Estrato de la vegetación donde se registra el individuo" if "estrato_veg" in df.columns else None

    df["measurementValue_2"] = df["ancho_qbr_m"] if "ancho_qbr_m" in df.columns else None
    df["measurementType_2"] = "Este campo contiene la magnitud del ancho de la quebrada sobre la que se registra el dato. Si no hay dato el campo aparecerá como Nulo" if "ancho_qbr_m" in df.columns else None

    df["measurementValue_3"] = df["altura_veg_m"] if "altura_veg_m" in df.columns else None
    df["measurementType_3"] = "Este campo contiene la magnitud de la altura de la vegetacion  sobre la que se registra el dato. Si no hay dato va a aperecer como Nulo" if "altura_veg_m" in df.columns else None

    df["measurementValue_4"] = df["presiones_iden"] if "presiones_iden" in df.columns else None
    df["measurementType_4"] = "Presiones identificadas en el sitio donde se registra el dato" if "presiones_iden" in df.columns else None

    return df





# Función principal para procesar 
def procesar_fauna_ocupacion_anfibios_pnn(ruta_gdb, capa_flora_pnn,  ruta_excel_flora_pnn, archivo_entrada_flora_pnn, archivo_salida_anfibios_pnn, epsg_origen):
    try:
        # Abrir la Geodatabase
        gdb = gdal.OpenEx(ruta_gdb, gdal.OF_VECTOR)
        if not gdb:
            raise RuntimeError(f"❌ No se pudo abrir la GDB en {ruta_gdb}")

        global EPSG_Runtime
        EPSG_Runtime = epsg_origen
        # Extraer coordenadas de la capa 
        datos_capa = extraer_coordenadas(gdb.GetLayerByName(capa_flora_pnn), epsg_origen)


        resultado = pd.DataFrame(datos_capa)


        if resultado is None or resultado.empty:
            return
        
        resultado = resultado.applymap(
        lambda x: None
        if pd.notna(x) and str(x).strip().lower() in ["sin informacion", "sin información"]
        else x
        )


        # Exportar el resultado del join a un archivo Excel intermedio
        exportar_excel(resultado, ruta_excel_flora_pnn)

        # Leer el archivo Excel intermedio y agregar taxonRank
        agregar_datos_api_a_excel(ruta_excel_flora_pnn)

        # Leer el archivo Excel con taxonRank agregado
        df_intermedio = pd.read_excel(ruta_excel_flora_pnn)

        # Procesar campos específicos
        df_intermedio = procesar_campos_especificos(df_intermedio)

        # Crear DataFrame final con todos los atributos de lista_atributos
        df_final = pd.DataFrame(columns=lista_atributos)

        # Mapear los datos del DataFrame intermedio al DataFrame final
        for columna_intermedia, columna_final in mapeo.items():
            if columna_intermedia in df_intermedio.columns:
                df_final[columna_final] = df_intermedio[columna_intermedia]


        crs = pyproj.CRS.from_user_input(epsg_origen)
        if crs.is_geographic:
            coord_system = "Coordenadas geográficas"
        else:
            coord_system = "Coordenadas proyectadas"

        # Valores constantes 
        valores_constantes = {
            #"occurrenceStatus": "present",
            "language": "es",
            "continent": "América del Sur",
            "country": "Colombia",
            "countryCode": "CO",
            "geodeticDatum": "WGS84",
            "verbatimSRS": epsg_origen,
            "verbatimCoordinateSystem": coord_system
        }


        # Agregar valores constantes
        for clave, valor in valores_constantes.items():
            df_final[clave] = valor

        # Agregar los campos calculados
        
        df_final["recordNumber"] = df_intermedio["recordNumber"]
        df_final["eventDate"] = df_intermedio["eventDate"]
        df_final["year"] = df_intermedio["year"]
        df_final["month"] = df_intermedio["month"]
        df_final["day"] = df_intermedio["day"]
        df_final["verbatimEventDate"] = df_intermedio["verbatimEventDate"]
        df_final["verbatimElevation"] = df_intermedio["verbatimElevation"]
        df_final["minimumElevationInMeters"] = df_intermedio["minimumElevationInMeters"]
        df_final["maximumElevationInMeters"] = df_intermedio["maximumElevationInMeters"]
        df_final["verbatimCoordinates"] = df_intermedio["verbatimCoordinates"]
        df_final["verbatimIdentification"] = df_intermedio["verbatimIdentification"]
        df_final["occurrenceID"] = df_intermedio["occurrenceID"]
        df_final["id"] = df_intermedio["id"]
        df_final["verbatimLatitude"] = df_intermedio["verbatimLatitude"]
        df_final["verbatimLongitude"] = df_intermedio["verbatimLongitude"] 
        df_final["locality"] = df_intermedio["locality"]   
        df_final["eventTime"] = df_intermedio["eventTime"]
        df_final["recordedBy"] = df_intermedio["recordedBy"]

        


        


        # Exportar el DataFrame final a un archivo Excel
        exportar_excel(df_final, archivo_salida_anfibios_pnn)

    except Exception as e:
        print("\n❌ ERROR en procesar_fauna_ocupacion_anfibios_pnn:", str(e), "\n")
        return None