import os
import requests
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

def download_openmeteo_data(lat, lon, start_date, end_date, feedback):
    """Download daily rainfall data from Open-Meteo API"""
    try:
        # Open-Meteo API URL for historical data
        url = (
            f"https://archive-api.open-meteo.com/v1/archive?"
            f"latitude={lat}&longitude={lon}&start_date={start_date}&end_date={end_date}"
            f"&daily=precipitation_sum&timezone=auto"
        )
        
        feedback.pushInfo(f"Fetching data from Open-Meteo API...")
        response = requests.get(url)
        
        if response.status_code != 200:
            raise Exception(f"API request failed: {response.text}")
        
        data = response.json()
        
        # Extract daily rainfall
        if "daily" not in data or "time" not in data["daily"]:
            raise Exception("No rainfall data found in API response")
        
        dates = data["daily"]["time"]
        rainfall = data["daily"]["precipitation_sum"]
        
        # Create DataFrame
        df = pd.DataFrame({
            "Date": pd.to_datetime(dates),
            "Rainfall (mm)": rainfall
        })
        df.set_index("Date", inplace=True)
        
        # Handle missing values - replace None with 0 and ensure numeric
        df['Rainfall (mm)'] = pd.to_numeric(df['Rainfall (mm)'], errors='coerce').fillna(0)
        
        # Ensure data is float type
        df['Rainfall (mm)'] = df['Rainfall (mm)'].astype(np.float64)
        
        feedback.pushInfo(f"Downloaded {len(df)} days of rainfall data")
        feedback.pushInfo(f"Data range: {df['Rainfall (mm)'].min():.2f} to {df['Rainfall (mm)'].max():.2f} mm")
        
        return df
        
    except Exception as e:
        feedback.reportError(f"Open-Meteo download failed: {str(e)}")
        return None

def download_openmeteo_hourly_data(lat, lon, start_date, end_date, feedback):
    """Download hourly rainfall data from Open-Meteo API"""
    try:
        # Convert to datetime for hour precision
        start_dt = datetime.strptime(start_date, '%Y-%m-%d %H:%M')
        end_dt = datetime.strptime(end_date, '%Y-%m-%d %H:%M')
        
        # Open-Meteo API URL for hourly historical data
        url = (
            f"https://archive-api.open-meteo.com/v1/archive?"
            f"latitude={lat}&longitude={lon}"
            f"&start_date={start_dt.strftime('%Y-%m-%d')}"
            f"&end_date={end_dt.strftime('%Y-%m-%d')}"
            f"&hourly=precipitation&timezone=auto"
        )
        
        feedback.pushInfo(f"Fetching hourly data from Open-Meteo API...")
        response = requests.get(url)
        
        if response.status_code != 200:
            raise Exception(f"API request failed: {response.text}")
        
        data = response.json()
        
        # Extract hourly rainfall
        if "hourly" not in data or "time" not in data["hourly"]:
            raise Exception("No hourly rainfall data found in API response")
        
        times = data["hourly"]["time"]
        rainfall = data["hourly"]["precipitation"]
        
        # Create DataFrame
        df = pd.DataFrame({
            "DateTime": pd.to_datetime(times),
            "Rainfall (mm)": rainfall
        })
        df.set_index("DateTime", inplace=True)
        
        # Filter to exact time range
        df = df.loc[start_dt:end_dt]
        
        # Handle missing values
        df['Rainfall (mm)'] = pd.to_numeric(df['Rainfall (mm)'], errors='coerce').fillna(0)
        df['Rainfall (mm)'] = df['Rainfall (mm)'].astype(np.float64)
        
        feedback.pushInfo(f"Downloaded {len(df)} hours of rainfall data")
        return df
        
    except Exception as e:
        feedback.reportError(f"Open-Meteo hourly download failed: {str(e)}")
        return None

def load_excel_rainfall_data(excel_file, date_column, rainfall_column, feedback):
    """Load rainfall data from Excel file"""
    try:
        # Read Excel file
        df = pd.read_excel(excel_file)
        
        # Check if required columns exist
        if date_column not in df.columns:
            raise Exception(f"Date column '{date_column}' not found in Excel file")
        if rainfall_column not in df.columns:
            raise Exception(f"Rainfall column '{rainfall_column}' not found in Excel file")
        
        # Create working dataframe
        result_df = pd.DataFrame({
            'Date': pd.to_datetime(df[date_column]),
            'Rainfall (mm)': pd.to_numeric(df[rainfall_column], errors='coerce')
        })
        
        # Set date as index and sort
        result_df.set_index('Date', inplace=True)
        result_df.sort_index(inplace=True)
        
        # Handle missing values
        result_df['Rainfall (mm)'] = result_df['Rainfall (mm)'].fillna(0)
        result_df['Rainfall (mm)'] = result_df['Rainfall (mm)'].astype(np.float64)
        
        # Remove any infinite values
        result_df = result_df.replace([np.inf, -np.inf], 0)
        
        feedback.pushInfo(f"Loaded {len(result_df)} records from Excel file")
        feedback.pushInfo(f"Date range: {result_df.index.min()} to {result_df.index.max()}")
        feedback.pushInfo(f"Rainfall stats: Mean={result_df['Rainfall (mm)'].mean():.2f} mm, "
                         f"Max={result_df['Rainfall (mm)'].max():.2f} mm")
        
        return result_df
        
    except Exception as e:
        feedback.reportError(f"Excel data loading failed: {str(e)}")
        return None

def download_imd_data(lat, lon, start_year, end_year, file_dir, feedback):
    """Download IMD rainfall data"""
    try:
        import imdlib as imd
        
        if file_dir is None:
            file_dir = os.path.join(os.path.expanduser('~'), 'imd_data')
        
        os.makedirs(file_dir, exist_ok=True)
        feedback.pushInfo(f"Downloading IMD data for {start_year}-{end_year}...")
        
        # Download IMD data
        imd.get_data('rain', start_year, end_year, fn_format='yearwise', file_dir=file_dir)
        
        # Open IMD data
        data = imd.open_data('rain', start_year, end_year, 'yearwise', file_dir=file_dir)
        ds = data.get_xarray()
        
        # Handle missing values (-999 indicates missing data in IMD)
        ds = ds.where(ds['rain'] != -999.0)
        
        feedback.pushInfo(f"Extracting data for location: {lat:.4f}°N, {lon:.4f}°E")
        
        # Find nearest grid point with valid data
        valid_point = None
        tolerance = 1.0  # degrees tolerance
        
        for search_radius in [0, 0.1, 0.5, 1.0]:
            lat_min = lat - search_radius
            lat_max = lat + search_radius
            lon_min = lon - search_radius
            lon_max = lon + search_radius
            
            region = ds.sel(lat=slice(lat_min, lat_max), lon=slice(lon_min, lon_max))
            region = region.where(region['rain'].notnull(), drop=True)
            
            if region['rain'].size > 0:
                # Calculate distances to all points in the region
                lats = region.lat.values
                lons = region.lon.values
                distances = np.sqrt(
                    (lats - lat)**2 + 
                    (lons - lon)**2
                )
                
                # Find the point with minimum distance
                min_idx = np.argmin(distances)
                lat_idx = np.where(lats == lats.flat[min_idx])[0][0]
                lon_idx = np.where(lons == lons.flat[min_idx])[0][0]
                
                # Select the point
                valid_point = region.isel(lat=lat_idx, lon=lon_idx)
                break
        
        if valid_point is None:
            raise ValueError("No valid IMD data found near the specified location")
        
        # Extract data
        point_data = valid_point
        rainfall_df = point_data['rain'].to_dataframe().reset_index()
        rainfall_df = rainfall_df[['time', 'rain']]
        rainfall_df.columns = ['Date', 'Rainfall (mm)']
        rainfall_df.set_index('Date', inplace=True)
        rainfall_df = rainfall_df.loc[f"{start_year}-01-01":f"{end_year}-12-31"]
        
        # Validate data
        if rainfall_df['Rainfall (mm)'].isnull().all():
            raise ValueError("All IMD rainfall values are missing")
        
        # Ensure proper data types
        rainfall_df['Rainfall (mm)'] = pd.to_numeric(rainfall_df['Rainfall (mm)'], errors='coerce').fillna(0)
        rainfall_df['Rainfall (mm)'] = rainfall_df['Rainfall (mm)'].astype(np.float64)
            
        feedback.pushInfo(f"Downloaded {len(rainfall_df)} days of IMD rainfall data")
        return rainfall_df
        
    except Exception as e:
        feedback.reportError(f"IMD data download failed: {str(e)}")
        return None

def load_chirps_data(folder, lat, lon, start_year, end_year, feedback):
    """Load CHIRPS rainfall data"""
    try:
        import xarray as xr
        
        # Check folder exists
        if not os.path.exists(folder):
            raise FileNotFoundError(f"CHIRPS folder not found: {folder}")
        
        # Find all .nc files in the folder
        files = [f for f in os.listdir(folder) if f.endswith('.nc')]
        if not files:
            raise FileNotFoundError("No CHIRPS .nc files found")
        
        files.sort()
        feedback.pushInfo(f"Found {len(files)} CHIRPS files")
        
        # Create list to store data
        all_dates = []
        all_rain = []
        
        for year in range(start_year, end_year + 1):
            # Find file for this year
            year_files = [f for f in files if str(year) in f]
            if not year_files:
                feedback.pushInfo(f"No CHIRPS file found for year {year}")
                continue
                
            file_path = os.path.join(folder, year_files[0])
            
            try:
                # Open dataset without loading all data
                with xr.open_dataset(file_path) as ds:
                    if 'precip' not in ds:
                        feedback.pushInfo(f"Skipping {file_path}: 'precip' variable not found")
                        continue
                    
                    # Determine longitude convention
                    lon_min = ds.longitude.values.min()
                    lon_max = ds.longitude.values.max()
                    
                    # Adjust longitude based on dataset convention
                    if lon_min >= 0 and lon_max <= 360:
                        # Dataset uses 0-360 convention
                        use_lon = lon % 360
                    elif lon_min >= -180 and lon_max <= 180:
                        # Dataset uses -180 to 180 convention
                        use_lon = lon
                    else:
                        # Unknown convention, use original longitude
                        use_lon = lon
                        feedback.pushWarning("Unknown longitude convention in CHIRPS file. Using original longitude.")
                    
                    # Find closest indices
                    lat_idx = np.abs(ds.latitude.values - lat).argmin()
                    lon_idx = np.abs(ds.longitude.values - use_lon).argmin()
                    
                    # Extract data for this point only
                    precip_data = ds['precip'][:, lat_idx, lon_idx].values
                    dates = ds['time'].values
                    
                    # Add to results
                    all_dates.extend(dates)
                    all_rain.extend(precip_data)
                    
                    # Log actual point used
                    actual_lat = ds.latitude.values[lat_idx]
                    actual_lon = ds.longitude.values[lon_idx]
                    feedback.pushInfo(f"Using CHIRPS point for {year}: lat={actual_lat}, lon={actual_lon}")
                    feedback.pushInfo(f"Loaded {year} data: {len(precip_data)} records")
                
            except Exception as e:
                feedback.pushInfo(f"Error processing {year}: {str(e)}")
        
        if not all_dates:
            raise ValueError("No valid CHIRPS data loaded")
            
        # Create DataFrame
        rainfall_df = pd.DataFrame({
            'Date': pd.to_datetime(all_dates),
            'Rainfall (mm)': all_rain
        }).set_index('Date')
        
        # Validate data
        if rainfall_df['Rainfall (mm)'].isnull().all():
            raise ValueError("All CHIRPS rainfall values are missing")
            
        # Filter to date range
        rainfall_df = rainfall_df.loc[f"{start_year}-01-01":f"{end_year}-12-31"]
        
        # Set negative values to 0 and fill NaN with 0
        rainfall_df['Rainfall (mm)'] = pd.to_numeric(rainfall_df['Rainfall (mm)'], errors='coerce').fillna(0).clip(lower=0)
        rainfall_df['Rainfall (mm)'] = rainfall_df['Rainfall (mm)'].astype(np.float64)
        
        feedback.pushInfo(f"Successfully loaded {len(rainfall_df)} CHIRPS records")
        return rainfall_df
        
    except Exception as e:
        feedback.reportError(f"CHIRPS data loading failed: {str(e)}")
        return None