#!/usr/bin/env python3
import json
import logging
import os
import sys
from concurrent.futures import ThreadPoolExecutor, TimeoutError

import click

from ndff.connector.connector import NdffConnector
from ndff.exceptions import NdffLibError

log = logging.getLogger(__name__)

lib_log = logging.getLogger('ndff')
lib_log.setLevel(logging.WARNING)
# SILENCE THIS ONE:
logging.getLogger('urllib3').setLevel(logging.ERROR)

# this is needed for the click testing framework ??
name = 'ndff'


def show_environment_setting(ndff_global_settings_dir: str, ndff_user_settings_dir: str) -> None:
    """
    Function to be able to output info about setting mandatory environment settings for this script

    :param str ndff_global_settings_dir: string/path to NDFF_GLOBAL_SETTINGS directory
    :param str ndff_user_settings_dir: string/path to NDFF_USER_SETTINGS directory
    """
    if sys.platform == "linux" or sys.platform == "linux2":
        # linux
        click.echo(f'SET configs in the Environment, for easier cli calls:\n'
                   f'export NDFF_GLOBAL_SETTINGS_DIR={ndff_global_settings_dir}\n'
                   f'export NDFF_USER_SETTINGS_DIR={ndff_user_settings_dir}')
    elif sys.platform == "darwin":
        # OS X
        click.echo('NOT SURE HOW, BUT YOU CAN SET configs in the Environment')
    elif sys.platform == "win32" or sys.platform == "cygwin":
        # Windows...
        click.echo(f'SET configs in the Environment, for easier cli calls:\n'
                   f'set NDFF_GLOBAL_SETTINGS_DIR={ndff_global_settings_dir}\n'
                   f'set NDFF_USER_SETTINGS_DIR={ndff_user_settings_dir}')


def get_env_param(param: str) -> str:
    """
    Util function to try to retrieve an environment param from current env.

    IF NOT found, print error message and 'sys.exit' with error 1
    """
    if os.environ.get(param, False):
        return os.environ.get(param)
    else:
        click.echo('Er zou een environment parameter "{}" moeten zijn.\nControleer a.u.b.\nQUITTING.'.format(param))
        show_environment_setting('<your global config dir here>', '<your user config dir here>')
        sys.exit(1)


@click.group(invoke_without_command=True)
# @click.argument('ndff_global_settings_dir', envvar='NDFF_GLOBAL_SETTINGS_DIR', type=str)
# @click.argument('ndff_user_settings_dir', envvar='NDFF_USER_SETTINGS_DIR', type=str)
@click.option('-v', '--verbose', count=True)  # https://click.palletsprojects.com/en/3.x/options/#counting  # verbosity
@click.option('--version', default=False, is_flag=True)  # version
def cli(version, verbose=0):
    """
    Dit is een Command line client (CLI) om op basis van 1 of 2 mappen met configuraties de NDFF te benaderen.

    Bij de juiste configuratie kan hiermee een (lokale) dataset worden:

     - bekeken (sample)

     - gevalideerd (validate)

     - opgestuurd (sent)

    Met -v wordt INFO getoond

    Met -vv wordt DEBUG-info van de CLI, en INFO van de NDFF-library getoond

    Met -vvv wordt DEBUG-info van de CLI, en DEBUG van de NDFF-library getoond

    Voorbeelden:

    ndff --version # toont versie van geïnstalleerde ndff module

    ndff -v sample # toont 1 record als de environment-variabelen goed staan

    """
    # check if the following 2 environment variables are set:
    # NDFF_GLOBAL_SETTINGS_DIR
    # NDFF_USER_SETTINGS
    # if NOT tell user to use 'init' (which will use 2 arguments and use those to SET in current terminal)
    if verbose == 1:
        # click.echo('Setting log verbosity to INFO')
        log.setLevel(logging.INFO)
    elif verbose == 2:
        # click.echo('Setting log verbosity to DEBUG')
        log.setLevel(logging.DEBUG)
        lib_log.setLevel(logging.INFO)
    elif verbose == 3:
        # click.echo('Setting log verbosity to DEBUG (also for ndff lib)')
        log.setLevel(logging.DEBUG)
        lib_log.setLevel(logging.DEBUG)
        # trying to change the format of the handler of the lib_logger
        # logger_handler = logging.StreamHandler()  # Handler for the logger
        # logger_handler.setFormatter(logging.Formatter(LOG_FORMAT))
        # lib_log.addHandler(logger_handler)
    else:
        # click.echo('Keeping log to WARNING')
        pass

    if version:
        import pkg_resources  # part of setuptools
        cli_version = pkg_resources.require("ndff")[0].version
        click.echo(f'ndff/cli version {cli_version}')
        return cli_version

    ndff_global_settings_dir = get_env_param('NDFF_GLOBAL_SETTINGS_DIR')
    ndff_user_settings_dir = get_env_param('NDFF_USER_SETTINGS_DIR')
    if ndff_global_settings_dir and ndff_user_settings_dir:
        ndff_global_settings_dir = get_env_param('NDFF_GLOBAL_SETTINGS_DIR')
        ndff_user_settings_dir = get_env_param('NDFF_USER_SETTINGS_DIR')
        # if both went OK:
        log.info(f'NDFF CLI wordt opgebouwd met de volgende directories (UIT ENVIRONMENT):\n'
                 f' {ndff_global_settings_dir}\n'
                 f' {ndff_user_settings_dir}')
    else:
        # Argh, you cannot SET the (global) env var from within the calling script
        os.environ['NDFF_GLOBAL_SETTINGS_DIR'] = ndff_global_settings_dir
        os.environ['NDFF_USER_SETTINGS_DIR'] = ndff_user_settings_dir
        log.info(f'NDFF CLI wordt opgebouwd met de volgende directories (VAN COMMANDLINE):\n'
                 f' {ndff_global_settings_dir}\n'
                 f' {ndff_user_settings_dir}')
        show_environment_setting(ndff_global_settings_dir, ndff_user_settings_dir)


def get_connector() -> NdffConnector:
    """
    Given the mandatory setting directories (from env), create a NdffConnector instance.

    A NdffConnector instance is the main object to call the library functions.

    :return: NdffConnector
    """
    ndff_global_settings_dir = get_env_param('NDFF_GLOBAL_SETTINGS_DIR')
    ndff_user_settings_dir = get_env_param('NDFF_USER_SETTINGS_DIR')
    nc = NdffConnector.create_from_directories(ndff_global_settings_dir, ndff_user_settings_dir)
    return nc


@click.command()
@click.option('-n', '--sample-size', default=1, help='Number of samples to show')
def sample(sample_size=1):
    """
    Show a sample of the opened dataset (defined in data_source.csv).
    Defaults to showing 1 record, add -n INTEGER to show more

    If you have jq, to show a clean record, you can for example do:

        ndff sample -n 2 | jq .

    :param sample_size:
    :return:
    """
    max_sample_count = 100000000000  # sorry
    if sample_size >= 1:
        max_sample_count = sample_size
    count = 0
    try:
        nc = get_connector()
        while count < max_sample_count:
            # NOT using log.xxx here because this should ALWAYS be shown
            # click.echo(nc.next_record())
            # better: show it as json:
            click.echo(json.dumps(nc.next_record()))
            count += 1
    except FileNotFoundError as file_not_found_exception:
        log.error(file_not_found_exception)
        log.error(f'Huidige werk-directory is "{os.getcwd()}"')
    except StopIteration:
        log.info('End of dataset reached')
    except Exception as e:
        log.error(e)


@click.command()
@click.option('-n', '--size', default=-1, help='Number of records to validate')
def validate(size=-1):
    """
    Do a local(!) validation of current data record(s) (using the ndff library is_valid function).

    Either one record, or a set of records (default).

    Stopping when a record is NOT valid

    :param int size: Number of records to validate
    """
    max_validation_count = 100000000000  # sorry
    if size > 0:
        max_validation_count = size
    count = 0
    try:
        nc = get_connector()
        valid = True
        while valid and count < max_validation_count:
            record = nc.next_record()
            observation = nc.map_data_to_ndff_observation(record)
            valid, reasons = observation.is_valid()
            if valid:
                log.debug(f'Record {observation.identity} looks valid...')
                count += 1
            else:
                log.warning(f'NOT valid: {reasons}')
                valid = False
    except FileNotFoundError as file_not_found_exception:
        log.error(file_not_found_exception)
        log.error(f'Huidige werk-directory is "{os.getcwd()}"')
    except StopIteration:
        log.info('End of dataset reached')
    except Exception as e:
        log.error(e)


@click.command()
@click.option('-n', '--size', default=-1, help='Number of records to sent (default = -1 == ALL)')
@click.option('-o', '--overwrite', default=False, is_flag=True,
              help='Should we try to UPDATE the observation in case it is already at the NDFF')
def sent(size=-1, overwrite=False):
    """
    Given a dataset (from env params) sent one or more records to the NDFF api

    :param int size: Number of records to sent (default = -1 == ALL)
    :param boolean overwrite: Should we try to UPDATE the observation in case it is already at the NDFF (boolean flag)
    """
    max_record_count = 100000000000  # sorry
    if size > 0:
        max_record_count = size

    count = 0
    try:
        nc = get_connector()
        while count < max_record_count:
            record = nc.next_record()
            count += 1
            observation = nc.map_data_to_ndff_observation(record)
            valid, reasons = observation.is_valid()
            if valid:
                result = nc.sent_observation_to_ndff(observation, overwrite=overwrite)
                if str(result["http_status"]) == "200":
                    log.info(f'NDFF Resultaat: UPDATE van {result["ndff_uri"]} gelukt')
                elif str(result["http_status"]) == "201":
                    log.info(f'NDFF Resultaat: succesvol NIEUW aangemaakt {result["ndff_uri"]}')
                else:
                    log.error(result)
            else:
                raise NdffLibError(reasons)

    except FileNotFoundError as file_not_found_exception:
        log.error(file_not_found_exception)
        log.error(f'Huidige werk-directory is "{os.getcwd()}"')
    except StopIteration:
        log.info(f'Einde van deze dataset bereikt, {count} records verwerkt...')
    except Exception as e:
        log.error(e)


@click.command()
@click.argument('identity', type=str)
def find(identity):
    """
    CLI command to find a record in the dataset whose identity is exactly the identity given as parameter.

    If found output both the record and the json version of the record.

    :param identity:
    :return:
    """
    found = False
    nc = get_connector()
    while not found:
        record = nc.next_record()
        observation = nc.map_data_to_ndff_observation(record)
        if observation.get('identity') == identity:
            found = True
            click.echo('Record:')
            click.echo(json.dumps(record, indent=2))
            click.echo('NdffObservation:')
            click.echo(observation.to_ndff_observation_json())


# https://superfastpython.com/threadpoolexecutor-in-python/#Step_1_Create_the_Thread_Pool
@click.command()
@click.option('-n', '--size', default=-1, help='Number of records to sent (default = -1 == ALL)')
@click.option('-t', '--timeout', default=None,
              help='Timeout (seconds) for a Thread(!) to succeed (default = None: wait for ever). NOTE that in the API there is also a HTTPS(!)-timeout (60s)')
@click.option('-r', '--threads', default=5, help='Number of Threads to start (defaults = 5)')
@click.option('-o', '--overwrite', default=False, is_flag=True,
              help='Should we try to UPDATE the observation in case it is already at the NDFF')
def fast_sent(size=-1, overwrite=False, threads=5, timeout=None):
    """
    CLI command to try to send a dataset faster to the NDFF by using a thread pool (default size 5).

    Note that it easy to overflow the api...

    :param int size: Number of records to sent (default = -1 == ALL)
    :param boolean overwrite: flag to try to update the observation in case it is already at the NDFF
    :param int threads: size of thread pool, default 5
    :param int timeout: Timeout (seconds) for a Thread(!) to succeed (default = None: wait forever).
        NOTE that in the API there is also an HTTPS(!)-timeout (60s)
    :return:
    """
    max_records_to_sent = 1000000
    if size > 0:
        max_records_to_sent = size
    count = 0
    try:
        nc = get_connector()

        def task(record):
            if count >= max_records_to_sent:
                # sorry no easier way to stop...
                raise NdffLibError(f'Gepland Gestopt na ongeveer(!) {max_records_to_sent} records')

            observation = nc.map_data_to_ndff_observation(record)
            valid, reasons = observation.is_valid()
            if not valid:
                raise NdffLibError(f'Waarneming is niet geldig: {reasons}\n\nZorg eerst dat de hele set valide is')

            results = nc.sent_observation_to_ndff(observation, overwrite=overwrite)
            # resend on 'X-Rate-Limit-Limit' hit
            if str(results["http_status"]) == "429":
                # normal 'X-Rate-Limit-Limit' are being hit
                log.info(f'Hit X-Rate-Limit-Limit (429)... {observation.identity} / {results["ndff_uri"]}')
                results = nc.sent_observation_to_ndff(observation, overwrite=overwrite)
                if str(results["http_status"]) == "429":
                    log.error(f'SECOND TRY 2, ALSO ERR... {observation.identity} {results}')
                    api = nc.get_api()
                    search_uri = api.add_domain_key_option(f'{api.api_url}/{api.domain}/observations/')
                    # OK, we tried twice both failing let's stop:
                    raise NdffLibError(f'API limitatie: 2x geprobeerd te uploaden: {observation.identity} {search_uri}?identity={observation.identity}, waarschijnlijk NIET aangemaakt/aangepast:\n{results}')

            if str(results["http_status"]) == "200":
                log.info(f'NDFF Resultaat: UPDATE van {observation.identity} / {results["ndff_uri"]} gelukt (~{count}) ')
            elif str(results["http_status"]) == "201":
                log.info(f'NDFF Resultaat: succesvol NIEUW {observation.identity} / {results["ndff_uri"]} aangemaakt (~{count}) ')
            else:
                # not sure what, but apparently no 200, 201 or 429 let's stop
                raise NdffLibError(f'Onduidelijke foutmelding ({results["http_status"]} {results})\n\nUpload verder afgebroken...')
        # END OF TASK

        with ThreadPoolExecutor(threads, thread_name_prefix='NDFF_CLI') as executor:
            # note the timeout !!
            for _ in executor.map(task, nc.data_records, timeout=timeout):
                count += 1  # indication(!), as addition is after (parallelized) action

    except FileNotFoundError as file_not_found_exception:
        log.error(file_not_found_exception)
        log.error(f'Huidige werk-directory is "{os.getcwd()}"')
    except StopIteration:
        log.info(f'Einde van deze dataset bereikt, {count} records verwerkt...')
    except TimeoutError:
        log.error(f'Thread TimeOut Error in een van de ( {threads} concurrent ) tasks, Timeout waarde: {timeout}')
    except Exception as e:
        if len(str(e)) > 3:
            log.error(f'FOUT in fast_sent: {e}')
        else:
            log.error(f'FOUT in fast_sent, error type = {type(e)}: {e}')


def main():
    """
    Main function in which the Click loads all commands
    """
    cli.add_command(sample)
    cli.add_command(validate)
    cli.add_command(sent)
    cli.add_command(fast_sent)
    cli.add_command(find)
    cli()


if __name__ == "__main__":
    sys.exit(main())

# export NDFF_GLOBAL_SETTINGS_DIR=/home/richard/git/ndff-connector/settings_user
# export NDFF_USER_SETTINGS_DIR=/home/richard/git/ndff-connector/tests/data/notatio_postgis/ndff_settings/
# cd /home/richard/git/ndff-connector/tests
# export NDFF_USER_SETTINGS_DIR=/home/richard/git/ndff-connector/tests/data/notatio_csv/ndff_settings/

# export NDFF_USER_SETTINGS_DIR=/home/richard/git/ndff-connector/tests/data/wnpro_csv/ndff_settings/

# deze heeft geen Datasource? klopt natuurlijk: is een SHAPE !
# export NDFF_USER_SETTINGS_DIR=/home/richard/git/ndff-connector-plugin/ndff-connector-plugin/tests/data/wnpro_shp_alles/ndff_settings

# csv 5843 records
# export NDFF_GLOBAL_SETTINGS_DIR=/home/richard/git/ndff-connector/settings_user
# export NDFF_USER_SETTINGS_DIR=/home/richard/git/ndff-connector/tests/data/wnnl_csv_export/ndff_settings
# time ndff -v sent -o
# root INFO 2022-11-18 16:37:47,125 - Einde van deze dataset bereikt, 5843 records verwerkt...
# real    53m4.906s
# time python -m ndff -vv fast-sent -o -r 20
# real    12m18.532s
# time python -m ndff -vv fast-sent -o -r 10
# real    12m8.743s
# time python -m ndff -vv fast-sent -o -r 5
# real    12m43.511s

# dd 20221223
# export NDFF_GLOBAL_SETTINGS_DIR=/home/richard/git/ndff-connector/settings_user
# export NDFF_USER_SETTINGS_DIR=/home/richard/git/ndff-connector/tests/data/wnnl_csv_export/ndff_settings
# time python -m ndff -vv fast-sent -o -r 5
# real    28m58.023s

# dd 20230120
# real    28m59.445s

# postgis 100 records
# export NDFF_GLOBAL_SETTINGS_DIR=/home/richard/git/ndff-connector/settings_user
# export NDFF_USER_SETTINGS_DIR=/home/richard/git/ndff-connector-plugin/tests/data/notatio_postgis/ndff_settings
# time ndff -v sent -o
# ook iets van 22 sec alle 100 nieuw aanmaken
# dan nog een keer, alles wordt dan ge-update
# time ndff -v sent -o
# 55s == sequentieel
# time python -m ndff -vv fast-sent -o -r 5
# 15s voor alle 100 fast met 5 workers
