import csv
import logging
import re
import tempfile
import time
from collections import deque
from datetime import datetime
from pathlib import Path
from typing import Union

import shapely.wkb
import shapely.wkt
from shapely.geometry import mapping

from ..api.ndffapi import (
    NdffApi,
)
from ..api.object import (
    NdffDataset,
    NdffObservation,
    NdffResult,
)
from ..datasource.base import DataSource
from ..exceptions import NdffLibError
from ..utils import (
    is_hex,
    is_numeric,
    is_uri,
)

log = logging.getLogger(__name__)


class NdffConnector:
    """
    A NdffConnector is the bridge between the NdffApi and Datasource
    The NdffAPI configuration is read from settings files
    The Datasource configuration is read from settings files
    It reads field-mappings (to map datasource fields to NdffObservation fields)
    and data-mappings (to map data VALUES to Ndff-uri's) from settings files

    The NdffConnector holds the actual NdffApi instance.
    Note that it is not yet constructed when all settings are read, but it is deferred to the time the user acutally
    wants to use the API (via get_api call). This makes it possible to change Api config's after construction
    of a NdffConnector.
    """

    # all settings (both main/global ones AND the project/datasource ones)
    # are written in this subdirectory
    NDFF_SETTINGS_DIR = 'ndff_settings'

    # global settings
    # settings to connect to api
    NDFF_API_SETTINGS = 'ndff_api.csv'

    # datasource settings name
    # file name for a settings file containing the type and datasource settings
    NDFF_DATASOURCE_SETTINGS = 'data_source.csv'

    # per project/datasource type settings
    # field/column mappings
    NDFF_OBSERVATION_FIELD_MAPPINGS = 'field_mappings.csv'

    # client settings
    # a file containing information for every NdffObservation field, for example
    # could be used to show a (translated) text instead of field name. It can also
    # contain the url a client should use to search for a value-uri, a more
    # descriptive text etc.
    NDFF_OBSERVATION_CLIENT_SETTINGS = 'client_settings.csv'

    # ndff log file
    # a file holding the identities of successful validated AND sent NdffObservations
    # this file will be read (from settings dir) when connector is created:
    # a dict of 'identity':[identity, ndff_uri, timestamp]
    # this file can be written to disk again
    NDFF_LOG = 'ndff_log.csv'

    # EPSG codes used by NDFF api: 4326 are lat lon values, 28992 are 'Amersfoort Coordinaten'
    EPSG_4326 = 'EPSG:4326'
    EPSG_28992 = 'EPSG:28992'

    DATETIME_FORMAT = '%Y-%m-%dT%H:%M:%S'

    # fields in ndff_observation hold a list with:
    # field: text,url1,url2,description,ndff_fieldname
    FIELD_COL_NAME = 0
    FIELD_COL_TEXT = 1
    FIELD_COL_URL1 = 2
    FIELD_COL_URL2 = 3
    FIELD_COL_DESCRIPTION = 4
    FIELD_COL_NDFF_NAME = 5
    FIELD_COL_CHANGE_DATA = 6

    WRONG_FIELD_MAPPING = "VELD NIET BESCHIKBAAR"

    # good enough test for wkt
    # https://regex101.com/
    # currently only "POINT (145918.0769337003 389302.11025674635)" like strings are valid (so UPPERCASE 'POINT' only)
    # OR optional a SRID=4326; in front (EWKT): SRID=4326;POINT (4.67979621887207 51.67397308349609)
    # TODO: QGIS expressions does NOT create POINT but Point (note caps): better to have a more forgiven regexp?
    valid_wkt_point_regex = re.compile(r"(SRID=\d+;)?POINT\s*\(.+\s.+\)", re.MULTILINE | re.UNICODE)
    valid_wkt_multipoint_regex = re.compile(r"(SRID=\d+;)?MULTIPOINT\s*\(.+\s.+\)", re.MULTILINE | re.UNICODE)
    valid_wkt_polygon_regex = re.compile(r"(SRID=\d+;)?POLYGON\s*\(.+\s.+\)", re.MULTILINE | re.UNICODE)

    # mapping to map an Observation field name to the api-codes names used in the api
    mapping = {
        'taxon': 'taxa',
        'abundance_schema': 'abundancies',
        'abundance_value': 'abundancies',
        'determination_method': 'determinationmethods',
        'activity': 'activities',
        'biotope': 'biotopes',
        'lifestage': 'lifestages',
        'sex': 'sexes',
        'survey_method': 'surveymethods',
        'subject_type': 'subjecttypes',
        'extra_info_identity': 'extrainfo',
        'extra_info_value': 'extrainfo',
        'dwelling': 'dwellings',
    }

    # memory cache object
    has_related_codes_cache = {}

    @staticmethod
    def datetimestring_to_datetime(date_only_or_datetime_string: str) -> datetime:
        """
        Helper method to create a datetime.datetime instance from given string

        :param date_only_or_datetime_string:
        :return: datetime.datetime
        """
        if str == type(date_only_or_datetime_string) and len(date_only_or_datetime_string.strip()) < 11:
            dt = datetime.strptime(date_only_or_datetime_string, '%Y-%m-%d')
            dt = dt.replace(hour=12, minute=00, second=00)
        elif str == type(date_only_or_datetime_string) and len(date_only_or_datetime_string.strip()) < 20:
            # 2022-01-01T12:12:00 OR 2022-01-01T12:12:00 both are 'ISO-format' :-(
            dt = datetime.fromisoformat(date_only_or_datetime_string)
        else:
            raise ValueError(f'Trying to create a datetime from a date(time)string which does not to be OK: {date_only_or_datetime_string}')
        return dt

    @staticmethod
    def parse_wkt_wkb(observation: NdffObservation, location):
        """
        Helper method to handle shapely parsing of location (wkt/wkb)
        """
        could_be_wbk: bool = (str(location).startswith('b'))
        # can be WKB, WKT or EWKT
        # log.info(f"WKT....{location}")
        if not could_be_wbk and 'SRID' in location:
            # EWKT
            # Shapely can ONLY load a WKT string (not EWKT)
            # so remove the SRID=28992; part from it in case of EWKT
            location = location.split(';')[1]

        try:
            if isinstance(location, str) and is_hex(location):
                # WKB string(!) like you receive from postgis:
                #
                # reveals
                # Point (145918.07693370030028746 389302.11025674635311589)
                shapely_geom = shapely.wkb.loads(location, hex=True)
            elif isinstance(location, bytes):
                # b'\x01\xe9\x03\x00\x00!G\x85\xad\xc3\x98\x17@\xbb\xabd\xa7\x07\xadI@\x00\x00\x00\x00\x00\x00\x00\x00'
                # real bytes array (e.g. from Geopackage from Input app)
                # currently Z is removed, as api does not seem to handle this...
                # see also: https://ndff.zendesk.com/hc/nl/requests/41523
                shapely_geom = shapely.wkb.loads(location)
            else:
                # WKT
                shapely_geom = shapely.wkt.loads(location)

            if shapely_geom and shapely_geom.is_valid:
                # NOTE: CURRENTLY MULTIPOINT IS NOT SUPPORTED BY NDFF, so we take only the first point IF data is multipoint
                if shapely_geom.geometryType() == 'MultiPoint':
                    log.warning('Location is MultiPoint (according to shapely) BUT we only take the first one!!!')
                    shapely_geom = shapely_geom.geoms[0]

                if shapely_geom.geometryType() in (
                        'MultiPoint', 'Point', 'Polygon', 'MultiPolygon', 'LineString', 'MultiLineString'):
                    observation.location = {'geometry': {}}
                    observation.location['geometry']['type'] = shapely_geom.geometryType()
                    mapped = mapping(shapely_geom)
                    observation.location['geometry'] = mapped
            else:
                raise ValueError(f"Non valid geometry: {location} ")
        except Exception as e:
            log.debug(f'Error (shapely) creating a geometry from {location}: {e}')
            observation.location = {'geometry': {}} # it's fixable later...

    @staticmethod
    def is_valid_wkt(location_wkt_string: str) -> bool:
        """
        Method to check if given WKB(!) or WKT string could potentially return a valid shapely.geom
        Only return True or False

        :param location_wkt_string:
        :return: bool if given wkt string seems to be a valid Geometry
        """
        try:
            if is_hex(location_wkt_string):
                # WKB !
                shapely_geom = shapely.wkb.loads(location_wkt_string, hex=True)
            else:
                # WKT
                shapely_geom = shapely.wkt.loads(location_wkt_string)
            if shapely_geom.is_valid:
                return True
        except shapely.errors.WKTReadingError:
            pass
        except Exception:
            pass
        return False

    @staticmethod
    def get_coordinate_list(geometry: dict) -> Union[list[int], list[float]]:
        """
        Get the flattened list of coordinates

        :param geometry:
        :return: list of int or float
        """
        # try to get all coords as a list from the geometry (different types are possible)!
        coordinate_list: list[int] = [0, ]  # to have at least one coordinate

        # ONLY the first set from a (multi) geometry is used to determine the crs
        if 'type' in geometry and (geometry['type'].upper() == 'POINT'):
            coordinate_list = geometry['coordinates']
        elif 'type' in geometry and (geometry['type'].upper() == 'MULTIPOINT'):
            # NOTE! MULTIPOINT CURRENTLY NOT SUPPORTED
            coordinate_list = geometry['coordinates'][0]
        elif 'type' in geometry and (geometry['type'].upper() in (
                'POLYGON', 'MULTIPOLYGON', 'LINESTRING', 'MULTILINESTRING')):
            # we are going to get the first (or only) polygon of this (multi)polygon
            # which is a tuple of coordinate pairs ( (x,y),(x,y),(x,y) )
            if type(geometry['coordinates'][0][0]) in (float, int):
                # normal linestring
                coordinates = geometry['coordinates']
            elif type(geometry['coordinates'][0][0][0]) in (float, int):
                # normal polygon
                coordinates = geometry['coordinates'][0]
            else:
                # multiple polygon OR a polygon with a hole
                coordinates = geometry['coordinates'][0][0]
            # zipping the tuple of coordinate pairs ( (x,y),(x,y),(x,y) ) will result in
            # two lists of (x,x,x...),(y,y,y...) which you can iterate over to collect them in one
            coordinates_set = zip(*coordinates)
            for pair in coordinates_set:
                coordinate_list = coordinate_list + list(pair)

        return coordinate_list

    @staticmethod
    def read_dict_from_csv(filename: Union[str, Path]) -> dict:
        """
        Utility method to read a csv into a dict, and using the first item of every row as key and the rest of the
        row (including the first item) as value(s).

        Settings files are deliberately csv files, so longer settings files, like the mapping files, could potentially
        being edited by hand by users.

        2 columns will result in key:value
        >2 columns will result in key:[key,value1,value2]

        IMPORTANT: so there should be no row with identical first items!

        A csv with lines like:
            item1,itemA,itemB
            item2,itemA,itemB
        results in:
        {
            'item1': ['item1','itemA','itemB'],
            'item2': ['item2','itemA','itemB']
        }
        A csv with lines like:
            item1,itemA
            item2,itemA
        result in:
        {
            'item1':'itemA'.
            'item2':'itemA'
        }


        :param: filename full (string) path to csv
        :return: dict
        """
        # https://stackoverflow.com/questions/3320406/how-to-check-if-a-path-is-absolute-path-or-relative-path-in-a-cross-platform-way
        dictionary = {}
        if (Path(filename)).is_file():
            with open(filename, mode='r', newline='', encoding='utf-8-sig') as f:
                # DictReader, returns a dictionary
                # csv_reader = csv.DictReader(filter(lambda rw: rw[0] != '#', f), delimiter=',', skipinitialspace=True)  # skipping commented lines
                # Normal reader returns one list per row, adding to it based on first column...
                # WHICH IN THIS CASE HAS TO BE UNIQUE !!
                csv_reader = csv.reader(filter(lambda rw: rw[0] != '#', f), delimiter=',', skipinitialspace=True)  # skipping commented lines
                for row in csv_reader:
                    if len(row) == 2:
                        # simple key value pairs
                        dictionary[row[0]] = row[1]
                    elif len(row) > 2:
                        # # rows with more than 2 columns: take the first one as key, others as list:
                        # # row "key,val1,val2" will result in {key:(val1,val2)}
                        # # thinking about using a DictReader here... but ...
                        # if len(row[0]) > 0:  # skipping 'comment rows', being ",,,,,"
                        #     dictionary[row[0]] = row[1:]

                        # rows with more than 2 columns: take the first one as key, full list as value:
                        # row "key,val1,val2" will result in {key:(key, val1,val2)}
                        # thinking about using a DictReader here... but ...
                        if len(row[0]) > 0:  # skipping 'comment rows', being ",,,,,"
                            dictionary[row[0]] = row[0:]
                    else:
                        log.debug(f'Skipping this row in {filename}: "{row}"')
        # else:
        #    pass
            # we deliberately try all files in different dirs...
            # this make is it easier to use this function (instead of checking if something is a file or not)
            # log.warning("Filename '{filename}' does not seem to be a file?")
        return dictionary

    @staticmethod
    def create_from_directories(global_config_dir: str, user_config_dir: str, from_within_qgis: bool = False):
        """
        Create a (typed!) Datasource by 'merging' the user-settings/config OVER the
        main/global-settings/config.

        The idea here is to let users have a global set of settings (with more general settings/lists), and more
        project of data oriented settings (sets) which are then being merged OVER the global ones.

        The connector is to be used both with client apps like CLI or R, and in QGIS.
        The 'from_within_qgis' flag is there because ONLY if using the library OUTSIDE the QGIS plugin you have
        to load the datasource settings. In QGIS these settings are ignored because QGIS will get its data from
        the loaded layers/tables.

        :param str global_config_dir:
        :param str user_config_dir:
        :param bool from_within_qgis: optional flag if this connector is used from within QGIS
        :return:
        """
        # Check if both directories end on NdffConnector.NDFF_SETTINGS_DIR
        global_config_path = Path(global_config_dir)
        #  global_config_path has an OPTION(!) to end on NdffConnector.NDFF_SETTINGS_DIR:
        if global_config_path.name != NdffConnector.NDFF_SETTINGS_DIR:
            # AND if there is a NDFF_SETTINGS_DIR IN IT:
            if (global_config_path / NdffConnector.NDFF_SETTINGS_DIR).is_dir():
                # THEN we decide that global_config_path / NdffConnector.NDFF_SETTINGS_DIR is the one
                global_config_path = global_config_path / NdffConnector.NDFF_SETTINGS_DIR
        user_config_path = Path(user_config_dir)
        if user_config_path.name != NdffConnector.NDFF_SETTINGS_DIR:
            user_config_path = user_config_path / NdffConnector.NDFF_SETTINGS_DIR

        # settings will be a merging of the api_settings AND the datasource_settings
        ndff_api_settings = NdffConnector.read_dict_from_csv(Path(global_config_path) / NdffConnector.NDFF_API_SETTINGS)
        # log.debug(ndff_api_settings)
        user_ndff_api_settings = NdffConnector.read_dict_from_csv(Path(user_config_path) / NdffConnector.NDFF_API_SETTINGS)
        # log.debug(user_api_settings)
        ndff_api_settings.update(user_ndff_api_settings)
        log.debug(f'Using ndff_api_settings: {ndff_api_settings}')

        # ONLY if using the library OUTSIDE the QGIS plugin, load the datasource settings.
        # IF within QGIS, QGIS/plugin will load the features from the active layer
        datasource_settings = None
        if not from_within_qgis:
            datasource_settings = NdffConnector.read_dict_from_csv(Path(global_config_path) / NdffConnector.NDFF_DATASOURCE_SETTINGS)
            # log.debug(datasource_settings)
            user_datasource_settings = NdffConnector.read_dict_from_csv(Path(user_config_path) / NdffConnector.NDFF_DATASOURCE_SETTINGS)
            # log.debug(user_datasource_settings)
            datasource_settings.update(user_datasource_settings)
            log.debug(datasource_settings)

        # settings will be a merging of the api_settings AND the datasource_settings
        client_settings = NdffConnector.read_dict_from_csv(Path(global_config_path) / NdffConnector.NDFF_OBSERVATION_CLIENT_SETTINGS)
        # log.debug(client_settings)
        user_client_settings = NdffConnector.read_dict_from_csv(Path(user_config_path) / NdffConnector.NDFF_OBSERVATION_CLIENT_SETTINGS)
        # log.debug(client_settings)
        client_settings.update(user_client_settings)
        log.debug(f'Client settings: {client_settings}')

        # load field_mappings and defaults
        field_mappings = NdffConnector.read_dict_from_csv(Path(global_config_path) / NdffConnector.NDFF_OBSERVATION_FIELD_MAPPINGS)
        # NOTE: the user/project settings should ALWAYS have a field_mappings.csv
        user_field_mappings = NdffConnector.read_dict_from_csv(Path(user_config_path) / NdffConnector.NDFF_OBSERVATION_FIELD_MAPPINGS)
        field_mappings.update(user_field_mappings)
        # extra check, to be sure we only have triplets for every mapping?
        for key, value in field_mappings.items():
            if len(value) != 3:
                raise ValueError(f'Error in field mappings, ALL values should be triplets, but at least one: "{key}" is not a triplet (add a comma to make it one): "{value}"')

        # every field in the NdffObservation has a potential string->ndff-uri DATA mapping file
        # named after the field. Eg for taxon it's file is mapping_taxon.csv
        data_mappings_for_fields = {}
        # create an observation based on master to get the proper fields
        observation = NdffObservation()
        for field in observation.fields():
            filename = f'mappings_{field}.csv'
            data_mappings = NdffConnector.read_dict_from_csv(Path(global_config_path) / filename)
            user_data_mappings = NdffConnector.read_dict_from_csv(Path(user_config_path) / filename)
            data_mappings.update(user_data_mappings)
            data_mappings_for_fields[field] = data_mappings
        # NOT going to log the potentially giant data mappings

        # Create the actual connector instance:
        nc = NdffConnector(ndff_api_settings=ndff_api_settings, datasource_settings=datasource_settings, field_mappings=field_mappings, data_mappings=data_mappings_for_fields, client_settings=client_settings, ndff_log_dir=str(user_config_path))
        log.debug('Created NdffConnector (create_from_directories)')
        return nc

    def __init__(self, ndff_api_settings: dict = {}, datasource_settings: dict = {}, field_mappings: dict = {}, data_mappings: dict = {}, client_settings: dict = {}, ndff_log_dir: str = tempfile.gettempdir()):
        """
        Constructor, in which a NdffConnector instance is created from a set of setting dicts. In general the static
        methods in which the settings are read from a directory of settings files is used to use this constructor
        instead of using the constructor directly.

        Note: the NdffApi instance creation is deferred later, to be able to change settings in between.

        :param ndff_api_settings:
        :param datasource_settings:
        :param field_mappings:
        :param data_mappings:
        :param client_settings:
        :param ndff_log_dir:
        """
        self.ndff_api_settings = ndff_api_settings
        self.datasource_settings = datasource_settings
        self.datasource = DataSource.create_from_settings(self.datasource_settings)
        self.data_records = self.datasource.get_records()
        self.field_mappings = field_mappings
        self.data_mappings = data_mappings
        self.client_settings = client_settings
        self.ndff_log_dir = ndff_log_dir
        # NOT going to create an api connection NOW, do it later when somebody requests for a live api connection
        # THIS ALSO gives us the possibility to add/change api
        self._api = None

    def next_record(self):
        """
        Returns a record/map of key/value pairs of the next record of current datasource

        Note that datasource's data_records is always a (datasource specific) iterator!

        :return: a dict with key/value's of the next record
        """
        return next(self.data_records)

    def set_data_records(self, data_records_iterator: iter) -> None:
        """
        Normally the specific DataSources will provide the data_records iterator.

        But in case of QGIS or other clients, it is possible that they provide the
        data themselves. Giving the option to SET the data_records here you can
        still retrieve/use 'next_record' from this connector

        :param iter data_records_iterator: an iterator to be questioned for next_record
        """
        self.data_records = data_records_iterator

    def get_api(self, fresh_one=False) -> NdffApi:
        """
        This returns (or creates) an Api instance from this connector.
        If 'fresh_one' is True then always a NEW instance will be created.
        If 'fresh_one' is False (the default), then there will only be created a fresh Api instance if there is no one.
        This make is possible to 'invalidate' the api of a connector, for example
        after a ndff_api_settings change, by requesting 'get_api(True)' or 'get_api(fresh_one=True)'

        :param bool fresh_one: to request a fresh api instance or reuse existing one
        :return: NdffApi instance (created based on NdffConnectors settings)
        """
        if fresh_one:
            self._api = None
        if self._api is None:
            # lazy Api object creation
            # let api deliberately remove token set of THIS api because we ask for a fresh_one
            self._api = NdffApi(self.ndff_api_settings, fresh_one=fresh_one)
        return self._api

    def get_field_mapping(self, field: str) -> str:
        """
        From the FIELD mappings, get the value (mapped field name) of this field-parameter

        The field is always one of NDFF field names

        The connector is responsible for the bookkeeping of the mappings!

        :param str field: the field to look up
        :return str: the value  of the field in the mappings
        """
        if len(self.field_mappings.keys()) == 0:
            raise NdffLibError('This Connector does not have any FIELD mappings to GET (from file field_mappings.csv), this should not happen...')
        field_mapping = None
        if field in self.field_mappings:
            field_mapping = str(self.field_mappings[field][1]).strip()
            if field_mapping in ('', '-', 'None'):
                field_mapping = None
        return field_mapping

    def set_field_mapping(self, field: str, mapped_field_name) -> bool:
        """
        SET a FIELD mapping: for example in the data the field 'name' is mapped to the NDFFs 'taxon'

        The field is always one of NDFF field names

        :param str field: the field to map
        :param mapped_field_name: the value to map to
        :return:
        """
        if len(self.field_mappings.keys()) == 0:
            raise NdffLibError('This Connector does not have any FIELD mappings to SET (from file field_mappings.csv), this should not happen...')
        if field in self.field_mappings:
            # the value of the field mappings is an immutable tuple as (field, value, default)
            # create a list from it first to be able to edit a value
            self.field_mappings[field] = list(self.field_mappings[field])
            self.field_mappings[field][1] = str(mapped_field_name).strip()
            # AND remove a possible earlier set default value IF mapped_field_name not is None
            if str(mapped_field_name).strip() not in ['', None, 'None', '-']:
                self.field_mappings[field][2] = None
            self.field_mappings[field] = tuple(self.field_mappings[field])
            return True
        return False

    def get_field_default(self, field: str) -> str:
        """
        In the field_mappings.csv it is possible to define a column which holds the value for given field, BUT
        it is also possible (in the field_mappings.csv settings) to use the third column, which defines a default
        value to be used for this field.

        This method returns None if there is no default defined, or the default (uri) to be used for this field

        :param str field:
        :return: None if not defined, or the default value/uri to be used.
        """
        if len(self.field_mappings.keys()) == 0:
            raise NdffLibError('This Connector does not have ANY mappings (from file field_mappings.csv), this should not happen...')
        default = None
        if field in self.field_mappings:
            default = str(self.field_mappings[field][2]).strip()
            if default in ('', '-', 'None'):
                default = None
        return default

    def set_field_default(self, field: str, field_default) -> bool:
        """
        SET a default for a field, in the field_mappings settings which will probably being saved to the
        field_mappings.csv settings file

        :param field: the field for which to define a default for
        :param field_default: the uri or value to be used
        :raise: NdffLibError in case the user tries to define a default for a none existing field or other errors
        :return bool: True in case  the setting succeeded
        """
        if len(self.field_mappings.keys()) == 0:
            raise NdffLibError('This Connector does not have ANY mappings (from file field_mappings.csv), this should not happen...')
        if field in self.field_mappings:
            # the value of the field_mappings is an immutable tuple
            # first create a list from it, so we can edit it:
            self.field_mappings[field] = list(self.field_mappings[field])
            # set the second column to the chosen default value for this field
            self.field_mappings[field][2] = str(field_default).strip()
            # AND remove a possible earlier set mapped value IF field_default not is None
            if str(field_default).strip() not in ['', None, 'None', '-']:
                self.field_mappings[field][1] = None
            self.field_mappings[field] = tuple(self.field_mappings[field])
            return True
        else:
            raise NdffLibError(f'This Connector tries to set a default for field "{field}" but is NOT available in mapping definition (from file field_mappings.csv), this should not happen...')

    def get_field_text(self, field: str) -> str:
        """
        The client_settings.csv can be used by client applications to change the texts/descriptions/urls the user sees.

        # field, text, url1_woordenboek, url2_default, description, ndff_field

        This method gets the text from the settings based on the (NdffObservation) fieldname

        :param field:
        :return: text the client wants to see for this field
        """
        text = field
        if field in self.client_settings.keys():
            text = str(self.client_settings[field][NdffConnector.FIELD_COL_TEXT]).strip()
        return text

    def get_field_ndff_name(self, field: str) -> str:
        """
        The client_settings.csv can be used by client applications to change the texts/descriptions/urls the user sees.

        # field, text, url1_woordenboek, url2_default, description, ndff_field

        This method gets the NDFF name from the settings based on the (NdffObservation) fieldname
        (for example: period_start in Observation is PeriodStart at NDFF)

        :param field:
        :return: NDFF name
        """
        ndff_name = field
        if field in self.client_settings.keys():
            ndff_name = str(self.client_settings[field][NdffConnector.FIELD_COL_NDFF_NAME]).strip()
        return ndff_name

    def get_field_description(self, field: str) -> str:
        """
        The client_settings.csv can be used by client applications to change the texts/descriptions/urls the user sees.

        # field, text, url1_woordenboek, url2_default, description, ndff_field

        This method gets the (longer) description from the settings based on the (NdffObservation) fieldname

        :param field:
        :return: description text
        """
        description = field
        if field in self.client_settings.keys():
            description = str(self.client_settings[field][NdffConnector.FIELD_COL_DESCRIPTION]).strip()
        return description

    def get_field_url1(self, field: str) -> str:
        """
        The client_settings.csv can be used by client applications to change the texts/descriptions/urls the user sees.

        # field, text, url1_woordenboek, url2_default, description, ndff_field

        url1_woordenboek is the url for given field to go to the woordenboek to search for field values

        :param field:
        :return: the url1_woordenboek url
        """
        url1 = field
        if field in self.client_settings.keys():
            url1 = str(self.client_settings[field][NdffConnector.FIELD_COL_URL1]).strip()
        if url1 and str(url1).strip() in ('', '-', 'None'):
            url1 = ''
        return url1

    def get_field_url2(self, field: str) -> str:
        """
        The client_settings.csv can be used by client applications to change the texts/descriptions/urls the user sees.

        # field, text, url1_woordenboek, url2_default, description, ndff_field

        url2_default is for certain fields an url for a default value which is easy to be used.
        For example the 'unknown', 'exact_count' or 'uncertain', ndff uri
        This uri is to be shown in the data dialog and can be copied and pasted OR searched from that dialog

        :param field:
        :return: the url2_default url
        """
        url2 = field
        if field in self.client_settings.keys():
            url2 = str(self.client_settings[field][NdffConnector.FIELD_COL_URL2]).strip()
        if url2 and str(url2).strip() in ('', '-', 'None'):
            url2 = ''
        return url2

    def add_extra_info_field_mapping(self, new_identity: str, new_value: str) -> bool:
        """
        The extra_info of an observation comes from a list of key value pairs.
        The key/identity should always be an uri!

        The value of it could be a string/number OR uri, BUT always MAPPED from the data

        Extra info can NOT have a default value in the mappings.

        Because extra-info fields is a list of key/value pairs, and we want to be able to save mappings in the field
        mappings, we prepend the keys with 'extra_info_identity_' and the value with 'extra_info_value_' and then a
        number. For example:

        extra_info_identity_1, 'http://ndff-ecogrid.nl/codes/keys/observation/dwellings', None
        extra_info_value_1, 'dwelling', None
        extra_info_identity_2, 'http://ndff-ecogrid.nl/codes/keys/external/original_visit_id', None
        extra_info_value_2, 'visit_id', None

        In this way you can have several extra_info_field mappings.

        :param: new_identity
        :param: new_value
        :return: True if success else False
        """
        if not is_uri(new_identity):
            log.debug(f'Trying to set an extra info field mapping with an identity which is not an uri: "{new_identity}"')
            return False
        if self.field_mappings and isinstance(self.field_mappings, dict):
            # ugly, just checking the highest index
            max_index = 0
            for key in self.field_mappings:
                if key.startswith('extra_info_identity_'):
                    index = int(key[20:])
                    if index > max_index:
                        max_index = index
            # now we found the highest index/runner
            max_index += 1
            self.field_mappings[f'extra_info_identity_{max_index}'] = (f'extra_info_identity_{max_index}', new_identity, None)
            self.field_mappings[f'extra_info_value_{max_index}'] = (f'extra_info_value_{max_index}', new_value, None)
            return True
        else:
            return False

    def delete_extra_info_field_mapping(self, field_map_key: str, field_map_value: str) -> bool:
        """
        Delete a field mapping in which the field is an extra-info field.

        Because the extra-info fields is a list of key value pairs, you have to use BOTH the field_map_key and the
        field_map_value: so a good set would be: (extra_info_identity_1, extra_info_value_1)

        :param field_map_key:
        :param field_map_value:
        :return: bool if success
        """
        if field_map_key in self.field_mappings and field_map_value in self.field_mappings:
            self.field_mappings.pop(field_map_key)
            self.field_mappings.pop(field_map_value)
            return True
        return False

    def change_extra_info_field_mapping(self, field_map_key: str, new_identity: str, field_map_value: str, new_value: str) -> bool:
        """
        Change some existing mapping, get it from the live field mappings instance
        based on the 'field_map_key' and 'field_map_value', which are the 'keys'
        used in the field_mappings dict, like: extra_info_identity_2 and extra_info_value_2

        :param field_map_key: key like: extra_info_value_2
        :param new_identity: value of the extra info (ndff URI !!!)
        :param field_map_value: key like: extra_info_value_2
        :param new_value: value of extra info (fieldname or value)
        :return: bool if success
        """
        try:
            self.set_field_mapping(field_map_key, new_identity)
            self.set_field_mapping(field_map_value, new_value)
            return True
        except NdffLibError:
            return False

    def change_extra_info_field_mapping_values(self, old_identity: str, old_value: str, new_identity: str, new_value: str) -> bool:
        """
        The extra_info of an observation comes from a list of key value pairs.
        Because we do not know if a key of the extra info can be used multiple
        times, we are going to look up from current mappings, so BOTH key(-uri)
        AND value(-mapping)

        Note: it is probably safer to use 'change_extra_info_field_mapping' as
        theoretically an old_identity can be reused in a set of extra info
        (deprecate this one?)

        :param: old_identity actual URI of this extra info
        :param: old_value actual Fieldname (or value) of this extra info
        :param: new_identity URI of new extra info
        :param: new_value new Fieldname (or value) of this extra info
        :return: True if success else False
        """
        # first find the old identity/value in the xtra info mappings
        for key in self.field_mappings:
            if key.startswith('extra_info_identity_'):
                index = int(key[20:])
                value = f'extra_info_value_{index}'
                if self.field_mappings[key][1] == old_identity and self.field_mappings[value][1] == old_value:
                    # replace them with new value, but only if new_identity is also an uri
                    if is_uri(new_identity):
                        self.field_mappings[key] = (key, new_identity, None)
                        self.field_mappings[value] = (value, new_value, None)
                        return True
                    else:
                        log.debug(f'Trying to change an extra info field mapping with an identity which is not an uri: "{new_identity}"')
        log.error(f'Failing to change_extra_info_field_mapping_values: "{old_identity}":"{old_value}" to "{new_identity}":"{new_value}"')
        return False

    def get_data_mapping(self, field: str, field_key: str) -> Union[str, None]:
        """
        Data mappings are dictionaries (in memory AND/OR on disk/in database, depending
        on the type of datasource).

        In current implementation they are saved in the user_settings in a file
        called 'mappings_<field>.csv'

        Not sure yet if we should to lazy loading: as open the file when needed,
        OR early loading: open file and load in memory

        Although 'domain'-tables in databased typically have >2 columns, these
        will be exploded to a long list of key-value pairs...

        NOTE! since 20221202 ALL KEYS in the csv files will be lowercase!!
        Meaning also searching for a key should use .lower() before searching !!!

        Drawback of this implementation: you cannot have the same key for 2
        different uri's. So IF you have
        hop --> plant uri
        you cannot have
        hop -> bird uri
        in the same file...

        You can have the same uri (as value) several times in the mappings:
        for example the nl name, scientific name, an abbreviation, and maybe an
        often made misspelling ot the nl name etc...

        NOTE! the mapping key's for abundance_value (like Tansley code's) consist of BOTH the schema AND the value,
        E.g. http://ndff-ecogrid.nl/codes/scales/tansley:s

        :param field: the actual field (taxon, activity etc.)
        :param field_key: (the string which should map to a certain uri)
        :return: field_value: an uri for the field_key or None (that fits better in data dicts)
        """
        # try to find <field_key>.lower() in the mapping dict
        # if found return
        # else if not found return None
        # since nov 2022 we try to be case-insensitive, but sometimes people write or still have mappings with uppercase keys, so we check for both
        lower_case_key = str(field_key).lower()
        if field not in self.data_mappings:
            return None  # deliberately NOT returning False because this could show up in Observation maybe...
        if lower_case_key in self.data_mappings[field]:  # preferred, and most apparent (hopefully)
            # strip value from whitespace, else creates havoc kt-362
            return self.data_mappings[field][lower_case_key].strip()
        # extra check (backwards compatibility)
        if field_key in self.data_mappings[field]:
            # strip value from whitespace, else creates havoc kt-362
            return self.data_mappings[field][field_key].strip()
        return None  # deliberately NOT returning False because this could show up in Observation maybe...

    def get_dataset_types(self) -> deque:
        """
        Using api.get_dataset_types (only retrieving 1 page) retrieve ALL dataset types

        The connector will 'page' through all datasets until all retrieved.
        (thinking that there will be no users/domains with that many datasettypes????)

        Return an empty deque when the api fails.
        (deque is used because it is faster, better suited to pop left/right and more thread safe)

        :return: a deque of dataset_type objects like:
            {
                "_links": {
                    "self": {
                        "href": "https://accapi.ndff.nl/api/v2/domains/708/datasettypes/21532/"
                    }
                },
                "category": "inbox",
                "description": "inbox Test NGB",
                "identity": "http://ndff.nl/foldertypes/test_ngb/inbox"
            }

        """
        api = self.get_api()
        all_dataset_types = deque()
        result = api.get_dataset_types()  # one page at a time...
        if result['http_status'] != 200:
            log.debug(f'Error retrieving datasettypes for {api.user} domain {api.domain}...')
            return all_dataset_types
        obj = result['http_response']
        # using a deque, because that is thread safe (?) and faster to extend
        all_dataset_types.extend(obj['_embedded']['items'])
        while '_links' in obj and 'next' in obj['_links']:
            uri = obj['_links']['next']['href']
            result = api.get_data(ndff_uri=uri)
            assert result['http_status'] == 200
            response = result['http_response']
            obj = response
            all_dataset_types.extend(obj['_embedded']['items'])
        return all_dataset_types

    def search_observations(self, field=None, field_value=None, max_hits=250, page_size: int = 25) -> deque:
        """
        Using api.search_waarneming(field, field_value) (only retrieving 1 page at a time) to retrieve observations
        for current user and domain (given field and field_value)

        For example searching for one observation with given identity field:
        https://accapi.ndff.nl/api/v2/domains/708/observations/?identity=http://ecoreest.nl/test1/1

        Or all observations from given dataset:
        https://accapi.ndff.nl/api/v2/domains/708/observations/?dataset=http://ndff.nl/api-testngb/folders/1671554924123426753

        The connector will 'page' through all observations until all retrieved.

        Return an empty deque when the api fails or returns an empty set
        (deque is used because it is faster, better suited to pop left/right and more thread safe)

        :param: field the string or url to search in
        :param: field_value the observation fields (like extrainfo, taxa, etc.)
        :param: max_hits the number of hits (while paging) to stop
        :param: page_size number of results per 'page' from api (default to ndff default 25)
        :return: deque
        """
        # fetch ALL datasets for this user/domain
        api = self.get_api()
        all_observations = deque()
        result = api.search_waarneming(field, field_value, page_size)  # one page at a time...
        if result['http_status'] != 200:
            log.debug(f'Error retrieving datasets for {api.user} domain {api.domain}...')
            return all_observations
        obj = result['http_response']
        # using a deque, because that is thread safe (?) and faster to extend...
        all_observations.extend(obj['_embedded']['items'])
        while '_links' in obj and 'next' in obj['_links']:
            if len(all_observations) >= max_hits:
                log.debug(f'Max hits for a code search is set to {max_hits}, but is now: {len(all_observations)}, quitting with paging... ')
                break
            uri = obj['_links']['next']['href']
            result = api.get_data(ndff_uri=uri)
            assert result['http_status'] == 200
            response = result['http_response']
            obj = response
            all_observations.extend(obj['_embedded']['items'])
        return all_observations

    def search_codes(self, search_text: str, search_type: str = 'codes', search_field: str = None, max_hits=250, page_size: int = 25) -> deque:
        """
        Search code uses the filter or search endpoint of the codes list of the API

        The search_type is the codes type to search for
        (the api endpoints, like abundancies, taxa, extrainfo etc., see https://accapi.ndff.nl/codes/v2/)

        When a 'search_field' is given, that field is used to search in.
        Possible fields: description, identity, indexvalue, name, rank, language, speciesgroup
        For example search in the name field of taxa (NOTE: name searches in both Dutch and Scientific name!!)
        https://accapi.ndff.nl/codes/v2/taxa/?name=pipi&ordering=-indexvalue
        or
        https://accapi.ndff.nl/codes/v2/taxa/?name=paardenbloem&ordering=-indexvalue

        If the 'search_field' is None, do not search over the field endpoint, but over the more general 'search' endpoint
        Either search for search-text using the 'search' endpoint
        https://accapi.ndff.nl/codes/v2/extrainfo/?search=location_id

        When 'search_text' is an uri, search for an object via the identity uri like:
        https://accapi.ndff.nl/codes/v2/extrainfo/?identity=[IDENTITYURI]&ordering=-indexvalue

        Note that it is possible that the result contains a 'next line', meaning we can page over the results.
        It is the responsibility of the connector to do this!
         {
            "_links": {
                "next": {
                    "href": "https://accapi.ndff.nl/api/v2/domains/168/datasets/?page=5"
                },
                "previous": {
                    "href": "https://accapi.ndff.nl/api/v2/domains/168/datasets/?page=3"
                },
                "self": {
                    "href": "https://accapi.ndff.nl/api/v2/domains/168/datasets/?page=4"
                }
            },
            "count": 103,
            "page_size": 25,
        ...
        }

        :param: search_text the string or url to search for
        :param: search_type: the observation fields (like extrainfo, taxa, etc.)
        :param: search_field name or description
        :param: max_hits the number of hits (while paging) to stop
        :param: page_size number of results per 'page' from api (default to ndff default 25)
        :return: deque
        """
        api = self.get_api()
        # using a deque, because that is thread safe (?) and faster to extend...
        all_codes = deque()
        # get the field => search type mapping (eg a 'taxon' is to be searched in 'taxa')
        if search_type in self.mapping.keys():
            search_type = self.mapping[search_type]
        result = api.search_codes(search_text, search_type, search_field, page_size=page_size)  # one page (of size 'page_size') at a time...
        if result['http_status'] != 200:
            log.debug(f'Error searching codes: {search_text} {search_type} {search_field}...')
            return all_codes
        obj = result['http_response']
        all_codes.extend(obj['_embedded']['items'])
        while '_links' in obj and 'next' in obj['_links']:
            if len(all_codes) >= max_hits:
                log.debug(f'Max hits for a code search is set to {max_hits}, but is now: {len(all_codes)}, quitting with paging... ')
                break
            uri = obj['_links']['next']['href']
            result = api.search_codes(next_link=uri)
            assert result['http_status'] == 200
            response = result['http_response']
            obj = response
            all_codes.extend(obj['_embedded']['items'])
        return all_codes

    def identity_has_related_codes(self, identity_uri=None) -> bool:
        """
        Will check if identity_uri has related codes, by first resolving the identity URI to a NDFF URL, and THEN
        checking the NDFF URL for related codes

        # NOTE: API searches using the NDFF URL, but connector searches/pages using the abundancy_schema URI

        :param: identity_uri
        :return: bool
        """
        if not is_uri(identity_uri):
            log.debug(f'Error has related codes using {identity_uri} (NOT an uri!)...')
            return False
        # check local cache
        if identity_uri in self.has_related_codes_cache:
            return self.has_related_codes_cache[identity_uri]
        # currently we know that this is an abundancy_schema IDENTITY uri, but we should also be able to handle an
        # abundancy_schema NDFF URL, like https://accapi.ndff.nl/codes/v2/abundancies/179580026/
        codes = self.search_codes(search_text=identity_uri, search_type='abundance_value')
        if len(codes) == 0:
            log.debug(f'No NDFF-URL found when searching for "related codes" found for {identity_uri}')
            return False
        ndff_uri = codes[0]['_links']['self']['href']
        has_related_codes = self.get_api().has_related_codes(ndff_uri)
        # create a small local cache:
        self.has_related_codes_cache[identity_uri] = has_related_codes
        return has_related_codes

    def get_related_codes(self, identity_uri=None, search_text=None) -> deque:
        """
        Fetch ALL related codes of given identity_uri

        Return an empty deque when the api fails.
        (deque is used because it is faster, better suited to pop left/right and more thread safe)

        # NOTE: API searches using the NDFF URL, but connector searches/pages using the abundancy_schema URI

        :param: identity_uri
        :return: deque
        """
        all_related_codes = deque()
        if not is_uri(identity_uri):
            log.debug(f'Error retrieving related codes using {identity_uri} (NOT an uri!)...')
            return all_related_codes
        # check cache:
        if search_text is None and identity_uri in self.has_related_codes_cache and not self.has_related_codes_cache[identity_uri]:
            return all_related_codes

        # using a deque, because that is thread safe (?) and faster to extend...
        api = self.get_api()
        # currently we know that this is an abundancy_schema IDENTITY uri, but we should also be able to handle an
        # abundancy_schema NDFF URL, like https://accapi.ndff.nl/codes/v2/abundancies/179580026/
        codes = self.search_codes(search_text=identity_uri, search_type='abundance_value')
        if len(codes) == 0:
            log.debug(f'No NDFF-URL found when searching for "related codes" found for {identity_uri}')
            return all_related_codes

        ndff_uri = codes[0]['_links']['self']['href']

        result = api.get_related_codes(ndff_uri, search_text=search_text)  # one page at a time...
        if result['http_status'] != 200:
            log.debug(f'Error retrieving related codes using {ndff_uri} for {identity_uri}')
            return all_related_codes
        obj = result['http_response']
        all_related_codes.extend(obj['_embedded']['items'])
        while '_links' in obj and 'next' in obj['_links']:
            ndff_uri = obj['_links']['next']['href']
            result = api.get_related_codes(ndff_uri, search_text=search_text)  # one page at a time...
            assert result['http_status'] == 200
            response = result['http_response']
            obj = response
            all_related_codes.extend(obj['_embedded']['items'])
        return all_related_codes

    def get_protocols(self) -> deque:
        """
        Using api.get_data() (only retrieving 1 page) to retrieve ALL(!) datasets
        for current user and domain (as defined by the api instance).

        The connector will 'page' through all datasets until all retrieved.
        (thinking that there will be no users/domains with that many datasets????)

        Return an empty deque when the api fails.
        (deque is used because it is faster, better suited to pop left/right and more thread safe)

        Return a deque of dataset objects like when successful:

        {
            "_links": {
                "self": {
                    "href": "https://accapi.ndff.nl/api/v2/domains/708/protocols/130/"
                }
            },
            "description": "12.205 Monitoring Beoordeling Natuurkwaliteit EHS - N2000 (SNL-2014)",
            "identity": "http://ndff-ecogrid.nl/codes/protocols/12.205"
        }

        :return: deque
        """
        # fetch ALL protocols for this user/domain
        # using a deque, because that is thread safe (?) and faster to extend...
        all_protocols = deque()
        api = self.get_api()
        result = api.get_protocols()  # one page at a time...
        if result['http_status'] != 200:
            log.debug(f'Error retrieving protocols for {api.user} domain {api.domain}...')
            return all_protocols
        obj = result['http_response']
        all_protocols.extend(obj['_embedded']['items'])
        while '_links' in obj and 'next' in obj['_links']:
            uri = obj['_links']['next']['href']
            result = api.get_protocols(ndff_uri=uri)
            assert result['http_status'] == 200
            response = result['http_response']
            obj = response
            all_protocols.extend(obj['_embedded']['items'])
        return all_protocols

    def get_datasets(self) -> deque:
        """
        Using api.get_data() (only retrieving 1 page) to retrieve ALL(!) datasets
        for current user and domain (as defined by the api instance).

        The connector will 'page' through all datasets until all retrieved.
        (thinking that there will be no users/domains with that many datasets????)

        Return an empty deque when the api fails.
        (deque is used because it is faster, better suited to pop left/right and more thread safe)

        Return a deque of dataset objects like when successful:
            {
                "_links": {
                    "self": {
                        "href": "https://accapi.ndff.nl/api/v2/domains/708/datasets/2920720/"
                    }
                },
                "datasetType": "http://ndff.nl/foldertypes/test_ngb/startmap",
                "description": "Test NGB",
                "duration": null,
                "extrainfo": [],
                "identity": "http://ndff.nl/folders/2920720",
                "involved": [
                    {
                        "involvementType": "http://ndff-ecogrid.nl/codes/involvementtypes/folder_admin",
                        "person": "http://telmee.nl/contacts/persons/1311835"
                    },
                    {
                        "involvementType": "http://ndff-ecogrid.nl/codes/involvementtypes/observation_admin",
                        "person": "http://telmee.nl/contacts/persons/1311833"
                    }
                ],
                "location": null,
                "locationCoverage": null,
                "parent": "http://ndff.nl/folders/2920719",
                "periodStart": null,
                "periodStop": null,
                "protocol": null
            }

        :return: deque
        """
        # fetch ALL datasets for this user/domain
        api = self.get_api()
        # using a deque, because that is thread safe (?) and faster to extend...
        all_datasets = deque()
        result = api.get_datasets(page_size=250)  # one page at a time, but some larger pages (250)
        if result['http_status'] != 200:
            log.debug(f'Error retrieving datasets for {api.user} domain {api.domain}...')
            return all_datasets
        obj = result['http_response']
        all_datasets.extend(obj['_embedded']['items'])
        while '_links' in obj and 'next' in obj['_links']:
            uri = obj['_links']['next']['href']
            result = api.get_data(ndff_uri=uri)
            assert result['http_status'] == 200
            response = result['http_response']
            obj = response
            all_datasets.extend(obj['_embedded']['items'])
        return all_datasets

    def create_dataset(self, dataset: NdffDataset):
        """
        Let the connector create a new dataset (map) for current user/domain, as child dir of parent_dataset

        :return: NdffResult
        """
        if not dataset.is_valid():
            log.error(f'Dataset {dataset} is NOT valid, POSTING anyway!')
        return self.get_api().post_dataset(dataset.to_ndff_dataset_json(), dataset.identity)

    def search_dataset(self, dataset_identity_uri: str) -> Union[NdffDataset, None]:
        """
        Search for a Dataset using a valid dataset_identity_uri

        :param str dataset_identity_uri: the uri of the dataset
        :return: a NDFFDataset (build from the json of the result) or None if nothing found
        """
        if not is_uri(dataset_identity_uri):
            log.error(f'Searching a Dataset with an uri which seems not an uri: "{dataset_identity_uri}"')
            return None
        result = self.get_api().search_dataset(dataset_identity_uri)
        if result['http_status'] == 200:
            response = result['http_response']
            datasets = response['_embedded']['items']
            return NdffDataset.from_dataset_json_to_dataset(datasets[0])
        return None

    def set_data_mapping(self, field: str, field_key: str, field_value: str) -> bool:
        """
        Setting a data mapping for given field.
        Both as csv/data/db and in current memory instance of the field self.mappings_<field>

        NOTE! since 20221202 ALL KEYS in the csv files will be lowered case!!
        Meaning also searching for a key should use .lower() before searching !!!

        :param field: the actual field (taxon, activity etc)
        :param field_key: (the string which should map to a certain uri)
        :param field_value: an uri for the field_key
        :return: bool True if setting (both in dict and on disk/db) succeeded
                else: False
        """
        # check if field_key is actually a string of length >= 1
        # check if field_value is actually an uri
        if field_key is None or len(field_key) < 1:
            log.error(f'Error: trying to set a data mapping for "{field}" with a field_key "{field_key}" (None or length < 1)')
            return False
        if field_value is None or not is_uri(field_value):
            log.error(f'Error: trying to set a data mapping for "{field}" with a field_value "{field_value}" (None or non-uri)')
            return False

        # check if field_key is already in this dict, if not add it with
        # the corresponding field_value
        key = field_key.lower()
        if key in self.data_mappings[field]:
            log.warning(f'"{key}" already in mappings for {field}, with value {self.data_mappings[field][key]} OVERWRITING IT !!!')
        self.data_mappings[field][key] = field_value
        return True

    def map_data_to_ndff_observation(self, data_record: dict) -> NdffObservation:
        """
        Very import method to map a (data dict) record to a NdffObservation.

        We are taking into account here all (runtime) settings of the user/connector.
        So we check if fieldnames are mapped or not, same for data values etc.

        Special care for the location because that one can have several sources.

        This method
        :param dict data_record:
        :return:
        """
        observation = NdffObservation()
        # creating a location during the ride...
        # the fields should be filled with actual data, so floats for x, y and buffer and wkt for location
        location_data = {
            'location': None,
            'location_x': None,
            'location_y': None,
            'location_buffer': None
        }
        # TODO reduce complexity to divide into
        # - do field mappings
        # - do default values
        # - do mapped data, or just abundance

        # go over all FIELD MAPPINGS (which contain both the observation
        # fields AND some extra location related fields, and see if it is in the data)
        for field in self.field_mappings.keys():
            # OK, user SET a field mapping, now check IF this is actually a valid one:
            if field.startswith('extra_info_value'):
                pass  # handled at identity
            elif field.startswith('extra_info_identity'):
                counter = int(field.replace('extra_info_identity_', ''))
                key = self.get_field_mapping(f'extra_info_identity_{counter}')
                field = self.get_field_mapping(f'extra_info_value_{counter}')
                if field in data_record:
                    value = data_record[field]
                    (valid, error_list) = observation.set_extra_info(key, value)
                    #if not valid:
                    #    print(error_list)
                    #    log.error(error_list[0])
                else:
                    log.error(f'Trying to find a (mapped) field "{field}" in the data.... is it a genuine fieldname?')
            elif self.get_field_mapping(field) in data_record:
                if field in location_data:
                    # handled differently: collect and use later to create location
                    location_data[field] = data_record[self.get_field_mapping(field)]
                else:  # there can be a (faulty) mapping, which is NOT a field in the data
                    observation.set(field, data_record[self.get_field_mapping(field)])
            # else:
                # not sure what to do here, set a message so clients can check for that, OR raise an Exception, so mappings will be forced to fit...
                # raise ValueError(f'User set the field mapping of field "{field}" to "{self.get_field_mapping(field)}" but that field is NOT available in the data: {data_record}')
                # log.error(f'User set the field mapping of field "{field}" to "{self.get_field_mapping(field)}" but that field is NOT available in the data')
                # observation.set(field, self.WRONG_FIELD_MAPPING)

        # look for defined DEFAULT(!) values:
        # set the observation field to default IF it is either None (?OR it was missing?)
        for field in self.field_mappings.keys():
            # location_buffer is a special case: should not be available in the NdffObservation,
            # BUT is needed for a valid geometry, so should be either a mapped value (OR a default)
            # BUT we are only using the default value if it is not set by data from the record yet:
            if field == 'location_buffer' and location_data['location_buffer'] is None:
                location_data[field] = self.get_field_default(field)
            elif field in location_data:
                pass
            elif observation.get(field) is None and self.get_field_default(field):
                observation.set(field, self.get_field_default(field))
            else:
                pass

        # fixing the location now, either it is filled with a dict or wkt (that is there IS a 'location' field)
        # OR location is still None, but x_y_location is filled during this ride...
        # NDFF Connector is responsible for creating a NDFF geometry from the record data
        # can be a WKT field, can be from an x,y, can go wrong...
        # log.error(f'Before create_geom: location_data = {location_data}')
        if self.create_geom(observation, location_data):
            log.debug('Successfully created a location object')
        else:
            log.debug('NOT created a valid location object (yet, now worries)')

        # now go over MAPPED DATA mappings for every observation field, to set the values (MAPPED to 'NDFF uri') in the Observation
        log.debug("Starting to map data to uri's in Observation")
        for field in observation.fields():
            # check IF there are DATA mappings for this field...
            if field in self.data_mappings.keys():
                # if the value in current observation DATA/FIELD is in the data_mappings (as key)
                # (they map from a term/abbreviation/name/id to an uri):
                mapping_key = observation.get(field)
                # SPECIAL CASE: the mapping keys for abundance_value contains both abundance_schema AND abundance_value
                # e.g. http://ndff-ecogrid.nl/codes/scales/tansley:s
                if field == 'abundance_value':
                    # we have to find the abundance_schema URI, can be non mapped OR via mapping
                    abundance_schema = observation.get('abundance_schema')
                    abundance_schema_field = 'abundance_schema'
                    if self.get_field_mapping('abundance_schema'):
                        abundance_schema_field = self.get_field_mapping('abundance_schema')
                    abundance_value_field = 'abundance_value'
                    if self.get_field_mapping('abundance_value'):
                        abundance_value_field = self.get_field_mapping('abundance_value')
                    if abundance_schema_field in data_record and self.get_data_mapping('abundance_schema', data_record[abundance_schema_field]):
                        abundance_schema = self.get_data_mapping('abundance_schema', data_record[abundance_schema_field])  # observation.get('abundance_schema') nope: you cannot 100% be sure this  mapping is already resolved...
                    abundance_value = None
                    if abundance_value_field in data_record and data_record[abundance_value_field]:  # self.get_data_mapping('abundance_value', data_record[abundance_value_field]):
                        # for the data mapping key of the abundance_value, we use:
                        # - the NDFF URI for the abundance_schema: 'http://ndff-ecogrid.nl/codes/scales/tansley'
                        # - the DATA value for the abundance_value: 's'
                        # to create as mapping_kye: 'http://ndff-ecogrid.nl/codes/scales/tansley:s'
                        abundance_value = data_record[abundance_value_field]  # self.get_data_mapping('abundance_value', data_record[abundance_value_field])  # observation.get('abundance_schema') nope: you cannot 100% be sure this  mapping is already resolved...
                    if abundance_value:
                        mapping_key = f'{abundance_schema}:{abundance_value}'
                if (isinstance(mapping_key, str) or isinstance(mapping_key, int)) and self.get_data_mapping(field, mapping_key) is not None:  # observation.get(field) in self.data_mappings[field]:
                    # 'overwrite' the value with the 'mapped' value
                    observation.set(field, self.get_data_mapping(field, str(mapping_key)))

        # fix/check period
        # NOPE: this was wrong, this always created a full iso datetime string, while partial strings also are OK for NDFF
        # result = NdffConnector.create_fix_period(observation)
        # if result[0]:
        #     log.debug('Successfully created a period for the observation')
        # else:
        #     log.debug('NOT created a period for the observation: {result[1]}')
        log.debug('Successfully created the Observation from the data')
        return observation

    def sent_observation_to_ndff(self, observation: NdffObservation, overwrite=False, test_modus=False) -> NdffResult:
        """
        This will either POST or PUT the observation to the NDFF.

        It is actually a wrapper around the api calls (via NdffApi), but it gives user/clients a choice to overwrite
        an NdffObservation (via the overwrite parameter).
        There is also a test_modus where you will always get a unique identity uri

        :param: observation
        :param: overwrite
        :param: test_modus test_modus=True will always generate a unique uri, so never clashes of POST's
        :return: NdffResult
        """
        log_lines = []
        crs = self.get_crs(observation)
        api = self.get_api()
        identity = None
        # When test_modus True: will always generate a unique uri, so never clashes of POST's.
        # This is handy for testing purposes when you want to POST one observation several times
        # without needing to test if an observation is already uploaded/exists
        if test_modus:
            # ALWAYS unique:
            identity = f'http://test.ndff.nl/test/test/{time.time_ns()}'
            # another option is to ADD fixed number to original (mainly for debugging purposes):
            # identity = observation.get('identity') + '/20220527-2'
            # time.sleep(5) # seconds (testing)

        log.debug(f'Sending/POST to NDFF, user: {api.user},domain: {api.domain}, crs: {crs}, client-id: {api.client_id}\nidentity: {identity}\n{observation.to_ndff_observation_json()}')
        result = api.post_waarneming(observation.to_ndff_observation_json(observation_identity=identity), identity, crs)
        log_lines.append(result.as_tuple())
        # For now: we do not ask, IF overwrite==True: upon an identity conflict (409) we just overwrite...
        if f'{result["http_status"]}' == '409':
            if overwrite:
                if result['ndff_uri'] and len(result['ndff_uri']) > 7:  # older NDFF api versions returned :// as ndff_uri
                    ndff_uri = result['ndff_uri']
                    log.debug(f'Sending/PUT to NDFF, user: {api.user}, domain: {api.domain}, crs: {crs}, client-id: {api.client_id}\nidentity: {identity}\n{observation.to_ndff_observation_json()}')
                    result = api.put_waarneming(observation.to_ndff_observation_json(), ndff_uri, identity, crs)
                    log_lines.append(result.as_tuple())
                else:
                    # failed to get the ndff_uri from the 409, second try:we do a GET/search request to find out the ndff uri
                    # (we should not come here anymore, since NDFF is now returning the ndff_uri in the 409 response!)
                    log.info('ASSERT: we should not be here: NDFF is now returning the ndff_uri in the response...')
                    search_result = api.search_waarneming('identity', observation.get('identity'))
                    if search_result['http_status'] == 200:
                        # if you searched for an observation and the count in the response >= 1
                        # then the result contains the observations 'embedded', so grab the ndff_uri from it
                        # tricky, but working for now...
                        ndff_uri = search_result['http_response']['_embedded']['items'][0]['_links']['self']['href']
                        # get identity from observation
                        identity = observation.get('identity')
                        search_result['object_id'] = identity
                        search_result['ndff_uri'] = ndff_uri
                        # log the successful search result here (after adding identity and ndff_uri)
                        log_lines.append(search_result.as_tuple())
                        log.debug(f'Sending/PUT to NDFF, user: {api.user}, client-id {api.client_id},\ndomain: {api.domain}, crs: {crs} \n{identity}\n{observation.to_ndff_observation_json()}')
                        result = api.put_waarneming(observation.to_ndff_observation_json(), ndff_uri, identity, crs)
                        log_lines.append(result.as_tuple())
                    else:
                        log.debug(f'Nothing found when searching for: {observation.get("identity")}')
                        # log the failed search
                        log_lines.append(search_result.as_tuple())
            else:
                # throw exception, custom ndff_status ??
                # for now: we just return the 409 ndff_result
                log.debug(f'{observation.get("identity")} NDFF api returned 409 (resource exists), but we have "overwrite" on False...')

        self.write_to_ndff_log(log_lines)
        return result

    def write_to_ndff_log(self, log_lines: list):
        """
        Write a list of log liness to the ndff log ('ndff_log.csv' probably in the ndff_settings dir)
        :param log_lines:
        """
        with open(Path(self.ndff_log_dir) / self.NDFF_LOG, mode='a', encoding='UTF8',
                  newline='') as f:  # using 'with open', then file is explicitly closed
            writer = csv.writer(f)
            writer.writerows(log_lines)
            f.flush()

    # Commented this because this created a full iso datetime string always...
    # @staticmethod
    # def create_fix_period(observation):
    #     # period_start and period_stop
    #     if observation.period_start is None and observation.period_stop is None:
    #         # mmm, apparently nothing is set... non-valid record ?
    #         return False, False
    #     try:
    #         # only a period_start is set: add one minute to it and make that period_stop
    #         if observation.period_start and observation.period_stop is None:
    #             start = NdffConnector.datetimestring_to_datetime(observation.period_start)
    #             stop = start + timedelta(minutes=1)
    #         # only a period_end is set: subtract one minute to it and make that period_start
    #         elif observation.period_stop and observation.period_start is None:
    #             stop = NdffConnector.datetimestring_to_datetime(observation.period_stop)
    #             # subtract one minute
    #             start = stop - timedelta(minutes=1)
    #         else:
    #             start = NdffConnector.datetimestring_to_datetime(observation.period_start)
    #             stop = NdffConnector.datetimestring_to_datetime(observation.period_stop)
    #         # Ok, we should have a valid start AND end now...:
    #         observation.period_start = start.isoformat()
    #         observation.period_stop = stop.isoformat()
    #     except ValueError as v:
    #         return False, v
    #     return True, True

    @staticmethod
    def create_geom(observation: NdffObservation, location_data):
        """
        Internal method which in the map_data_to_observation method tries to create a valid Geometry.

        The 'location_data' parameter here is actually a json, build in map_data_to_observation, based on the
        mappings in the record, from plugin can be incomplete:

        location_data = {
            'location': None,
            'location_x': None,
            'location_y': None,
            'location_buffer': None
        }

        We to create a valid GEOMETRY from this, where 'location' can be WKT, WKB, json etc etc
        IF available we also insert the 'buffer' here.

        Note that we are NOT creating the valid location json here!

        Only the geometry part of the full location json below:
        "location": {
            "buffer": 5,
            "geometry": {
                "type": "Point",
                "coordinates": [
                    408241,
                    78648
                    ]
                }
            }

        :returns: True in case the creation succeeded, False in case of a failure
        """
        # log.debug(f"Create geom: {type(location_data['location'])} {location_data['location']}")
        if 'location' not in location_data:
            # not even a location field (yet), silently return False
            return False
        elif (location_data['location'] is None and location_data['location_y'] is None
              and location_data['location_x'] is None and location_data['location_buffer'] is None):
            # no location value (yet), silently return False
            return False
        elif isinstance(location_data['location'], dict):
            # ok apparently the data is already a dict...
            # this dict should have a buffer already, but if not, see if it was in the data
            observation.location = location_data['location']
        else:
            # going to create the geometry dict here ourselves
            # check if we have an x/y or a wkt geom or a wkb
            if ('location_x' in location_data and location_data['location_x'] is not None
                    and 'location_y' in location_data and location_data['location_y'] is not None):
                # ok we have an x and y column here...
                observation.location = {'geometry': {}}
                observation.location['geometry']['type'] = 'Point'
                try:
                    observation.location['geometry']['coordinates'] = [float(location_data['location_x']),
                                                                       float(location_data['location_y'])]
                except Exception as e:
                    log.error(
                        f'Error creating a geometry from "{location_data["location_x"]}" and "{location_data["location_y"]}": {e}')
                    observation.location['geometry'] = {}
                    # NOT returning False because it's fixable later...
            elif location_data['location'] is not None:
                NdffConnector.parse_wkt_wkb(observation, location_data['location'])
            else:
                log.debug('(Yet) unknown geometry type in your wkt...')
                return False

        if 'location_buffer' in location_data and is_numeric(location_data['location_buffer']):  # is_numeric tests both for a number and a value
            observation.location['buffer'] = location_data['location_buffer']

        if observation.location and 'geometry' in observation.location and 'coordinates' in observation.location['geometry']:
            # ok we have a geometry with coordinates....
            # ?? fix the crs needed for the api to set the crs header
            # self._set_crs(observation.location['geometry']['coordinates'])
            # all well, return True
            return True

        return False

    @staticmethod
    def get_crs(observation: NdffObservation) -> str:
        """
        Inspect the coordinates of the 'location' of given NdffObservation, and determine if we actually have lat/lon
        coordinates (EPSG:4326) or Amersfoort coordinates (EPSG:28992).
        Rule is: both coordinates < 180 => 4326
        Coordinates < 640000 AND > 0 => 28992
        Else throw exceptions

        "location": {
            "buffer": 5,
            "geometry": {
                "type": "Point",
                "coordinates": [
                    408241,
                    78648
                    ]
                }
            }

        :param NdffObservation observation: observation to inspect the location from
        ;raise: ValueError in case coordinates seem to be outside valid options
        :return: string either 'EPSG:28992' or 'EPSG:4326'
        """
        # TODO wrong geometry might pass as 4326
        crs = NdffConnector.EPSG_4326
        location = observation.location
        if location is not None and isinstance(location, dict):
            if ('geometry' in location and location['geometry'] and 'coordinates' in location['geometry']
                    and 'type' in location['geometry']):
                geometry = location['geometry']
                coordinate_list = NdffConnector.get_coordinate_list(geometry)

                # inspect the coordinates and determine the crs
                if float(max(coordinate_list)) < 180:
                    crs = NdffConnector.EPSG_4326
                elif float(max(coordinate_list)) < 640000 and float(min(coordinate_list)) >= 0:
                    crs = NdffConnector.EPSG_28992
                else:
                    raise ValueError("Coordinates outside of EPSG:4326 or EPSG:28992 range")
        return crs

    def next_ndff_observation(self) -> NdffObservation:
        """
        Method to fetch the next record as NdffObservation.

        Mostly usefull for CLI or client use.

        :return: NdffObservation
        """

        record = self.next_record()
        return self.map_data_to_ndff_observation(record)

    def __repr__(self) -> str:
        """
        String representation of a NdffConnector for debugging purposes: all data mappings are shown, but only the first
        10 field mappings
        :return: str
        """
        head = f"""Connector: datasource type: {type(self.datasource)}
ndff_api_settings: {self.ndff_api_settings}
field_mappings: {self.field_mappings}
data_mappings:"""
        data_mappings_s = ''
        for field, field_dict in self.data_mappings.items():
            data_mappings_s += f'{field} (first 10, if available):\n'
            i = 0
            for key, value in field_dict.items():
                data_mappings_s += f"'{key}': '{value}' \n"
                i += 1
                if i > 9:
                    break
        return head + data_mappings_s

    def save_to_directory(self, ndff_config_directory: str, with_datasource=False):
        """
        Write current (all) configuration settings to a directory 'ndff_settings' IN the
        ndff_config_directory given as first parameter

        :param: ndff_config_directory directory to use to create a dir 'ndff_settings'
        and write all params in it
        :param: with_datasource if this is written from within the QGIS plugin, we can optionally
        write a data_source.csv (only for Postgres and CSV datasources)
        :return:
        """
        try:
            # write all needed files to this ndff_config_directory
            path = Path(ndff_config_directory)
            # check IF there is already NdffConnector.NDFF_SETTINGS_DIR ending
            if path.name != NdffConnector.NDFF_SETTINGS_DIR:
                ndff_config_directory = path / NdffConnector.NDFF_SETTINGS_DIR
            if not ndff_config_directory.is_dir():
                ndff_config_directory.mkdir(parents=True)
            timestamp = datetime.now().replace(microsecond=0).isoformat()
            if with_datasource:
                # QGIS did not use the connector datasource, IF we want to write a datasource settings
                self.write_datasource_settings(ndff_config_directory, timestamp)
            self.write_field_mappings(ndff_config_directory, timestamp)
            self.write_data_mappings(ndff_config_directory, timestamp)
        except Exception as e:
            log.error(f'Error saving these NDFF Settings to this directory: {ndff_config_directory}\n{e}')
            return False
        return True

    def write_datasource_settings(self, ndff_config_directory: str, timestamp: str):
        """
        Write the datasource_settings to disk as csv

        :param ndff_config_directory:
        :param timestamp:
        :return:
        """
        with open(Path(ndff_config_directory) / self.NDFF_DATASOURCE_SETTINGS, mode='w+', encoding='UTF8', newline='') as f:  # using 'with open', then file is explicitly closed
            writer = csv.writer(f)
            writer.writerow(['#'])
            writer.writerow([f'# This Datasource settings file is written on {timestamp} using the NDFF-Connector'])
            writer.writerow(['#'])
            if self.datasource_settings:
                for key, value in self.datasource_settings.items():
                    writer.writerow([key, value])
            else:
                log.debug(f'write_datasource_settings has no settings to write: {self.datasource_settings}')

    def write_field_mappings(self, ndff_config_directory: str, timestamp: str):
        """
        Write the field_mappings_settings to disk as csv

        :param ndff_config_directory:
        :param timestamp:
        :return:
        """
        with open(Path(ndff_config_directory) / self.NDFF_OBSERVATION_FIELD_MAPPINGS, 'w+', encoding='UTF8', newline='') as f:  # using 'with open', then file is explicitly closed
            writer = csv.writer(f)
            writer.writerow(['#'])
            writer.writerow([f'# this Field mapping file is written on {timestamp} using the NDFF-Connector'])
            writer.writerow(['#'])
            # for key, value in self.field_mappings.items():
            #    writer.writerow(value)
            # splitting the extra_info key/values from the rest
            normals = dict(filter(lambda elem: 'extra_info_' not in elem[0], self.field_mappings.items()))
            for key, value in normals.items():
                writer.writerow(value)
            extra_infos = dict(filter(lambda elem: 'extra_info_' in elem[0], self.field_mappings.items()))
            # we are going to rewrite the index/numbers of the extra info's independent of current values
            index = 1  # base 1
            for key in extra_infos:
                if 'extra_info_identity_' in key:
                    counter = int(key.replace('extra_info_identity_', ''))
                    if f'extra_info_identity_{counter}' in extra_infos:
                        key_row = list(extra_infos[f'extra_info_identity_{counter}'])
                        value_row = list(extra_infos[f'extra_info_value_{counter}'])
                        # recounting/mapping:
                        key_row[0] = key_row[0].replace(str(counter), str(index))
                        value_row[0] = value_row[0].replace(str(counter), str(index))
                        writer.writerow(key_row)
                        writer.writerow(value_row)
                        index += 1
                    else:
                        raise ValueError(f'extra_info_identity_{counter} does not have a corresponding extra_info_value_{counter}.. in the field mappings:\n{self.field_mappings}')
            # print(extra_infos)

    def write_data_mappings(self, ndff_config_directory: str, timestamp: str):
        """
        Write the data mappings to disk as a csv

        NOTE! since 20221202 ALL KEYS in the csv files will be lowercase!!
        Meaning also searching for a key should use .lower() before searching !!!

        The data mappings are written PER FIELD as a csv with name mappings_<fieldname>.csv
        so for example for taxon: mappings_taxon.csv
        """
        for field in self.data_mappings.keys():
            # BUT only if there ARE actually mappings
            if len(self.data_mappings[field]) > 0:
                with open(Path(ndff_config_directory) / f'mappings_{field}.csv', 'w+', encoding='UTF8', newline='') as f:
                    writer = csv.writer(f)
                    writer.writerow(['#'])
                    writer.writerow([f'# this Data mapping file for {field} is written {timestamp} using the NDFF-Connector'])
                    writer.writerow(['#'])
                    for key, value in (self.data_mappings[field]).items():
                        # 20221202 make sure that in (new) csv files ALL keyw will be lowercase
                        writer.writerow([key.lower(), value])
