from ..api import (
    Api,
    NdffObservation,
    NdffResult,
    NdffDataset,
)
from ..datasource.base import DataSource
from ..utils import (
    is_uri,
    is_hex,
)
from pathlib import Path
import shapely.wkt
import shapely.wkb
from shapely.geometry import mapping
from datetime import datetime
import csv
import re
import tempfile
from typing import Union
from collections import deque
import logging
log = logging.getLogger(__name__)


class NdffConnector:
    """
    A NdffConnector is the bridge between the NdffApi and Datasource
    The NdffAPI configuration is read from settings files
    The Datasource configuration is read from settings files
    It reads field-mappings (to map datasource fields to NdffObservation fields)
    and data-mappings (to map data VALUES to Ndff-uri's) from settings files

    """

    # all settings (both main/global ones AND the project/datasource ones)
    # are written in this subdirectory
    NDFF_SETTINGS_DIR = 'ndff_settings'

    # global settings
    # settings to connect to api
    NDFF_API_SETTINGS = 'ndff_api.csv'

    # datasource settings name
    # file name for a settings file containing the type and datasource settings
    NDFF_DATASOURCE_SETTINGS = 'data_source.csv'

    # per project/datasource type settings
    # field/column mappings
    NDFF_OBSERVATION_FIELD_MAPPINGS = 'field_mappings.csv'

    # client settings
    # a file containing information for every NdffObservation field, for example
    # could be used to show a (translated) text instead of field name. It can also
    # contain the url a client should use to search for a value-uri, a more
    # descriptive text etc
    NDFF_OBSERVATION_CLIENT_SETTINGS = 'client_settings.csv'

    # ndff log file
    # a file holding the identities of successful validated AND sent NdffObservations
    # this file will be read (from settings dir) when connector is created:
    # a dict of 'identity':[identity, ndff_uri, timestamp]
    # this file can be written to disk again
    NDFF_LOG = 'ndff_log.csv'

    EPSG_4326 = 'EPSG:4326'
    EPSG_28992 = 'EPSG:28992'

    DATETIME_FORMAT = '%Y-%m-%dT%H:%M:%S'

    # fields in ndff_observation hold a list with:
    # field: text,url1,url2,description,ndff_fieldname
    FIELD_COL_NAME = 0
    FIELD_COL_TEXT = 1
    FIELD_COL_URL1 = 2
    FIELD_COL_URL2 = 3
    FIELD_COL_DESCRIPTION = 4
    FIELD_COL_NDFF_NAME = 5
    FIELD_COL_CHANGE_DATA = 6

    WRONG_FIELD_MAPPING = "VELD NIET BESCHIKBAAR"

    # good enough test for wkt
    # https://regex101.com/
    # currently only "POINT (145918.0769337003 389302.11025674635)" like strings are valid (so UPPERCASE POINT only)
    # OR optional a SRID=4326; in front (EWKT): SRID=4326;POINT (4.67979621887207 51.67397308349609)
    # TODO: QGIS expressions does NOT create POINT but Point (note caps): better to have a more forgiven regexp?
    valid_wkt_point_regex = re.compile(r"(SRID=\d+;)?POINT\s*\(.+\s.+\)", re.MULTILINE | re.UNICODE)
    valid_wkt_multipoint_regex = re.compile(r"(SRID=\d+;)?MULTIPOINT\s*\(.+\s.+\)", re.MULTILINE | re.UNICODE)
    valid_wkt_polygon_regex = re.compile(r"(SRID=\d+;)?POLYGON\s*\(.+\s.+\)", re.MULTILINE | re.UNICODE)

    # mapping to map an Observation field name to the api-codes names used in the api
    mapping = {
        'taxon': 'taxa',
        'abundance_schema': 'abundancies',
        'abundance_value': 'abundancies',
        'determination_method': 'determinationmethods',
        'activity': 'activities',
        'biotope': 'biotopes',
        'lifestage': 'lifestages',
        'sex': 'sexes',
        'survey_method': 'surveymethods',
        'subject_type': 'subjecttypes',
        'extra_info_identity': 'extrainfo',
        'extra_info_value': 'extrainfo',
        'dwelling': 'dwellings',
    }

    has_related_codes_cache = {}

    @staticmethod
    def datetimestring_to_datetime(date_only_or_datetime_string):
        if str == type(date_only_or_datetime_string) and len(date_only_or_datetime_string.strip()) < 11:
            dt = datetime.strptime(date_only_or_datetime_string, '%Y-%m-%d')
            dt = dt.replace(hour=12, minute=00, second=00)
        elif str == type(date_only_or_datetime_string) and len(date_only_or_datetime_string.strip()) < 20:
            # 2022-01-01T12:12:00 OR 2022-01-01T12:12:00 both are 'ISO-format' :-(
            dt = datetime.fromisoformat(date_only_or_datetime_string)
        else:
            raise ValueError(f'Trying to create a datetime from a date(time)string which does not to be OK: {date_only_or_datetime_string}')
        return dt
    #
    # @staticmethod
    # def is_valid_wkt_point(location_string: str) -> bool:
    #     # 'POINT (145918.0769337003 389302.11025674635)'
    #     if NdffConnector.valid_wkt_point_regex.match(location_string.upper()):
    #         return True
    #     return False
    #
    # @staticmethod
    # def is_valid_wkt_multipoint(location_string: str) -> bool:
    #     # NOTE: NOT SUPPORTED BY NDFF
    #     if NdffConnector.valid_wkt_multipoint_regex.match(location_string.upper()):
    #         return True
    #     return False
    #
    # @staticmethod
    # def is_valid_wkt_polygon(location_string: str) -> bool:
    #     # POLYGON ((4.41042 51.86444, 4.41042 51.86444, 4.410441 51.86443, 4.41044 51.86444, 4.41042 51.86444))
    #     if NdffConnector.valid_wkt_polygon_regex.match(location_string.upper()):
    #         return True
    #     return False

    @staticmethod
    def is_valid_wkt(location_wkt_string: str) -> bool:
        try:
            if is_hex(location_wkt_string):
                # WKB !
                shapely_geom = shapely.wkb.loads(location_wkt_string, hex=True)
            else:
                # WKT
                shapely_geom = shapely.wkt.loads(location_wkt_string)
            if shapely_geom.is_valid:
                return True
        except Exception:
            pass
        return False

    @staticmethod
    def read_dict_from_csv(filename: Union[str, Path]) -> dict:
        """
        Utility method to read a csv into a dict

        :param: filename full (string) path to csv
        :return: dict
        """
        # TODO: handle absolute and relative paths Note: test/check well for linux/windows paths!! see:
        # https://stackoverflow.com/questions/3320406/how-to-check-if-a-path-is-absolute-path-or-relative-path-in-a-cross-platform-way
        dictionary = {}
        if (Path(filename)).is_file():
            with open(filename, mode='r', newline='', encoding='utf-8-sig') as f:
                # DictReader, returns a dictionary
                # csv_reader = csv.DictReader(filter(lambda rw: rw[0] != '#', f), delimiter=',', skipinitialspace=True)  # skipping commented lines
                # Normal reader returns one list per row, adding to it based on first column...
                # WHICH IN THIS CASE HAS TO BE UNIQUE !!
                csv_reader = csv.reader(filter(lambda rw: rw[0] != '#', f), delimiter=',', skipinitialspace=True)  # skipping commented lines
                for row in csv_reader:
                    if len(row) == 2:
                        # simple key value pairs
                        dictionary[row[0]] = row[1]
                    elif len(row) > 2:
                        # # rows with more than 2 columns: take the first one as key, others as list:
                        # # row "key,val1,val2" will result in {key:(val1,val2)}
                        # # thinking about using a DictReader here... but ...
                        # if len(row[0]) > 0:  # skipping 'comment rows', being ",,,,,"
                        #     dictionary[row[0]] = row[1:]

                        # rows with more than 2 columns: take the first one as key, full list as value:
                        # row "key,val1,val2" will result in {key:(key, val1,val2)}
                        # thinking about using a DictReader here... but ...
                        if len(row[0]) > 0:  # skipping 'comment rows', being ",,,,,"
                            dictionary[row[0]] = row[0:]
                    else:
                        log.debug(f'Skipping this row in {filename}: "{row}"')
        else:
            pass
            # we deliberately try all files in different dirs...
            # this make is it easier to use this function (instead of checking if something is a file or not)
            #log.warning("Filename '{filename}' does not seem to be a file?")
        return dictionary

    @staticmethod
    def create_from_directories(global_config_dir: str, user_config_dir: str, from_within_qgis=False):
        """
        Create a (typed!) Datasource by 'merging' the user-settings/config OVER the
        main/global-settings/config

        :param: global_config_dir
        :param: user_config_dir
        :param: from_within_qgis
        :return:
        """
        # Check if both directories end on DataSource.NDFF_SETTINGS_DIR
        global_config_path = Path(global_config_dir)
        #  global_config_path has an OPTION(!) to end on DataSource.NDFF_SETTINGS_DIR:
        if global_config_path.name != DataSource.NDFF_SETTINGS_DIR:
            # AND if there is a NDFF_SETTINGS_DIR IN IT:
            if (global_config_path / DataSource.NDFF_SETTINGS_DIR).is_dir():
                # THEN we decide that global_config_path / DataSource.NDFF_SETTINGS_DIR is the one
                global_config_path = global_config_path / DataSource.NDFF_SETTINGS_DIR
        user_config_path = Path(user_config_dir)
        if user_config_path.name != NdffConnector.NDFF_SETTINGS_DIR:
            user_config_path = user_config_path / NdffConnector.NDFF_SETTINGS_DIR

        # settings will be a merging of the api_settings AND the datasource_settings
        ndff_api_settings = NdffConnector.read_dict_from_csv(Path(global_config_path) / NdffConnector.NDFF_API_SETTINGS)
        #log.debug(ndff_api_settings)
        user_ndff_api_settings = NdffConnector.read_dict_from_csv(Path(user_config_path) / NdffConnector.NDFF_API_SETTINGS)
        #log.debug(user_api_settings)
        ndff_api_settings.update(user_ndff_api_settings)
        log.debug(f'Using ndff_api_settings: {ndff_api_settings}')

        # ONLY if using the library OUTSIDE the QGIS plugin, load the datasource settings.
        # IF within QGIS, QGIS/plugin will load the features from the active layer
        datasource_settings = None
        if not from_within_qgis:
            datasource_settings = NdffConnector.read_dict_from_csv(Path(global_config_path) / NdffConnector.NDFF_DATASOURCE_SETTINGS)
            #log.debug(datasource_settings)
            user_datasource_settings = NdffConnector.read_dict_from_csv(Path(user_config_path) / NdffConnector.NDFF_DATASOURCE_SETTINGS)
            #log.debug(user_datasource_settings)
            datasource_settings.update(user_datasource_settings)
            log.debug(datasource_settings)

        # settings will be a merging of the api_settings AND the datasource_settings
        client_settings = NdffConnector.read_dict_from_csv(Path(global_config_path) / NdffConnector.NDFF_OBSERVATION_CLIENT_SETTINGS)
        #log.debug(client_settings)
        user_client_settings = NdffConnector.read_dict_from_csv(Path(user_config_path) / NdffConnector.NDFF_OBSERVATION_CLIENT_SETTINGS)
        #log.debug(client_settings)
        client_settings.update(user_client_settings)
        log.debug(f'Client settings: {client_settings}')

        # load field_mappings and defaults
        field_mappings = NdffConnector.read_dict_from_csv(Path(global_config_path) / NdffConnector.NDFF_OBSERVATION_FIELD_MAPPINGS)
        # NOTE: the user/project settings should ALWAYS have a field_mappings.csv
        user_field_mappings = NdffConnector.read_dict_from_csv(Path(user_config_path) / NdffConnector.NDFF_OBSERVATION_FIELD_MAPPINGS)
        field_mappings.update(user_field_mappings)
        # extra check, to be sure we only have triplets for every mapping?
        for key, value in field_mappings.items():
            if len(value) != 3:
                raise ValueError(f'Error in field mappings, ALL values should be triplets, but at least one: "{key}" is not a triplet (add a comma to make it one): "{value}"')

        # every field in the NdffObservation has a potential string->ndff-uri DATA mapping file
        # named after the field. Eg for taxon it's file is mapping_taxon.csv
        data_mappings_for_fields = {}
        # create an observation based on master to get the proper fields
        observation = NdffObservation()
        for field in observation.fields():
            filename = f'mappings_{field}.csv'
            data_mappings = NdffConnector.read_dict_from_csv(Path(global_config_path) / filename)
            user_data_mappings = NdffConnector.read_dict_from_csv(Path(user_config_path) / filename)
            data_mappings.update(user_data_mappings)
            data_mappings_for_fields[field] = data_mappings
        # NOT going to log the potentially giant data mappings

        # Create the actual connector instance:
        nc = NdffConnector(ndff_api_settings=ndff_api_settings, datasource_settings=datasource_settings, field_mappings=field_mappings, data_mappings=data_mappings_for_fields, client_settings=client_settings, ndff_log_dir=user_config_path)
        log.debug('Created NdffConnector (create_from_directories)')
        return nc

    def __init__(self, ndff_api_settings: dict = {}, datasource_settings: dict = {}, field_mappings: dict = {}, data_mappings: dict = {}, client_settings: dict = {}, ndff_log_dir: str = tempfile.gettempdir()):
        self.ndff_api_settings = ndff_api_settings
        self.datasource_settings = datasource_settings
        self.datasource = DataSource.create_from_settings(self.datasource_settings)
        self.data_records = self.datasource.get_records()
        self.field_mappings = field_mappings
        self.data_mappings = data_mappings
        self.client_settings = client_settings
        self.ndff_log_dir = ndff_log_dir
        # NOT going to create an api connection NOW, do it later when somebody requests for a live api connection
        self._api = None

    def next_record(self):
        """
        Returns a record/map of key/value pairs of the next record
        :return: a dict with key/value's of the next record
        """
        return next(self.data_records)

    def set_data_records(self, data_records_iterator: iter):
        """
        Normally the specific DataSources will provide data_records
        But in case of QGIS or other clients, it is possible that they provide the
        data themselves. Giving the option the set the data_records here you can
        still retrieve 'next_record' from this connector
        """
        self.data_records = data_records_iterator

    def get_api(self, fresh_one=False):
        """
        This returns the (or creates an) Api instance from this connector.
        If 'fresh_one' is True then a NEW instance will be created.
        If 'fresh_one' is False (the default), then there will only be created
        a fresh Api instance if there is no one.
        This make is possible to 'invalidate' the api of a connector, for example
        after a ndff_api_settings change, by requesting 'get_api(True)'
        or 'get_api(fresh+one=True)'
        """
        if fresh_one:
            self._api = None
            # and also clean up potentially saved tokens of the old one
            Api.remove_saved_tokens()
        if self._api is None:
            # lazy Api object creation
            self._api = Api(self.ndff_api_settings)
        return self._api

    def get_field_mapping(self, field: str) -> str:
        """
        The connector is responsible for the bookkeeping of the mappings 
        """
        if len(self.field_mappings.keys()) == 0:
            raise Exception('This Connector does not have ANY mappings (from file field_mappings.csv), this should not happen...')
        field_mapping = None
        if field in self.field_mappings:
            field_mapping = str(self.field_mappings[field][1]).strip()
            if str(field_mapping).strip() in ('', '-', 'None'):
                field_mapping = None
        return field_mapping

    def set_field_mapping(self, field: str, mapped_field_name) -> bool:
        if len(self.field_mappings.keys()) == 0:
            raise Exception('This Connector does not have ANY mappings (from file field_mappings.csv), this should not happen...')
        if field in self.field_mappings:
            # the value of the field mappings is an immutable tuple as (field, value, default)
            # create a list from it first to be able to edit a value
            updated_mapping = list(self.field_mappings[field])
            updated_mapping[1] = mapped_field_name
            self.field_mappings[field] = tuple(updated_mapping)
            return True
        return False

    def get_field_default(self, field: str) -> str:
        if len(self.field_mappings.keys()) == 0:
            raise Exception('This Connector does not have ANY mappings (from file field_mappings.csv), this should not happen...')
        default = None
        if field in self.field_mappings:
            default = str(self.field_mappings[field][2]).strip()
            if default and str(default).strip() in ('', '-', 'None'):
                default = None
        return default

    def set_field_default(self, field: str, field_default) -> bool:
        if len(self.field_mappings.keys()) == 0:
            raise Exception('This Connector does not have ANY mappings (from file field_mappings.csv), this should not happen...')
        if field in self.field_mappings:
            # the value of the field_mappings is an immutable tuple
            # create a list from it
            self.field_mappings[field] = list(self.field_mappings[field])
            self.field_mappings[field][2] = field_default
            return True
        else:
            raise Exception(f'This Connector tries to set a default for field "{field}" but is NOT available in mapping definition (from file field_mappings.csv), this should not happen...')

    def get_field_text(self, field: str) -> str:
        text = field
        if field in self.client_settings.keys():
            text = self.client_settings[field][NdffConnector.FIELD_COL_TEXT]
        return text

    def get_field_ndff_name(self, field: str) -> str:
        ndff_name = field
        if field in self.client_settings.keys():
            ndff_name = self.client_settings[field][NdffConnector.FIELD_COL_NDFF_NAME]
        return ndff_name

    def get_field_description(self, field: str) -> str:
        description = field
        if field in self.client_settings.keys():
            description = self.client_settings[field][NdffConnector.FIELD_COL_DESCRIPTION]
        return description

    def get_field_url1(self, field: str) -> str:
        url1 = field
        if field in self.client_settings.keys():
            url1 = self.client_settings[field][NdffConnector.FIELD_COL_URL1]
        if url1 and str(url1).strip() in ('', '-', 'None'):
            url1 = ''
        return url1

    def get_field_url2(self, field: str) -> str:
        url2 = field
        if field in self.client_settings.keys():
            url2 = self.client_settings[field][NdffConnector.FIELD_COL_URL2]
        if url2 and str(url2).strip() in ('', '-', 'None'):
            url2 = ''
        return url2

    def get_field_change_type(self, field: str) -> str:
        change_type = field
        if field in self.client_settings.keys():
            change_type = self.client_settings[field][NdffConnector.FIELD_COL_CHANGE_DATA]
        return change_type

    def add_extra_info_field_mapping(self, new_identity: str, new_value: str) -> bool:
        """
        The extra_info of an observation comes from a list of key value pairs.
        The key/identity should always be an uri
        The value of it could be a string/number OR uri, BUT always MAPPED
        from the data

        :param: new_identity
        :param: new_value
        :return: True if success else False
        """
        if not is_uri(new_identity):
            log.debug(f'Trying to set an extra info field mapping with an identity which is not an uri: "{new_identity}"')
            return False
        if self.field_mappings and isinstance(self.field_mappings, dict):
            # ugly, just checking the highest index
            max_index = 0
            for key in self.field_mappings:
                if key.startswith('extra_info_identity_'):
                    index = int(key[20:])
                    if index > max_index:
                        max_index = index
            # now we found the highest index/runner
            max_index += 1
            self.field_mappings[f'extra_info_identity_{max_index}'] = (f'extra_info_identity_{max_index}', new_identity, None)
            self.field_mappings[f'extra_info_value_{max_index}'] = (f'extra_info_value_{max_index}', new_value, None)
            return True
        else:
            return False

    def delete_extra_info_field_mapping(self, field_map_key: str, field_map_value: str) -> bool:
        if field_map_key in self.field_mappings and field_map_value in self.field_mappings:
            self.field_mappings.pop(field_map_key)
            self.field_mappings.pop(field_map_value)
            return True
        return False

    def change_extra_info_field_mapping(self, field_map_key: str, new_identity: str, field_map_value: str, new_value: str) -> bool:
        """
        Change some existing mapping, get it from the live field mappings instance
        based on the 'field_map_key' and 'field_map_value', which are the 'keys'
        used in the field_mappings dict, like: extra_info_identity_2 and extra_info_value_2
        :param field_map_key: key like: extra_info_value_2
        :param new_identity: value of the extra info (ndff URI !!!)
        :param field_map_value: key like: extra_info_value_2
        :param new_value: value of extra info (fieldname or value)
        """
        try:
            self.set_field_mapping(field_map_key, new_identity)
            self.set_field_mapping(field_map_value, new_value)
            return True
        except Exception as e:
            #return False # ??
            raise e

    def change_extra_info_field_mapping_values(self, old_identity: str, old_value: str, new_identity: str, new_value: str) -> bool:
        """
        The extra_info of an observation comes from a list of key value pairs.
        Because we do not know if a key of the extra info can be used multiple
        times, we are going to look up from current mappings, so BOTH key(-uri)
        AND value(-mapping)

        Note: it is probably safer to use 'change_extra_info_field_mapping' as
        theoretically an old_identity can be reused in a set of extra info
        (deprecate this one?)

        :param: old_identity actual URI of this extra info
        :param: old_value actual Fieldname (or value) of this extra info
        :param: new_identity URI of new extra info
        :param: new_value new Fieldname (or value) of this extra info
        :return: True if success else False
        """
        # first find the old identity/value in the xtra info mappings
        for key in self.field_mappings:
            if key.startswith('extra_info_identity_'):
                index = int(key[20:])
                value = f'extra_info_value_{index}'
                if self.field_mappings[key][1] == old_identity and self.field_mappings[value][1] == old_value:
                    # replace them with new value, but only if new_identity is also an uri
                    if is_uri(new_identity):
                        self.field_mappings[key] = (key, new_identity, None)
                        self.field_mappings[value] = (value, new_value, None)
                        return True
                    else:
                        log.debug(f'Trying to change an extra info field mapping with an identity which is not an uri: "{new_identity}"')
        log.error(f'Failing to change_extra_info_field_mapping_values: "{old_identity}":"{old_value}" to "{new_identity}":"{new_value}"')
        return False

    def get_data_mapping(self, field: str, field_key: str) -> Union[str, None]:
        """
        Data mappings are dictionaries (in memory AND/OR on disk/in database, depending
        on the type of datasource).

        In current implementation they are saved in the user_settings in a file
        called 'mappings_<field>.csv'

        Not sure yet if we should to lazy loading: as open the file when needed,
        OR early loading: open file and load in memory

        Although 'domain'-tables in databased typically have >2 columns, these
        will be exploded to a long list of key-value pairs...

        NOTE! since 20221202 ALL KEYS in the csv files will be lowercase!!
        Meaning also searching for a key should use .lower() before searching !!!

        Drawback of this implementation: you cannot have the same key for 2
        different uri's. So IF you have
        hop --> plant uri
        you cannot have
        hop -> bird uri
        in the same file...

        You can have the same uri (as value) several times in the mappings:
        for example the nl name, scientific name, an abbreviation, and maybe an
        often made misspelling ot the nl name etc...

        NOTE! the mapping key's for abundance_value (like Tansley code's) consist of BOTH the schema AND the value,
        E.g. http://ndff-ecogrid.nl/codes/scales/tansley:s

        :param field: the actual field (taxon, activity etc)
        :param field_key: (the string which should map to a certain uri)
        :return: field_value: an uri for the field_key or None (that fits better in data dicts)
        """
        # try to find <field_key>.lower() in the mapping dict
        # if found return
        # else if not found return None
        # since nov 2022 we try to be case-insensitive, but sometimes people write or still have mappings with uppercase keys, so we check for both
        lower_case_key = str(field_key).lower()
        if lower_case_key in self.data_mappings[field]:  # preferred, and most apparent (hopefully)
            return self.data_mappings[field][lower_case_key]
        # extra check (backwards compatibility)
        if field_key in self.data_mappings[field]:
            return self.data_mappings[field][field_key]
        return None  # deliberately NOT returning False because this could show up in Observation maybe...

    def get_dataset_types(self) -> deque:
        """
        Using api.get_dataset_types (only retrieving 1 page) retrieve ALL dataset types

        The connector will 'page' through all datasets until all retrieved.
        (thinking that there will be no users/domains with that many datasettypes????)

        Return an empty deque when the api fails.
        (deque is used because it is faster, better suited to pop left/right and more thread safe)

        :return: a deque of dataset_type objects like:
            {
                "_links": {
                    "self": {
                        "href": "https://accapi.ndff.nl/api/v2/domains/708/datasettypes/21532/"
                    }
                },
                "category": "inbox",
                "description": "inbox Test NGB",
                "identity": "http://ndff.nl/foldertypes/test_ngb/inbox"
            }

        """
        api = self.get_api()
        all_dataset_types = deque()
        result = api.get_dataset_types()  # one page at a time...
        if result['http_status'] != 200:
            log.debug(f'Error retrieving datasettypes for {api.user} domain {api.domain}...')
            return all_dataset_types
        obj = result['http_response']
        # using a deque, because that is thread safe (?) and faster to extend
        all_dataset_types.extend(obj['_embedded']['items'])
        while '_links' in obj and 'next' in obj['_links']:
            uri = obj['_links']['next']['href']
            result = api.get_datasets(ndff_uri=uri)
            assert result['http_status'] == 200
            response = result['http_response']
            obj = response
            all_dataset_types.extend(obj['_embedded']['items'])
        return all_dataset_types

    def search_observations(self, field=None, field_value=None, max_hits=500) -> deque:
        """
        Using api.search_waarneming(field, field_value) (only retrieving 1 page at a time) to retrieve observations
        for current user and domain (given field and field_value)

        For example searching for one observation with given identity field:
        https://accapi.ndff.nl/api/v2/domains/708/observations/?identity=http://ecoreest.nl/test1/1

        Or all observations from given dataset:
        https://accapi.ndff.nl/api/v2/domains/708/observations/?dataset=http://ndff.nl/api-testngb/folders/1671554924123426753

        The connector will 'page' through all observations until all retrieved.

        Return an empty deque when the api fails or returns an empty set
        (deque is used because it is faster, better suited to pop left/right and more thread safe)

        :param: field the string or url to search in
        :param: field_value the observation fields (like extrainfo, taxa, etc)
        :param: max_hits the number of hits (while paging) to stop
        :return: deque
        """
        # fetch ALL datasets for this user/domain
        api = self.get_api()
        all_observations = deque()
        result = api.search_waarneming(field, field_value)  # one page at a time...
        if result['http_status'] != 200:
            log.debug(f'Error retrieving datasets for {api.user} domain {api.domain}...')
            return all_observations
        obj = result['http_response']
        # using a deque, because that is thread safe (?) and faster to extend...
        all_observations.extend(obj['_embedded']['items'])
        while '_links' in obj and 'next' in obj['_links']:
            if len(all_observations) >= max_hits:
                log.debug(f'Max hits for a code search is set to {max_hits}, but is now: {len(all_observations)}, quitting with paging... ')
                break
            uri = obj['_links']['next']['href']
            result = api.get_datasets(ndff_uri=uri)
            assert result['http_status'] == 200
            response = result['http_response']
            obj = response
            all_observations.extend(obj['_embedded']['items'])
        return all_observations

    def search_codes(self, search_text: str, search_type: str = 'codes', search_field: str = None, max_hits=250, page_size: int = 25) -> deque:
        """
        Search code uses the filter or search endpoint of the codes list of the API

        The search_type is the codes type to search for
        (the api endpoints, like abundancies, taxa, extrainfo etc, see https://accapi.ndff.nl/codes/v2/)

        When a 'search_field' is given, that field is used to search in.
        Possible fields: description, identity, indexvalue, name, rank, language, speciesgroup
        For example search in the name field of taxa (NOTE: name searches in both Dutch and Scientific name!!)
        https://accapi.ndff.nl/codes/v2/taxa/?name=pipi&ordering=-indexvalue
        or
        https://accapi.ndff.nl/codes/v2/taxa/?name=paardenbloem&ordering=-indexvalue

        If the 'search_field' is None, do not search over the field endpoint, but over the more general 'search' endpoint
        Either search for search-text using the 'search' endpoint
        https://accapi.ndff.nl/codes/v2/extrainfo/?search=location_id

        When 'search_text' is an uri, search for an object via the identity uri like:
        https://accapi.ndff.nl/codes/v2/extrainfo/?identity=[IDENTITYURI]&ordering=-indexvalue

        Note that it is possible that the result contains a 'next line', meaning we can page over the results.
        It is the responsibility of the connector to do this!
         {
            "_links": {
                "next": {
                    "href": "https://accapi.ndff.nl/api/v2/domains/168/datasets/?page=5"
                },
                "previous": {
                    "href": "https://accapi.ndff.nl/api/v2/domains/168/datasets/?page=3"
                },
                "self": {
                    "href": "https://accapi.ndff.nl/api/v2/domains/168/datasets/?page=4"
                }
            },
            "count": 103,
            "page_size": 25,
        ...
        }

        :param: search_text the string or url to search for
        :param: search_type: the observation fields (like extrainfo, taxa, etc)
        :param: search_field name or description
        :param: max_hits the number of hits (while paging) to stop
        :param: page_size number of results per 'page' from api (default to ndff default 25)
        :return: deque
        """
        api = self.get_api()
        # using a deque, because that is thread safe (?) and faster to extend...
        all_codes = deque()
        # get the field => search type mapping (eg a 'taxon' is to be searched in 'taxa')
        if search_type in self.mapping.keys():
            search_type = self.mapping[search_type]
        result = api.search_codes(search_text, search_type, search_field, page_size=page_size)  # one page (of size 'page_size' at a time...
        if result['http_status'] != 200:
            log.debug(f'Error searching codes: {search_text} {search_type} {search_field}...')
            return all_codes
        obj = result['http_response']
        all_codes.extend(obj['_embedded']['items'])
        while '_links' in obj and 'next' in obj['_links']:
            if len(all_codes) >= max_hits:
                log.debug(f'Max hits for a code search is set to {max_hits}, but is now: {len(all_codes)}, quitting with paging... ')
                break
            uri = obj['_links']['next']['href']
            result = api.search_codes(next_link=uri)
            assert result['http_status'] == 200
            response = result['http_response']
            obj = response
            all_codes.extend(obj['_embedded']['items'])
        return all_codes

    def identity_has_related_codes(self, identity_uri=None) -> bool:
        """
        Will check if identity_uri has related codes, by first resolving the identity URI to a NDFF URL, and THEN
        checking the NDFF URL for related codes

        # NOTE: API searches using the NDFF URL, but connector searches/pages using the abundancy_schema URI

        :param: identity_uri
        :return: bool
        """
        if not is_uri(identity_uri):
            log.debug(f'Error has related codes using {identity_uri} (NOT an uri!)...')
            return False
        # check local cache
        if identity_uri in self.has_related_codes_cache:
            return self.has_related_codes_cache[identity_uri]
        # TODO FIRST check if the ndff_uri is a (resolvable) NDFF-uri, or a (NON resolvable) NDFF-identity
        # currently we know that this is a abundancy_schema IDENTITY uri, but we should also be able to handle a abundancy_schema NDFF URL, like https://accapi.ndff.nl/codes/v2/abundancies/179580026/
        codes = self.search_codes(search_text=identity_uri, search_type='abundance_value')
        if len(codes) == 0:
            log.debug(f'No NDFF-URL found when searching for "related codes" found for {identity_uri}')
            return False
        ndff_uri = codes[0]['_links']['self']['href']
        has_related_codes = self.get_api().has_related_codes(ndff_uri)
        # create a small local cache:
        self.has_related_codes_cache[identity_uri] = has_related_codes
        return has_related_codes

    def get_related_codes(self, identity_uri=None) -> deque:
        """
        Fetch ALL related codes of given identity_uri

        Return an empty deque when the api fails.
        (deque is used because it is faster, better suited to pop left/right and more thread safe)

        # NOTE: API searches using the NDFF URL, but connector searches/pages using the abundancy_schema URI

        :param: identity_uri
        :return: deque
        """
        all_related_codes = deque()
        if not is_uri(identity_uri):
            log.debug(f'Error retrieving related codes using {identity_uri} (NOT an uri!)...')
            return all_related_codes
        # check cache:
        if identity_uri in self.has_related_codes_cache and not self.has_related_codes_cache[identity_uri]:
            return all_related_codes

        # using a deque, because that is thread safe (?) and faster to extend...
        api = self.get_api()
        # TODO FIRST check if the ndff_uri is a (resolvable) NDFF-uri, or a (NON resolvable) NDFF-identity
        # currently we know that this is a abundancy_schema IDENTITY uri, but we should also be able to handle a abundancy_schema NDFF URL, like https://accapi.ndff.nl/codes/v2/abundancies/179580026/
        codes = self.search_codes(search_text=identity_uri, search_type='abundance_value')
        if len(codes) == 0:
            log.debug(f'No NDFF-URL found when searching for "related codes" found for {identity_uri}')
            return all_related_codes

        ndff_uri = codes[0]['_links']['self']['href']

        result = api.get_related_codes(ndff_uri)  # one page at a time...
        if result['http_status'] != 200:
            log.debug(f'Error retrieving related codes using {ndff_uri} for {identity_uri}')
            return all_related_codes
        obj = result['http_response']
        all_related_codes.extend(obj['_embedded']['items'])
        while '_links' in obj and 'next' in obj['_links']:
            ndff_uri = obj['_links']['next']['href']
            result = api.get_related_codes(ndff_uri)  # one page at a time...
            assert result['http_status'] == 200
            response = result['http_response']
            obj = response
            all_related_codes.extend(obj['_embedded']['items'])
        return all_related_codes

    def get_protocols(self) -> deque:
        """
        Using api.get_datasets() (only retrieving 1 page) to retrieve ALL(!) datasets
        for current user and domain (as defined by the api instance).

        The connector will 'page' through all datasets until all retrieved.
        (thinking that there will be no users/domains with that many datasets????)

        Return an empty deque when the api fails.
        (deque is used because it is faster, better suited to pop left/right and more thread safe)

        Return a deque of dataset objects like when successful:

        {
            "_links": {
                "self": {
                    "href": "https://accapi.ndff.nl/api/v2/domains/708/protocols/130/"
                }
            },
            "description": "12.205 Monitoring Beoordeling Natuurkwaliteit EHS - N2000 (SNL-2014)",
            "identity": "http://ndff-ecogrid.nl/codes/protocols/12.205"
        }

        """
        # fetch ALL protocols for this user/domain
        # using a deque, because that is thread safe (?) and faster to extend...
        all_protocols = deque()
        api = self.get_api()
        result = api.get_protocols()  # one page at a time...
        if result['http_status'] != 200:
            log.debug(f'Error retrieving protocols for {api.user} domain {api.domain}...')
            return all_protocols
        obj = result['http_response']
        all_protocols.extend(obj['_embedded']['items'])
        while '_links' in obj and 'next' in obj['_links']:
            uri = obj['_links']['next']['href']
            result = api.get_protocols(ndff_uri=uri)
            assert result['http_status'] == 200
            response = result['http_response']
            obj = response
            all_protocols.extend(obj['_embedded']['items'])
        return all_protocols

    def get_datasets(self) -> deque:
        """
        Using api.get_datasets() (only retrieving 1 page) to retrieve ALL(!) datasets
        for current user and domain (as defined by the api instance).

        The connector will 'page' through all datasets until all retrieved.
        (thinking that there will be no users/domains with that many datasets????)

        Return an empty deque when the api fails.
        (deque is used because it is faster, better suited to pop left/right and more thread safe)

        Return a deque of dataset objects like when successful:
            {
                "_links": {
                    "self": {
                        "href": "https://accapi.ndff.nl/api/v2/domains/708/datasets/2920720/"
                    }
                },
                "datasetType": "http://ndff.nl/foldertypes/test_ngb/startmap",
                "description": "Test NGB",
                "duration": null,
                "extrainfo": [],
                "identity": "http://ndff.nl/folders/2920720",
                "involved": [
                    {
                        "involvementType": "http://ndff-ecogrid.nl/codes/involvementtypes/folder_admin",
                        "person": "http://telmee.nl/contacts/persons/1311835"
                    },
                    {
                        "involvementType": "http://ndff-ecogrid.nl/codes/involvementtypes/observation_admin",
                        "person": "http://telmee.nl/contacts/persons/1311833"
                    }
                ],
                "location": null,
                "locationCoverage": null,
                "parent": "http://ndff.nl/folders/2920719",
                "periodStart": null,
                "periodStop": null,
                "protocol": null
            }
        """
        # fetch ALL datasets for this user/domain
        api = self.get_api()
        # using a deque, because that is thread safe (?) and faster to extend...
        all_datasets = deque()
        result = api.get_datasets()  # one page at a time...
        if result['http_status'] != 200:
            log.debug(f'Error retrieving datasets for {api.user} domain {api.domain}...')
            return all_datasets
        obj = result['http_response']
        all_datasets.extend(obj['_embedded']['items'])
        while '_links' in obj and 'next' in obj['_links']:
            uri = obj['_links']['next']['href']
            result = api.get_datasets(ndff_uri=uri)
            assert result['http_status'] == 200
            response = result['http_response']
            obj = response
            all_datasets.extend(obj['_embedded']['items'])
        return all_datasets

    def create_dataset(self, dataset: NdffDataset):
        """
        Let the connector create a new dataset (map) for current user/domain, as child dir of parent_dataset

        Return: NdffResult
        """
        if not dataset.is_valid():
            log.error(f'Dataset {dataset} is NOT valid, POSTING anyway!')
        return self.get_api().post_dataset(dataset.to_ndff_dataset_json(), dataset.identity)

    def search_dataset(self, dataset_identity_uri: str) -> Union[NdffDataset, None]:
        """
        Search for a Dataset using a valid dataset_identity_uri

        Return: a NDFFDataset (build from the json of the result)
        """
        if not is_uri(dataset_identity_uri):
            log.error(f'Searching a Dataset with an uri which seems not an uri: "{dataset_identity_uri}"')
            return None
        result = self.get_api().search_dataset(dataset_identity_uri)
        if result['http_status'] == 200:
            response = result['http_response']
            datasets = response['_embedded']['items']
            return NdffDataset.from_dataset_json_to_dataset(datasets[0])
        return None

    def set_data_mapping(self, field: str, field_key: str, field_value: str) -> bool:
        """
        Setting a data mapping for given field.
        Both as csv/data/db and in current memory instance of the field self.mappings_<field>

        NOTE! since 20221202 ALL KEYS in the csv files will be lowered case!!
        Meaning also searching for a key should use .lower() before searching !!!

        :param field: the actual field (taxon, activity etc)
        :param field_key: (the string which should map to a certain uri)
        :param field_value: an uri for the field_key
        :return: bool True if setting (both in dict and on disk/db) succeeded
                else: False
        """
        # check if field_key is actually a string of length >= 1
        # check if field_value is actually an uri
        if field_key is None or len(field_key) < 1:
            log.error(f'Error: trying to set a data mapping for "{field}" with a field_key "{field_key}" (None or length < 1)')
            return False
        if field_value is None or not is_uri(field_value):
            log.error(f'Error: trying to set a data mapping for "{field}" with a field_value "{field_value}" (None or non-uri)')
            return False

        # check if field_key is already in this dict, if not add it with
        # the corresponding field_value
        key = field_key.lower()
        if key in self.data_mappings[field]:
            ##raise Exception(f'Errr, {field_key} already in mappings for {field}, with value {self.data_mappings[field][field_key]}')
            log.warning(f'"{key}" already in mappings for {field}, with value {self.data_mappings[field][key]} OVERWRITING IT !!!')
        self.data_mappings[field][key] = field_value
        return True

    def map_data_to_ndff_observation(self, data_record: dict) -> NdffObservation:
        observation = NdffObservation()
        # creating a location during the ride...
        # the fields should be filled with actual data, so floats for x, y and buffer and wkt for location
        location_data = {
            'location': None,
            'location_x': None,
            'location_y': None,
            'location_buffer': None
        }
        # go over all FIELD MAPPINGS (which contain both the observation
        # fields AND some extra location related fields, and see if it is in the data)
        for field in self.field_mappings.keys():
            # OK, user SET a field mapping, now check IF this is actually a valid one:
            if field.startswith('extra_info_value'):
                pass  # handled at identity
            elif field.startswith('extra_info_identity'):
                counter = int(field.replace('extra_info_identity_', ''))
                key = self.get_field_mapping(f'extra_info_identity_{counter}')
                if not is_uri(key):
                    log.error(f'Trying to use a non uri key as extra info key: {key}')
                if self.get_field_mapping(f'extra_info_value_{counter}') in data_record:
                    value = data_record[self.get_field_mapping(f'extra_info_value_{counter}')]
                    observation.set_extra_info(key, value)
                else:
                    field = self.get_field_mapping(f'extra_info_value_{counter}')
                    log.error(f'Trying to find a (mapped) field "{field}" in the data.... is it a genuine fieldname?')
            elif self.get_field_mapping(field) in data_record:
                if field in location_data:
                    # handled differently: collect and use later to create location
                    location_data[field] = data_record[self.get_field_mapping(field)]
                else:  # there can be a (faulty) mapping, which is NOT a field in the data
                    observation.set(field, data_record[self.get_field_mapping(field)])
            #else:
                # TODO: not sure what to do here, set a message so clients can check for that, OR raise an Exception, so mappings will be forced to fit...
                # raise ValueError(f'User set the field mapping of field "{field}" to "{self.get_field_mapping(field)}" but that field is NOT available in the data: {data_record}')
                #log.error(f'User set the field mapping of field "{field}" to "{self.get_field_mapping(field)}" but that field is NOT available in the data')
                #observation.set(field, self.WRONG_FIELD_MAPPING)

        # look for defined DEFAULT(!) values:
        # set the observation field to default IF it is either None (?OR it was missing?)
        for field in self.field_mappings.keys():
            # location_buffer is a special case: should not be available in the NdffObservation,
            # BUT is needed for a valid geometry, so should be either a mapped value (OR a default)
            # BUT we are only using the default value if it is not set by data from the record yet:
            if field == 'location_buffer' and location_data['location_buffer'] is None:
                location_data[field] = self.get_field_default(field)
            elif field in location_data:
                pass
            elif observation.get(field) is None and self.get_field_default(field):
                observation.set(field, self.get_field_default(field))
            else:
                # TODO log or raise exception?
                pass

        # fixing the location now, either it is filled with a dict or wkt (that is there IS a 'location' field)
        # OR location is still None, but x_y_location is filled during this ride...
        # NDFF Connector is responsible for creating a NDFF geometry from the record data
        # can be a WKT field, can be from an x,y, can go wrong...
        # log.error(f'Before create_geom: location_data = {location_data}')
        if self.create_geom(observation, location_data):
            log.debug('Successfully created a location object')
        else:
            log.debug('NOT created a valid location object (yet, now worries)')

        # now go over MAPPED DATA mappings for every observation field, to set the values (MAPPED to NDFF uri') in the Observation
        log.debug('Starting to map data to uri''s in Observation')
        for field in observation.fields():
            # check IF there are DATA mappings for this field...
            if field in self.data_mappings.keys():
                # if the value in current observation DATA/FIELD is in the data_mappings (as key)
                # (they map from a term/abbreviation/name/id to an uri):
                mapping_key = observation.get(field)
                # SPECIAL CASE: the mapping keys for abundance_value contains both abundance_schema AND abundance_value
                # e.g. http://ndff-ecogrid.nl/codes/scales/tansley:s
                if field == 'abundance_value':
                    # we have to find the abundance_schema URI, can be non mapped OR via mapping
                    abundance_schema = observation.get('abundance_schema')
                    abundance_schema_field = 'abundance_schema'
                    if self.get_field_mapping('abundance_schema'):
                        abundance_schema_field = self.get_field_mapping('abundance_schema')
                    abundance_value_field = 'abundance_value'
                    if self.get_field_mapping('abundance_value'):
                        abundance_value_field = self.get_field_mapping('abundance_value')
                    if abundance_schema_field in data_record and self.get_data_mapping('abundance_schema', data_record[abundance_schema_field]):
                        abundance_schema = self.get_data_mapping('abundance_schema', data_record[abundance_schema_field])  # observation.get('abundance_schema') nope: you cannot 100% be sure this  mapping is already resolved...
                    abundance_value = None
                    if abundance_value_field in data_record and data_record[abundance_value_field]:  # self.get_data_mapping('abundance_value', data_record[abundance_value_field]):
                        # for the data mapping key of the abundance_value, we use:
                        # - the NDFF URI for the abundance_schema: 'http://ndff-ecogrid.nl/codes/scales/tansley'
                        # - the DATA value for the abundance_value: 's'
                        # to create as mapping_kye: 'http://ndff-ecogrid.nl/codes/scales/tansley:s'
                        abundance_value = data_record[abundance_value_field]  # self.get_data_mapping('abundance_value', data_record[abundance_value_field])  # observation.get('abundance_schema') nope: you cannot 100% be sure this  mapping is already resolved...
                    if abundance_value:
                        mapping_key = f'{abundance_schema}:{abundance_value}'
                if (isinstance(mapping_key, str) or isinstance(mapping_key, int)) and self.get_data_mapping(field, mapping_key) is not None:  # observation.get(field) in self.data_mappings[field]:
                    # 'overwrite' the value with the 'mapped' value
                    # TODO: remove # observation.set(field, self.data_mappings[field][observation.get(field)])
                    observation.set(field, self.get_data_mapping(field, str(mapping_key)))

        # fix/check period
        # NOPE: this was wrong, this always created a full iso datetime string, while partial strings also are OK for NDFF
        #result = NdffConnector.create_fix_period(observation)
        # if result[0]:
        #     log.debug('Successfully created a period for the observation')
        # else:
        #     log.debug('NOT created a period for the observation: {result[1]}')
        log.debug('Returning actual Observation')
        return observation

    def sent_observation_to_ndff(self, observation: NdffObservation, overwrite=False, test_modus=False) -> NdffResult:
        """
        This will either POST or PUT the observation to the NDFF

        :param: observation
        :param: overwrite
        :param: test_modus test_modus=True will always generate a unique uri, so never clashes of POST's
        :return: NdffResult
        """
        log_lines = []
        crs = self.get_crs(observation)
        api = self.get_api()
        identity = None
        # test_modus will always generate a unique uri, so never clashes of POST's
        # handy for testing purposes when you want to POST one observation several times
        # without needing to test if already uploaded
        if test_modus:
            # ALWAYS unique:
            #identity = f'http://zuidt.nl/test/test/{time.time_ns()}'
            # ADD fixed number to original identity
            identity = observation.get('identity') + '/20220527-2'
            # time.sleep(5) # seconds (testing)

        log.debug(f'Sending/POST to NDFF, user: {api.user}, client-id {api.client_id},\ndomain: {api.domain}, crs: {crs}\n{observation.to_ndff_observation_json()}')
        result = api.post_waarneming(observation.to_ndff_observation_json(), identity, crs)
        log_lines.append(result.as_tuple())
        # for now: we do not ask, IF overwrite==True, upon an identity conflict (409) we just overwrite...
        if f'{result["http_status"]}' == '409':
            if overwrite:
                if result['ndff_uri'] and len(result['ndff_uri']) > 7:  # earlier api returned :// as ndff_uri
                    ndff_uri = result['ndff_uri']
                    log.debug(f'Sending/PUT to NDFF, user: {api.user}, client-id {api.client_id},\ndomain: {api.domain}, crs: {crs} \n{identity}\n{observation.to_ndff_observation_json()}')
                    result = api.put_waarneming(observation.to_ndff_observation_json(), ndff_uri, identity, crs)
                    log_lines.append(result.as_tuple())
                else:
                    # failed to get the ndff_uri from the 409, second try:we do a GET/search request to find out the ndff uri
                    # (we should not come here anymore, since NDFF is now returning the ndff_uri in the 409 response!)
                    log.info('ASSERT: we should not be here: NDFF is now returning the ndff_uri in the response...')
                    search_result = api.search_waarneming('identity', observation.get('identity'))
                    if search_result['http_status'] == 200:
                        # if you searched for an observation and the count in the response >= 1
                        # then the result contains the observations 'embedded', so grab the ndff_uri from it
                        # tricky, but working for now...
                        ndff_uri = search_result['http_response']['_embedded']['items'][0]['_links']['self']['href']
                        # get identity from observation
                        identity = observation.get('identity')
                        search_result['object_id'] = identity
                        search_result['ndff_uri'] = ndff_uri
                        # log the successful search result here (after adding identity and ndff_uri)
                        log_lines.append(search_result.as_tuple())
                        log.debug(f'Sending/PUT to NDFF, user: {api.user}, client-id {api.client_id},\ndomain: {api.domain}, crs: {crs} \n{identity}\n{observation.to_ndff_observation_json()}')
                        result = api.put_waarneming(observation.to_ndff_observation_json(), ndff_uri, identity, crs)
                        log_lines.append(result.as_tuple())
                    else:
                        log.debug(f'Nothing found when searching for: {observation.get("identity")}')
                        # log the failed search
                        log_lines.append(search_result.as_tuple())
            else:
                # TODO: throw exception, custom ndff_status ??
                # for now: we just return the 409 ndff_result
                log.error(f'{observation.get("identity")} NDFF api returned 409 (resource exists), but we have "overwrite" on False...')

        self.write_to_ndff_log(log_lines)
        return result

    def write_to_ndff_log(self, log_lines: list):
        with open(Path(self.ndff_log_dir) / self.NDFF_LOG, mode='a', encoding='UTF8',
                  newline='') as f:  # using 'with open', then file is explicitly closed
            writer = csv.writer(f)
            writer.writerows(log_lines)
            f.flush()

    # Removed this because this created a full iso datetime string always...
    # @staticmethod
    # def create_fix_period(observation):
    #     # period_start and period_stop
    #     if observation.period_start is None and observation.period_stop is None:
    #         # mmm, apparently nothing is set... non-valid record ?
    #         return False, False
    #     try:
    #         # only a period_start is set: add one minute to it and make that period_stop
    #         if observation.period_start and observation.period_stop is None:
    #             start = NdffConnector.datetimestring_to_datetime(observation.period_start)
    #             stop = start + timedelta(minutes=1)
    #         # only a period_end is set: subtract one minute to it and make that period_start
    #         elif observation.period_stop and observation.period_start is None:
    #             stop = NdffConnector.datetimestring_to_datetime(observation.period_stop)
    #             # subtract one minute
    #             start = stop - timedelta(minutes=1)
    #         else:
    #             start = NdffConnector.datetimestring_to_datetime(observation.period_start)
    #             stop = NdffConnector.datetimestring_to_datetime(observation.period_stop)
    #         # Ok, we should have a valid start AND end now...:
    #         observation.period_start = start.isoformat()
    #         observation.period_stop = stop.isoformat()
    #     except ValueError as v:
    #         return False, v
    #     return True, True

    @staticmethod
    def create_geom(observation, location_data):
        """
        Try to create a valid GEOMETRY for 'observation' parameter, from the 'location_data' parameter
        Note that we are NOT creating the valid location json here!

        Only the geometry part of the full location json below:
        "location": {
            "buffer": 5,
            "geometry": {
                "type": "Point",
                "coordinates": [
                    408241,
                    78648
                    ]
                }
            }

        Also note that

        :returns: True in case the creation succeeded, False in case of a failure
        """
        #log.debug(f"Create geom: {type(location_data['location'])} {location_data['location']}")
        if 'location' not in location_data:
            # not even a location field (yet), silently return False
            return False
        elif location_data['location'] is None and location_data['location_y'] is None and location_data['location_x'] is None and location_data['location_buffer'] is None:
            # no location value (yet), silently return False
            return False
        elif isinstance(location_data['location'], dict):
            # ok apparently the data is already a dict...
            # this dict should have a buffer already, but if not, see if it was in the data
            observation.location = location_data['location']
        else:
            # going to create the geometry dict here ourselves
            # check if we have an x/y or a wkt geom or a wkb
            could_be_wbk = False
            if str(location_data['location']).startswith('b'):
                could_be_wbk = True
            if could_be_wbk or isinstance(location_data['location'], str):
                # can be WKB, WKT or EWKT
                #log.info(f"WKT....{location_data['location']}")
                if not could_be_wbk and 'SRID' in location_data['location']:
                    # EWKT
                    # Shapely can ONLY load a WKT string (not EWKT)
                    # so remove the SRID=28992; part from it in case of EWKT
                    location_data['location'] = location_data['location'].split(';')[1]
                try:
                    if isinstance(location_data['location'], str) and is_hex(location_data['location']):
                        # WKB string(!) like you receive from postgis:
                        #
                        # reveals
                        # Point (145918.07693370030028746 389302.11025674635311589)
                        shapely_geom = shapely.wkb.loads(location_data['location'], hex=True)
                    elif isinstance(location_data['location'], bytes):
                        # b'\x01\xe9\x03\x00\x00!G\x85\xad\xc3\x98\x17@\xbb\xabd\xa7\x07\xadI@\x00\x00\x00\x00\x00\x00\x00\x00'
                        # real bytes array (e.g. from Geopackage from Input app)
                        # currently Z is removed, as api does not seem to handle this... 
                        # see also: https://ndff.zendesk.com/hc/nl/requests/41523
                        shapely_geom = shapely.wkb.loads(location_data['location'])
                    else:
                        # WKT
                        shapely_geom = shapely.wkt.loads(location_data['location'])
                        
                    if shapely_geom and shapely_geom.is_valid:
                        # NOTE: CURRENTLY MULTIPOINT IS NOT SUPPORTED BY NDFF, so we take only the first point IF data is multipoint
                        if shapely_geom.geometryType() == 'MultiPoint':
                            log.warning(f'Location is MultiPoint (according to shapely) BUT we only take the first one!!!')
                            shapely_geom = shapely_geom.geoms[0]
                        if shapely_geom.geometryType() in ('MultiPoint', 'Point', 'Polygon', 'MultiPolygon', 'LineString', 'MultiLineString'):
                            observation.location = {'geometry': {}}
                            observation.location['geometry']['type'] = shapely_geom.geometryType()
                            mapped = mapping(shapely_geom)
                            observation.location['geometry'] = mapped
                    else:
                        raise ValueError(f"Non valid geometry: {location_data['location']} ")
                except Exception as e:
                    log.debug(f'Error (shapely) creating a geometry from {location_data["location"]}: {e}')
                    observation.location['geometry'] = {}
                    #return False
            elif location_data['location_x'] is not None and location_data['location_y'] is not None:
                # ok we have an x and y column here...
                observation.location = {'geometry': {}}
                observation.location['geometry']['type'] = 'Point'
                try:
                    observation.location['geometry']['coordinates'] = [float(location_data['location_x']), float(location_data['location_y'])]
                except Exception as e:
                    log.error(f'Error creating a geometry from "{location_data["location_x"]}" and "{location_data["location_y"]}": {e}')
                    observation.location['geometry'] = {}
                    #return False
            else:
                log.debug('(Yet) unknown geometry type in your wkt...')
                return False

        if 'location_buffer' in location_data and location_data['location_buffer']:
            observation.location['buffer'] = location_data['location_buffer']

        if observation.location and 'geometry' in observation.location and 'coordinates' in observation.location['geometry']:
            # ok we have a geometry with coordinates....
            # TODO fix the crs needed for the api to set the crs header
            #self._set_crs(observation.location['geometry']['coordinates'])
            # all well, return True
            return True

        return False

    # TODO: move this to ndff-object or observation?
    def get_crs(self, observation: NdffObservation):
        # "location": {
        #     "buffer": 5,
        #     "geometry": {
        #         "type": "Point",
        #         "coordinates": [
        #             408241,
        #             78648
        #             ]
        #         }
        #     }
        crs = self.EPSG_4326
        location = observation.location
        if location is not None and isinstance(location, dict):
            if 'geometry' in location and location['geometry'] and 'coordinates' in location['geometry']:
                coordinate_list = [0, ]  # to have at least one coordinate
                # we are going to subtract ONLY the first set of polygons from this (multi) geometry to determine the crs
                if 'type' in location['geometry'] and (location['geometry']['type'].upper() == 'POINT'):
                    coordinate_list = location['geometry']['coordinates']
                elif 'type' in location['geometry'] and (location['geometry']['type'].upper() == 'MULTIPOINT'):
                    # NOTE! MULTIPOINT CURRENTLY NOT SUPPORTED
                    coordinate_list = location['geometry']['coordinates'][0]
                elif 'type' in location['geometry'] and (location['geometry']['type'].upper() in ('POLYGON', 'MULTIPOLYGON', 'LINESTRING', 'MULTILINESTRING')):
                    # we are going to get the first (or only) polygon of this (multi)polygon
                    # which is a tuple of coordinate pairs ( (x,y),(x,y),(x,y) )
                    if type(location['geometry']['coordinates'][0][0]) in (float, int):
                        # normal linestring
                        coordinates = location['geometry']['coordinates']
                    elif type(location['geometry']['coordinates'][0][0][0]) in (float, int):
                        # normal polygon
                        coordinates = location['geometry']['coordinates'][0]
                    else:
                        # multiple polygon OR a polygon with a hole
                        coordinates = location['geometry']['coordinates'][0][0]
                    # zipping the tuple of coordinate pairs ( (x,y),(x,y),(x,y) ) will result in
                    # two lists of (x,x,x...),(y,y,y...) which you can iterate over to collect them in one
                    coordinates_set = zip(*coordinates)
                    for pair in coordinates_set:
                        coordinate_list = coordinate_list + list(pair)
                if float(max(coordinate_list)) < 180:
                    crs = self.EPSG_4326
                elif float(max(coordinate_list)) < 640000 and float(min(coordinate_list)) >= 0:
                    crs = self.EPSG_28992
                else:
                    raise ValueError("Coordinates outside of EPSG:4326 or EPSG:28992 range")
        return crs

    def next_ndff_observation(self):
        record = self.next_record()
        return self.map_data_to_ndff_observation(record)

    def __repr__(self):
        head = f"""Connector: datasource type: {type(self.datasource)}
ndff_api_settings: {self.ndff_api_settings}
field_mappings: {self.field_mappings}
data_mappings:"""
        data_mappings_s = ''
        for field, field_dict in self.data_mappings.items():
            data_mappings_s += f'{field} (first 10, if available):\n'
            i = 0
            for key, value in field_dict.items():
                data_mappings_s += f"'{key}': '{value}' \n"
                i += 1
                if i > 9:
                    break
        return head + data_mappings_s

    def save_to_directory(self, ndff_config_directory: str, with_datasource=False):
        """
        Write current configuration to a directory 'ndff_settings' IN the
        ndff_config_directory given as first parameter

        :param: ndff_config_directory directory to use to create a dir 'ndff_settings'
        and write all params in it
        :param: with_datasource if this is written from within the QGIS plugin, we can optionally
        write a data_source.csv (only for Postgres and CSV datasources)
        :return:
        """
        try:
            # write all needed files to this ndff_config_directory
            path = Path(ndff_config_directory)
            # check IF there is already Datasource.NDFF_SETTINGS_DIR ending
            if path.name != DataSource.NDFF_SETTINGS_DIR:
                ndff_config_directory = path / DataSource.NDFF_SETTINGS_DIR
            if not ndff_config_directory.is_dir():
                ndff_config_directory.mkdir(parents=True)
            timestamp = datetime.now().replace(microsecond=0).isoformat()
            if with_datasource:
                # QGIS did not use the connector datasource, IF we want to write a datasource settings
                self.write_datasource_settings(ndff_config_directory, timestamp)
            self.write_field_mappings(ndff_config_directory, timestamp)
            self.write_data_mappings(ndff_config_directory, timestamp)
        except Exception as e:
            log.error(f'Error saving these NDFF Settings to this directory: {ndff_config_directory}\n{e}')

            return False
        return True

    def write_datasource_settings(self, ndff_config_directory: str, timestamp: str):
        with open(Path(ndff_config_directory) / self.NDFF_DATASOURCE_SETTINGS, mode='w+', encoding='UTF8', newline='') as f:  # using 'with open', then file is explicitly closed
            writer = csv.writer(f)
            writer.writerow(['#'])
            writer.writerow([f'# This Datasource settings file is written on {timestamp} using the NDFF-Connector'])
            writer.writerow(['#'])
            if self.datasource_settings:
                for key, value in self.datasource_settings.items():
                    writer.writerow([key, value])
            else:
                log.debug(f'write_datasource_settings has no settings to write: {self.datasource_settings}')

    def write_field_mappings(self, ndff_config_directory: str, timestamp: str):
        with open(Path(ndff_config_directory) / self.NDFF_OBSERVATION_FIELD_MAPPINGS, 'w+', encoding='UTF8', newline='') as f:  # using 'with open', then file is explicitly closed
            writer = csv.writer(f)
            writer.writerow(['#'])
            writer.writerow([f'# this Field mapping file is written on {timestamp} using the NDFF-Connector'])
            writer.writerow(['#'])
            #for key, value in self.field_mappings.items():
            #    writer.writerow(value)
            # splitting the extra_info key/values from the rest (and TODO also the involved_persons)
            normals = dict(filter(lambda elem: 'extra_info_' not in elem[0], self.field_mappings.items()))
            for key, value in normals.items():
                writer.writerow(value)
            extra_infos = dict(filter(lambda elem: 'extra_info_' in elem[0], self.field_mappings.items()))
            # we are going to rewrite the index/numbers of the extra info's independent of current values
            index = 1  # base 1
            for key in extra_infos:
                if 'extra_info_identity_' in key:
                    counter = int(key.replace('extra_info_identity_', ''))
                    if f'extra_info_identity_{counter}' in extra_infos:
                        key_row = list(extra_infos[f'extra_info_identity_{counter}'])
                        value_row = list(extra_infos[f'extra_info_value_{counter}'])
                        # recounting/mapping:
                        key_row[0] = key_row[0].replace(str(counter), str(index))
                        value_row[0] = value_row[0].replace(str(counter), str(index))
                        writer.writerow(key_row)
                        writer.writerow(value_row)
                        index += 1
                    else:
                        raise ValueError(f'extra_info_identity_{counter} does not have a corresponding extra_info_value_{counter}.. in the field mappings:\n{self.field_mappings}')
            #print(extra_infos)

    def write_data_mappings(self, ndff_config_directory: str, timestamp: str):
        """
        A datasource has a member 'data_mappings' which is a dict of dicts where
        the first one has the field name as key , and a string -> uri mapping as value

        {
         'taxon':             {'Leucojum aestivum': 'http://ndff-ecogrid.nl/taxonomy/taxa/leucojumaestivum', ...},
         'abundance_schema':  {'Exact aantal': 'http://ndff-ecogrid.nl/codes/scales/exact_count', ...},
        }

        NOTE! since 20221202 ALL KEYS in the csv files will be lowercase!!
        Meaning also searching for a key should use .lower() before searching !!!

        The data mappings are written PER FIELD as a csv with name mappings_<fieldname>.csv
        so for example for taxon: mappings_taxon.csv
        """
        for field in self.data_mappings.keys():
            # BUT only if there ARE actually mappings
            if len(self.data_mappings[field]) > 0:
                with open(Path(ndff_config_directory) / f'mappings_{field}.csv', 'w+', encoding='UTF8', newline='') as f:
                    writer = csv.writer(f)
                    writer.writerow(['#'])
                    writer.writerow([f'# this Data mapping file for {field} is written {timestamp} using the NDFF-Connector'])
                    writer.writerow(['#'])
                    for key, value in self.data_mappings[field].items():
                        # 20221202 make sure that in (new) csv files ALL keyw will be lowercase
                        writer.writerow([key.lower(), value])
