import os
from pathlib import Path
from lxml import etree
import boto3
import pandas as pd
from domain.helpers.db_secret_string_manager import build_connection_string
from utilities.dcmgeometrysdk.dcmgeom2postgis.dna2dynameasurebulk import MapDNA2DynadjustMeasureBulk
from utilities.dcmgeometrysdk.dna.dnarunner import DNARunner
from utilities.dcmgeometrysdk.dna.dnaconvertfromxml import convert
from utilities.dcmgeometrysdk.dna.msr_parser import ReadMSRFiles
from utilities.dcmgeometrysdk.geometryfunctions.otherfunctions import chunker
from utilities.dcmgeometrysdk.utilities.pandas_postgis_helpers import read_sql_inmem_uncompressed, \
    psql_insert_copy, read_sql_tmpfile
from multiprocess import Pool
from multiprocessing import cpu_count

def process_xml(fpath, host_dir=None, mnt_dir=None, docker=False):

    schema_path = '/app/DynAdjust/sampleData/DynaML.xsd'

    sub_path = os.split(fpath)
    stns = []
    msrs = []
    for xml_item in fpath:
        xml_item = os.path.join(fpath, xml_item)

        if xml_item.endswith('.xml'):
            name = xml_item[:-4]
            tree = etree.parse(xml_item)
            root = tree.getroot()
            root.attrib[
                '{http://www.w3.org/2001/XMLSchema-instance}noNamespaceSchemaLocation'] = schema_path
            tree.write(xml_item, xml_declaration=True, encoding='UTF-8')
            if xml_item.startswith('msr'):
                msrs.append(xml_item)
            elif xml_item.startswith('stn'):
                stns.append(xml_item)

    runner = DNARunner(dna_dir='/opt/dynadjust', output_dir=str(sub_path),
                       mount_dir=mnt_dir, host_directory=host_dir, filename=name, docker=docker,
                       export_dna_files=True)
    nstns, nmsrs = convert(stns, msrs, runner)
    return nstns, nmsrs

def get_events(event):
    local = event[2]
    if local is True:
        session1 = boto3.Session(profile_name='work-prod')
        supply_engine = build_connection_string(session1, 'dcdb_supply_user', local=True, local_port=9090,
                                            return_engine=True)
    else:
        session1 = boto3.Session()
        supply_engine = build_connection_string(session1, 'dcdb_supply_user', return_engine=True)

    read_existing = f"select d.id, d.planid, d.plannumber from {event[0]}.dynadjustmeasure d"
    lga_code = event[0].split('_')[1]
    print('Reading supply', event[0])
    df = read_sql_tmpfile(read_existing, supply_engine)
    lookup = df.set_index('id').to_dict(orient='index')
    print('Finised Reading supply', event[0])
    return (lga_code, event[1], lookup)

def process_file(x):
    msrs, zone, event, lookup = x
    msr_read = ReadMSRFiles(msrs, int(zone))
    msr_read.process_file()
    b = MapDNA2DynadjustMeasureBulk(msr_read, zone_id=int(zone), event_id=event, db_lookup=lookup)
    return b.df

def process_directory(events, directory):
    lga_process = {x[0] for x in events}

    lga_dirs = {}

    for item in os.listdir(directory):
        sub_path = Path(directory, item)
        if sub_path.is_dir() is True:
            lga = item[:3]
            if lga in lga_process:
                lgas = lga_dirs.get(lga, [])
                for f in os.listdir(str(sub_path)):
                    if f.endswith('.msr'):
                        lgas.append((item, os.path.join(str(sub_path), f)))
                        lga_dirs[lga] = lgas
    all_items = []
    for lga, event_id, lookup in events:
        dirs = lga_dirs.get(lga)
        for zone, msr in dirs:
            all_items.append(([msr], zone, event_id, lookup))
    return all_items

def main(local=True):
    if local is True:
        session = boto3.Session(profile_name='work-prod')
        event_engine = build_connection_string(session, 'dcdb_event_user', local=True, local_port=5438,
                                               return_engine=True)

        out_engine = build_connection_string(session, 'dcdb_adjustment_user', local=True, local_port=5438,
                                             return_engine=True)

        top_dir = '/Users/jamesleversha/Downloads/final_msr_files'
    else:
        session = boto3.Session()
        event_engine = build_connection_string(session, 'dcdb_event_user', return_engine=True)

        out_engine = build_connection_string(session, 'dcdb_adjustment_user', return_engine=True)

        top_dir = '/Users/jamesleversha/Downloads/final_msr_files'

    sql = """select e.id, e.description from dcm_event.event_history e where e.description like  'dcm_%%_%%_%%'
        and e.creator = 'SV'"""

    event_df = pd.read_sql(sql, event_engine)
    event_details = [(i.description, i.id, local) for i in event_df.itertuples()]
    if len(event_details) > cpu_count() - 1:
        cpus = cpu_count() - 1
    else:
        cpus = len(event_details)
    p = Pool(processes=cpus)
    r = p.map_async(get_events, event_details)
    p.close()
    p.join()
    events = r.get()

    all_items = process_directory(events, top_dir)

    p = Pool(processes=cpu_count()-1)
    r = p.map_async(process_file, all_items)
    p.close()
    p.join()
    item_dfs = r.get()
    print(len(item_dfs))
    concat = pd.concat(item_dfs)

    print('starting load', concat.shape)
    concat.to_sql('dynadjustmeasure', out_engine, schema='adjustment', if_exists='append', method=psql_insert_copy,
              index=False, chunksize=200000)
    print('finished load')



if __name__ == '__main__':
    main()
