import traceback
import sys
import datetime
import pathlib
import os
import importlib
'''
If module fig is not found, we guess that it is in ./modules
which is then added to the PYTHONPATH.

Alternative without this importlib stuff:
Before running Python, run command
export PYTHONPATH=${PYTHONPATH}:/mnt/c/DATA/4TURD/python-progs/modules
or whatever your path is (example is from my laptop)
'''
if not importlib.util.find_spec('fig'):
    ab_path_modules = os.path.join(os.getcwd(), 'modules')
    print(f'inserting {ab_path_modules} into path')
    sys.path.append(ab_path_modules)
import fig.datacite as DX
import fig.utils as EF

#------ settings
log = 'doi-reg-log.txt'    #logfile for errors
out = 'doi-reg.json'       #file for output of metadata
upd = 'doi-reg-update.txt' #file holding update date+time

do_met = True              #register metadata?
do_doi = True              #register doi url?
maxVersions = 0            #max latest versions for update (0 = inf)
query = ''                 #optional query (overrules date since)
                           #e.g. ':doi:10.4121/14074091'
doFilter = False           #filter items with filterBool(item)?

def filterBool(item):      #adapt this filter to your needs
    return True

#------ don't touch the part below
def filterItem(item):
    try:
        if not(doFilter) or filterBool(item):
            return item
    except:
        pass

BASE_DIR = pathlib.Path(__file__).parent
log_path = BASE_DIR.joinpath(log)
out_path = BASE_DIR.joinpath(out)
upd_path = BASE_DIR.joinpath(upd)

now = datetime.datetime.now()
offset = now.utcoffset()
if not offset:
    offset = datetime.timedelta(hours=2) #CEST to be on the safe side
nowStr = (now - offset).isoformat()[:19] + 'Z' #Figshare only accepts whole seconds and Z

with open(upd_path,'r') as f:
    since = f.read().strip()
if query:
    print('query  :', query)
if doFilter:
    print('filter :', filterBool.__doc__)
if not query:
    print('from   :', since)
Types = ('collection', 'article')
if query:
    itemsByType = {T: EF.jpostAll(f'/{T}s/search', {'search_for':query}) for T in Types}
else:
    itemsByType = {T: EF.jgetAll(f'/{T}s', {'published_since':since}) for T in Types}
print(sum([len(itemsByType[T]) for T in Types]))

itemVersions = []
meta = []
with open(log_path, 'a') as logf:
    logf.write(f'\n{now}')
    for Type, items in itemsByType.items():
        base = f'{EF.HOST}/{Type}s/_/'
        for num, item in enumerate(items):
            try:
                pid = item['id']
                print(f'\n#{num+1} ------------------------ {pid}')
                versionDescrs = EF.jget(f'/{Type}s/{pid}/versions')[-maxVersions:]
                versions = [int(v['url'].split('/')[-1]) for v in versionDescrs]
                artVs = {}
                for v in versions:
                    item = filterItem(EF.getCompactItem(pid, Type, v))
                    if item:
                        artVs[v] = item
                dois = [(v, artVs[v]['doi']) for v in artVs]
                resolved = DX.resolveVersions(f'{base}{pid}', dois)
                itemVersions.append((pid, resolved))
                for v, doi, url in resolved:
                    print(v, doi, url)
                    url_do = url if do_doi else None
                    if do_met:
                        DCX, v_, doi_, url_, valid = DX.dataciteFromArticle(
                            pid, artVs[v], Type=Type, version=v, url=url, doi=doi
                            )
                        data = DX.xml_str(DCX)
                        ok, message = valid
                        if ok:
                            meta.append((doi, pid, url_do, data))
                        else:
                            text = f'ERROR xml not valid\n{message}\n{data}' 
                            print(text)
                            logf.write(f'\n{v} {doi}\n{text}')
                    else:
                        meta.append((doi, pid, url_do, None))
            except:
                text = f'\nERROR {pid}'
                print(text)
                logf.write(f'{text}\n')
                traceback.print_exc()
                traceback.print_exception(*sys.exc_info(), file=logf)
    EF.jdump(out_path, itemVersions)

    print('\nDATACITE')
    DX.registerDois(meta, logf=logf)

if not (query or doFilter):
    with open(upd_path,'w') as f:
        f.write(nowStr)

