from lxml import etree
import json
import fig.utils as EF
import fig.auth
import requests
import re

NS = {None: 'http://datacite.org/schema/kernel-4',
      'xsi': 'http://www.w3.org/2001/XMLSchema-instance'}
D,I = ['{'+NS[x]+'}' for x in NS]
doShortUrls = True
xmlHeader = '<?xml version="1.0" encoding="UTF-8"?>\n'
schemaLoc = 'http://schema.datacite.org/meta/kernel-4.4/metadata.xsd'

def xml_str(tree, pretty_print=True):
    return xmlHeader + etree.tounicode(tree, pretty_print=pretty_print)

def val(doc,schema):
    error = ''
    valid = schema.validate(doc)
    if not(valid):
        try:
            schema.assertValid(doc)
        except Exception as e:
            error = str(e)
    return valid, error

def child(el, name, text=None, attrib={}):
    ch = etree.SubElement(el, D+name, nsmap=NS)
    for a in attrib:
        ch.set(a, attrib[a])
    if text:
        ch.text = f'{text}'
    return ch

def childIf(el, name, source, sourceName, attrib={}):
    if sourceName in source:
        child(el, name, source[sourceName].strip(), attrib)

def repairURL(url, version):
    parts = url.split('/')
    newUrl = url
    if len(parts[-1]) < 4:
        if not version:
            newUrl = '/'.join(parts[:-1])
    elif version:
        newUrl = f'{url}/{version}'
    return newUrl

def repairDOI(doi, version):
    parts = doi.split('.v')
    newDoi = doi
    if len(parts) > 1:
        if not version:
            newDoi = parts[0]
    elif version:
        newDoi = f'{doi}.v{version}'
    return newDoi

def decimal_coord(txt, axis, digits=4):
    '''
    Converts txt to string with decimal coordinates or None if invalid.
    Accepts input like "42.597" or "5º 38’ 18.5’’ E".
    axis is 'N' or 'E'
    '''
    pat = r"^(-?\d+)º\s*(\d+)[’']\s*((\d+)(\.\d?)?)[’']{2}\s*([NESW]?)$"
    txt = txt.strip()
    deg = None
    ax = None
    try:
        deg = float(txt)
        ax = ''
    except:
        match = re.search(pat, txt)
        if match:
            g = match.groups()
            deg = int(g[0]) + int(g[1])/60 + float(g[2])/3600
            ax = g[-1]
            if ax:
                if ax in 'SW':
                    deg = - deg
                    ax = 'N' if ax == 'S' else 'E'
    if ax in (axis, ''):
        arc_rel = deg/90 if axis == 'N' else deg/180
        if abs(arc_rel) <= 1.:
            return f'{deg:.{digits}f}'

def dataciteXML(pid, Type='article', version=None, url=None, doi=None):
    article = EF.compactItem(EF.getItem(pid, Type=Type, version=version))
    return dataciteFromArticle(pid, article, Type=Type, version=version)

def dataciteFromArticle(pid, article, Type='article', version=None, validate=True, url=None, doi=None):
    '''
    article: EF.compactItem() of detailed article or collection from API.
    Type: article|collection
    version, url, doi: provided values overrule derived ones
    validate: do validation against Schema?
    returns: tuple (Datacite xml tree,
                    derived version,
                    derived doi,
                    derived url,
                    validation result in tuple (ok?, message))
    '''
    schemaDecl = f'{NS[None]} {schemaLoc}' 
    schema = etree.XMLSchema(etree.parse(schemaLoc))
    subjectScheme = 'Australian and New Zealand Standard Research Classification (ANZSRC), 2008'
    orcidAtt = {'nameIdentifierScheme': 'https://orcid.org/'}
    custom = article['custom_fields'] if 'custom_fields' in article else []
    ver = article['version']
    if not doi:
        doi = repairDOI(article['doi'], version)
    if not url:
        if doShortUrls or Type == 'collection':
            url = f'{EF.HOST}/{Type}s/_/{pid}'
            if version:
                url += f'/{version}'
            elif ':' in doi:
                url += f'/{ver}'
        else:
            url = repairURL(article['url_public_html'], version)
    pub = article['published_date'][:10]

    root = etree.Element(D+'resource', nsmap=NS)
    root.set (I+'schemaLocation', schemaDecl)

    #01 identifier
    child(root, 'identifier', doi, {'identifierType':'DOI'})

    #02 creators
    creatorsE = child(root, 'creators')
    for author in article['authors']:
        creatorE = child(creatorsE, 'creator')
        nameE = child(creatorE, 'creatorName', author['full_name'])
        orcid = None
        if 'id' in author:
            profile = EF.jget(f'/account/authors/{author["id"]}')
            prof = EF.compact(profile)
            childIf(creatorE, 'givenName', prof, 'first_name')
            childIf(creatorE, 'familyName', prof, 'last_name')
            if 'orcid_id' in prof:
                orcid = profile['orcid_id']
        if 'orcid_id' in author and not orcid:
            orcid = author['orcid_id']
        if orcid:
            child(creatorE, 'nameIdentifier', orcid, orcidAtt)
            nameE.set('nameType', 'Personal')

    #03 titles                
    titlesE = child(root, 'titles')
    child(titlesE, 'title', article['title'])

    #04 publisher
    child(root, 'publisher', custom['Publisher'])

    #05 publicationYear
    child(root, 'publicationYear', pub[:4])

    #06 resourceType
    fT = 'defined_type_name'
    rtype = article[fT].capitalize() if fT in article else 'Collection'
    if rtype not in ('Dataset','Software','Collection'):
        rtype = 'Text'
    child(root, 'resourceType', rtype, {'resourceTypeGeneral': rtype})

    #07 subjects
    subjE = child(root, 'subjects')
    fCat = 'categories'
    if fCat in article:
        for cat in article[fCat]:
            att = {'subjectScheme': subjectScheme}
            cid = cat['id']
            if cid in EF.aus_codes:
                att['classificationCode'] = EF.aus_codes[cid]
            else:
                print(f'WARNING {pid}: no classification code for {cid}')
            child(subjE, 'subject', cat['title'], att)
    else:
        print(f'WARNING: no categories in {pid}/{version}')
    kwT = 'tags'
    if kwT in article:
        for t in article[kwT]:
            child(subjE, 'subject', t)
    timeT = 'Time coverage'
    if timeT in custom:
        child(subjE, 'subject', f'Time: {custom[timeT]}')

    #08 contributors
    conT = 'Contributors'
    orgT = 'Organizations'
    cAtt = {'contributorType': 'Other'}
    hasContribs = conT in custom
    hasOrgs = orgT in custom
    if hasContribs or hasOrgs:
        contribsE = child(root, 'contributors')
        cT = 'contributor'
        cNT = 'contributorName'
        nT = 'nameType'
        nTO = 'Organizational'
        nTP = 'Personal'
        if hasContribs:
            contribs = custom[conT].split(';\n')
            for contrib in contribs:
                contribE = child(contribsE, cT, attrib=cAtt)
                parts = contrib.split(' [orcid:', 1)
                name = parts[0]
                orcid = parts[1][:-1] if parts[1:] else None
                isPers = EF.isPerson(contrib)
                nameType = nTP if isPers else nTO
                nameE = child(contribE, cNT, name, attrib={nT:nameType})
                if orcid and isPers:
                    child(contribE, 'nameIdentifier', orcid, orcidAtt)
        if hasOrgs:
            nameAtt = {nT: nTO}
            orgs = [x for x in re.split(r'\s*[;\n]\s*', custom[orgT]) if x != '']
            for name in orgs:
                contribE = child(contribsE, cT, attrib=cAtt)
                child(contribE, cNT, name, attrib=nameAtt)

    #09 dates
    datesE = child(root, 'dates')
    child(datesE, 'date', pub, {'dateType': 'Issued'})

    #10 language
    fT = 'Language'
    if fT in custom:
        child(root, 'language', custom[fT][0])

    #11 relatedIdentifiers
    doiT = 'resource_doi'
    refT = 'references'
    hasDoi = doiT in article
    hasRef = refT in article
    if hasDoi or hasRef:
        relT = 'relatedIdentifier'
        relTT = 'relationType'
        relIT = 'relatedIdentifierType'
        relsE = child(root, 'relatedIdentifiers')
        if hasDoi:
            child(relsE, relT, article[doiT], {relIT:'DOI', relTT:'IsSupplementTo'})
        if hasRef:
            for ref in article[refT]:
                child(relsE, relT, ref, {relIT:'URL', relTT:'References'})

    #12 formats
    fT = 'Format'
    if fT in custom:
        fE = child(root, 'formats')
        child(fE, 'format', custom[fT])

    #13 version
    child(root, 'version', ver)
    
    #14 rightsList
    own = {98: 'https://doi.org/10.4121/resource:terms_of_use'}
    fT = 'license'
    if fT in article:
        lic = article[fT]
        lid = lic['value']
        uri = own[lid] if lid in own else lic['url']
        rListE = child(root, 'rightsList')
        child(rListE, 'rights', lic['name'], {'rightsURI': uri})

    #15 descriptions
    descriptionsE = child(root, 'descriptions')
    child(descriptionsE, 'description', article['description'],
          {'descriptionType': 'Abstract'})

    #16 geoLocations        
    geoT = 'Geolocation'
    lonT = 'Geolocation Longitude'
    latT = 'Geolocation Latitude'
    hasGeo = geoT in custom
    hasPoint = lonT in custom and latT in custom
    if hasPoint:
        lon = decimal_coord(custom[lonT], 'E')
        lat = decimal_coord(custom[latT], 'N')
        hasPoint = lon and lat
    if hasGeo or hasPoint:
        geosE = child(root, 'geoLocations')
        geoE = child(geosE, 'geoLocation')
        if hasGeo:
            child(geoE, 'geoLocationPlace', custom[geoT])
        if hasPoint:
            pointE = child(geoE, 'geoLocationPoint')
            child(pointE, 'pointLongitude', lon)
            child(pointE, 'pointLatitude', lat)
        
    #17 fundingReferences
    fT = 'funding_list'
    if fT in article:
        fundsE = child(root, 'fundingReferences')
        for fund in article[fT]:
            fundE = child(fundsE, 'fundingReference')
            nameT = 'funder_name'
            fName = fund[nameT] if nameT in fund else 'unknown'
            child(fundE, 'funderName', fName)
            childIf(fundE, 'awardNumber', fund, 'grant_code')
            childIf(fundE, 'awardTitle', fund, 'title')

    if validate:
        return root, ver, doi, url, val(root, schema)
    else:
        ok = ver == version
        message = '' if ok else f'wrong version {ver} vs {version}'
        return root, ver, doi, url, (ok, message)

def resolveVersions(url, data):
    '''
    Versions from the versions API endpoint are not directly
    usable for doi registration. This function fixes this.
    url : base url (without version)
    data: list of tuples (version, doi)
          derived from versions API endpoint
    returns: list of tuples (version, doi, url),
             doi/url versioned as needed for doi registrations
    '''
    n = len(data)
    result = []
    for i in range(n):
        v, doi_raw = data[i]
        doi_split = doi_raw.split('.v',1)
        doi = doi_split[0]
        hasV = len(doi_split) == 2
        isOldV = False
        if i < n-1:
            isOldV = doi == data[i+1][1].split('.v',1)[0]
        isLatestV = i == n-1
        isUuid = ':' in doi
        doiV = f'{doi}.v{v}'
        urlV = f'{url}/{v}'
        if isLatestV:
            if isUuid:
                if hasV:
                    result.append((v, doiV, urlV))
                result.append((v, doi, urlV))
            else:
                result.append((v, doiV, urlV))
                result.append((v, doi, url))
        elif isOldV:
            result.append((v, doiV, urlV))
        else: #only occurs if isUuid
            if hasV:
                result.append((v, doiV, urlV))
            result.append((v, doi, urlV))
    return result

def registerDois(meta, logf=None):
    '''
    Do doi registrations
    meta  : list of tuples (doi, pid, url, data), data: xml-string
            url or data may be None to skip registration of doi or metadata
    logf: logfile object
    '''
    dcite = 'https://mds.datacite.org/'
    dcite_met = dcite + 'metadata'
    dcite_doi = dcite + 'doi'
    hdrs_met = {'Content-Type':'application/xml;charset=UTF-8'}
    hdrs_doi = {'Content-Type':'text/plain;charset=UTF-8'}
    with requests.Session() as session:
        session.auth = fig.auth.Datacite
        n = len(meta)
        for i, (doi, pid, url, data) in enumerate(meta):
            print(f'{f" {i} done, {n-i} to go ":-^30} {doi}')
            if data:
                resp = session.post(dcite_met, data=data.encode('utf-8'), headers=hdrs_met)
                text = f'{pid} metadata {resp.status_code} {resp.reason}'
                print(text)
                if logf and not resp.ok:
                    logf.write(f'\n{text}')
            if url:
                data_doi = f'doi={doi}\nurl={url}'
                resp = session.post(dcite_doi, data=data_doi, headers=hdrs_doi)
                text = f'{pid}      url {resp.status_code} {resp.reason}'
                print(text)
                if logf and not resp.ok:
                    logf.write(f'\n{text}')
        print(f'{f" {n} done, ready ":-^30}')

def registerSingleDoi (pid, doi, version=None, Type='article'):
    url = f'{EF.HOST}/{Type}s/_/{pid}'
    if version:
        url += f'/{version}'
    item = EF.getCompactItem(pid, Type, version)
    DCX, v_, doi_, url_, valid = dataciteFromArticle(
        pid, item, Type=Type, version=version, url=url, doi=doi
        )
    ok, message = valid
    if ok:
        data = xml_str(DCX)
        meta = [(doi, pid, url, data)]
        registerDois(meta)
    else:
        print(f'ERROR xml not valid\n{message}\n{data}')

if __name__ == '__main__':
    #'''
    pid = 16887658 #13200398 geo deg,min,sec #12764789 geobox #12764762 pure #5065445 collection #12718787 Unicode in spatial
    Type = 'article'
    print(pid)
    article = EF.getCompactItem(pid, Type=Type)
    EF.jprint(article)
    print('\nwithout version')
    DC, version, doi, url, valid = dataciteFromArticle(pid, article, Type=Type)
    print(f'    {doi}\n{url}\n{valid}')
    print(xml_str(DC))
    #print(f'\nwith version [{version}]')
    #DC, ver, doi, url, valid = dataciteFromArticle(pid, article, version=version, Type=Type, validate=False)
    #print(f'    {doi}\n{url}\n{valid}')
    #print(xml_str(DC))
    
