import diskcache # dependencies
from getcache2 import get as GET
import logging, lzma, random, requests, sys, time
from tqdm.contrib.concurrent import thread_map as tqdmmap

class Cuirass:
    def __init__(self, url = "https://ci.guix.gnu.org/"):
        self.url = url
    def get(self, *path, **params):
        while True:
            try:
                return GET(self.url + '/'.join([str(item) for item in path]) + '?' + '&'.join([f'{k}={v}' for k,v in params.items() if v is not None]))
            except requests.exceptions.HTTPError as e:
                if e.response.status_code in [500,504]:
                    logging.warning(e)
                    continue
                else:
                    raise
    def json(self, *path, **params):
        return self.get(*path, **params).json()
    def text(self, *path, **params):
        return self.get(*path, **params).text
    def binary(self, *path, **params):
        return self.get(*path, **params).content
    def lzip(self, *path, **params):
        data = self.binary(*path, **params)
        return lzma.decompress(data)
    def specifications(self):
        return self.json('jobsets')
    def evaluations(self, nr, spec = None):
        return self.json('api','evaluations',nr=nr,spec=spec)
    def evaluation(self, id):
        return self.json('api','evaluation',id=id)
    def evaluationlog(self, id):
        return self.text('eval', id, 'log', 'raw')
    def channels(self, id):
        return self.json('eval', id, 'channels.json')
    def jobs(self, evaluation, *names):
        return self.json('api','jobs',evaluation=evaluation,names=','.join(names) if names else None)
    def build(self, bid):
        return self.json('build', bid)
    def buildlog(self, bid):
        try:
            return self.text('build', bid, 'log', 'raw')
        except requests.exceptions.HTTPError as e:
            if e.response.status_code == 404:
                return None
            raise
    def details(self, bid):
        return self.text('build', bid, 'details')
    def dependencies(self, bid):
        _, dephunk = self.details(bid).split('<tr><th>Dependencies</th><td class="dependencies">')
        if dephunk.startswith('—</td></tr>') or dephunk.startswith('</td></tr>'):
            return {}
        try:
            dephunk, _ = dephunk.split('</a><br /></div><button', 1)
        except:
            dephunk, _ = dephunk.split('</a><br /></div></td></tr>', 1)
        depchunks = dephunk.split('</a><br /></div><div')
        return {
                namechunk: int(bid)
            for depchunk in depchunks
            for bid, namechunk in [depchunk.split('<a href="/build/', 1)[1].split('/details">')]
        }
    def download(self, id:int):
        return self.get('download', id).content
    def output(self, oid):
        oid = oid.removeprefix('/gnu/store/')
        return self.json('output', oid)
    def outputlog(self, oid):
        oid = oid.removeprefix('/gnu/store/')
        return self.text('log', oid)
    def narinfo(self, oid, raw=False):
        oid, *_ = oid.removeprefix('/gnu/store/').split('-')
        narinfo = self.text(oid + '.narinfo')
        if not raw:
            narinfo = narinfo.strip('\n').split('\n')
            narinfo =[ line.split(': ', 1) if ': ' in line else [line,None] for line in narinfo ]
        return narinfo
    def nar(self, url):
        fn = url.split('/')[-1]
        return self.lzip('nar', 'lzip', fn)

logging.basicConfig(level=logging.INFO)
c = Cuirass('https://ci.guix.gnu.org/')
b = Cuirass('https://bordeaux.guix.gnu.org/')

specifications = c.specifications()
specifications_by_name = {
    spec['name']: spec
    for spec in specifications
}
evaluations_by_id = {
    e['id']: {**spec, **e}
    for spec in specifications
    for e in c.evaluations(2048, spec['name'])
}
#def evaluation(eid):
#    if eid in evaluations_by_id:
#        return evaluations_by_id[eid]
#    else:
#        e = c.evaluation(eid)
#        return {**specifications_by_name[e['specification']], **e}

def commits_for_build(bid, *eids):
    #commits = []
    #def add_eval(eid):
    #    e = c.evaluation(eid)
    #    assert len(e['checkouts']) == 1
    #    time, commit = e['checkouttime'], e['checkouts'][0]['commit']
    #    commits.append([time, commit])
    #add_eval(eid)
    build = c.build(bid)
    eids = list(eids) + [build['evaluation']]
    missing_specs = set([spec['name'] for spec in specifications])
    found_specs = set()
    for eid in range(min(eids), max(eids)+1):
        if eid not in evaluations_by_id:
            continue
        e = evaluations_by_id[eid]
        spec = e['specification']
        if spec not in missing_specs:
            continue
        bids = [job['build'] for job in c.jobs(eid)]
        if bid in bids:
            import pdb; pdb.set_trace()
            missing_specs.remove(spec)
            found_specs.add(spec)
    #eids = [
    #    eid
    #    if any([job['build'] == bid for job in c.jobs(eid)])
    #]

    #eids = [eid for eid in eids if any( c.jobs(eid)  
    #for eid in eids:
    #    
    #add_eval(build['evaluation'])
    #return commits

    evals.sort(key = lambda eval: eval['checkouttime'])
    for eval in evals:
        assert len(eval['checkouts']) == 1
    commits = [eval['checkouts'][0]['commit'] for eval in evals]
    return commits

commits = [
        '2d4ed08662714ea46cfe0b41ca195d1ef845fd1b', # v1.5.0rc1
        'd339785a0fbd8f13930082a4fa7a73b6685630fd', # version-1.5.0 after v1.5.0rc1
        '0d3f82123b8645196a8c37139ad02d3570d75cd6', # master after v1.5.0rc1
        '001cd00bcd123cd1eab6ebd1b85afd7b0aa994fa', # latest images build prior to v1.5.0
        '82f286efb989f625da5975bc829ba08ed5d14cfa', # earliest release build after v1.5.0rc1
        'e642300195dfc7c83283db7e103fabf2c6ac65a7', # version-1.5.0 prior to v1.5.0
        'd58da8a56b75af73a7b5466f7ef8a81679713aa3', # version-1.5.0 after v1.5.0
        '0f8d5a4760bb4a4d056113e4fd861938b15bd171', # master after v1.5.0
        'b989e0138e1684df4d043af813a96fba73dd8c8c', # next images build after v1.5.0
]

eids = [
    # last images
    2116056,2116057,2116062,2116066,2116068,2116075,
    2116077,2116079,2116080,2116093,2116094,2116095,
    2116092,2116100,2116101,2116103,2116104,2116105,
    2116106,2116108,2116109,2116250,2116871,

    # v1.5.0rc1 version-1.5.0
    2116258,2116356,

    # v1.5.0rc1 master release
    2116711,2116712,2116718,2116719,2116720,2116721,
    2116725,2116732,2116733,2116736,2116744,2116746,
    2116749,

    # v1.5.0 version-1.5.0
    2125477,2125479,2125531,2125534,2125556,
        
    # v1.5.0 master
    2125568,2125748,2125749,2125750,2125753,2125754,
    2125763,2125786,2125786,2125789,2125797,

    # v1.5.0 next-master/images
    2126751,2126752,2126758,2126759,2126767,2126769,
    2126772,2126798,2126810,2126838,2126844,2126850,
    2126889,2126901,2126902,2126917,2126918,2126922,
    2126925,2126928,2126927,2126930,
]

#missing_bids = [
#        3398282,2125531,3398255,
#]

for e in evaluations_by_id.values():
    for checkout in e['checkouts']:
        if checkout['commit'] in commits and e['id'] not in eids:
            logging.warning(f'Missing eval {e["id"]} from {e["specification"]} {checkout["commit"][:7]}')
            eids.append(e['id'])
for eid in eids:
    assert any(checkout['commit'] in commits for checkout in c.evaluation(eid)['checkouts'])
    #for job in c.jobs(eid):
    #    assert job['build'] not in missing_bids


seen = set()
seen_srcs = set()
bids = [job['build'] for eid in eids for job in c.jobs(eid)]
sitms = []

#random.shuffle(bids)

backoff = 1./16
while bids or sitms:
    try:
        while bids and not sitms:
            bid = bids[0]
            bids = bids[1:]
            if bid in seen:
                continue
            seen.add(bid)
            #commits_for_build(bid, *eids)
            for name, depbid in c.dependencies(bid).items():
                #assert depbid not in missing_bids
                bids.append(depbid)
            build = c.build(bid)
            sitms.append(build['derivation'])
            sitms.extend([out['path'] for out in build['buildoutputs'].values()])
            #log = c.buildlog(bid)
    except requests.exceptions.HTTPError as e:
        if e.response.status_code == 404:
            pass
        elif e.response.status_code == 429:
            #bids.append(bid)
            bids[:0] = [bid]
            seen.remove(bid)
        else:
            raise
    except requests.exceptions.ContentDecodingError as e:
        bids[:0] = [bid]
        seen.remove(bid)
        logging.warning(e)
    sitms = [sitm for sitm in sitms if sitm not in seen]
    #if bids:
    #    bids[:] = bids[-1:] + bids[:-1]
        #if not sitms:
        #    #backoff *= 2
        #    logging.info(backoff)
        #    time.sleep(backoff)
        ##else:
        ##    backoff *= 0.75
    while sitms:
        sitm = sitms.pop().split('/')[-1]
        if sitm in seen:
            continue
        seen.add(sitm)
        narinfos = []
        for substs in [c,b]:
            try:
                narinfos.append(dict(substs.narinfo(sitm)))
            except requests.exceptions.HTTPError as e:
                if e.response.status_code == 404:
                    continue
                else:
                    raise
        for narinfo in narinfos:
            if sitm.endswith('.drv'):
                drv = c.nar(narinfo['URL'])
                drv = drv.split(b'Derive(')[1]
                drv, *_ = drv.split(b')\0')
                drv, *_ = drv.split(b')\x01')
                drv = eval(drv.decode())
                out, inps, builds, *_ = drv
                if out[0][3]:
                    if out[0][1] not in seen_srcs:
                        print(out[0][1])
                        seen_srcs.add(out[0][1])
                    #else:
                    #    logging.info('seen: '+out[0][1])
                sitms.extend([inp[0] for inp in inps])
                sitms.extend(builds)
            refs = narinfo['References']
            if refs:
                sitms.extend(refs.split(' '))
            if narinfo.get('Deriver'):
                sitms.append(narinfo['Deriver'])
            #else:
            #    sitm = '/gnu/store/' + sitm
            #    if sitm not in seen_srcs:
            #        print(sitm)
            #        seen_srcs.add(sitm)
