import diskcache
import logging, requests, time

logger = logging.getLogger(__name__)
session = requests.Session()
schedule = time.time()
timeperrequest = 0

cache = diskcache.Cache('cache', eviction_policy='none')
@cache.memoize()
def get_noraise(url):
    global schedule, timeperrequest
    logger.info(url)
    if '/build/' in url:
        now = time.time()
        if now < schedule:
            time.sleep(schedule - now)
            now = schedule
        schedule = now + timeperrequest
    response = session.get(url)
    if response.status_code not in [200, 404]:
        if response.status_code == 429 and '/build/' in url:
            timeperrequest = max(timeperrequest+0.01,time.time() - now - timeperrequest)
            schedule = now + timeperrequest
        response.raise_for_status()
    return response

def get(url):
    response = get_noraise(url)
    response.raise_for_status()
    return response

if __name__ == '__main__':
    import lzma
    import tqdm
    seen = set()
    for key in tqdm.tqdm(cache):
        if '/log/' not in key and key[1].startswith('https://ci.guix.gnu.org/build'):
            #if cache[key].status_code == 404:
            #    print(cache[key].json())
            continue
        if key[1].endswith('narinfo?'):
            continue
        if key[1].endswith('.drv?'):
            drv = lzma.decompress(cache[key].content)
            drv = drv.split(b'Derive(')[1]
            drv, *_ = drv.split(b')\0')
            drv, *_ = drv.split(b')\x01')
            drv = eval(drv.decode())
            out, inps, builds, *_ = drv
            if out[0][3]:
                path = out[0][1]
                if path not in seen:
                    print(path)
                    seen.add(path)
            continue
        #print()
        #print(key)
        #print()
        #cache.delete(key)
