import concurrent.futures, json, sys import tqdm import ar peer = ar.Peer('https://arweave.net') def tag_query(tags): query = 'query{transactions(tags:['+','.join(['{name:"'+name+'",values:["'+value+'"]}' for name, value in tags.items()])+']){edges{cursor node{id tags{name value}bundledIn{id}}}pageInfo{hasNextPage}}}' page = peer.graphql(query)['data']['transactions'] edges = page['edges'] yield from [edge['node'] for edge in edges] while page['pageInfo']['hasNextPage']: query = 'query{transactions(tags:['+','.join(['{name:"'+name+'",values:["'+value+'"]}' for name, value in tags.items()])+'],after:"'+edges[-1]['cursor']+'"){edges{cursor node{id tags{name value}bundledIn{id}}}pageInfo{hasNextPage}}}' page = peer.graphql(query)['data']['transactions'] edges = page['edges'] yield from [edge['node'] for edge in edges] DRIVE_ID = 'f9573109-76f0-4825-a62a-2e0d56c0e9a8' folders = [] for drive_tx in tag_query({'Drive-Id':DRIVE_ID, 'Entity-Type':'drive'}): drive_data = json.loads(peer.data(drive_tx['id'])) folders.append(drive_data['rootFolderId']) while folders: folder = folders.pop() files = [] file_txs = list(tag_query({'Parent-Folder-Id':folder, 'Entity-Type':'file'})) with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor, tqdm.tqdm(total=len(file_txs)) as pbar: def process_file(file_tx): file_data = json.loads(peer.data(file_tx['id'])) name = file_data['name'] ditemid = file_data['dataTxId'] query = 'query { transaction(id: "'+ditemid+'") { bundledIn { id } } }' txid = peer.graphql(query)['data']['transaction']['bundledIn']['id'] tx_status = peer.tx_status(txid) bundle_stream = peer.gateway_stream(txid) bundle = ar.ANS104BundleHeader.fromstream(bundle_stream) header_start, data_end = bundle.get_range(ditemid) ditem_stream = peer.gateway_stream(txid, range=[header_start,data_end]) header = ar.ANS104DataItemHeader.fromstream(ditem_stream, ditemid) data_start = header_start + ditem_stream.tell() files.append(dict( Filename=name, ArweaveBlockHeight=tx_status['block_height'], ArweaveBlockHash=tx_status['block_indep_hash'], BundleTXID=txid, DataitemID=ditemid, OffsetInTX=data_start, Size=data_end - data_start, URL='https://arweave.net/raw/' + ditemid, )) pbar.update() executor.map(process_file, file_txs) files.sort(key=lambda file:file['Filename']) for file in files: for key, val in file.items(): print(key + ': ' + str(val)) print() for folder_tx in tag_query({'Parent-Folder-Id':folder, 'Entity-Type':'folder'}): tags = {tag['name']:tag['value'] for tag in folder_tx['tags']} folders.append(tags['Folder-Id'])