import json
from pathlib import Path
import time
from typing import Dict
import requests
import re
import base64


def make_cached_request(url: str, headers: Dict[str, str]) -> str:

    # Encode the url and headers into a filename to use as a cache
    cache_filename = Path("/tmp/as_stats") / base64.b64encode(
        url.encode('utf-8') + str(headers).encode('utf-8')).decode('utf-8')
    print(f"Using cache file: {cache_filename}")

    # Check if the file timestamp of the cache file is older than 30 minutes or the file doesn't exist
    if not cache_filename.exists() or (cache_filename.stat().st_mtime + 1800) < time.time():
        print(f"Making request to {url}")
        
        # Make the request and write it to the cache file
        response = requests.get(url, headers=headers).text
        cache_filename.parent.mkdir(parents=True, exist_ok=True)
        cache_filename.write_text(response)

    # Return the contents of the cache file
    return cache_filename.read_text()


data = make_cached_request("https://bgp.tools/tags/perso.csv",
                           headers={"User-Agent": "ewpratten.com ASN statistics script"})
as_search = re.compile(r"AS(\d+)")
asns = [int(x) for x in as_search.findall(data)]
print(f"Found {len(asns)} personal ASNs in the DFZ")

# Download the full BGP table in JSONL format
bgp_table = make_cached_request("https://bgp.tools/table.jsonl", headers={"User-Agent": "ewpratten.com ASN statistics script"})
routes = {}
for line in bgp_table.splitlines():
    bgp_data = json.loads(line)
    routes.setdefault(bgp_data["ASN"], []).append(bgp_data["CIDR"])

# For each ASN, get some additional data
dataset = []
for asn in asns: