1
2022-09-18 15:10:31 -04:00

46 lines
1.6 KiB
Python

import json
from pathlib import Path
import time
from typing import Dict
import requests
import re
import base64
def make_cached_request(url: str, headers: Dict[str, str]) -> str:
# Encode the url and headers into a filename to use as a cache
cache_filename = Path("/tmp/as_stats") / base64.b64encode(
url.encode('utf-8') + str(headers).encode('utf-8')).decode('utf-8')
print(f"Using cache file: {cache_filename}")
# Check if the file timestamp of the cache file is older than 30 minutes or the file doesn't exist
if not cache_filename.exists() or (cache_filename.stat().st_mtime + 1800) < time.time():
print(f"Making request to {url}")
# Make the request and write it to the cache file
response = requests.get(url, headers=headers).text
cache_filename.parent.mkdir(parents=True, exist_ok=True)
cache_filename.write_text(response)
# Return the contents of the cache file
return cache_filename.read_text()
data = make_cached_request("https://bgp.tools/tags/perso.csv",
headers={"User-Agent": "ewpratten.com ASN statistics script"})
as_search = re.compile(r"AS(\d+)")
asns = [int(x) for x in as_search.findall(data)]
print(f"Found {len(asns)} personal ASNs in the DFZ")
# Download the full BGP table in JSONL format
bgp_table = make_cached_request("https://bgp.tools/table.jsonl", headers={"User-Agent": "ewpratten.com ASN statistics script"})
routes = {}
for line in bgp_table.splitlines():
bgp_data = json.loads(line)
routes.setdefault(bgp_data["ASN"], []).append(bgp_data["CIDR"])
# For each ASN, get some additional data
dataset = []
for asn in asns: