46 lines
1.6 KiB
Python
46 lines
1.6 KiB
Python
import json
|
|
from pathlib import Path
|
|
import time
|
|
from typing import Dict
|
|
import requests
|
|
import re
|
|
import base64
|
|
|
|
|
|
def make_cached_request(url: str, headers: Dict[str, str]) -> str:
|
|
|
|
# Encode the url and headers into a filename to use as a cache
|
|
cache_filename = Path("/tmp/as_stats") / base64.b64encode(
|
|
url.encode('utf-8') + str(headers).encode('utf-8')).decode('utf-8')
|
|
print(f"Using cache file: {cache_filename}")
|
|
|
|
# Check if the file timestamp of the cache file is older than 30 minutes or the file doesn't exist
|
|
if not cache_filename.exists() or (cache_filename.stat().st_mtime + 1800) < time.time():
|
|
print(f"Making request to {url}")
|
|
|
|
# Make the request and write it to the cache file
|
|
response = requests.get(url, headers=headers).text
|
|
cache_filename.parent.mkdir(parents=True, exist_ok=True)
|
|
cache_filename.write_text(response)
|
|
|
|
# Return the contents of the cache file
|
|
return cache_filename.read_text()
|
|
|
|
|
|
data = make_cached_request("https://bgp.tools/tags/perso.csv",
|
|
headers={"User-Agent": "ewpratten.com ASN statistics script"})
|
|
as_search = re.compile(r"AS(\d+)")
|
|
asns = [int(x) for x in as_search.findall(data)]
|
|
print(f"Found {len(asns)} personal ASNs in the DFZ")
|
|
|
|
# Download the full BGP table in JSONL format
|
|
bgp_table = make_cached_request("https://bgp.tools/table.jsonl", headers={"User-Agent": "ewpratten.com ASN statistics script"})
|
|
routes = {}
|
|
for line in bgp_table.splitlines():
|
|
bgp_data = json.loads(line)
|
|
routes.setdefault(bgp_data["ASN"], []).append(bgp_data["CIDR"])
|
|
|
|
# For each ASN, get some additional data
|
|
dataset = []
|
|
for asn in asns:
|
|
|