1
ewconfig/scripts/rbn-download-range
2024-11-21 13:12:52 -05:00

75 lines
2.1 KiB
Python
Executable File

#! /usr/bin/env python3
import argparse
import sys
import io
import logging
import requests
import zipfile
from datetime import datetime, timedelta
from pathlib import Path
logger = logging.getLogger(__name__)
def main() -> int:
# Handle program arguments
ap = argparse.ArgumentParser(
prog="rbn-download-range",
description="Downloads a range of Reverse Beacon Network archives",
)
ap.add_argument("output_dir", help="Output directory", type=Path)
ap.add_argument(
"start_date",
help="Start date (inclusive)",
type=lambda s: datetime.strptime(s, "%Y-%m-%d"),
)
ap.add_argument(
"end_date",
help="End date (inclusive)",
type=lambda s: datetime.strptime(s, "%Y-%m-%d"),
)
ap.add_argument(
"-v", "--verbose", help="Enable verbose logging", action="store_true"
)
args = ap.parse_args()
# Configure logging
logging.basicConfig(
level=logging.DEBUG if args.verbose else logging.INFO,
format="%(levelname)s: %(message)s",
)
# Ensure the output directory exists
args.output_dir.mkdir(parents=True, exist_ok=True)
# Loop through the dates
for date in range((args.end_date - args.start_date).days + 1):
date = args.start_date + timedelta(days=date)
# If a file with this date already exists, skip it
if (args.output_dir / f"{date:%Y%m%d}.csv").exists():
logger.info(f"Archive for {date} already exists, skipping")
continue
logger.info(f"Downloading archive for: {date}")
# Attempt to fetch the archive
try:
response = requests.get(
f"https://data.reversebeacon.net/rbn_history/{date:%Y%m%d}.zip"
)
response.raise_for_status()
except requests.RequestException as e:
logger.error(f"Failed to download archive for {date}: {e}")
continue
# Extract the archive
with zipfile.ZipFile(io.BytesIO(response.content)) as z:
z.extractall(args.output_dir)
return 0
if __name__ == "__main__":
sys.exit(main())