diff --git a/install-linux.sh b/install-linux.sh index 25919ff..62d3b50 100644 --- a/install-linux.sh +++ b/install-linux.sh @@ -119,6 +119,9 @@ ln -nsf $EWCONFIG_ROOT/configs/systemd/scripts ~/.config/systemd/scripts # GitLab CLI ln -sf $EWCONFIG_ROOT/configs/glab-cli/aliases.yml ~/.config/glab-cli/aliases.yml +# RSS stuff +ln -nsf $EWCONFIG_ROOT/configs/ewp-rss-feeds ~/.config/ewp-rss-feeds + # Minecraft global configs ln -nsf $EWCONFIG_ROOT/configs/minecraft ~/.config/minecraft if [ -d ~/.var/app/org.prismlauncher.PrismLauncher ]; then diff --git a/scripts/ewp-send-article-digest b/scripts/ewp-send-article-digest new file mode 100755 index 0000000..9983964 --- /dev/null +++ b/scripts/ewp-send-article-digest @@ -0,0 +1,134 @@ +#! /usr/bin/env python3 +import argparse +import sys +import logging +import smtplib +import json +import requests +import feedparser +import referencing +import ast +import re +from datetime import datetime +from pathlib import Path +from time import mktime + +logger = logging.getLogger(__name__) + +FILTER_FN_RE = re.compile(r"^([^(]+)(?:(\([^\)]+\)))?") + +def filter_lwn_hide_paid_articles(article: dict) -> bool: + return "[$]" not in article["entry"]["title"] + +def filter_discord_category(article: dict, category: str) -> bool: + # return category in article["metadata"]["categories"] + return True + +def main() -> int: + # Handle program arguments + ap = argparse.ArgumentParser( + prog="ewp-send-article-digest", + description="Generates and emails a digest of new articles", + ) + ap.add_argument("targets", help="Email addresses to send the digest to", nargs="+") + ap.add_argument( + "--since", + help="Only fetch articles since this time", + ) + ap.add_argument("--fetch-first-n", help="Fetch the first N articles", type=int) + ap.add_argument( + "-v", "--verbose", help="Enable verbose logging", action="store_true" + ) + args = ap.parse_args() + + # Configure logging + logging.basicConfig( + level=logging.DEBUG if args.verbose else logging.INFO, + format="%(levelname)s: %(message)s", + ) + + # Read the subscriptions list + subscriptions_file = json.loads( + Path("~/.config/ewp-rss-feeds/subscriptions.json").expanduser().read_text() + ) + subscriptions = subscriptions_file["subscriptions"] + logger.info(f"Found {len(subscriptions)} subscriptions") + + # Fetch each feed + feeds = [] + for subscription in subscriptions: + # If we have a limit, stop fetching feeds + if args.fetch_first_n and len(feeds) >= args.fetch_first_n: + logger.info(f"Reached fetch limit of {args.fetch_first_n}") + break + + # Make a request to get the feed + logger.info(f"Fetching feed for: {subscription['name']}") + try: + response = requests.get(subscription["url"], timeout=3) + except requests.exceptions.ConnectTimeout: + logger.warning(f"Timed out fetching feed for: {subscription['name']}") + continue + + # If the response fails, we can warn and skip + if not response.ok: + logger.warning(f"Failed to fetch feed for: {subscription['name']}") + continue + + # Parse the feed + feed = feedparser.parse(response.text) + feeds.append({"metadata": subscription, "feed": feed}) + logger.info(f"Fetched {len(feeds)}/{len(subscriptions)} feeds") + + # Figure out the actual time to filter articles by + since = 0 if not args.since else datetime.fromisoformat(args.since).timestamp() + + # Filter articles by date + logger.info(f"Filtering articles since: {since}") + articles = [] + for feed in feeds: + for entry in feed["feed"]["entries"]: + if entry["published_parsed"]: + if mktime(entry["published_parsed"]) > since: + articles.append({"metadata": feed["metadata"], "entry": entry}) + else: + logger.warning(f"Entry has no published date: {entry['title']}") + logger.info(f"Found {len(articles)} articles") + + # Handle special filters + logger.info("Applying special filters") + for article in articles: + article["send"] = True + filters = article["metadata"].get("filters", []) + for filter_info in filters: + # Parse the filter into a name and possible literal arguments + match = FILTER_FN_RE.match(filter_info) + if not match: + logger.error(f"Failed to parse filter: {filter_info}") + sys.exit(1) + filter_name, filter_args = match.groups() + filter_name = filter_name.lower().replace("-", "_").replace("::", "_") + if filter_args: + filter_args = ast.literal_eval(filter_args) + else: + filter_args = () + + # Get the filter function + filter_fn = getattr(sys.modules[__name__], f"filter_{filter_name}") + if not filter_fn: + logger.error(f"Failed to find filter: {filter_name}") + sys.exit(1) + + # Apply the filter + article["send"] = filter_fn(article, *filter_args) + # Prune + articles = [article for article in articles if article["send"]] + logger.info(f"Filtered to {len(articles)} articles") + + + + return 0 + + +if __name__ == "__main__": + sys.exit(main())