1

Working on feed script

This commit is contained in:
Evan Pratten 2024-05-09 10:56:21 -04:00
parent 78cf8ea6ee
commit ab5c9edd69
2 changed files with 137 additions and 0 deletions

View File

@ -119,6 +119,9 @@ ln -nsf $EWCONFIG_ROOT/configs/systemd/scripts ~/.config/systemd/scripts
# GitLab CLI
ln -sf $EWCONFIG_ROOT/configs/glab-cli/aliases.yml ~/.config/glab-cli/aliases.yml
# RSS stuff
ln -nsf $EWCONFIG_ROOT/configs/ewp-rss-feeds ~/.config/ewp-rss-feeds
# Minecraft global configs
ln -nsf $EWCONFIG_ROOT/configs/minecraft ~/.config/minecraft
if [ -d ~/.var/app/org.prismlauncher.PrismLauncher ]; then

134
scripts/ewp-send-article-digest Executable file
View File

@ -0,0 +1,134 @@
#! /usr/bin/env python3
import argparse
import sys
import logging
import smtplib
import json
import requests
import feedparser
import referencing
import ast
import re
from datetime import datetime
from pathlib import Path
from time import mktime
logger = logging.getLogger(__name__)
FILTER_FN_RE = re.compile(r"^([^(]+)(?:(\([^\)]+\)))?")
def filter_lwn_hide_paid_articles(article: dict) -> bool:
return "[$]" not in article["entry"]["title"]
def filter_discord_category(article: dict, category: str) -> bool:
# return category in article["metadata"]["categories"]
return True
def main() -> int:
# Handle program arguments
ap = argparse.ArgumentParser(
prog="ewp-send-article-digest",
description="Generates and emails a digest of new articles",
)
ap.add_argument("targets", help="Email addresses to send the digest to", nargs="+")
ap.add_argument(
"--since",
help="Only fetch articles since this time",
)
ap.add_argument("--fetch-first-n", help="Fetch the first N articles", type=int)
ap.add_argument(
"-v", "--verbose", help="Enable verbose logging", action="store_true"
)
args = ap.parse_args()
# Configure logging
logging.basicConfig(
level=logging.DEBUG if args.verbose else logging.INFO,
format="%(levelname)s: %(message)s",
)
# Read the subscriptions list
subscriptions_file = json.loads(
Path("~/.config/ewp-rss-feeds/subscriptions.json").expanduser().read_text()
)
subscriptions = subscriptions_file["subscriptions"]
logger.info(f"Found {len(subscriptions)} subscriptions")
# Fetch each feed
feeds = []
for subscription in subscriptions:
# If we have a limit, stop fetching feeds
if args.fetch_first_n and len(feeds) >= args.fetch_first_n:
logger.info(f"Reached fetch limit of {args.fetch_first_n}")
break
# Make a request to get the feed
logger.info(f"Fetching feed for: {subscription['name']}")
try:
response = requests.get(subscription["url"], timeout=3)
except requests.exceptions.ConnectTimeout:
logger.warning(f"Timed out fetching feed for: {subscription['name']}")
continue
# If the response fails, we can warn and skip
if not response.ok:
logger.warning(f"Failed to fetch feed for: {subscription['name']}")
continue
# Parse the feed
feed = feedparser.parse(response.text)
feeds.append({"metadata": subscription, "feed": feed})
logger.info(f"Fetched {len(feeds)}/{len(subscriptions)} feeds")
# Figure out the actual time to filter articles by
since = 0 if not args.since else datetime.fromisoformat(args.since).timestamp()
# Filter articles by date
logger.info(f"Filtering articles since: {since}")
articles = []
for feed in feeds:
for entry in feed["feed"]["entries"]:
if entry["published_parsed"]:
if mktime(entry["published_parsed"]) > since:
articles.append({"metadata": feed["metadata"], "entry": entry})
else:
logger.warning(f"Entry has no published date: {entry['title']}")
logger.info(f"Found {len(articles)} articles")
# Handle special filters
logger.info("Applying special filters")
for article in articles:
article["send"] = True
filters = article["metadata"].get("filters", [])
for filter_info in filters:
# Parse the filter into a name and possible literal arguments
match = FILTER_FN_RE.match(filter_info)
if not match:
logger.error(f"Failed to parse filter: {filter_info}")
sys.exit(1)
filter_name, filter_args = match.groups()
filter_name = filter_name.lower().replace("-", "_").replace("::", "_")
if filter_args:
filter_args = ast.literal_eval(filter_args)
else:
filter_args = ()
# Get the filter function
filter_fn = getattr(sys.modules[__name__], f"filter_{filter_name}")
if not filter_fn:
logger.error(f"Failed to find filter: {filter_name}")
sys.exit(1)
# Apply the filter
article["send"] = filter_fn(article, *filter_args)
# Prune
articles = [article for article in articles if article["send"]]
logger.info(f"Filtered to {len(articles)} articles")
return 0
if __name__ == "__main__":
sys.exit(main())