1
ewpratten.com/scripts/find_external_assets.py
Evan Pratten 62ba268ffe Revert "clean up existing unused files"
This reverts commit ee4b001300e24509dc6978771002a49225fd76ef.
2024-11-14 12:45:29 -05:00

37 lines
1.1 KiB
Python

import re
from pathlib import Path
REPO_ROOT = Path(__file__).parent.parent
# Find all MD and HTML files
md_files = list(REPO_ROOT.rglob("*.md"))
html_files = list(REPO_ROOT.rglob("*.html"))
# Ignore any files in the `public` directory
md_files = [f for f in md_files if "public" not in f.parts]
html_files = [f for f in html_files if "public" not in f.parts]
# Result storage
external_assets = set()
# Find Markdown images
for file in md_files:
body = file.read_text()
for match in re.finditer(r"!\[.*?\]\((.*?)\)", body):
link = match.group(1)
if link.startswith("http"):
external_assets.add((file, link))
# Search HTML
for file in html_files:
body = file.read_text()
for match in re.finditer(r'src="(.*?)"', body):
link = match.group(1)
if link.startswith("http"):
external_assets.add((file, link))
# Print all external assets
for file_path, link in external_assets:
# Strip the prefix off the file path
file_path = file_path.relative_to(REPO_ROOT)
print(f"{file_path}:\t{link}")