37 lines
1.1 KiB
Python
37 lines
1.1 KiB
Python
import re
|
|
from pathlib import Path
|
|
|
|
REPO_ROOT = Path(__file__).parent.parent
|
|
|
|
# Find all MD and HTML files
|
|
md_files = list(REPO_ROOT.rglob("*.md"))
|
|
html_files = list(REPO_ROOT.rglob("*.html"))
|
|
|
|
# Ignore any files in the `public` directory
|
|
md_files = [f for f in md_files if "public" not in f.parts]
|
|
html_files = [f for f in html_files if "public" not in f.parts]
|
|
|
|
# Result storage
|
|
external_assets = set()
|
|
|
|
# Find Markdown images
|
|
for file in md_files:
|
|
body = file.read_text()
|
|
for match in re.finditer(r"!\[.*?\]\((.*?)\)", body):
|
|
link = match.group(1)
|
|
if link.startswith("http"):
|
|
external_assets.add((file, link))
|
|
|
|
# Search HTML
|
|
for file in html_files:
|
|
body = file.read_text()
|
|
for match in re.finditer(r'src="(.*?)"', body):
|
|
link = match.group(1)
|
|
if link.startswith("http"):
|
|
external_assets.add((file, link))
|
|
|
|
# Print all external assets
|
|
for file_path, link in external_assets:
|
|
# Strip the prefix off the file path
|
|
file_path = file_path.relative_to(REPO_ROOT)
|
|
print(f"{file_path}:\t{link}") |