Converting Blog from Frog to MkDocs
I wrote in 2020 about converting my blog from Wordpress to Racket's Frog. After five years, I decided to switch to a different static site generator. This was essentially the result of nerd sniping myself :) I'm getting ready for this year's Advent of Code, so I'll probably be making a blog post or two in advance of the festivities. So, naturally, I might as well completely change my blogging infrastructure first! ;)
The Frog maintainer has written:
TL;DR: This project is in low-maintenance mode. ... Eventually I felt even that was too complicated, and my own blog should simply be a Makefile driving a few pieces of code inherited from Frog. In other words, I no longer use Frog for my own blog.
Given the "low maintenance mode", lack of features, and the fact that I've switched from Racket to Python as my primary programming language, I felt that Material for MkDocs was a reasonable choice.
The main reason I chose Frog originally was simply due to the fact that it was implemented in Racket. This time around, I was open to various implementation languages, and considered options such as Hugo, but I've wanted to get familiar with MkDocs for creating documentation, and I'm already familiar with Jinja 2 templating, so I tried it out, and it was super easy to setup.
I like the look of the basic "material" theme, and the addition of a search facility is very nice. Although I haven't tried Hugo, I expect it's significantly faster than MkDocs; however, I make new posts infrequently enough that speed is really a non-issue. Tweaking the default configuration a little was very easy, so I'm satisfied with the switch.
Now, onward to Advent of Code preparation!
Automated Conversion
I used the following Python script to make a few modifications to each post to make them compatible with MkDocs:
from attrs import frozen
from pathlib import Path
import os
import re
@frozen(order=True, eq=True, repr=True, slots=True, kw_only=True)
class BlogPost:
title: str
date_str: str
categories: list[str]
lines: list[str]
def parse_blog_post(filepath: Path, content: str) -> BlogPost:
lines = content.splitlines()
title = parse_title(filepath, lines[0])
date_str = parse_date(filepath, lines[1])
categories = parse_categories(filepath, lines[2])
return BlogPost(title=title, date_str=date_str, categories=categories, lines=lines[4:])
def parse_categories(filepath: Path, line: str) -> str:
m = re.search(r'^Tags: ([a-z, +]+)$', line.strip())
if not m:
raise RuntimeError(f'Failed to parse categories for {filepath}')
return [s.capitalize() for s in m.group(1).split(', ')]
def parse_date(filepath: Path, line: str) -> str:
m = re.search(r'^Date: ([-0-9T:]+)$', line.strip())
if not m:
raise RuntimeError(f'Failed to parse date for {filepath}')
return m.group(1)
def parse_title(filepath: Path, line: str) -> str:
m = re.search(r"^Title: ([-A-Za-z0-9',.: &?]+)$", line.strip())
if not m:
raise RuntimeError(f'Failed to parse title for {filepath}')
title = m.group(1).replace(' - ', '-')
title = title.replace('.', '-')
title = title.replace(' &', ',')
if ':' in title:
return f'"{title}"'
else:
return title
def transform_content(content: str, filepath: Path) -> str:
"""
Parameters:
content: Original file content as string
filepath: Path object of the current file (useful for context-aware transforms)
Returns:
Transformed content as string
"""
post = parse_blog_post(filepath, content)
result = []
result.append('---')
result.append(f'title: {post.title}')
result.append('date:')
result.append(f' created: {post.date_str}')
result.append('categories:')
for cat in post.categories:
result.append(f' - {cat}')
result.append('---')
result.append('')
result.extend([transform_url(filepath, line) for line in post.lines])
return '\n'.join(result)
def transform_url(filepath: Path, line: str) -> str:
pat = r'\((/(\d\d\d\d)/(\d\d)/(\d\d)/([^/]+))\)'
urls = re.findall(pat, line)
n = len(urls)
if n < 1:
return line
elif n > 1:
raise RuntimeError(f'more than one url on line in {filepath}')
m = re.search(pat, line)
old_url = m.group(1)
year = m.group(2)
month = m.group(3)
day = m.group(4)
file = m.group(5).replace('.html', '.md')
new_url = f'../{year}/{year}-{month}-{day}-{file}'
return line.replace(old_url, new_url)
def valid_paths(input_root: str):
"""
Used during development/testing to limit the conversion to a subset of posts.
"""
for file_path in input_root.rglob("*.md"):
relative_path: Path = file_path.relative_to(input_root)
yield (file_path, relative_path)
def main(input_root: str | Path, output_root: str | Path):
"""
Traverse input_root, transform .md files, and write to identical structure in output_root.
"""
input_root = Path(input_root).resolve()
output_root = Path(output_root).resolve()
if not input_root.exists():
raise FileNotFoundError(f"Input directory not found: {input_root}")
if not output_root.exists():
raise FileNotFoundError(f"Ouput directory not found: {output_root}")
# Walk through all directories and files
for file_path, relative_path in valid_paths(input_root):
output_path = output_root / relative_path
output_path.parent.mkdir(parents=True, exist_ok=True)
content = file_path.read_text(encoding="utf-8")
new_content = transform_content(content, file_path)
output_path.write_text(new_content, encoding="utf-8")
print(f"Transformed: {relative_path}")
if __name__ == "__main__":
raise RuntimeError('Do not convert - we are in production!')
# INPUT_DIR = Path("...")
# OUTPUT_DIR = Path("...")
# main(INPUT_DIR, OUTPUT_DIR)
# print(f"\nAll done! Transformed files saved to: {OUTPUT_DIR.resolve()}")