Converting Blog from Frog to MkDocs

I wrote in 2020 about converting my blog from Wordpress to Racket's Frog. After five years, I decided to switch to a different static site generator. This was essentially the result of nerd sniping myself :) I'm getting ready for this year's Advent of Code, so I'll probably be making a blog post or two in advance of the festivities. So, naturally, I might as well completely change my blogging infrastructure first! ;)

The Frog maintainer has written:

TL;DR: This project is in low-maintenance mode. ... Eventually I felt even that was too complicated, and my own blog should simply be a Makefile driving a few pieces of code inherited from Frog. In other words, I no longer use Frog for my own blog.

Given the "low maintenance mode", lack of features, and the fact that I've switched from Racket to Python as my primary programming language, I felt that Material for MkDocs was a reasonable choice.

The main reason I chose Frog originally was simply due to the fact that it was implemented in Racket. This time around, I was open to various implementation languages, and considered options such as Hugo, but I've wanted to get familiar with MkDocs for creating documentation, and I'm already familiar with Jinja 2 templating, so I tried it out, and it was super easy to setup.

I like the look of the basic "material" theme, and the addition of a search facility is very nice. Although I haven't tried Hugo, I expect it's significantly faster than MkDocs; however, I make new posts infrequently enough that speed is really a non-issue. Tweaking the default configuration a little was very easy, so I'm satisfied with the switch.

Now, onward to Advent of Code preparation!

Automated Conversion

I used the following Python script to make a few modifications to each post to make them compatible with MkDocs:

from attrs import frozen
from pathlib import Path
import os
import re


@frozen(order=True, eq=True, repr=True, slots=True, kw_only=True)
class BlogPost:
    title: str
    date_str: str
    categories: list[str]
    lines: list[str]


def parse_blog_post(filepath: Path, content: str) -> BlogPost:
    lines = content.splitlines()

    title = parse_title(filepath, lines[0])
    date_str = parse_date(filepath, lines[1])
    categories = parse_categories(filepath, lines[2])

    return BlogPost(title=title, date_str=date_str, categories=categories, lines=lines[4:])


def parse_categories(filepath: Path, line: str) -> str:
    m = re.search(r'^Tags: ([a-z, +]+)$', line.strip())

    if not m:
        raise RuntimeError(f'Failed to parse categories for {filepath}')

    return [s.capitalize() for s in m.group(1).split(', ')]


def parse_date(filepath: Path, line: str) -> str:
    m = re.search(r'^Date: ([-0-9T:]+)$', line.strip())

    if not m:
        raise RuntimeError(f'Failed to parse date for {filepath}')

    return m.group(1)


def parse_title(filepath: Path, line: str) -> str:
    m = re.search(r"^Title: ([-A-Za-z0-9',.: &?]+)$", line.strip())

    if not m:
        raise RuntimeError(f'Failed to parse title for {filepath}')

    title = m.group(1).replace(' - ', '-')
    title = title.replace('.', '-')
    title = title.replace(' &', ',')

    if ':' in title:
        return f'"{title}"'
    else:
        return title


def transform_content(content: str, filepath: Path) -> str:
    """
    Parameters:
        content: Original file content as string
        filepath: Path object of the current file (useful for context-aware transforms)

    Returns:
        Transformed content as string
    """
    post = parse_blog_post(filepath, content)
    result = []
    result.append('---')
    result.append(f'title: {post.title}')
    result.append('date:')
    result.append(f'  created: {post.date_str}')
    result.append('categories:')

    for cat in post.categories:
        result.append(f'    - {cat}')

    result.append('---')
    result.append('')

    result.extend([transform_url(filepath, line) for line in post.lines])

    return '\n'.join(result)


def transform_url(filepath: Path, line: str) -> str:
    pat = r'\((/(\d\d\d\d)/(\d\d)/(\d\d)/([^/]+))\)'
    urls = re.findall(pat, line)
    n = len(urls)

    if n < 1:
        return line
    elif n > 1:
        raise RuntimeError(f'more than one url on line in {filepath}')

    m = re.search(pat, line)

    old_url = m.group(1)
    year = m.group(2)
    month = m.group(3)
    day = m.group(4)
    file = m.group(5).replace('.html', '.md')

    new_url = f'../{year}/{year}-{month}-{day}-{file}'

    return line.replace(old_url, new_url)


def valid_paths(input_root: str):
    """
    Used during development/testing to limit the conversion to a subset of posts.
    """
    for file_path in input_root.rglob("*.md"):
        relative_path: Path = file_path.relative_to(input_root)
        yield (file_path, relative_path)


def main(input_root: str | Path, output_root: str | Path):
    """
    Traverse input_root, transform .md files, and write to identical structure in output_root.
    """
    input_root = Path(input_root).resolve()
    output_root = Path(output_root).resolve()

    if not input_root.exists():
        raise FileNotFoundError(f"Input directory not found: {input_root}")

    if not output_root.exists():
        raise FileNotFoundError(f"Ouput directory not found: {output_root}")

    # Walk through all directories and files
    for file_path, relative_path in valid_paths(input_root):
        output_path = output_root / relative_path
        output_path.parent.mkdir(parents=True, exist_ok=True)
        content = file_path.read_text(encoding="utf-8")
        new_content = transform_content(content, file_path)

        output_path.write_text(new_content, encoding="utf-8")
        print(f"Transformed: {relative_path}")


if __name__ == "__main__":
    raise RuntimeError('Do not convert - we are in production!')
    # INPUT_DIR = Path("...")
    # OUTPUT_DIR = Path("...")
    # main(INPUT_DIR, OUTPUT_DIR)
    # print(f"\nAll done! Transformed files saved to: {OUTPUT_DIR.resolve()}")