diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 8b79f0f..610e40a 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -7,7 +7,7 @@ on:
jobs:
build-en:
- name: Build (English)
+ name: Build (English mdBook)
runs-on: ubuntu-22.04
steps:
@@ -17,30 +17,28 @@ jobs:
uses: actions/setup-python@v5
with:
python-version: '3.10'
- cache: 'pip'
- - name: Install pandoc
+ - name: Install Rust toolchain
run: |
- wget -q https://github.com/jgm/pandoc/releases/download/2.19.2/pandoc-2.19.2-1-amd64.deb
- sudo dpkg -i pandoc-2.19.2-1-amd64.deb
+ curl -sSf https://sh.rustup.rs | sh -s -- -y --profile minimal
+ echo "$HOME/.cargo/bin" >> "$GITHUB_PATH"
- - name: Install d2lbook
+ - name: Install mdBook
+ run: cargo install mdbook --locked
+
+ - name: Run mdBook regression tests
run: |
- git clone https://github.com/openmlsys/d2l-book.git
- cd d2l-book
- # Fix Python 3.10+ incompatibility: bibtex<2.0.0 depends on oset which
- # uses collections.MutableSet removed in Python 3.10.
- sed -i "s/'sphinxcontrib-bibtex<2.0.0'/'sphinxcontrib-bibtex>=2.5.0'/" setup.py
- python3 -m pip install .
+ python3 -m unittest discover -s tests -p 'test_prepare_mdbook.py'
+ python3 -m unittest discover -s tests -p 'test_prepare_mdbook_zh.py'
+ python3 -m unittest discover -s tests -p 'test_assemble_docs_publish_tree.py'
+ python3 -m unittest discover -s tests -p 'test_ensure_book_resources.py'
+ python3 -m unittest discover -s tests -p 'test_mdbook_mathjax.py'
- - name: Install Python dependencies
- run: python3 -m pip install -r requirements.txt
-
- - name: Build English HTML
- run: bash build_html.sh
+ - name: Build English HTML with mdBook
+ run: bash build_mdbook.sh
build-zh:
- name: Build (Chinese)
+ name: Build (Chinese mdBook)
runs-on: ubuntu-22.04
steps:
@@ -50,25 +48,17 @@ jobs:
uses: actions/setup-python@v5
with:
python-version: '3.10'
- cache: 'pip'
-
- - name: Install pandoc
+
+ - name: Install Rust toolchain
run: |
- wget -q https://github.com/jgm/pandoc/releases/download/2.19.2/pandoc-2.19.2-1-amd64.deb
- sudo dpkg -i pandoc-2.19.2-1-amd64.deb
+ curl -sSf https://sh.rustup.rs | sh -s -- -y --profile minimal
+ echo "$HOME/.cargo/bin" >> "$GITHUB_PATH"
- - name: Install d2lbook
- run: |
- git clone https://github.com/openmlsys/d2l-book.git
- cd d2l-book
- sed -i "s/'sphinxcontrib-bibtex<2.0.0'/'sphinxcontrib-bibtex>=2.5.0'/" setup.py
- python3 -m pip install .
+ - name: Install mdBook
+ run: cargo install mdbook --locked
- - name: Install Python dependencies
- run: python3 -m pip install -r requirements.txt
-
- - name: Build Chinese HTML
- run: bash build_html_zh.sh
+ - name: Build Chinese HTML with mdBook
+ run: bash build_mdbook_zh.sh
build:
name: build
diff --git a/.github/workflows/update_docs.yml b/.github/workflows/update_docs.yml
index ac9694a..7f19a6b 100644
--- a/.github/workflows/update_docs.yml
+++ b/.github/workflows/update_docs.yml
@@ -16,30 +16,28 @@ jobs:
uses: actions/setup-python@v5
with:
python-version: '3.10'
- cache: 'pip'
- - name: Install pandoc
+ - name: Install Rust toolchain
run: |
- wget -q https://github.com/jgm/pandoc/releases/download/2.19.2/pandoc-2.19.2-1-amd64.deb
- sudo dpkg -i pandoc-2.19.2-1-amd64.deb
+ curl -sSf https://sh.rustup.rs | sh -s -- -y --profile minimal
+ echo "$HOME/.cargo/bin" >> "$GITHUB_PATH"
- - name: Install d2lbook
+ - name: Install mdBook
+ run: cargo install mdbook --locked
+
+ - name: Run mdBook regression tests
run: |
- git clone https://github.com/openmlsys/d2l-book.git
- cd d2l-book
- # Fix Python 3.10+ incompatibility: bibtex<2.0.0 depends on oset which
- # uses collections.MutableSet removed in Python 3.10.
- sed -i "s/'sphinxcontrib-bibtex<2.0.0'/'sphinxcontrib-bibtex>=2.5.0'/" setup.py
- python3 -m pip install .
+ python3 -m unittest discover -s tests -p 'test_prepare_mdbook.py'
+ python3 -m unittest discover -s tests -p 'test_prepare_mdbook_zh.py'
+ python3 -m unittest discover -s tests -p 'test_assemble_docs_publish_tree.py'
+ python3 -m unittest discover -s tests -p 'test_ensure_book_resources.py'
+ python3 -m unittest discover -s tests -p 'test_mdbook_mathjax.py'
- - name: Install Python dependencies
- run: python3 -m pip install -r requirements.txt sphinx-mathjax-offline
+ - name: Build English HTML with mdBook
+ run: bash build_mdbook.sh
- - name: Build English HTML
- run: bash build_html.sh
-
- - name: Build Chinese HTML
- run: bash build_html_zh.sh
+ - name: Build Chinese HTML with mdBook
+ run: bash build_mdbook_zh.sh
- name: Deploy to GitHub Pages
env:
@@ -47,12 +45,11 @@ jobs:
run: |
git clone https://x-access-token:${DEPLOY_TOKEN}@github.com/openmlsys/openmlsys.github.io.git
- # English → root (default language)
- cp -r en_chapters/_build/html/* openmlsys.github.io/docs/
-
- # Chinese → /cn/ subdirectory
- mkdir -p openmlsys.github.io/docs/cn
- cp -r zh_chapters/_build/html/* openmlsys.github.io/docs/cn/
+ python3 tools/assemble_docs_publish_tree.py \
+ --destination-root openmlsys.github.io \
+ --docs-subdir docs \
+ --en-source .mdbook/book \
+ --zh-source .mdbook-zh/book
cd openmlsys.github.io
git config user.name "github-actions[bot]"
diff --git a/.gitignore b/.gitignore
index fa65c61..2f39fab 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,6 +12,7 @@ test*.md
run.sh
.idea
env
+.mdbook/
.mdbook-zh/
.mdbook-zh-test/
task_plan.md
@@ -19,7 +20,6 @@ findings.md
progress.md
d2l-book/
docs/
-tests/
en_chapters/img
en_chapters/references
en_chapters/static
diff --git a/book.toml b/book.toml
index a9fa90f..6a5ddc9 100644
--- a/book.toml
+++ b/book.toml
@@ -1,15 +1,15 @@
[book]
authors = ["OpenMLSys Contributors"]
-language = "zh-CN"
-src = "zh_chapters"
-title = "机器学习系统:设计和实现"
+language = "en"
+src = "en_chapters"
+title = "Machine Learning Systems: Design and Implementation"
[build]
-build-dir = ".mdbook-zh/book"
+build-dir = ".mdbook/book"
create-missing = false
-[preprocessor.openmlsys-zh]
-command = "python3 tools/mdbook_zh_preprocessor.py"
+[preprocessor.openmlsys]
+command = "python3 tools/mdbook_preprocessor.py"
[output.html]
git-repository-url = "https://github.com/openmlsys/openmlsys-zh"
diff --git a/books/zh/book.toml b/books/zh/book.toml
new file mode 100644
index 0000000..532f20a
--- /dev/null
+++ b/books/zh/book.toml
@@ -0,0 +1,18 @@
+[book]
+authors = ["OpenMLSys Contributors"]
+language = "zh-CN"
+src = "../../zh_chapters"
+title = "机器学习系统:设计和实现"
+
+[build]
+build-dir = "../../.mdbook-zh/book"
+create-missing = false
+
+[preprocessor.openmlsys-zh]
+command = "python3 ../../tools/mdbook_zh_preprocessor.py"
+
+[output.html]
+git-repository-url = "https://github.com/openmlsys/openmlsys-zh"
+mathjax-support = true
+preferred-dark-theme = "navy"
+additional-css = ["theme/dark-mode-images.css"]
diff --git a/books/zh/theme/dark-mode-images.css b/books/zh/theme/dark-mode-images.css
new file mode 100644
index 0000000..d0b209a
--- /dev/null
+++ b/books/zh/theme/dark-mode-images.css
@@ -0,0 +1,6 @@
+html.light img[src$=".png"],
+html.light img[src$=".jpg"],
+html.light img[src$=".jpeg"],
+html.light img[src$=".gif"] {
+ background-color: #fff;
+}
diff --git a/books/zh/theme/head.hbs b/books/zh/theme/head.hbs
new file mode 100644
index 0000000..793a459
--- /dev/null
+++ b/books/zh/theme/head.hbs
@@ -0,0 +1,10 @@
+
diff --git a/build_html.sh b/build_html.sh
index 6e47429..904791c 100644
--- a/build_html.sh
+++ b/build_html.sh
@@ -10,15 +10,7 @@ set -e
ROOT="$(cd "$(dirname "$0")" && pwd)"
# ── Create resource symlinks ──────────────────────────────────────────────────
-for target in img references static mlsys.bib; do
- link="$ROOT/en_chapters/$target"
- rel_target="../$target"
- if [ -e "$link" ] && [ ! -L "$link" ]; then
- echo "Refusing to replace non-symlink path: $link" >&2
- exit 1
- fi
- ln -sfn "$rel_target" "$link"
-done
+python3 "$ROOT/tools/ensure_book_resources.py" --chapter-dir "$ROOT/en_chapters"
# ── Build ─────────────────────────────────────────────────────────────────────
cd "$ROOT/en_chapters"
diff --git a/build_html_zh.sh b/build_html_zh.sh
index 2949005..ccefd76 100755
--- a/build_html_zh.sh
+++ b/build_html_zh.sh
@@ -10,15 +10,7 @@ set -e
ROOT="$(cd "$(dirname "$0")" && pwd)"
# ── Create resource symlinks ──────────────────────────────────────────────────
-for target in img references static mlsys.bib; do
- link="$ROOT/zh_chapters/$target"
- rel_target="../$target"
- if [ -e "$link" ] && [ ! -L "$link" ]; then
- echo "Refusing to replace non-symlink path: $link" >&2
- exit 1
- fi
- ln -sfn "$rel_target" "$link"
-done
+python3 "$ROOT/tools/ensure_book_resources.py" --chapter-dir "$ROOT/zh_chapters"
# ── Build ─────────────────────────────────────────────────────────────────────
cd "$ROOT/zh_chapters"
diff --git a/build_mdbook.sh b/build_mdbook.sh
new file mode 100644
index 0000000..f814264
--- /dev/null
+++ b/build_mdbook.sh
@@ -0,0 +1,23 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PYTHON_BIN="$(command -v python3 || command -v python || true)"
+
+if [[ -z "${PYTHON_BIN}" ]]; then
+ echo "Python is required to prepare the mdBook staging tree." >&2
+ exit 1
+fi
+
+if ! command -v mdbook >/dev/null 2>&1; then
+ echo "mdbook is not installed. Install it first, for example with: cargo install mdbook" >&2
+ exit 1
+fi
+
+"${PYTHON_BIN}" "${ROOT}/tools/ensure_book_resources.py" --chapter-dir "${ROOT}/en_chapters"
+"${PYTHON_BIN}" "${ROOT}/tools/prepare_mdbook.py" \
+ --source "${ROOT}/en_chapters" \
+ --summary-output "${ROOT}/en_chapters/SUMMARY.md" \
+ --placeholder-prefix "[TODO: src = zh_chapters/"
+
+mdbook build "${ROOT}"
diff --git a/build_mdbook_zh.sh b/build_mdbook_zh.sh
index 6028915..fff91c4 100755
--- a/build_mdbook_zh.sh
+++ b/build_mdbook_zh.sh
@@ -14,22 +14,12 @@ if ! command -v mdbook >/dev/null 2>&1; then
exit 1
fi
-# ── Create resource symlinks ──────────────────────────────────────────────────
-# Resources (img/, references/, static/, mlsys.bib) live at the repo root and
-# are symlinked into zh_chapters/ so mdbook can find them at relative paths.
-for target in img references static mlsys.bib; do
- link="${ROOT}/zh_chapters/${target}"
- rel_target="../${target}"
- if [[ -e "${link}" ]] && [[ ! -L "${link}" ]]; then
- echo "Refusing to replace non-symlink path: ${link}" >&2
- exit 1
- fi
- ln -sfn "${rel_target}" "${link}"
-done
+# ── Create resource links ─────────────────────────────────────────────────────
+"${PYTHON_BIN}" "${ROOT}/tools/ensure_book_resources.py" --chapter-dir "${ROOT}/zh_chapters"
# ── Build ─────────────────────────────────────────────────────────────────────
"${PYTHON_BIN}" "${ROOT}/tools/prepare_mdbook_zh.py" \
--source "${ROOT}/zh_chapters" \
--summary-output "${ROOT}/zh_chapters/SUMMARY.md"
-mdbook build "${ROOT}"
+mdbook build "${ROOT}/books/zh"
diff --git a/en_chapters/SUMMARY.md b/en_chapters/SUMMARY.md
new file mode 100644
index 0000000..9436c5d
--- /dev/null
+++ b/en_chapters/SUMMARY.md
@@ -0,0 +1,3 @@
+# Summary
+
+[Machine Learning Systems: Design and Implementation](index.md)
diff --git a/tests/test_assemble_docs_publish_tree.py b/tests/test_assemble_docs_publish_tree.py
new file mode 100644
index 0000000..04e9bf6
--- /dev/null
+++ b/tests/test_assemble_docs_publish_tree.py
@@ -0,0 +1,87 @@
+from __future__ import annotations
+
+import tempfile
+import unittest
+from pathlib import Path
+
+from tools.assemble_docs_publish_tree import assemble_publish_tree
+
+
+class AssembleDocsPublishTreeTests(unittest.TestCase):
+ def test_assemble_publish_tree_uses_legacy_docs_layout(self) -> None:
+ with tempfile.TemporaryDirectory() as tmpdir:
+ root = Path(tmpdir)
+ pages_repo = root / "pages"
+ en_source = root / "en-html"
+ zh_source = root / "zh-html"
+
+ pages_repo.mkdir()
+ en_source.mkdir()
+ zh_source.mkdir()
+
+ (en_source / "index.html").write_text("english home", encoding="utf-8")
+ (en_source / "guide.html").write_text("english guide", encoding="utf-8")
+ (zh_source / "index.html").write_text("chinese home", encoding="utf-8")
+ (zh_source / "searchindex.js").write_text("zh search", encoding="utf-8")
+
+ assemble_publish_tree(
+ destination_root=pages_repo,
+ docs_subdir="docs",
+ en_source=en_source,
+ zh_source=zh_source,
+ )
+
+ self.assertEqual(
+ (pages_repo / "docs" / "index.html").read_text(encoding="utf-8"),
+ "english home",
+ )
+ self.assertEqual(
+ (pages_repo / "docs" / "guide.html").read_text(encoding="utf-8"),
+ "english guide",
+ )
+ self.assertEqual(
+ (pages_repo / "docs" / "cn" / "index.html").read_text(encoding="utf-8"),
+ "chinese home",
+ )
+ self.assertEqual(
+ (pages_repo / "docs" / "cn" / "searchindex.js").read_text(encoding="utf-8"),
+ "zh search",
+ )
+
+ def test_assemble_publish_tree_replaces_stale_docs_content(self) -> None:
+ with tempfile.TemporaryDirectory() as tmpdir:
+ root = Path(tmpdir)
+ pages_repo = root / "pages"
+ en_source = root / "en-html"
+ zh_source = root / "zh-html"
+
+ (pages_repo / "docs" / "cn").mkdir(parents=True)
+ (pages_repo / "docs" / "old.html").write_text("stale en", encoding="utf-8")
+ (pages_repo / "docs" / "cn" / "old.html").write_text("stale zh", encoding="utf-8")
+
+ en_source.mkdir()
+ zh_source.mkdir()
+ (en_source / "index.html").write_text("fresh en", encoding="utf-8")
+ (zh_source / "index.html").write_text("fresh zh", encoding="utf-8")
+
+ assemble_publish_tree(
+ destination_root=pages_repo,
+ docs_subdir="docs",
+ en_source=en_source,
+ zh_source=zh_source,
+ )
+
+ self.assertFalse((pages_repo / "docs" / "old.html").exists())
+ self.assertFalse((pages_repo / "docs" / "cn" / "old.html").exists())
+ self.assertEqual(
+ (pages_repo / "docs" / "index.html").read_text(encoding="utf-8"),
+ "fresh en",
+ )
+ self.assertEqual(
+ (pages_repo / "docs" / "cn" / "index.html").read_text(encoding="utf-8"),
+ "fresh zh",
+ )
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/tests/test_ensure_book_resources.py b/tests/test_ensure_book_resources.py
new file mode 100644
index 0000000..98304e3
--- /dev/null
+++ b/tests/test_ensure_book_resources.py
@@ -0,0 +1,55 @@
+from __future__ import annotations
+
+import tempfile
+import unittest
+from pathlib import Path
+
+from tools.ensure_book_resources import ensure_resource_views
+
+
+class EnsureBookResourcesTests(unittest.TestCase):
+ def test_ensure_resource_views_creates_missing_symlinks(self) -> None:
+ with tempfile.TemporaryDirectory() as tmpdir:
+ root = Path(tmpdir)
+ chapter_dir = root / "en_chapters"
+ chapter_dir.mkdir()
+
+ for directory in ("img", "references", "static"):
+ (root / directory).mkdir()
+ (root / "mlsys.bib").write_text("bib", encoding="utf-8")
+
+ ensure_resource_views(chapter_dir, root)
+
+ for name in ("img", "references", "static", "mlsys.bib"):
+ path = chapter_dir / name
+ self.assertTrue(path.is_symlink(), f"{name} should be a symlink")
+ self.assertEqual(path.resolve(), (root / name).resolve())
+
+ def test_ensure_resource_views_keeps_existing_non_symlink_paths(self) -> None:
+ with tempfile.TemporaryDirectory() as tmpdir:
+ root = Path(tmpdir)
+ chapter_dir = root / "en_chapters"
+ chapter_dir.mkdir()
+
+ for directory in ("img", "references", "static"):
+ (root / directory).mkdir()
+ (root / "mlsys.bib").write_text("root bib", encoding="utf-8")
+
+ local_bib = chapter_dir / "mlsys.bib"
+ local_bib.write_text("local bib", encoding="utf-8")
+ local_static = chapter_dir / "static"
+ local_static.mkdir()
+ (local_static / "frontpage.html").write_text("local static", encoding="utf-8")
+
+ ensure_resource_views(chapter_dir, root)
+
+ self.assertFalse(local_bib.is_symlink())
+ self.assertEqual(local_bib.read_text(encoding="utf-8"), "local bib")
+ self.assertFalse(local_static.is_symlink())
+ self.assertTrue((local_static / "frontpage.html").exists())
+ self.assertTrue((chapter_dir / "img").is_symlink())
+ self.assertTrue((chapter_dir / "references").is_symlink())
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/tests/test_prepare_mdbook.py b/tests/test_prepare_mdbook.py
new file mode 100644
index 0000000..bfce92e
--- /dev/null
+++ b/tests/test_prepare_mdbook.py
@@ -0,0 +1,114 @@
+from __future__ import annotations
+
+import tempfile
+import unittest
+from pathlib import Path
+
+from tools.prepare_mdbook import build_title_cache, rewrite_markdown, write_summary
+
+
+class PrepareMdBookTests(unittest.TestCase):
+ def test_write_summary_skips_placeholder_pages(self) -> None:
+ with tempfile.TemporaryDirectory() as tmpdir:
+ root = Path(tmpdir)
+ source = root / "en_chapters"
+ source.mkdir()
+
+ (source / "index.md").write_text(
+ """Machine Learning Systems
+========================
+
+```toc
+:maxdepth: 2
+
+chapter_preface/index
+chapter_introduction/index
+```
+
+```toc
+:maxdepth: 1
+
+appendix/index
+```
+""",
+ encoding="utf-8",
+ )
+
+ chapter_preface = source / "chapter_preface"
+ chapter_preface.mkdir()
+ (chapter_preface / "index.md").write_text(
+ "[TODO: src = zh_chapters/chapter_preface/index.md]\n",
+ encoding="utf-8",
+ )
+
+ chapter_intro = source / "chapter_introduction"
+ chapter_intro.mkdir()
+ (chapter_intro / "index.md").write_text("# Introduction\n", encoding="utf-8")
+
+ appendix = source / "appendix"
+ appendix.mkdir()
+ (appendix / "index.md").write_text("# Appendix\n", encoding="utf-8")
+
+ summary_path = write_summary(
+ source,
+ placeholder_prefix="[TODO: src = zh_chapters/",
+ )
+ summary = summary_path.read_text(encoding="utf-8")
+
+ self.assertEqual(
+ summary,
+ """# Summary
+
+[Machine Learning Systems](index.md)
+[Introduction](chapter_introduction/index.md)
+[Appendix](appendix/index.md)
+""",
+ )
+
+ title_cache = build_title_cache(
+ source,
+ placeholder_prefix="[TODO: src = zh_chapters/",
+ )
+ rewritten = rewrite_markdown(
+ (source / "index.md").read_text(encoding="utf-8"),
+ (source / "index.md").resolve(),
+ title_cache,
+ )
+
+ self.assertIn("- [Introduction](chapter_introduction/index.md)", rewritten)
+ self.assertIn("- [Appendix](appendix/index.md)", rewritten)
+ self.assertNotIn("chapter_preface/index.md", rewritten)
+
+ def test_rewrite_markdown_uses_configured_bibliography_title(self) -> None:
+ with tempfile.TemporaryDirectory() as tmpdir:
+ root = Path(tmpdir)
+ page = root / "chapter.md"
+ page.write_text(
+ """# Introduction
+
+Reference :cite:`smith2024`.
+""",
+ encoding="utf-8",
+ )
+
+ rewritten = rewrite_markdown(
+ page.read_text(encoding="utf-8"),
+ page.resolve(),
+ {page.resolve(): "Introduction"},
+ bib_db={
+ "smith2024": {
+ "author": "Smith, Alice and Doe, Bob",
+ "title": "Systems Paper",
+ "year": "2024",
+ "journal": "ML Systems Journal",
+ }
+ },
+ bibliography_title="References",
+ )
+
+ self.assertIn("## References", rewritten)
+ self.assertNotIn("## 参考文献", rewritten)
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/tests/test_prepare_mdbook_zh.py b/tests/test_prepare_mdbook_zh.py
new file mode 100644
index 0000000..7182438
--- /dev/null
+++ b/tests/test_prepare_mdbook_zh.py
@@ -0,0 +1,227 @@
+from __future__ import annotations
+
+import tempfile
+import unittest
+from pathlib import Path
+
+from tools.prepare_mdbook_zh import extract_title, rewrite_markdown, write_summary
+
+
+class PrepareMdBookZhTests(unittest.TestCase):
+ def test_extract_title_supports_atx_and_setext_headings(self) -> None:
+ self.assertEqual(extract_title("# 导论\n"), "导论")
+ self.assertEqual(extract_title("前言文字\n\n## 机器学习应用\n"), "机器学习应用")
+ self.assertEqual(extract_title("机器学习系统:设计和实现\n=========================\n"), "机器学习系统:设计和实现")
+
+ def test_write_summary_generates_nested_navigation(self) -> None:
+ with tempfile.TemporaryDirectory() as tmpdir:
+ root = Path(tmpdir)
+ source = root / "zh_chapters"
+ source.mkdir()
+
+ (source / "index.md").write_text(
+ """机器学习系统:设计和实现
+=========================
+
+```eval_rst
+.. raw:: html
+ :file: frontpage.html
+```
+
+```toc
+:maxdepth: 2
+
+[前言](chapter_preface/index)
+
+# 基础篇
+chapter_introduction/index
+
+# 附录
+[机器学习基础附录](appendix_machine_learning_introduction/index)
+```
+""",
+ encoding="utf-8",
+ )
+
+ chapter_preface = source / "chapter_preface"
+ chapter_preface.mkdir()
+ (chapter_preface / "index.md").write_text("# 前言\n", encoding="utf-8")
+ static_dir = source / "static"
+ static_dir.mkdir()
+ (static_dir / "frontpage.html").write_text(
+ "
', rewritten)
+ self.assertIn('", re.IGNORECASE | re.DOTALL)
+DEFAULT_BIBLIOGRAPHY_TITLE = "References"
+FRONTPAGE_LAYOUT_CSS = """
+
+""".strip()
+
+
+@dataclass(frozen=True)
+class TocItem:
+ kind: str
+ label: str
+ target: str | None = None
+
+
+def is_placeholder_markdown(markdown: str, placeholder_prefix: str | None = None) -> bool:
+ if not placeholder_prefix:
+ return False
+
+ stripped = markdown.strip()
+ return stripped.startswith(placeholder_prefix) and stripped.endswith("]")
+
+
+def extract_title(markdown: str, fallback: str = "Untitled") -> str:
+ lines = markdown.splitlines()
+
+ for index, line in enumerate(lines):
+ stripped = line.strip()
+ if not stripped:
+ continue
+ if stripped.startswith("#"):
+ heading = stripped.lstrip("#").strip()
+ if heading:
+ return heading
+
+ next_index = index + 1
+ if next_index < len(lines):
+ underline = lines[next_index].strip()
+ if underline and set(underline) <= {"=", "-"}:
+ return stripped
+
+ return fallback
+
+
+def parse_toc_entries(block_lines: list[str]) -> list[TocItem]:
+ entries: list[TocItem] = []
+ for line in block_lines:
+ stripped = line.strip()
+ if not stripped or stripped.startswith(":"):
+ continue
+ part_match = TOC_PART_RE.match(stripped)
+ if part_match:
+ entries.append(TocItem(kind="part", label=part_match.group(1).strip()))
+ continue
+ link_match = TOC_LINK_RE.match(stripped)
+ if link_match:
+ entries.append(
+ TocItem(
+ kind="chapter",
+ label=link_match.group(1).strip(),
+ target=link_match.group(2).strip(),
+ )
+ )
+ continue
+ entries.append(TocItem(kind="chapter", label="", target=stripped))
+ return entries
+
+
+def parse_toc_blocks(markdown: str) -> list[list[TocItem]]:
+ blocks: list[list[TocItem]] = []
+ lines = markdown.splitlines()
+ index = 0
+
+ while index < len(lines):
+ if lines[index].strip() == f"```{TOC_FENCE}":
+ index += 1
+ block_lines: list[str] = []
+ while index < len(lines) and lines[index].strip() != "```":
+ block_lines.append(lines[index])
+ index += 1
+ entries = parse_toc_entries(block_lines)
+ blocks.append(entries)
+ index += 1
+
+ return blocks
+
+
+def resolve_toc_target(current_file: Path, entry: str) -> Path:
+ target_name = entry if entry.endswith(".md") else f"{entry}.md"
+ target = (current_file.parent / target_name).resolve()
+ if not target.exists():
+ raise FileNotFoundError(f"TOC entry '{entry}' from '{current_file}' does not exist")
+ return target
+
+
+def relative_link(from_file: Path, target_file: Path) -> str:
+ return Path(os.path.relpath(target_file, start=from_file.parent)).as_posix()
+
+
+def _strip_latex_escapes_outside_math(line: str) -> str:
+ parts = line.split("$")
+ for i in range(0, len(parts), 2):
+ parts[i] = LATEX_ESCAPE_RE.sub(r"\1", parts[i])
+ return "$".join(parts)
+
+
+def normalize_directives(markdown: str) -> str:
+ normalized = OPTION_LINE_RE.sub("", markdown)
+ normalized = NUMREF_RE.sub(lambda match: f"`{match.group(1)}`", normalized)
+ normalized = EQREF_RE.sub(lambda match: f"`{match.group(1)}`", normalized)
+
+ lines = [_strip_latex_escapes_outside_math(line.rstrip()) for line in normalized.splitlines()]
+ collapsed: list[str] = []
+ previous_blank = False
+ for line in lines:
+ is_blank = line == ""
+ if is_blank and previous_blank:
+ continue
+ collapsed.append(line)
+ previous_blank = is_blank
+
+ while collapsed and collapsed[-1] == "":
+ collapsed.pop()
+
+ return "\n".join(collapsed) + "\n"
+
+
+def clean_bibtex(value: str) -> str:
+ value = re.sub(r"\{\\[`'^\"~=.](\w)\}", r"\1", value)
+ value = re.sub(r"\\[`'^\"~=.](\w)", r"\1", value)
+ value = value.replace("{", "").replace("}", "")
+ return value.strip()
+
+
+def _parse_bib_fields(body: str) -> dict[str, str]:
+ fields: dict[str, str] = {}
+ i = 0
+ while i < len(body):
+ while i < len(body) and body[i] in " \t\n\r,":
+ i += 1
+ if i >= len(body):
+ break
+ start = i
+ while i < len(body) and body[i] not in "= \t\n\r":
+ i += 1
+ name = body[start:i].strip().lower()
+ while i < len(body) and body[i] != "=":
+ i += 1
+ if i >= len(body):
+ break
+ i += 1
+ while i < len(body) and body[i] in " \t\n\r":
+ i += 1
+ if i >= len(body):
+ break
+ if body[i] == "{":
+ depth = 1
+ i += 1
+ vstart = i
+ while i < len(body) and depth > 0:
+ if body[i] == "{":
+ depth += 1
+ elif body[i] == "}":
+ depth -= 1
+ i += 1
+ value = body[vstart : i - 1]
+ elif body[i] == '"':
+ i += 1
+ vstart = i
+ while i < len(body) and body[i] != '"':
+ i += 1
+ value = body[vstart:i]
+ i += 1
+ else:
+ vstart = i
+ while i < len(body) and body[i] not in ", \t\n\r}":
+ i += 1
+ value = body[vstart:i]
+ if name:
+ fields[name] = value.strip()
+ return fields
+
+
+def parse_bib(bib_path: Path) -> dict[str, dict[str, str]]:
+ text = bib_path.read_text(encoding="utf-8")
+ entries: dict[str, dict[str, str]] = {}
+ for match in BIB_ENTRY_RE.finditer(text):
+ key = match.group(2).strip()
+ start = match.end()
+ depth = 1
+ pos = start
+ while pos < len(text) and depth > 0:
+ if text[pos] == "{":
+ depth += 1
+ elif text[pos] == "}":
+ depth -= 1
+ pos += 1
+ fields = _parse_bib_fields(text[start : pos - 1])
+ fields["_type"] = match.group(1).lower()
+ entries[key] = fields
+ return entries
+
+
+def _render_bibliography(
+ cited_keys: list[str],
+ bib_db: dict[str, dict[str, str]],
+ bibliography_title: str,
+) -> list[str]:
+ lines: list[str] = ["---", "", f"## {bibliography_title}", "", "
"]
+ for key in cited_keys:
+ entry = bib_db.get(key)
+ if not entry:
+ lines.append(f'- {key}. ↩
')
+ continue
+ author = clean_bibtex(entry.get("author", ""))
+ title = clean_bibtex(entry.get("title", ""))
+ year = entry.get("year", "")
+ venue = clean_bibtex(entry.get("journal", "") or entry.get("booktitle", ""))
+ parts: list[str] = []
+ if author:
+ parts.append(author)
+ if title:
+ parts.append(f"{title}")
+ if venue:
+ parts.append(venue)
+ if year:
+ parts.append(year)
+ text = ". ".join(parts) + "." if parts else f"{key}."
+ lines.append(f'- {text} ↩
')
+ lines.append("
")
+ return lines
+
+
+def process_citations(
+ markdown: str,
+ bib_db: dict[str, dict[str, str]],
+ bibliography_title: str = DEFAULT_BIBLIOGRAPHY_TITLE,
+) -> str:
+ cited_keys: list[str] = []
+
+ def _replace_cite(match: re.Match[str]) -> str:
+ keys = [k.strip() for k in match.group(1).split(",")]
+ for key in keys:
+ if key not in cited_keys:
+ cited_keys.append(key)
+ if not bib_db:
+ return "[" + ", ".join(keys) + "]"
+ nums: list[str] = []
+ for key in keys:
+ idx = cited_keys.index(key) + 1
+ nums.append(f'
[{idx}]')
+ return "".join(nums)
+
+ processed = CITE_RE.sub(_replace_cite, markdown)
+ if cited_keys and bib_db:
+ bib_lines = _render_bibliography(cited_keys, bib_db, bibliography_title)
+ processed = processed.rstrip("\n") + "\n\n" + "\n".join(bib_lines) + "\n"
+ return processed
+
+
+def resolve_raw_html_file(current_file: Path, filename: str) -> Path:
+ direct = (current_file.parent / filename).resolve()
+ if direct.exists():
+ return direct
+
+ static_fallback = (current_file.parent / "static" / filename).resolve()
+ if static_fallback.exists():
+ return static_fallback
+
+ repo_static = (Path(__file__).resolve().parent.parent / "static" / filename)
+ if repo_static.exists():
+ return repo_static
+
+ raise FileNotFoundError(f"Raw HTML include '{filename}' from '{current_file}' does not exist")
+
+
+def rewrite_frontpage_assets(html: str) -> str:
+ rewritten = html.replace("./_images/", "static/image/")
+ rewritten = rewritten.replace("_images/", "static/image/")
+ rewritten = HEAD_TAG_RE.sub("", rewritten)
+ rewritten = STYLE_BLOCK_RE.sub(_minify_style_block, rewritten)
+ return rewritten
+
+
+def _minify_style_block(match: re.Match[str]) -> str:
+ content = match.group(1)
+ parts = [line.strip() for line in content.splitlines() if line.strip()]
+ return f""
+
+
+def wrap_frontpage_html(html: str) -> str:
+ return "\n".join([FRONTPAGE_LAYOUT_CSS, '
', html.strip(), "
"])
+
+
+def inline_raw_html(block_lines: list[str], current_file: Path) -> str | None:
+ stripped = [line.strip() for line in block_lines if line.strip()]
+ if not stripped or stripped[0] != ".. raw:: html":
+ return None
+
+ filename: str | None = None
+ for line in stripped[1:]:
+ match = RAW_HTML_FILE_RE.match(line)
+ if match:
+ filename = match.group(1)
+ break
+
+ if filename is None:
+ return None
+
+ html_path = resolve_raw_html_file(current_file, filename)
+ html = rewrite_frontpage_assets(html_path.read_text(encoding="utf-8")).strip()
+ if Path(filename).name == "frontpage.html":
+ return wrap_frontpage_html(html)
+ return html
+
+
+def chapter_label(item: TocItem, target: Path, title_cache: dict[Path, str]) -> str:
+ return item.label or title_cache[target]
+
+
+def render_toc_list(entries: list[TocItem], current_file: Path, title_cache: dict[Path, str]) -> list[str]:
+ rendered: list[str] = []
+ current_indent = 0
+ for entry in entries:
+ if entry.kind == "part":
+ rendered.append(f"- {entry.label}")
+ current_indent = 1
+ continue
+
+ if entry.target is None:
+ continue
+
+ target = resolve_toc_target(current_file, entry.target)
+ if target not in title_cache:
+ continue
+
+ label = chapter_label(entry, target, title_cache)
+ rendered.append(f"{' ' * current_indent}- [{label}]({relative_link(current_file, target)})")
+ return rendered
+
+
+def rewrite_markdown(
+ markdown: str,
+ current_file: Path,
+ title_cache: dict[Path, str],
+ bib_db: dict[str, dict[str, str]] | None = None,
+ bibliography_title: str = DEFAULT_BIBLIOGRAPHY_TITLE,
+) -> str:
+ output: list[str] = []
+ lines = markdown.splitlines()
+ index = 0
+
+ while index < len(lines):
+ stripped = lines[index].strip()
+ if stripped in (f"```{TOC_FENCE}", f"```{EVAL_RST_FENCE}"):
+ fence = stripped[3:]
+ index += 1
+ block_lines: list[str] = []
+ while index < len(lines) and lines[index].strip() != "```":
+ block_lines.append(lines[index])
+ index += 1
+
+ if fence == TOC_FENCE:
+ entries = parse_toc_entries(block_lines)
+ if entries:
+ if output and output[-1] != "":
+ output.append("")
+ rendered = render_toc_list(entries, current_file, title_cache)
+ output.extend(rendered)
+ if rendered and output and output[-1] != "":
+ output.append("")
+ elif fence == EVAL_RST_FENCE:
+ raw_html = inline_raw_html(block_lines, current_file)
+ if raw_html:
+ if output and output[-1] != "":
+ output.append("")
+ output.extend(raw_html.splitlines())
+ if output and output[-1] != "":
+ output.append("")
+ index += 1
+ continue
+
+ output.append(lines[index])
+ index += 1
+
+ while output and output[-1] == "":
+ output.pop()
+
+ result = normalize_directives("\n".join(output) + "\n")
+ result = process_citations(result, bib_db or {}, bibliography_title=bibliography_title)
+ return result
+
+
+def build_title_cache(
+ source_dir: Path,
+ placeholder_prefix: str | None = None,
+) -> dict[Path, str]:
+ cache: dict[Path, str] = {}
+ for markdown_file in sorted(source_dir.rglob("*.md")):
+ if "_build" in markdown_file.parts or markdown_file.name == "SUMMARY.md":
+ continue
+ text = markdown_file.read_text(encoding="utf-8")
+ if is_placeholder_markdown(text, placeholder_prefix):
+ continue
+ cache[markdown_file.resolve()] = extract_title(text, fallback=markdown_file.stem)
+ return cache
+
+
+def build_summary(source_dir: Path, title_cache: dict[Path, str]) -> str:
+ root_index = (source_dir / "index.md").resolve()
+ root_markdown = root_index.read_text(encoding="utf-8")
+
+ lines = ["# Summary", "", f"[{title_cache[root_index]}](index.md)"]
+ seen: set[Path] = {root_index}
+
+ def append_entry(target: Path, indent: int, label: str | None = None) -> None:
+ target = target.resolve()
+ if target in seen or target not in title_cache:
+ return
+ seen.add(target)
+ rel = target.relative_to(source_dir.resolve()).as_posix()
+ title = label or title_cache[target]
+ lines.append(f"{' ' * indent}- [{title}]({rel})")
+
+ child_markdown = target.read_text(encoding="utf-8")
+ for block in parse_toc_blocks(child_markdown):
+ for entry in block:
+ if entry.kind != "chapter" or entry.target is None:
+ continue
+ append_entry(resolve_toc_target(target, entry.target), indent + 1, entry.label or None)
+
+ def append_prefix_chapter(target: Path, label: str | None = None) -> None:
+ target = target.resolve()
+ if target in seen or target not in title_cache:
+ return
+ seen.add(target)
+ rel = target.relative_to(source_dir.resolve()).as_posix()
+ title = label or title_cache[target]
+ lines.append(f"[{title}]({rel})")
+
+ numbered_started = False
+ for block in parse_toc_blocks(root_markdown):
+ for entry in block:
+ if entry.kind == "part":
+ if lines and lines[-1] != "":
+ lines.append("")
+ lines.append(f"# {entry.label}")
+ lines.append("")
+ numbered_started = True
+ continue
+
+ if entry.target is None:
+ continue
+
+ target = resolve_toc_target(root_index, entry.target)
+ if numbered_started:
+ append_entry(target, 0, entry.label or None)
+ else:
+ append_prefix_chapter(target, entry.label or None)
+
+ return "\n".join(lines) + "\n"
+
+
+def write_summary(
+ source_dir: Path,
+ summary_path: Path | None = None,
+ placeholder_prefix: str | None = None,
+) -> Path:
+ source_dir = source_dir.resolve()
+ summary_path = summary_path.resolve() if summary_path else (source_dir / "SUMMARY.md")
+ title_cache = build_title_cache(source_dir, placeholder_prefix=placeholder_prefix)
+ summary_path.write_text(build_summary(source_dir, title_cache), encoding="utf-8")
+ return summary_path
+
+
+def parse_args() -> argparse.Namespace:
+ parser = argparse.ArgumentParser(description="Generate mdBook SUMMARY.md for a chapter directory.")
+ parser.add_argument("--source", type=Path, required=True, help="Source chapter directory")
+ parser.add_argument("--summary-output", type=Path, required=True, help="Where to write the generated SUMMARY.md")
+ parser.add_argument(
+ "--placeholder-prefix",
+ default=None,
+ help="If set, files whose entire contents start with this prefix are skipped from mdBook output.",
+ )
+ return parser.parse_args()
+
+
+def main() -> int:
+ args = parse_args()
+ summary_path = write_summary(
+ args.source,
+ summary_path=args.summary_output,
+ placeholder_prefix=args.placeholder_prefix,
+ )
+ print(f"Wrote mdBook summary to {summary_path}")
+ return 0
+
+
+if __name__ == "__main__":
+ raise SystemExit(main())
diff --git a/tools/prepare_mdbook_zh.py b/tools/prepare_mdbook_zh.py
index 7da3755..a5e17c5 100644
--- a/tools/prepare_mdbook_zh.py
+++ b/tools/prepare_mdbook_zh.py
@@ -1,580 +1,24 @@
from __future__ import annotations
import argparse
-import os
-import re
-from dataclasses import dataclass
from pathlib import Path
-
-TOC_FENCE = "toc"
-EVAL_RST_FENCE = "eval_rst"
-OPTION_LINE_RE = re.compile(r"^:(width|label):`[^`]+`\s*$", re.MULTILINE)
-NUMREF_RE = re.compile(r":numref:`([^`]+)`")
-EQREF_RE = re.compile(r":eqref:`([^`]+)`")
-CITE_RE = re.compile(r":cite:`([^`]+)`")
-BIB_ENTRY_RE = re.compile(r"@(\w+)\{([^,]+),")
-LATEX_ESCAPE_RE = re.compile(r"\\([_%#&])")
-RAW_HTML_FILE_RE = re.compile(r"^\s*:file:\s*([^\s]+)\s*$")
-TOC_LINK_RE = re.compile(r"^\[([^\]]+)\]\(([^)]+)\)\s*$")
-TOC_PART_RE = re.compile(r"^#+\s+(.+?)\s*$")
-HEAD_TAG_RE = re.compile(r"?head>", re.IGNORECASE)
-STYLE_BLOCK_RE = re.compile(r"", re.IGNORECASE | re.DOTALL)
-FRONTPAGE_LAYOUT_CSS = """
-
-""".strip()
-
-
-@dataclass(frozen=True)
-class TocItem:
- kind: str
- label: str
- target: str | None = None
-
-
-def extract_title(markdown: str, fallback: str = "Untitled") -> str:
- lines = markdown.splitlines()
-
- for index, line in enumerate(lines):
- stripped = line.strip()
- if not stripped:
- continue
- if stripped.startswith("#"):
- heading = stripped.lstrip("#").strip()
- if heading:
- return heading
-
- next_index = index + 1
- if next_index < len(lines):
- underline = lines[next_index].strip()
- if underline and set(underline) <= {"=", "-"}:
- return stripped
-
- return fallback
-
-
-def parse_toc_entries(block_lines: list[str]) -> list[TocItem]:
- entries: list[TocItem] = []
- for line in block_lines:
- stripped = line.strip()
- if not stripped or stripped.startswith(":"):
- continue
- part_match = TOC_PART_RE.match(stripped)
- if part_match:
- entries.append(TocItem(kind="part", label=part_match.group(1).strip()))
- continue
- link_match = TOC_LINK_RE.match(stripped)
- if link_match:
- entries.append(TocItem(kind="chapter", label=link_match.group(1).strip(), target=link_match.group(2).strip()))
- continue
- entries.append(TocItem(kind="chapter", label="", target=stripped))
- return entries
-
-
-def parse_toc_blocks(markdown: str) -> list[list[TocItem]]:
- blocks: list[list[TocItem]] = []
- lines = markdown.splitlines()
- index = 0
-
- while index < len(lines):
- if lines[index].strip() == f"```{TOC_FENCE}":
- index += 1
- block_lines: list[str] = []
- while index < len(lines) and lines[index].strip() != "```":
- block_lines.append(lines[index])
- index += 1
- entries = parse_toc_entries(block_lines)
- blocks.append(entries)
- index += 1
-
- return blocks
-
-
-def resolve_toc_target(current_file: Path, entry: str) -> Path:
- target_name = entry if entry.endswith(".md") else f"{entry}.md"
- target = (current_file.parent / target_name).resolve()
- if not target.exists():
- raise FileNotFoundError(f"TOC entry '{entry}' from '{current_file}' does not exist")
- return target
-
-
-def relative_link(from_file: Path, target_file: Path) -> str:
- return Path(os.path.relpath(target_file, start=from_file.parent)).as_posix()
-
-
-def _strip_latex_escapes_outside_math(line: str) -> str:
- """Remove LaTeX escapes (\\_, \\%, \\#, \\&) from text outside $...$ math spans."""
- parts = line.split("$")
- for i in range(0, len(parts), 2): # even indices are outside math
- parts[i] = LATEX_ESCAPE_RE.sub(r"\1", parts[i])
- return "$".join(parts)
-
-
-def normalize_directives(markdown: str) -> str:
- normalized = OPTION_LINE_RE.sub("", markdown)
- normalized = NUMREF_RE.sub(lambda match: f"`{match.group(1)}`", normalized)
- normalized = EQREF_RE.sub(lambda match: f"`{match.group(1)}`", normalized)
-
- lines = [_strip_latex_escapes_outside_math(line.rstrip()) for line in normalized.splitlines()]
- collapsed: list[str] = []
- previous_blank = False
- for line in lines:
- is_blank = line == ""
- if is_blank and previous_blank:
- continue
- collapsed.append(line)
- previous_blank = is_blank
-
- while collapsed and collapsed[-1] == "":
- collapsed.pop()
-
- return "\n".join(collapsed) + "\n"
-
-
-# ── BibTeX parsing ────────────────────────────────────────────────────────────
-
-
-def clean_bibtex(value: str) -> str:
- """Remove BibTeX formatting (braces, LaTeX accents) from a string."""
- value = re.sub(r"\{\\[`'^\"~=.](\w)\}", r"\1", value)
- value = re.sub(r"\\[`'^\"~=.](\w)", r"\1", value)
- value = value.replace("{", "").replace("}", "")
- return value.strip()
-
-
-def _parse_bib_fields(body: str) -> dict[str, str]:
- """Parse field=value pairs inside a BibTeX entry body."""
- fields: dict[str, str] = {}
- i = 0
- while i < len(body):
- while i < len(body) and body[i] in " \t\n\r,":
- i += 1
- if i >= len(body):
- break
- start = i
- while i < len(body) and body[i] not in "= \t\n\r":
- i += 1
- name = body[start:i].strip().lower()
- while i < len(body) and body[i] != "=":
- i += 1
- if i >= len(body):
- break
- i += 1
- while i < len(body) and body[i] in " \t\n\r":
- i += 1
- if i >= len(body):
- break
- if body[i] == "{":
- depth = 1
- i += 1
- vstart = i
- while i < len(body) and depth > 0:
- if body[i] == "{":
- depth += 1
- elif body[i] == "}":
- depth -= 1
- i += 1
- value = body[vstart : i - 1]
- elif body[i] == '"':
- i += 1
- vstart = i
- while i < len(body) and body[i] != '"':
- i += 1
- value = body[vstart:i]
- i += 1
- else:
- vstart = i
- while i < len(body) and body[i] not in ", \t\n\r}":
- i += 1
- value = body[vstart:i]
- if name:
- fields[name] = value.strip()
- return fields
-
-
-def parse_bib(bib_path: Path) -> dict[str, dict[str, str]]:
- """Parse a BibTeX file and return a dict keyed by citation key."""
- text = bib_path.read_text(encoding="utf-8")
- entries: dict[str, dict[str, str]] = {}
- for match in BIB_ENTRY_RE.finditer(text):
- key = match.group(2).strip()
- start = match.end()
- depth = 1
- pos = start
- while pos < len(text) and depth > 0:
- if text[pos] == "{":
- depth += 1
- elif text[pos] == "}":
- depth -= 1
- pos += 1
- fields = _parse_bib_fields(text[start : pos - 1])
- fields["_type"] = match.group(1).lower()
- entries[key] = fields
- return entries
-
-
-# ── Citation formatting ───────────────────────────────────────────────────────
-
-
-def _first_author_surname(author_str: str) -> str:
- """Extract the first author's surname from a BibTeX author string."""
- author_str = clean_bibtex(author_str)
- authors = [a.strip() for a in author_str.split(" and ")]
- if not authors or not authors[0]:
- return ""
- first = authors[0]
- if "," in first:
- return first.split(",")[0].strip()
- parts = first.split()
- return parts[-1] if parts else first
-
-
-def _format_cite_label(author: str, year: str) -> str:
- """Format an inline citation label like 'Surname et al., Year'."""
- surname = _first_author_surname(author)
- if not surname:
- return year or "?"
- authors = [a.strip() for a in clean_bibtex(author).split(" and ")]
- if len(authors) > 2:
- name_part = f"{surname} et al."
- elif len(authors) == 2:
- second = authors[1]
- if second.lower() == "others":
- name_part = f"{surname} et al."
- else:
- if "," in second:
- surname2 = second.split(",")[0].strip()
- else:
- parts = second.split()
- surname2 = parts[-1] if parts else second
- name_part = f"{surname} and {surname2}"
- else:
- name_part = surname
- if year:
- return f"{name_part}, {year}"
- return name_part
-
-
-def _render_bibliography(
- cited_keys: list[str], bib_db: dict[str, dict[str, str]]
-) -> list[str]:
- """Render a footnote-style bibliography section for the cited keys."""
- lines: list[str] = ["---", "", "## 参考文献", "", "
"]
- for idx, key in enumerate(cited_keys, 1):
- entry = bib_db.get(key)
- if not entry:
- lines.append(f'- {key}. ↩
')
- continue
- author = clean_bibtex(entry.get("author", ""))
- title = clean_bibtex(entry.get("title", ""))
- year = entry.get("year", "")
- venue = clean_bibtex(entry.get("journal", "") or entry.get("booktitle", ""))
- parts: list[str] = []
- if author:
- parts.append(author)
- if title:
- parts.append(f"{title}")
- if venue:
- parts.append(venue)
- if year:
- parts.append(year)
- text = ". ".join(parts) + "." if parts else f"{key}."
- lines.append(f'- {text} ↩
')
- lines.append("
")
- return lines
-
-
-def process_citations(
- markdown: str, bib_db: dict[str, dict[str, str]]
-) -> str:
- """Replace :cite: references with footnote-style numbered citations."""
- cited_keys: list[str] = []
-
- def _replace_cite(match: re.Match[str]) -> str:
- keys = [k.strip() for k in match.group(1).split(",")]
- for key in keys:
- if key not in cited_keys:
- cited_keys.append(key)
- if not bib_db:
- return "[" + ", ".join(keys) + "]"
- nums: list[str] = []
- for key in keys:
- idx = cited_keys.index(key) + 1
- nums.append(
- f'
[{idx}]'
- )
- return "".join(nums)
-
- processed = CITE_RE.sub(_replace_cite, markdown)
- if cited_keys and bib_db:
- bib_lines = _render_bibliography(cited_keys, bib_db)
- processed = processed.rstrip("\n") + "\n\n" + "\n".join(bib_lines) + "\n"
- return processed
-
-
-def resolve_raw_html_file(current_file: Path, filename: str) -> Path:
- direct = (current_file.parent / filename).resolve()
- if direct.exists():
- return direct
-
- static_fallback = (current_file.parent / "static" / filename).resolve()
- if static_fallback.exists():
- return static_fallback
-
- repo_static = (Path(__file__).resolve().parent.parent / "static" / filename)
- if repo_static.exists():
- return repo_static
-
- raise FileNotFoundError(f"Raw HTML include '{filename}' from '{current_file}' does not exist")
-
-
-def rewrite_frontpage_assets(html: str) -> str:
- rewritten = html.replace('./_images/', 'static/image/')
- rewritten = rewritten.replace('_images/', 'static/image/')
- rewritten = HEAD_TAG_RE.sub("", rewritten)
- rewritten = STYLE_BLOCK_RE.sub(_minify_style_block, rewritten)
- return rewritten
-
-
-def _minify_style_block(match: re.Match[str]) -> str:
- content = match.group(1)
- parts = [line.strip() for line in content.splitlines() if line.strip()]
- return f""
-
-
-def wrap_frontpage_html(html: str) -> str:
- return "\n".join([FRONTPAGE_LAYOUT_CSS, '
', html.strip(), '
'])
-
-
-def inline_raw_html(block_lines: list[str], current_file: Path) -> str | None:
- stripped = [line.strip() for line in block_lines if line.strip()]
- if not stripped or stripped[0] != ".. raw:: html":
- return None
-
- filename: str | None = None
- for line in stripped[1:]:
- match = RAW_HTML_FILE_RE.match(line)
- if match:
- filename = match.group(1)
- break
-
- if filename is None:
- return None
-
- html_path = resolve_raw_html_file(current_file, filename)
- html = rewrite_frontpage_assets(html_path.read_text(encoding="utf-8")).strip()
- if Path(filename).name == "frontpage.html":
- return wrap_frontpage_html(html)
- return html
-
-
-def chapter_label(item: TocItem, target: Path, title_cache: dict[Path, str]) -> str:
- return item.label or title_cache[target]
-
-
-def render_toc_list(entries: list[TocItem], current_file: Path, title_cache: dict[Path, str]) -> list[str]:
- rendered: list[str] = []
- current_indent = 0
- for entry in entries:
- if entry.kind == "part":
- rendered.append(f"- {entry.label}")
- current_indent = 1
- continue
-
- if entry.target is None:
- continue
-
- target = resolve_toc_target(current_file, entry.target)
- label = chapter_label(entry, target, title_cache)
- rendered.append(f"{' ' * current_indent}- [{label}]({relative_link(current_file, target)})")
- return rendered
-
-
-def rewrite_markdown(
- markdown: str,
- current_file: Path,
- title_cache: dict[Path, str],
- bib_db: dict[str, dict[str, str]] | None = None,
-) -> str:
- output: list[str] = []
- lines = markdown.splitlines()
- index = 0
-
- while index < len(lines):
- stripped = lines[index].strip()
- if stripped in (f"```{TOC_FENCE}", f"```{EVAL_RST_FENCE}"):
- fence = stripped[3:]
- index += 1
- block_lines: list[str] = []
- while index < len(lines) and lines[index].strip() != "```":
- block_lines.append(lines[index])
- index += 1
-
- if fence == TOC_FENCE:
- entries = parse_toc_entries(block_lines)
- if entries:
- if output and output[-1] != "":
- output.append("")
- output.extend(render_toc_list(entries, current_file, title_cache))
- if output and output[-1] != "":
- output.append("")
- elif fence == EVAL_RST_FENCE:
- raw_html = inline_raw_html(block_lines, current_file)
- if raw_html:
- if output and output[-1] != "":
- output.append("")
- output.extend(raw_html.splitlines())
- if output and output[-1] != "":
- output.append("")
- index += 1
- continue
-
- output.append(lines[index])
- index += 1
-
- while output and output[-1] == "":
- output.pop()
-
- result = normalize_directives("\n".join(output) + "\n")
- result = process_citations(result, bib_db or {})
- return result
-
-
-def build_title_cache(source_dir: Path) -> dict[Path, str]:
- cache: dict[Path, str] = {}
- for markdown_file in sorted(source_dir.rglob("*.md")):
- if "_build" in markdown_file.parts or markdown_file.name == "SUMMARY.md":
- continue
- cache[markdown_file.resolve()] = extract_title(markdown_file.read_text(encoding="utf-8"), fallback=markdown_file.stem)
- return cache
-
-
-def build_summary(source_dir: Path, title_cache: dict[Path, str]) -> str:
- root_index = (source_dir / "index.md").resolve()
- root_markdown = root_index.read_text(encoding="utf-8")
-
- lines = ["# Summary", "", f"[{title_cache[root_index]}](index.md)"]
- seen: set[Path] = {root_index}
-
- def append_entry(target: Path, indent: int, label: str | None = None) -> None:
- target = target.resolve()
- if target in seen:
- return
- seen.add(target)
- rel = target.relative_to(source_dir.resolve()).as_posix()
- title = label or title_cache[target]
- lines.append(f"{' ' * indent}- [{title}]({rel})")
-
- child_markdown = target.read_text(encoding="utf-8")
- for block in parse_toc_blocks(child_markdown):
- for entry in block:
- if entry.kind != "chapter" or entry.target is None:
- continue
- append_entry(resolve_toc_target(target, entry.target), indent + 1, entry.label or None)
-
- def append_prefix_chapter(target: Path, label: str | None = None) -> None:
- target = target.resolve()
- if target in seen:
- return
- seen.add(target)
- rel = target.relative_to(source_dir.resolve()).as_posix()
- title = label or title_cache[target]
- lines.append(f"[{title}]({rel})")
-
- numbered_started = False
- for block in parse_toc_blocks(root_markdown):
- for entry in block:
- if entry.kind == "part":
- if lines and lines[-1] != "":
- lines.append("")
- lines.append(f"# {entry.label}")
- lines.append("")
- numbered_started = True
- continue
-
- if entry.target is None:
- continue
-
- target = resolve_toc_target(root_index, entry.target)
- if numbered_started:
- append_entry(target, 0, entry.label or None)
- else:
- append_prefix_chapter(target, entry.label or None)
-
- return "\n".join(lines) + "\n"
-
-
-def write_summary(source_dir: Path, summary_path: Path | None = None) -> Path:
- source_dir = source_dir.resolve()
- summary_path = summary_path.resolve() if summary_path else (source_dir / "SUMMARY.md")
- title_cache = build_title_cache(source_dir)
- summary_path.write_text(build_summary(source_dir, title_cache), encoding="utf-8")
- return summary_path
+try:
+ from tools.prepare_mdbook import (
+ build_title_cache,
+ extract_title,
+ parse_bib,
+ rewrite_markdown,
+ write_summary,
+ )
+except ModuleNotFoundError:
+ from prepare_mdbook import (
+ build_title_cache,
+ extract_title,
+ parse_bib,
+ rewrite_markdown,
+ write_summary,
+ )
def parse_args() -> argparse.Namespace:
@@ -591,7 +35,7 @@ def parse_args() -> argparse.Namespace:
def main() -> int:
args = parse_args()
- summary_path = write_summary(args.source, args.summary_output)
+ summary_path = write_summary(args.source, summary_path=args.summary_output)
print(f"Wrote mdBook summary to {summary_path}")
return 0