mirror of
https://github.com/openmlsys/openmlsys-zh.git
synced 2026-03-31 17:32:06 +08:00
build: migrate docs publishing to mdbook
Move the English root site to mdBook, keep the Chinese site as a sub-book, and update CI/deploy to publish .mdbook outputs to docs/ and docs/cn/. Also add regression coverage for placeholder skipping, publish-tree assembly, and shared resource setup.
This commit is contained in:
58
.github/workflows/main.yml
vendored
58
.github/workflows/main.yml
vendored
@@ -7,7 +7,7 @@ on:
|
||||
|
||||
jobs:
|
||||
build-en:
|
||||
name: Build (English)
|
||||
name: Build (English mdBook)
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
steps:
|
||||
@@ -17,30 +17,28 @@ jobs:
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.10'
|
||||
cache: 'pip'
|
||||
|
||||
- name: Install pandoc
|
||||
- name: Install Rust toolchain
|
||||
run: |
|
||||
wget -q https://github.com/jgm/pandoc/releases/download/2.19.2/pandoc-2.19.2-1-amd64.deb
|
||||
sudo dpkg -i pandoc-2.19.2-1-amd64.deb
|
||||
curl -sSf https://sh.rustup.rs | sh -s -- -y --profile minimal
|
||||
echo "$HOME/.cargo/bin" >> "$GITHUB_PATH"
|
||||
|
||||
- name: Install d2lbook
|
||||
- name: Install mdBook
|
||||
run: cargo install mdbook --locked
|
||||
|
||||
- name: Run mdBook regression tests
|
||||
run: |
|
||||
git clone https://github.com/openmlsys/d2l-book.git
|
||||
cd d2l-book
|
||||
# Fix Python 3.10+ incompatibility: bibtex<2.0.0 depends on oset which
|
||||
# uses collections.MutableSet removed in Python 3.10.
|
||||
sed -i "s/'sphinxcontrib-bibtex<2.0.0'/'sphinxcontrib-bibtex>=2.5.0'/" setup.py
|
||||
python3 -m pip install .
|
||||
python3 -m unittest discover -s tests -p 'test_prepare_mdbook.py'
|
||||
python3 -m unittest discover -s tests -p 'test_prepare_mdbook_zh.py'
|
||||
python3 -m unittest discover -s tests -p 'test_assemble_docs_publish_tree.py'
|
||||
python3 -m unittest discover -s tests -p 'test_ensure_book_resources.py'
|
||||
python3 -m unittest discover -s tests -p 'test_mdbook_mathjax.py'
|
||||
|
||||
- name: Install Python dependencies
|
||||
run: python3 -m pip install -r requirements.txt
|
||||
|
||||
- name: Build English HTML
|
||||
run: bash build_html.sh
|
||||
- name: Build English HTML with mdBook
|
||||
run: bash build_mdbook.sh
|
||||
|
||||
build-zh:
|
||||
name: Build (Chinese)
|
||||
name: Build (Chinese mdBook)
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
steps:
|
||||
@@ -50,25 +48,17 @@ jobs:
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.10'
|
||||
cache: 'pip'
|
||||
|
||||
- name: Install pandoc
|
||||
|
||||
- name: Install Rust toolchain
|
||||
run: |
|
||||
wget -q https://github.com/jgm/pandoc/releases/download/2.19.2/pandoc-2.19.2-1-amd64.deb
|
||||
sudo dpkg -i pandoc-2.19.2-1-amd64.deb
|
||||
curl -sSf https://sh.rustup.rs | sh -s -- -y --profile minimal
|
||||
echo "$HOME/.cargo/bin" >> "$GITHUB_PATH"
|
||||
|
||||
- name: Install d2lbook
|
||||
run: |
|
||||
git clone https://github.com/openmlsys/d2l-book.git
|
||||
cd d2l-book
|
||||
sed -i "s/'sphinxcontrib-bibtex<2.0.0'/'sphinxcontrib-bibtex>=2.5.0'/" setup.py
|
||||
python3 -m pip install .
|
||||
- name: Install mdBook
|
||||
run: cargo install mdbook --locked
|
||||
|
||||
- name: Install Python dependencies
|
||||
run: python3 -m pip install -r requirements.txt
|
||||
|
||||
- name: Build Chinese HTML
|
||||
run: bash build_html_zh.sh
|
||||
- name: Build Chinese HTML with mdBook
|
||||
run: bash build_mdbook_zh.sh
|
||||
|
||||
build:
|
||||
name: build
|
||||
|
||||
45
.github/workflows/update_docs.yml
vendored
45
.github/workflows/update_docs.yml
vendored
@@ -16,30 +16,28 @@ jobs:
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.10'
|
||||
cache: 'pip'
|
||||
|
||||
- name: Install pandoc
|
||||
- name: Install Rust toolchain
|
||||
run: |
|
||||
wget -q https://github.com/jgm/pandoc/releases/download/2.19.2/pandoc-2.19.2-1-amd64.deb
|
||||
sudo dpkg -i pandoc-2.19.2-1-amd64.deb
|
||||
curl -sSf https://sh.rustup.rs | sh -s -- -y --profile minimal
|
||||
echo "$HOME/.cargo/bin" >> "$GITHUB_PATH"
|
||||
|
||||
- name: Install d2lbook
|
||||
- name: Install mdBook
|
||||
run: cargo install mdbook --locked
|
||||
|
||||
- name: Run mdBook regression tests
|
||||
run: |
|
||||
git clone https://github.com/openmlsys/d2l-book.git
|
||||
cd d2l-book
|
||||
# Fix Python 3.10+ incompatibility: bibtex<2.0.0 depends on oset which
|
||||
# uses collections.MutableSet removed in Python 3.10.
|
||||
sed -i "s/'sphinxcontrib-bibtex<2.0.0'/'sphinxcontrib-bibtex>=2.5.0'/" setup.py
|
||||
python3 -m pip install .
|
||||
python3 -m unittest discover -s tests -p 'test_prepare_mdbook.py'
|
||||
python3 -m unittest discover -s tests -p 'test_prepare_mdbook_zh.py'
|
||||
python3 -m unittest discover -s tests -p 'test_assemble_docs_publish_tree.py'
|
||||
python3 -m unittest discover -s tests -p 'test_ensure_book_resources.py'
|
||||
python3 -m unittest discover -s tests -p 'test_mdbook_mathjax.py'
|
||||
|
||||
- name: Install Python dependencies
|
||||
run: python3 -m pip install -r requirements.txt sphinx-mathjax-offline
|
||||
- name: Build English HTML with mdBook
|
||||
run: bash build_mdbook.sh
|
||||
|
||||
- name: Build English HTML
|
||||
run: bash build_html.sh
|
||||
|
||||
- name: Build Chinese HTML
|
||||
run: bash build_html_zh.sh
|
||||
- name: Build Chinese HTML with mdBook
|
||||
run: bash build_mdbook_zh.sh
|
||||
|
||||
- name: Deploy to GitHub Pages
|
||||
env:
|
||||
@@ -47,12 +45,11 @@ jobs:
|
||||
run: |
|
||||
git clone https://x-access-token:${DEPLOY_TOKEN}@github.com/openmlsys/openmlsys.github.io.git
|
||||
|
||||
# English → root (default language)
|
||||
cp -r en_chapters/_build/html/* openmlsys.github.io/docs/
|
||||
|
||||
# Chinese → /cn/ subdirectory
|
||||
mkdir -p openmlsys.github.io/docs/cn
|
||||
cp -r zh_chapters/_build/html/* openmlsys.github.io/docs/cn/
|
||||
python3 tools/assemble_docs_publish_tree.py \
|
||||
--destination-root openmlsys.github.io \
|
||||
--docs-subdir docs \
|
||||
--en-source .mdbook/book \
|
||||
--zh-source .mdbook-zh/book
|
||||
|
||||
cd openmlsys.github.io
|
||||
git config user.name "github-actions[bot]"
|
||||
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -12,6 +12,7 @@ test*.md
|
||||
run.sh
|
||||
.idea
|
||||
env
|
||||
.mdbook/
|
||||
.mdbook-zh/
|
||||
.mdbook-zh-test/
|
||||
task_plan.md
|
||||
@@ -19,7 +20,6 @@ findings.md
|
||||
progress.md
|
||||
d2l-book/
|
||||
docs/
|
||||
tests/
|
||||
en_chapters/img
|
||||
en_chapters/references
|
||||
en_chapters/static
|
||||
|
||||
12
book.toml
12
book.toml
@@ -1,15 +1,15 @@
|
||||
[book]
|
||||
authors = ["OpenMLSys Contributors"]
|
||||
language = "zh-CN"
|
||||
src = "zh_chapters"
|
||||
title = "机器学习系统:设计和实现"
|
||||
language = "en"
|
||||
src = "en_chapters"
|
||||
title = "Machine Learning Systems: Design and Implementation"
|
||||
|
||||
[build]
|
||||
build-dir = ".mdbook-zh/book"
|
||||
build-dir = ".mdbook/book"
|
||||
create-missing = false
|
||||
|
||||
[preprocessor.openmlsys-zh]
|
||||
command = "python3 tools/mdbook_zh_preprocessor.py"
|
||||
[preprocessor.openmlsys]
|
||||
command = "python3 tools/mdbook_preprocessor.py"
|
||||
|
||||
[output.html]
|
||||
git-repository-url = "https://github.com/openmlsys/openmlsys-zh"
|
||||
|
||||
18
books/zh/book.toml
Normal file
18
books/zh/book.toml
Normal file
@@ -0,0 +1,18 @@
|
||||
[book]
|
||||
authors = ["OpenMLSys Contributors"]
|
||||
language = "zh-CN"
|
||||
src = "../../zh_chapters"
|
||||
title = "机器学习系统:设计和实现"
|
||||
|
||||
[build]
|
||||
build-dir = "../../.mdbook-zh/book"
|
||||
create-missing = false
|
||||
|
||||
[preprocessor.openmlsys-zh]
|
||||
command = "python3 ../../tools/mdbook_zh_preprocessor.py"
|
||||
|
||||
[output.html]
|
||||
git-repository-url = "https://github.com/openmlsys/openmlsys-zh"
|
||||
mathjax-support = true
|
||||
preferred-dark-theme = "navy"
|
||||
additional-css = ["theme/dark-mode-images.css"]
|
||||
6
books/zh/theme/dark-mode-images.css
Normal file
6
books/zh/theme/dark-mode-images.css
Normal file
@@ -0,0 +1,6 @@
|
||||
html.light img[src$=".png"],
|
||||
html.light img[src$=".jpg"],
|
||||
html.light img[src$=".jpeg"],
|
||||
html.light img[src$=".gif"] {
|
||||
background-color: #fff;
|
||||
}
|
||||
10
books/zh/theme/head.hbs
Normal file
10
books/zh/theme/head.hbs
Normal file
@@ -0,0 +1,10 @@
|
||||
<script type="text/x-mathjax-config">
|
||||
MathJax.Hub.Config({
|
||||
tex2jax: {
|
||||
inlineMath: [['$', '$'], ['\\(', '\\)']],
|
||||
displayMath: [['$$', '$$'], ['\\[', '\\]']],
|
||||
processEscapes: true,
|
||||
processEnvironments: true
|
||||
}
|
||||
});
|
||||
</script>
|
||||
@@ -10,15 +10,7 @@ set -e
|
||||
ROOT="$(cd "$(dirname "$0")" && pwd)"
|
||||
|
||||
# ── Create resource symlinks ──────────────────────────────────────────────────
|
||||
for target in img references static mlsys.bib; do
|
||||
link="$ROOT/en_chapters/$target"
|
||||
rel_target="../$target"
|
||||
if [ -e "$link" ] && [ ! -L "$link" ]; then
|
||||
echo "Refusing to replace non-symlink path: $link" >&2
|
||||
exit 1
|
||||
fi
|
||||
ln -sfn "$rel_target" "$link"
|
||||
done
|
||||
python3 "$ROOT/tools/ensure_book_resources.py" --chapter-dir "$ROOT/en_chapters"
|
||||
|
||||
# ── Build ─────────────────────────────────────────────────────────────────────
|
||||
cd "$ROOT/en_chapters"
|
||||
|
||||
@@ -10,15 +10,7 @@ set -e
|
||||
ROOT="$(cd "$(dirname "$0")" && pwd)"
|
||||
|
||||
# ── Create resource symlinks ──────────────────────────────────────────────────
|
||||
for target in img references static mlsys.bib; do
|
||||
link="$ROOT/zh_chapters/$target"
|
||||
rel_target="../$target"
|
||||
if [ -e "$link" ] && [ ! -L "$link" ]; then
|
||||
echo "Refusing to replace non-symlink path: $link" >&2
|
||||
exit 1
|
||||
fi
|
||||
ln -sfn "$rel_target" "$link"
|
||||
done
|
||||
python3 "$ROOT/tools/ensure_book_resources.py" --chapter-dir "$ROOT/zh_chapters"
|
||||
|
||||
# ── Build ─────────────────────────────────────────────────────────────────────
|
||||
cd "$ROOT/zh_chapters"
|
||||
|
||||
23
build_mdbook.sh
Normal file
23
build_mdbook.sh
Normal file
@@ -0,0 +1,23 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PYTHON_BIN="$(command -v python3 || command -v python || true)"
|
||||
|
||||
if [[ -z "${PYTHON_BIN}" ]]; then
|
||||
echo "Python is required to prepare the mdBook staging tree." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! command -v mdbook >/dev/null 2>&1; then
|
||||
echo "mdbook is not installed. Install it first, for example with: cargo install mdbook" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
"${PYTHON_BIN}" "${ROOT}/tools/ensure_book_resources.py" --chapter-dir "${ROOT}/en_chapters"
|
||||
"${PYTHON_BIN}" "${ROOT}/tools/prepare_mdbook.py" \
|
||||
--source "${ROOT}/en_chapters" \
|
||||
--summary-output "${ROOT}/en_chapters/SUMMARY.md" \
|
||||
--placeholder-prefix "[TODO: src = zh_chapters/"
|
||||
|
||||
mdbook build "${ROOT}"
|
||||
@@ -14,22 +14,12 @@ if ! command -v mdbook >/dev/null 2>&1; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# ── Create resource symlinks ──────────────────────────────────────────────────
|
||||
# Resources (img/, references/, static/, mlsys.bib) live at the repo root and
|
||||
# are symlinked into zh_chapters/ so mdbook can find them at relative paths.
|
||||
for target in img references static mlsys.bib; do
|
||||
link="${ROOT}/zh_chapters/${target}"
|
||||
rel_target="../${target}"
|
||||
if [[ -e "${link}" ]] && [[ ! -L "${link}" ]]; then
|
||||
echo "Refusing to replace non-symlink path: ${link}" >&2
|
||||
exit 1
|
||||
fi
|
||||
ln -sfn "${rel_target}" "${link}"
|
||||
done
|
||||
# ── Create resource links ─────────────────────────────────────────────────────
|
||||
"${PYTHON_BIN}" "${ROOT}/tools/ensure_book_resources.py" --chapter-dir "${ROOT}/zh_chapters"
|
||||
|
||||
# ── Build ─────────────────────────────────────────────────────────────────────
|
||||
"${PYTHON_BIN}" "${ROOT}/tools/prepare_mdbook_zh.py" \
|
||||
--source "${ROOT}/zh_chapters" \
|
||||
--summary-output "${ROOT}/zh_chapters/SUMMARY.md"
|
||||
|
||||
mdbook build "${ROOT}"
|
||||
mdbook build "${ROOT}/books/zh"
|
||||
|
||||
3
en_chapters/SUMMARY.md
Normal file
3
en_chapters/SUMMARY.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# Summary
|
||||
|
||||
[Machine Learning Systems: Design and Implementation](index.md)
|
||||
87
tests/test_assemble_docs_publish_tree.py
Normal file
87
tests/test_assemble_docs_publish_tree.py
Normal file
@@ -0,0 +1,87 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
from tools.assemble_docs_publish_tree import assemble_publish_tree
|
||||
|
||||
|
||||
class AssembleDocsPublishTreeTests(unittest.TestCase):
|
||||
def test_assemble_publish_tree_uses_legacy_docs_layout(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
root = Path(tmpdir)
|
||||
pages_repo = root / "pages"
|
||||
en_source = root / "en-html"
|
||||
zh_source = root / "zh-html"
|
||||
|
||||
pages_repo.mkdir()
|
||||
en_source.mkdir()
|
||||
zh_source.mkdir()
|
||||
|
||||
(en_source / "index.html").write_text("english home", encoding="utf-8")
|
||||
(en_source / "guide.html").write_text("english guide", encoding="utf-8")
|
||||
(zh_source / "index.html").write_text("chinese home", encoding="utf-8")
|
||||
(zh_source / "searchindex.js").write_text("zh search", encoding="utf-8")
|
||||
|
||||
assemble_publish_tree(
|
||||
destination_root=pages_repo,
|
||||
docs_subdir="docs",
|
||||
en_source=en_source,
|
||||
zh_source=zh_source,
|
||||
)
|
||||
|
||||
self.assertEqual(
|
||||
(pages_repo / "docs" / "index.html").read_text(encoding="utf-8"),
|
||||
"english home",
|
||||
)
|
||||
self.assertEqual(
|
||||
(pages_repo / "docs" / "guide.html").read_text(encoding="utf-8"),
|
||||
"english guide",
|
||||
)
|
||||
self.assertEqual(
|
||||
(pages_repo / "docs" / "cn" / "index.html").read_text(encoding="utf-8"),
|
||||
"chinese home",
|
||||
)
|
||||
self.assertEqual(
|
||||
(pages_repo / "docs" / "cn" / "searchindex.js").read_text(encoding="utf-8"),
|
||||
"zh search",
|
||||
)
|
||||
|
||||
def test_assemble_publish_tree_replaces_stale_docs_content(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
root = Path(tmpdir)
|
||||
pages_repo = root / "pages"
|
||||
en_source = root / "en-html"
|
||||
zh_source = root / "zh-html"
|
||||
|
||||
(pages_repo / "docs" / "cn").mkdir(parents=True)
|
||||
(pages_repo / "docs" / "old.html").write_text("stale en", encoding="utf-8")
|
||||
(pages_repo / "docs" / "cn" / "old.html").write_text("stale zh", encoding="utf-8")
|
||||
|
||||
en_source.mkdir()
|
||||
zh_source.mkdir()
|
||||
(en_source / "index.html").write_text("fresh en", encoding="utf-8")
|
||||
(zh_source / "index.html").write_text("fresh zh", encoding="utf-8")
|
||||
|
||||
assemble_publish_tree(
|
||||
destination_root=pages_repo,
|
||||
docs_subdir="docs",
|
||||
en_source=en_source,
|
||||
zh_source=zh_source,
|
||||
)
|
||||
|
||||
self.assertFalse((pages_repo / "docs" / "old.html").exists())
|
||||
self.assertFalse((pages_repo / "docs" / "cn" / "old.html").exists())
|
||||
self.assertEqual(
|
||||
(pages_repo / "docs" / "index.html").read_text(encoding="utf-8"),
|
||||
"fresh en",
|
||||
)
|
||||
self.assertEqual(
|
||||
(pages_repo / "docs" / "cn" / "index.html").read_text(encoding="utf-8"),
|
||||
"fresh zh",
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
55
tests/test_ensure_book_resources.py
Normal file
55
tests/test_ensure_book_resources.py
Normal file
@@ -0,0 +1,55 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
from tools.ensure_book_resources import ensure_resource_views
|
||||
|
||||
|
||||
class EnsureBookResourcesTests(unittest.TestCase):
|
||||
def test_ensure_resource_views_creates_missing_symlinks(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
root = Path(tmpdir)
|
||||
chapter_dir = root / "en_chapters"
|
||||
chapter_dir.mkdir()
|
||||
|
||||
for directory in ("img", "references", "static"):
|
||||
(root / directory).mkdir()
|
||||
(root / "mlsys.bib").write_text("bib", encoding="utf-8")
|
||||
|
||||
ensure_resource_views(chapter_dir, root)
|
||||
|
||||
for name in ("img", "references", "static", "mlsys.bib"):
|
||||
path = chapter_dir / name
|
||||
self.assertTrue(path.is_symlink(), f"{name} should be a symlink")
|
||||
self.assertEqual(path.resolve(), (root / name).resolve())
|
||||
|
||||
def test_ensure_resource_views_keeps_existing_non_symlink_paths(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
root = Path(tmpdir)
|
||||
chapter_dir = root / "en_chapters"
|
||||
chapter_dir.mkdir()
|
||||
|
||||
for directory in ("img", "references", "static"):
|
||||
(root / directory).mkdir()
|
||||
(root / "mlsys.bib").write_text("root bib", encoding="utf-8")
|
||||
|
||||
local_bib = chapter_dir / "mlsys.bib"
|
||||
local_bib.write_text("local bib", encoding="utf-8")
|
||||
local_static = chapter_dir / "static"
|
||||
local_static.mkdir()
|
||||
(local_static / "frontpage.html").write_text("local static", encoding="utf-8")
|
||||
|
||||
ensure_resource_views(chapter_dir, root)
|
||||
|
||||
self.assertFalse(local_bib.is_symlink())
|
||||
self.assertEqual(local_bib.read_text(encoding="utf-8"), "local bib")
|
||||
self.assertFalse(local_static.is_symlink())
|
||||
self.assertTrue((local_static / "frontpage.html").exists())
|
||||
self.assertTrue((chapter_dir / "img").is_symlink())
|
||||
self.assertTrue((chapter_dir / "references").is_symlink())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
114
tests/test_prepare_mdbook.py
Normal file
114
tests/test_prepare_mdbook.py
Normal file
@@ -0,0 +1,114 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
from tools.prepare_mdbook import build_title_cache, rewrite_markdown, write_summary
|
||||
|
||||
|
||||
class PrepareMdBookTests(unittest.TestCase):
|
||||
def test_write_summary_skips_placeholder_pages(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
root = Path(tmpdir)
|
||||
source = root / "en_chapters"
|
||||
source.mkdir()
|
||||
|
||||
(source / "index.md").write_text(
|
||||
"""Machine Learning Systems
|
||||
========================
|
||||
|
||||
```toc
|
||||
:maxdepth: 2
|
||||
|
||||
chapter_preface/index
|
||||
chapter_introduction/index
|
||||
```
|
||||
|
||||
```toc
|
||||
:maxdepth: 1
|
||||
|
||||
appendix/index
|
||||
```
|
||||
""",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
chapter_preface = source / "chapter_preface"
|
||||
chapter_preface.mkdir()
|
||||
(chapter_preface / "index.md").write_text(
|
||||
"[TODO: src = zh_chapters/chapter_preface/index.md]\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
chapter_intro = source / "chapter_introduction"
|
||||
chapter_intro.mkdir()
|
||||
(chapter_intro / "index.md").write_text("# Introduction\n", encoding="utf-8")
|
||||
|
||||
appendix = source / "appendix"
|
||||
appendix.mkdir()
|
||||
(appendix / "index.md").write_text("# Appendix\n", encoding="utf-8")
|
||||
|
||||
summary_path = write_summary(
|
||||
source,
|
||||
placeholder_prefix="[TODO: src = zh_chapters/",
|
||||
)
|
||||
summary = summary_path.read_text(encoding="utf-8")
|
||||
|
||||
self.assertEqual(
|
||||
summary,
|
||||
"""# Summary
|
||||
|
||||
[Machine Learning Systems](index.md)
|
||||
[Introduction](chapter_introduction/index.md)
|
||||
[Appendix](appendix/index.md)
|
||||
""",
|
||||
)
|
||||
|
||||
title_cache = build_title_cache(
|
||||
source,
|
||||
placeholder_prefix="[TODO: src = zh_chapters/",
|
||||
)
|
||||
rewritten = rewrite_markdown(
|
||||
(source / "index.md").read_text(encoding="utf-8"),
|
||||
(source / "index.md").resolve(),
|
||||
title_cache,
|
||||
)
|
||||
|
||||
self.assertIn("- [Introduction](chapter_introduction/index.md)", rewritten)
|
||||
self.assertIn("- [Appendix](appendix/index.md)", rewritten)
|
||||
self.assertNotIn("chapter_preface/index.md", rewritten)
|
||||
|
||||
def test_rewrite_markdown_uses_configured_bibliography_title(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
root = Path(tmpdir)
|
||||
page = root / "chapter.md"
|
||||
page.write_text(
|
||||
"""# Introduction
|
||||
|
||||
Reference :cite:`smith2024`.
|
||||
""",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
rewritten = rewrite_markdown(
|
||||
page.read_text(encoding="utf-8"),
|
||||
page.resolve(),
|
||||
{page.resolve(): "Introduction"},
|
||||
bib_db={
|
||||
"smith2024": {
|
||||
"author": "Smith, Alice and Doe, Bob",
|
||||
"title": "Systems Paper",
|
||||
"year": "2024",
|
||||
"journal": "ML Systems Journal",
|
||||
}
|
||||
},
|
||||
bibliography_title="References",
|
||||
)
|
||||
|
||||
self.assertIn("## References", rewritten)
|
||||
self.assertNotIn("## 参考文献", rewritten)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
227
tests/test_prepare_mdbook_zh.py
Normal file
227
tests/test_prepare_mdbook_zh.py
Normal file
@@ -0,0 +1,227 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
from tools.prepare_mdbook_zh import extract_title, rewrite_markdown, write_summary
|
||||
|
||||
|
||||
class PrepareMdBookZhTests(unittest.TestCase):
|
||||
def test_extract_title_supports_atx_and_setext_headings(self) -> None:
|
||||
self.assertEqual(extract_title("# 导论\n"), "导论")
|
||||
self.assertEqual(extract_title("前言文字\n\n## 机器学习应用\n"), "机器学习应用")
|
||||
self.assertEqual(extract_title("机器学习系统:设计和实现\n=========================\n"), "机器学习系统:设计和实现")
|
||||
|
||||
def test_write_summary_generates_nested_navigation(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
root = Path(tmpdir)
|
||||
source = root / "zh_chapters"
|
||||
source.mkdir()
|
||||
|
||||
(source / "index.md").write_text(
|
||||
"""机器学习系统:设计和实现
|
||||
=========================
|
||||
|
||||
```eval_rst
|
||||
.. raw:: html
|
||||
:file: frontpage.html
|
||||
```
|
||||
|
||||
```toc
|
||||
:maxdepth: 2
|
||||
|
||||
[前言](chapter_preface/index)
|
||||
|
||||
# 基础篇
|
||||
chapter_introduction/index
|
||||
|
||||
# 附录
|
||||
[机器学习基础附录](appendix_machine_learning_introduction/index)
|
||||
```
|
||||
""",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
chapter_preface = source / "chapter_preface"
|
||||
chapter_preface.mkdir()
|
||||
(chapter_preface / "index.md").write_text("# 前言\n", encoding="utf-8")
|
||||
static_dir = source / "static"
|
||||
static_dir.mkdir()
|
||||
(static_dir / "frontpage.html").write_text(
|
||||
"<div class=\"hero\">frontpage</div>\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
chapter_intro = source / "chapter_introduction"
|
||||
chapter_intro.mkdir()
|
||||
(chapter_intro / "index.md").write_text(
|
||||
"""# 导论
|
||||
|
||||
```toc
|
||||
:maxdepth: 2
|
||||
|
||||
applications
|
||||
design
|
||||
```
|
||||
""",
|
||||
encoding="utf-8",
|
||||
)
|
||||
(chapter_intro / "applications.md").write_text("# 机器学习应用\n", encoding="utf-8")
|
||||
(chapter_intro / "design.md").write_text("# 设计目标\n", encoding="utf-8")
|
||||
|
||||
appendix = source / "appendix_machine_learning_introduction"
|
||||
appendix.mkdir()
|
||||
(appendix / "index.md").write_text("# 机器学习基础附录\n", encoding="utf-8")
|
||||
|
||||
for name in ("img", "static", "references"):
|
||||
(root / name).mkdir()
|
||||
(root / "mlsys.bib").write_text("% bibliography\n", encoding="utf-8")
|
||||
|
||||
summary_path = write_summary(source)
|
||||
summary = summary_path.read_text(encoding="utf-8")
|
||||
self.assertEqual(
|
||||
summary,
|
||||
"""# Summary
|
||||
|
||||
[机器学习系统:设计和实现](index.md)
|
||||
[前言](chapter_preface/index.md)
|
||||
|
||||
# 基础篇
|
||||
|
||||
- [导论](chapter_introduction/index.md)
|
||||
- [机器学习应用](chapter_introduction/applications.md)
|
||||
- [设计目标](chapter_introduction/design.md)
|
||||
|
||||
# 附录
|
||||
|
||||
- [机器学习基础附录](appendix_machine_learning_introduction/index.md)
|
||||
""",
|
||||
)
|
||||
|
||||
title_cache = {
|
||||
(source / "chapter_preface" / "index.md").resolve(): "前言",
|
||||
(source / "chapter_introduction" / "index.md").resolve(): "导论",
|
||||
(source / "chapter_introduction" / "applications.md").resolve(): "机器学习应用",
|
||||
(source / "chapter_introduction" / "design.md").resolve(): "设计目标",
|
||||
(source / "appendix_machine_learning_introduction" / "index.md").resolve(): "机器学习基础附录",
|
||||
}
|
||||
root_index = rewrite_markdown((source / "index.md").read_text(encoding="utf-8"), (source / "index.md").resolve(), title_cache)
|
||||
self.assertNotIn("```eval_rst", root_index)
|
||||
self.assertNotIn("```toc", root_index)
|
||||
self.assertIn("- [前言](chapter_preface/index.md)", root_index)
|
||||
self.assertIn("- 基础篇", root_index)
|
||||
self.assertIn(" - [导论](chapter_introduction/index.md)", root_index)
|
||||
self.assertIn("- 附录", root_index)
|
||||
self.assertIn(" - [机器学习基础附录](appendix_machine_learning_introduction/index.md)", root_index)
|
||||
|
||||
intro_index = rewrite_markdown(
|
||||
(source / "chapter_introduction" / "index.md").read_text(encoding="utf-8"),
|
||||
(source / "chapter_introduction" / "index.md").resolve(),
|
||||
title_cache,
|
||||
)
|
||||
self.assertNotIn("```toc", intro_index)
|
||||
self.assertIn("- [机器学习应用](applications.md)", intro_index)
|
||||
self.assertIn("- [设计目标](design.md)", intro_index)
|
||||
|
||||
def test_write_summary_raises_for_missing_toc_entries(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
root = Path(tmpdir)
|
||||
source = root / "zh_chapters"
|
||||
source.mkdir()
|
||||
|
||||
(source / "index.md").write_text(
|
||||
"""# 首页
|
||||
|
||||
```toc
|
||||
:maxdepth: 2
|
||||
|
||||
existing
|
||||
missing
|
||||
```
|
||||
""",
|
||||
encoding="utf-8",
|
||||
)
|
||||
(source / "existing.md").write_text("# 现有章节\n", encoding="utf-8")
|
||||
|
||||
with self.assertRaises(FileNotFoundError):
|
||||
write_summary(source)
|
||||
|
||||
def test_rewrite_markdown_normalizes_common_d2l_directives(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
root = Path(tmpdir)
|
||||
source = root / "zh_chapters"
|
||||
source.mkdir()
|
||||
|
||||
page = source / "chapter.md"
|
||||
page.write_text(
|
||||
"""# 标题
|
||||
|
||||

|
||||
:width:`800px`
|
||||
:label:`fig_example`
|
||||
|
||||
参见 :numref:`fig_example` 和公式 :eqref:`eq_example`,引用 :cite:`foo2024`。
|
||||
""",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
rewritten = rewrite_markdown(page.read_text(encoding="utf-8"), page.resolve(), {page.resolve(): "标题"})
|
||||
self.assertNotIn(":width:", rewritten)
|
||||
self.assertNotIn(":label:", rewritten)
|
||||
self.assertNotIn(":numref:", rewritten)
|
||||
self.assertNotIn(":eqref:", rewritten)
|
||||
self.assertNotIn(":cite:", rewritten)
|
||||
self.assertIn("`fig_example`", rewritten)
|
||||
self.assertIn("`eq_example`", rewritten)
|
||||
self.assertIn("[foo2024]", rewritten)
|
||||
|
||||
def test_rewrite_markdown_inlines_frontpage_html_include(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
root = Path(tmpdir)
|
||||
source = root / "zh_chapters"
|
||||
static_dir = source / "static"
|
||||
static_dir.mkdir(parents=True)
|
||||
|
||||
index = source / "index.md"
|
||||
index.write_text(
|
||||
"""# 首页
|
||||
|
||||
```eval_rst
|
||||
.. raw:: html
|
||||
:file: frontpage.html
|
||||
```
|
||||
""",
|
||||
encoding="utf-8",
|
||||
)
|
||||
(static_dir / "frontpage.html").write_text(
|
||||
"""<head>
|
||||
<style>
|
||||
.hero { color: red; }
|
||||
|
||||
.other { color: blue; }
|
||||
</style>
|
||||
</head>
|
||||
<div class="hero">
|
||||
<img src="_images/logo.png" />
|
||||
<img src="./_images/jinxuefeng.png" />
|
||||
</div>
|
||||
<script>console.log('frontpage');</script>
|
||||
""",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
rewritten = rewrite_markdown(index.read_text(encoding="utf-8"), index.resolve(), {index.resolve(): "首页"})
|
||||
self.assertNotIn("```eval_rst", rewritten)
|
||||
self.assertNotIn("<head>", rewritten)
|
||||
self.assertIn('class="openmlsys-frontpage"', rewritten)
|
||||
self.assertIn('<div class="hero">', rewritten)
|
||||
self.assertIn('<style>', rewritten)
|
||||
self.assertIn(".hero { color: red; } .other { color: blue; }", rewritten)
|
||||
self.assertIn("static/image/logo.png", rewritten)
|
||||
self.assertIn("static/image/jinxuefeng.png", rewritten)
|
||||
self.assertIn("console.log('frontpage')", rewritten)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
97
tools/assemble_docs_publish_tree.py
Normal file
97
tools/assemble_docs_publish_tree.py
Normal file
@@ -0,0 +1,97 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def remove_path(path: Path) -> None:
|
||||
if path.is_symlink() or path.is_file():
|
||||
path.unlink()
|
||||
return
|
||||
if path.is_dir():
|
||||
shutil.rmtree(path)
|
||||
|
||||
|
||||
def copy_site(source: Path, destination: Path) -> None:
|
||||
source = source.resolve()
|
||||
if not source.is_dir():
|
||||
raise FileNotFoundError(f"Site source does not exist or is not a directory: {source}")
|
||||
|
||||
remove_path(destination)
|
||||
destination.parent.mkdir(parents=True, exist_ok=True)
|
||||
shutil.copytree(source, destination)
|
||||
|
||||
|
||||
def assemble_publish_tree(
|
||||
destination_root: Path,
|
||||
docs_subdir: str = "docs",
|
||||
en_source: Path | None = None,
|
||||
zh_source: Path | None = None,
|
||||
) -> tuple[Path, Path | None]:
|
||||
if en_source is None and zh_source is None:
|
||||
raise ValueError("At least one site source must be provided.")
|
||||
|
||||
destination_root = destination_root.resolve()
|
||||
docs_root = (destination_root / docs_subdir).resolve()
|
||||
|
||||
remove_path(docs_root)
|
||||
docs_root.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if en_source is not None:
|
||||
copy_site(en_source, docs_root)
|
||||
else:
|
||||
docs_root.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
zh_destination: Path | None = None
|
||||
if zh_source is not None:
|
||||
zh_destination = docs_root / "cn"
|
||||
copy_site(zh_source, zh_destination)
|
||||
|
||||
return docs_root, zh_destination
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Assemble the publish tree expected by openmlsys.github.io."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--destination-root",
|
||||
type=Path,
|
||||
required=True,
|
||||
help="Root of the checked-out deployment repository.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--docs-subdir",
|
||||
default="docs",
|
||||
help="Subdirectory inside the destination root that hosts the site.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--en-source",
|
||||
type=Path,
|
||||
help="Built site to publish at docs/.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--zh-source",
|
||||
type=Path,
|
||||
help="Built site to publish at docs/cn/.",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main() -> int:
|
||||
args = parse_args()
|
||||
docs_root, zh_root = assemble_publish_tree(
|
||||
destination_root=args.destination_root,
|
||||
docs_subdir=args.docs_subdir,
|
||||
en_source=args.en_source,
|
||||
zh_source=args.zh_source,
|
||||
)
|
||||
print(f"Assembled root site at {docs_root}")
|
||||
if zh_root is not None:
|
||||
print(f"Assembled Chinese site at {zh_root}")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
60
tools/ensure_book_resources.py
Normal file
60
tools/ensure_book_resources.py
Normal file
@@ -0,0 +1,60 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
RESOURCE_NAMES = ("img", "references", "static", "mlsys.bib")
|
||||
|
||||
|
||||
def ensure_resource_views(
|
||||
chapter_dir: Path,
|
||||
repo_root: Path,
|
||||
resource_names: tuple[str, ...] = RESOURCE_NAMES,
|
||||
) -> None:
|
||||
chapter_dir = chapter_dir.resolve()
|
||||
repo_root = repo_root.resolve()
|
||||
|
||||
for name in resource_names:
|
||||
destination = chapter_dir / name
|
||||
source = repo_root / name
|
||||
if not source.exists():
|
||||
raise FileNotFoundError(f"Resource does not exist: {source}")
|
||||
|
||||
if destination.is_symlink():
|
||||
destination.unlink()
|
||||
elif destination.exists():
|
||||
continue
|
||||
|
||||
relative_source = os.path.relpath(source, start=chapter_dir)
|
||||
destination.symlink_to(relative_source)
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Ensure chapter directories can see shared book resources."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--chapter-dir",
|
||||
type=Path,
|
||||
required=True,
|
||||
help="Book source directory such as en_chapters or zh_chapters.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--repo-root",
|
||||
type=Path,
|
||||
default=Path(__file__).resolve().parent.parent,
|
||||
help="Repository root that owns the shared resources.",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main() -> int:
|
||||
args = parse_args()
|
||||
ensure_resource_views(args.chapter_dir, args.repo_root)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
57
tools/mdbook_preprocessor.py
Normal file
57
tools/mdbook_preprocessor.py
Normal file
@@ -0,0 +1,57 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
try:
|
||||
from tools.prepare_mdbook import build_title_cache, parse_bib, rewrite_markdown
|
||||
except ModuleNotFoundError:
|
||||
from prepare_mdbook import build_title_cache, parse_bib, rewrite_markdown
|
||||
|
||||
|
||||
PLACEHOLDER_PREFIX = "[TODO: src = zh_chapters/"
|
||||
BIBLIOGRAPHY_TITLE = "References"
|
||||
|
||||
|
||||
def iter_chapters(items: list[dict]) -> list[dict]:
|
||||
chapters: list[dict] = []
|
||||
for item in items:
|
||||
chapter = item.get("Chapter")
|
||||
if not chapter:
|
||||
continue
|
||||
chapters.append(chapter)
|
||||
chapters.extend(iter_chapters(chapter.get("sub_items", [])))
|
||||
return chapters
|
||||
|
||||
|
||||
def main() -> int:
|
||||
if len(sys.argv) > 1 and sys.argv[1] == "supports":
|
||||
return 0
|
||||
|
||||
context, book = json.load(sys.stdin)
|
||||
root = Path(context["root"]).resolve()
|
||||
source_dir = (root / context["config"]["book"]["src"]).resolve()
|
||||
title_cache = build_title_cache(source_dir, placeholder_prefix=PLACEHOLDER_PREFIX)
|
||||
bib_path = source_dir.parent / "mlsys.bib"
|
||||
bib_db = parse_bib(bib_path) if bib_path.exists() else {}
|
||||
|
||||
for chapter in iter_chapters(book.get("items", [])):
|
||||
source_path = chapter.get("source_path") or chapter.get("path")
|
||||
if not source_path:
|
||||
continue
|
||||
current_file = (source_dir / source_path).resolve()
|
||||
chapter["content"] = rewrite_markdown(
|
||||
chapter["content"],
|
||||
current_file,
|
||||
title_cache,
|
||||
bib_db,
|
||||
bibliography_title=BIBLIOGRAPHY_TITLE,
|
||||
)
|
||||
|
||||
json.dump(book, sys.stdout, ensure_ascii=False)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
@@ -4,7 +4,13 @@ import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from prepare_mdbook_zh import build_title_cache, parse_bib, rewrite_markdown
|
||||
try:
|
||||
from tools.prepare_mdbook import build_title_cache, parse_bib, rewrite_markdown
|
||||
except ModuleNotFoundError:
|
||||
from prepare_mdbook import build_title_cache, parse_bib, rewrite_markdown
|
||||
|
||||
|
||||
BIBLIOGRAPHY_TITLE = "参考文献"
|
||||
|
||||
|
||||
def iter_chapters(items: list[dict]) -> list[dict]:
|
||||
@@ -24,9 +30,9 @@ def main() -> int:
|
||||
|
||||
context, book = json.load(sys.stdin)
|
||||
root = Path(context["root"]).resolve()
|
||||
source_dir = root / context["config"]["book"]["src"]
|
||||
source_dir = (root / context["config"]["book"]["src"]).resolve()
|
||||
title_cache = build_title_cache(source_dir)
|
||||
bib_path = root / "mlsys.bib"
|
||||
bib_path = source_dir.parent / "mlsys.bib"
|
||||
bib_db = parse_bib(bib_path) if bib_path.exists() else {}
|
||||
|
||||
for chapter in iter_chapters(book.get("items", [])):
|
||||
@@ -34,7 +40,13 @@ def main() -> int:
|
||||
if not source_path:
|
||||
continue
|
||||
current_file = (source_dir / source_path).resolve()
|
||||
chapter["content"] = rewrite_markdown(chapter["content"], current_file, title_cache, bib_db)
|
||||
chapter["content"] = rewrite_markdown(
|
||||
chapter["content"],
|
||||
current_file,
|
||||
title_cache,
|
||||
bib_db,
|
||||
bibliography_title=BIBLIOGRAPHY_TITLE,
|
||||
)
|
||||
|
||||
json.dump(book, sys.stdout, ensure_ascii=False)
|
||||
return 0
|
||||
|
||||
585
tools/prepare_mdbook.py
Normal file
585
tools/prepare_mdbook.py
Normal file
@@ -0,0 +1,585 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
TOC_FENCE = "toc"
|
||||
EVAL_RST_FENCE = "eval_rst"
|
||||
OPTION_LINE_RE = re.compile(r"^:(width|label):`[^`]+`\s*$", re.MULTILINE)
|
||||
NUMREF_RE = re.compile(r":numref:`([^`]+)`")
|
||||
EQREF_RE = re.compile(r":eqref:`([^`]+)`")
|
||||
CITE_RE = re.compile(r":cite:`([^`]+)`")
|
||||
BIB_ENTRY_RE = re.compile(r"@(\w+)\{([^,]+),")
|
||||
LATEX_ESCAPE_RE = re.compile(r"\\([_%#&])")
|
||||
RAW_HTML_FILE_RE = re.compile(r"^\s*:file:\s*([^\s]+)\s*$")
|
||||
TOC_LINK_RE = re.compile(r"^\[([^\]]+)\]\(([^)]+)\)\s*$")
|
||||
TOC_PART_RE = re.compile(r"^#+\s+(.+?)\s*$")
|
||||
HEAD_TAG_RE = re.compile(r"</?head>", re.IGNORECASE)
|
||||
STYLE_BLOCK_RE = re.compile(r"<style>(.*?)</style>", re.IGNORECASE | re.DOTALL)
|
||||
DEFAULT_BIBLIOGRAPHY_TITLE = "References"
|
||||
FRONTPAGE_LAYOUT_CSS = """
|
||||
<style>
|
||||
.openmlsys-frontpage {
|
||||
width: 100%;
|
||||
margin: 0 auto 3rem;
|
||||
}
|
||||
.openmlsys-frontpage .mdl-grid {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
gap: 24px;
|
||||
width: 100%;
|
||||
box-sizing: border-box;
|
||||
}
|
||||
.openmlsys-frontpage .mdl-cell {
|
||||
box-sizing: border-box;
|
||||
flex: 1 1 220px;
|
||||
min-width: 0;
|
||||
}
|
||||
.openmlsys-frontpage .mdl-cell--1-col {
|
||||
flex: 0 0 48px;
|
||||
}
|
||||
.openmlsys-frontpage .mdl-cell--3-col {
|
||||
flex: 0 1 calc(16.666% - 20px);
|
||||
max-width: calc(16.666% - 20px);
|
||||
}
|
||||
.openmlsys-frontpage .authors.mdl-grid {
|
||||
justify-content: center;
|
||||
}
|
||||
.openmlsys-frontpage .mdl-cell--5-col {
|
||||
flex: 1 1 calc(41.666% - 24px);
|
||||
max-width: calc(41.666% - 18px);
|
||||
}
|
||||
.openmlsys-frontpage .mdl-cell--12-col {
|
||||
flex: 1 1 100%;
|
||||
max-width: 100%;
|
||||
}
|
||||
.openmlsys-frontpage .mdl-cell--middle {
|
||||
align-self: center;
|
||||
}
|
||||
.openmlsys-frontpage .mdl-color-text--primary {
|
||||
color: var(--links, #0b6bcb);
|
||||
}
|
||||
.openmlsys-frontpage img {
|
||||
max-width: 100%;
|
||||
height: auto;
|
||||
}
|
||||
#content,
|
||||
.content {
|
||||
max-width: min(1440px, calc(100vw - 48px));
|
||||
}
|
||||
.content main {
|
||||
max-width: 75%;
|
||||
}
|
||||
.openmlsys-frontpage + ul,
|
||||
.openmlsys-frontpage + ul ul {
|
||||
max-width: 960px;
|
||||
}
|
||||
@media (max-width: 1000px) {
|
||||
.openmlsys-frontpage .mdl-cell,
|
||||
.openmlsys-frontpage .mdl-cell--1-col,
|
||||
.openmlsys-frontpage .mdl-cell--3-col,
|
||||
.openmlsys-frontpage .mdl-cell--5-col {
|
||||
flex: 1 1 100%;
|
||||
max-width: 100%;
|
||||
}
|
||||
}
|
||||
</style>
|
||||
""".strip()
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class TocItem:
|
||||
kind: str
|
||||
label: str
|
||||
target: str | None = None
|
||||
|
||||
|
||||
def is_placeholder_markdown(markdown: str, placeholder_prefix: str | None = None) -> bool:
|
||||
if not placeholder_prefix:
|
||||
return False
|
||||
|
||||
stripped = markdown.strip()
|
||||
return stripped.startswith(placeholder_prefix) and stripped.endswith("]")
|
||||
|
||||
|
||||
def extract_title(markdown: str, fallback: str = "Untitled") -> str:
|
||||
lines = markdown.splitlines()
|
||||
|
||||
for index, line in enumerate(lines):
|
||||
stripped = line.strip()
|
||||
if not stripped:
|
||||
continue
|
||||
if stripped.startswith("#"):
|
||||
heading = stripped.lstrip("#").strip()
|
||||
if heading:
|
||||
return heading
|
||||
|
||||
next_index = index + 1
|
||||
if next_index < len(lines):
|
||||
underline = lines[next_index].strip()
|
||||
if underline and set(underline) <= {"=", "-"}:
|
||||
return stripped
|
||||
|
||||
return fallback
|
||||
|
||||
|
||||
def parse_toc_entries(block_lines: list[str]) -> list[TocItem]:
|
||||
entries: list[TocItem] = []
|
||||
for line in block_lines:
|
||||
stripped = line.strip()
|
||||
if not stripped or stripped.startswith(":"):
|
||||
continue
|
||||
part_match = TOC_PART_RE.match(stripped)
|
||||
if part_match:
|
||||
entries.append(TocItem(kind="part", label=part_match.group(1).strip()))
|
||||
continue
|
||||
link_match = TOC_LINK_RE.match(stripped)
|
||||
if link_match:
|
||||
entries.append(
|
||||
TocItem(
|
||||
kind="chapter",
|
||||
label=link_match.group(1).strip(),
|
||||
target=link_match.group(2).strip(),
|
||||
)
|
||||
)
|
||||
continue
|
||||
entries.append(TocItem(kind="chapter", label="", target=stripped))
|
||||
return entries
|
||||
|
||||
|
||||
def parse_toc_blocks(markdown: str) -> list[list[TocItem]]:
|
||||
blocks: list[list[TocItem]] = []
|
||||
lines = markdown.splitlines()
|
||||
index = 0
|
||||
|
||||
while index < len(lines):
|
||||
if lines[index].strip() == f"```{TOC_FENCE}":
|
||||
index += 1
|
||||
block_lines: list[str] = []
|
||||
while index < len(lines) and lines[index].strip() != "```":
|
||||
block_lines.append(lines[index])
|
||||
index += 1
|
||||
entries = parse_toc_entries(block_lines)
|
||||
blocks.append(entries)
|
||||
index += 1
|
||||
|
||||
return blocks
|
||||
|
||||
|
||||
def resolve_toc_target(current_file: Path, entry: str) -> Path:
|
||||
target_name = entry if entry.endswith(".md") else f"{entry}.md"
|
||||
target = (current_file.parent / target_name).resolve()
|
||||
if not target.exists():
|
||||
raise FileNotFoundError(f"TOC entry '{entry}' from '{current_file}' does not exist")
|
||||
return target
|
||||
|
||||
|
||||
def relative_link(from_file: Path, target_file: Path) -> str:
|
||||
return Path(os.path.relpath(target_file, start=from_file.parent)).as_posix()
|
||||
|
||||
|
||||
def _strip_latex_escapes_outside_math(line: str) -> str:
|
||||
parts = line.split("$")
|
||||
for i in range(0, len(parts), 2):
|
||||
parts[i] = LATEX_ESCAPE_RE.sub(r"\1", parts[i])
|
||||
return "$".join(parts)
|
||||
|
||||
|
||||
def normalize_directives(markdown: str) -> str:
|
||||
normalized = OPTION_LINE_RE.sub("", markdown)
|
||||
normalized = NUMREF_RE.sub(lambda match: f"`{match.group(1)}`", normalized)
|
||||
normalized = EQREF_RE.sub(lambda match: f"`{match.group(1)}`", normalized)
|
||||
|
||||
lines = [_strip_latex_escapes_outside_math(line.rstrip()) for line in normalized.splitlines()]
|
||||
collapsed: list[str] = []
|
||||
previous_blank = False
|
||||
for line in lines:
|
||||
is_blank = line == ""
|
||||
if is_blank and previous_blank:
|
||||
continue
|
||||
collapsed.append(line)
|
||||
previous_blank = is_blank
|
||||
|
||||
while collapsed and collapsed[-1] == "":
|
||||
collapsed.pop()
|
||||
|
||||
return "\n".join(collapsed) + "\n"
|
||||
|
||||
|
||||
def clean_bibtex(value: str) -> str:
|
||||
value = re.sub(r"\{\\[`'^\"~=.](\w)\}", r"\1", value)
|
||||
value = re.sub(r"\\[`'^\"~=.](\w)", r"\1", value)
|
||||
value = value.replace("{", "").replace("}", "")
|
||||
return value.strip()
|
||||
|
||||
|
||||
def _parse_bib_fields(body: str) -> dict[str, str]:
|
||||
fields: dict[str, str] = {}
|
||||
i = 0
|
||||
while i < len(body):
|
||||
while i < len(body) and body[i] in " \t\n\r,":
|
||||
i += 1
|
||||
if i >= len(body):
|
||||
break
|
||||
start = i
|
||||
while i < len(body) and body[i] not in "= \t\n\r":
|
||||
i += 1
|
||||
name = body[start:i].strip().lower()
|
||||
while i < len(body) and body[i] != "=":
|
||||
i += 1
|
||||
if i >= len(body):
|
||||
break
|
||||
i += 1
|
||||
while i < len(body) and body[i] in " \t\n\r":
|
||||
i += 1
|
||||
if i >= len(body):
|
||||
break
|
||||
if body[i] == "{":
|
||||
depth = 1
|
||||
i += 1
|
||||
vstart = i
|
||||
while i < len(body) and depth > 0:
|
||||
if body[i] == "{":
|
||||
depth += 1
|
||||
elif body[i] == "}":
|
||||
depth -= 1
|
||||
i += 1
|
||||
value = body[vstart : i - 1]
|
||||
elif body[i] == '"':
|
||||
i += 1
|
||||
vstart = i
|
||||
while i < len(body) and body[i] != '"':
|
||||
i += 1
|
||||
value = body[vstart:i]
|
||||
i += 1
|
||||
else:
|
||||
vstart = i
|
||||
while i < len(body) and body[i] not in ", \t\n\r}":
|
||||
i += 1
|
||||
value = body[vstart:i]
|
||||
if name:
|
||||
fields[name] = value.strip()
|
||||
return fields
|
||||
|
||||
|
||||
def parse_bib(bib_path: Path) -> dict[str, dict[str, str]]:
|
||||
text = bib_path.read_text(encoding="utf-8")
|
||||
entries: dict[str, dict[str, str]] = {}
|
||||
for match in BIB_ENTRY_RE.finditer(text):
|
||||
key = match.group(2).strip()
|
||||
start = match.end()
|
||||
depth = 1
|
||||
pos = start
|
||||
while pos < len(text) and depth > 0:
|
||||
if text[pos] == "{":
|
||||
depth += 1
|
||||
elif text[pos] == "}":
|
||||
depth -= 1
|
||||
pos += 1
|
||||
fields = _parse_bib_fields(text[start : pos - 1])
|
||||
fields["_type"] = match.group(1).lower()
|
||||
entries[key] = fields
|
||||
return entries
|
||||
|
||||
|
||||
def _render_bibliography(
|
||||
cited_keys: list[str],
|
||||
bib_db: dict[str, dict[str, str]],
|
||||
bibliography_title: str,
|
||||
) -> list[str]:
|
||||
lines: list[str] = ["---", "", f"## {bibliography_title}", "", "<ol>"]
|
||||
for key in cited_keys:
|
||||
entry = bib_db.get(key)
|
||||
if not entry:
|
||||
lines.append(f'<li id="ref-{key}">{key}. <a href="#cite-{key}">↩</a></li>')
|
||||
continue
|
||||
author = clean_bibtex(entry.get("author", ""))
|
||||
title = clean_bibtex(entry.get("title", ""))
|
||||
year = entry.get("year", "")
|
||||
venue = clean_bibtex(entry.get("journal", "") or entry.get("booktitle", ""))
|
||||
parts: list[str] = []
|
||||
if author:
|
||||
parts.append(author)
|
||||
if title:
|
||||
parts.append(f"<em>{title}</em>")
|
||||
if venue:
|
||||
parts.append(venue)
|
||||
if year:
|
||||
parts.append(year)
|
||||
text = ". ".join(parts) + "." if parts else f"{key}."
|
||||
lines.append(f'<li id="ref-{key}">{text} <a href="#cite-{key}">↩</a></li>')
|
||||
lines.append("</ol>")
|
||||
return lines
|
||||
|
||||
|
||||
def process_citations(
|
||||
markdown: str,
|
||||
bib_db: dict[str, dict[str, str]],
|
||||
bibliography_title: str = DEFAULT_BIBLIOGRAPHY_TITLE,
|
||||
) -> str:
|
||||
cited_keys: list[str] = []
|
||||
|
||||
def _replace_cite(match: re.Match[str]) -> str:
|
||||
keys = [k.strip() for k in match.group(1).split(",")]
|
||||
for key in keys:
|
||||
if key not in cited_keys:
|
||||
cited_keys.append(key)
|
||||
if not bib_db:
|
||||
return "[" + ", ".join(keys) + "]"
|
||||
nums: list[str] = []
|
||||
for key in keys:
|
||||
idx = cited_keys.index(key) + 1
|
||||
nums.append(f'<sup id="cite-{key}"><a href="#ref-{key}">[{idx}]</a></sup>')
|
||||
return "".join(nums)
|
||||
|
||||
processed = CITE_RE.sub(_replace_cite, markdown)
|
||||
if cited_keys and bib_db:
|
||||
bib_lines = _render_bibliography(cited_keys, bib_db, bibliography_title)
|
||||
processed = processed.rstrip("\n") + "\n\n" + "\n".join(bib_lines) + "\n"
|
||||
return processed
|
||||
|
||||
|
||||
def resolve_raw_html_file(current_file: Path, filename: str) -> Path:
|
||||
direct = (current_file.parent / filename).resolve()
|
||||
if direct.exists():
|
||||
return direct
|
||||
|
||||
static_fallback = (current_file.parent / "static" / filename).resolve()
|
||||
if static_fallback.exists():
|
||||
return static_fallback
|
||||
|
||||
repo_static = (Path(__file__).resolve().parent.parent / "static" / filename)
|
||||
if repo_static.exists():
|
||||
return repo_static
|
||||
|
||||
raise FileNotFoundError(f"Raw HTML include '{filename}' from '{current_file}' does not exist")
|
||||
|
||||
|
||||
def rewrite_frontpage_assets(html: str) -> str:
|
||||
rewritten = html.replace("./_images/", "static/image/")
|
||||
rewritten = rewritten.replace("_images/", "static/image/")
|
||||
rewritten = HEAD_TAG_RE.sub("", rewritten)
|
||||
rewritten = STYLE_BLOCK_RE.sub(_minify_style_block, rewritten)
|
||||
return rewritten
|
||||
|
||||
|
||||
def _minify_style_block(match: re.Match[str]) -> str:
|
||||
content = match.group(1)
|
||||
parts = [line.strip() for line in content.splitlines() if line.strip()]
|
||||
return f"<style>{' '.join(parts)}</style>"
|
||||
|
||||
|
||||
def wrap_frontpage_html(html: str) -> str:
|
||||
return "\n".join([FRONTPAGE_LAYOUT_CSS, '<div class="openmlsys-frontpage">', html.strip(), "</div>"])
|
||||
|
||||
|
||||
def inline_raw_html(block_lines: list[str], current_file: Path) -> str | None:
|
||||
stripped = [line.strip() for line in block_lines if line.strip()]
|
||||
if not stripped or stripped[0] != ".. raw:: html":
|
||||
return None
|
||||
|
||||
filename: str | None = None
|
||||
for line in stripped[1:]:
|
||||
match = RAW_HTML_FILE_RE.match(line)
|
||||
if match:
|
||||
filename = match.group(1)
|
||||
break
|
||||
|
||||
if filename is None:
|
||||
return None
|
||||
|
||||
html_path = resolve_raw_html_file(current_file, filename)
|
||||
html = rewrite_frontpage_assets(html_path.read_text(encoding="utf-8")).strip()
|
||||
if Path(filename).name == "frontpage.html":
|
||||
return wrap_frontpage_html(html)
|
||||
return html
|
||||
|
||||
|
||||
def chapter_label(item: TocItem, target: Path, title_cache: dict[Path, str]) -> str:
|
||||
return item.label or title_cache[target]
|
||||
|
||||
|
||||
def render_toc_list(entries: list[TocItem], current_file: Path, title_cache: dict[Path, str]) -> list[str]:
|
||||
rendered: list[str] = []
|
||||
current_indent = 0
|
||||
for entry in entries:
|
||||
if entry.kind == "part":
|
||||
rendered.append(f"- {entry.label}")
|
||||
current_indent = 1
|
||||
continue
|
||||
|
||||
if entry.target is None:
|
||||
continue
|
||||
|
||||
target = resolve_toc_target(current_file, entry.target)
|
||||
if target not in title_cache:
|
||||
continue
|
||||
|
||||
label = chapter_label(entry, target, title_cache)
|
||||
rendered.append(f"{' ' * current_indent}- [{label}]({relative_link(current_file, target)})")
|
||||
return rendered
|
||||
|
||||
|
||||
def rewrite_markdown(
|
||||
markdown: str,
|
||||
current_file: Path,
|
||||
title_cache: dict[Path, str],
|
||||
bib_db: dict[str, dict[str, str]] | None = None,
|
||||
bibliography_title: str = DEFAULT_BIBLIOGRAPHY_TITLE,
|
||||
) -> str:
|
||||
output: list[str] = []
|
||||
lines = markdown.splitlines()
|
||||
index = 0
|
||||
|
||||
while index < len(lines):
|
||||
stripped = lines[index].strip()
|
||||
if stripped in (f"```{TOC_FENCE}", f"```{EVAL_RST_FENCE}"):
|
||||
fence = stripped[3:]
|
||||
index += 1
|
||||
block_lines: list[str] = []
|
||||
while index < len(lines) and lines[index].strip() != "```":
|
||||
block_lines.append(lines[index])
|
||||
index += 1
|
||||
|
||||
if fence == TOC_FENCE:
|
||||
entries = parse_toc_entries(block_lines)
|
||||
if entries:
|
||||
if output and output[-1] != "":
|
||||
output.append("")
|
||||
rendered = render_toc_list(entries, current_file, title_cache)
|
||||
output.extend(rendered)
|
||||
if rendered and output and output[-1] != "":
|
||||
output.append("")
|
||||
elif fence == EVAL_RST_FENCE:
|
||||
raw_html = inline_raw_html(block_lines, current_file)
|
||||
if raw_html:
|
||||
if output and output[-1] != "":
|
||||
output.append("")
|
||||
output.extend(raw_html.splitlines())
|
||||
if output and output[-1] != "":
|
||||
output.append("")
|
||||
index += 1
|
||||
continue
|
||||
|
||||
output.append(lines[index])
|
||||
index += 1
|
||||
|
||||
while output and output[-1] == "":
|
||||
output.pop()
|
||||
|
||||
result = normalize_directives("\n".join(output) + "\n")
|
||||
result = process_citations(result, bib_db or {}, bibliography_title=bibliography_title)
|
||||
return result
|
||||
|
||||
|
||||
def build_title_cache(
|
||||
source_dir: Path,
|
||||
placeholder_prefix: str | None = None,
|
||||
) -> dict[Path, str]:
|
||||
cache: dict[Path, str] = {}
|
||||
for markdown_file in sorted(source_dir.rglob("*.md")):
|
||||
if "_build" in markdown_file.parts or markdown_file.name == "SUMMARY.md":
|
||||
continue
|
||||
text = markdown_file.read_text(encoding="utf-8")
|
||||
if is_placeholder_markdown(text, placeholder_prefix):
|
||||
continue
|
||||
cache[markdown_file.resolve()] = extract_title(text, fallback=markdown_file.stem)
|
||||
return cache
|
||||
|
||||
|
||||
def build_summary(source_dir: Path, title_cache: dict[Path, str]) -> str:
|
||||
root_index = (source_dir / "index.md").resolve()
|
||||
root_markdown = root_index.read_text(encoding="utf-8")
|
||||
|
||||
lines = ["# Summary", "", f"[{title_cache[root_index]}](index.md)"]
|
||||
seen: set[Path] = {root_index}
|
||||
|
||||
def append_entry(target: Path, indent: int, label: str | None = None) -> None:
|
||||
target = target.resolve()
|
||||
if target in seen or target not in title_cache:
|
||||
return
|
||||
seen.add(target)
|
||||
rel = target.relative_to(source_dir.resolve()).as_posix()
|
||||
title = label or title_cache[target]
|
||||
lines.append(f"{' ' * indent}- [{title}]({rel})")
|
||||
|
||||
child_markdown = target.read_text(encoding="utf-8")
|
||||
for block in parse_toc_blocks(child_markdown):
|
||||
for entry in block:
|
||||
if entry.kind != "chapter" or entry.target is None:
|
||||
continue
|
||||
append_entry(resolve_toc_target(target, entry.target), indent + 1, entry.label or None)
|
||||
|
||||
def append_prefix_chapter(target: Path, label: str | None = None) -> None:
|
||||
target = target.resolve()
|
||||
if target in seen or target not in title_cache:
|
||||
return
|
||||
seen.add(target)
|
||||
rel = target.relative_to(source_dir.resolve()).as_posix()
|
||||
title = label or title_cache[target]
|
||||
lines.append(f"[{title}]({rel})")
|
||||
|
||||
numbered_started = False
|
||||
for block in parse_toc_blocks(root_markdown):
|
||||
for entry in block:
|
||||
if entry.kind == "part":
|
||||
if lines and lines[-1] != "":
|
||||
lines.append("")
|
||||
lines.append(f"# {entry.label}")
|
||||
lines.append("")
|
||||
numbered_started = True
|
||||
continue
|
||||
|
||||
if entry.target is None:
|
||||
continue
|
||||
|
||||
target = resolve_toc_target(root_index, entry.target)
|
||||
if numbered_started:
|
||||
append_entry(target, 0, entry.label or None)
|
||||
else:
|
||||
append_prefix_chapter(target, entry.label or None)
|
||||
|
||||
return "\n".join(lines) + "\n"
|
||||
|
||||
|
||||
def write_summary(
|
||||
source_dir: Path,
|
||||
summary_path: Path | None = None,
|
||||
placeholder_prefix: str | None = None,
|
||||
) -> Path:
|
||||
source_dir = source_dir.resolve()
|
||||
summary_path = summary_path.resolve() if summary_path else (source_dir / "SUMMARY.md")
|
||||
title_cache = build_title_cache(source_dir, placeholder_prefix=placeholder_prefix)
|
||||
summary_path.write_text(build_summary(source_dir, title_cache), encoding="utf-8")
|
||||
return summary_path
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(description="Generate mdBook SUMMARY.md for a chapter directory.")
|
||||
parser.add_argument("--source", type=Path, required=True, help="Source chapter directory")
|
||||
parser.add_argument("--summary-output", type=Path, required=True, help="Where to write the generated SUMMARY.md")
|
||||
parser.add_argument(
|
||||
"--placeholder-prefix",
|
||||
default=None,
|
||||
help="If set, files whose entire contents start with this prefix are skipped from mdBook output.",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main() -> int:
|
||||
args = parse_args()
|
||||
summary_path = write_summary(
|
||||
args.source,
|
||||
summary_path=args.summary_output,
|
||||
placeholder_prefix=args.placeholder_prefix,
|
||||
)
|
||||
print(f"Wrote mdBook summary to {summary_path}")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
@@ -1,580 +1,24 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
TOC_FENCE = "toc"
|
||||
EVAL_RST_FENCE = "eval_rst"
|
||||
OPTION_LINE_RE = re.compile(r"^:(width|label):`[^`]+`\s*$", re.MULTILINE)
|
||||
NUMREF_RE = re.compile(r":numref:`([^`]+)`")
|
||||
EQREF_RE = re.compile(r":eqref:`([^`]+)`")
|
||||
CITE_RE = re.compile(r":cite:`([^`]+)`")
|
||||
BIB_ENTRY_RE = re.compile(r"@(\w+)\{([^,]+),")
|
||||
LATEX_ESCAPE_RE = re.compile(r"\\([_%#&])")
|
||||
RAW_HTML_FILE_RE = re.compile(r"^\s*:file:\s*([^\s]+)\s*$")
|
||||
TOC_LINK_RE = re.compile(r"^\[([^\]]+)\]\(([^)]+)\)\s*$")
|
||||
TOC_PART_RE = re.compile(r"^#+\s+(.+?)\s*$")
|
||||
HEAD_TAG_RE = re.compile(r"</?head>", re.IGNORECASE)
|
||||
STYLE_BLOCK_RE = re.compile(r"<style>(.*?)</style>", re.IGNORECASE | re.DOTALL)
|
||||
FRONTPAGE_LAYOUT_CSS = """
|
||||
<style>
|
||||
.openmlsys-frontpage {
|
||||
width: 100%;
|
||||
margin: 0 auto 3rem;
|
||||
}
|
||||
.openmlsys-frontpage .mdl-grid {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
gap: 24px;
|
||||
width: 100%;
|
||||
box-sizing: border-box;
|
||||
}
|
||||
.openmlsys-frontpage .mdl-cell {
|
||||
box-sizing: border-box;
|
||||
flex: 1 1 220px;
|
||||
min-width: 0;
|
||||
}
|
||||
.openmlsys-frontpage .mdl-cell--1-col {
|
||||
flex: 0 0 48px;
|
||||
}
|
||||
.openmlsys-frontpage .mdl-cell--3-col {
|
||||
flex: 0 1 calc(16.666% - 20px);
|
||||
max-width: calc(16.666% - 20px);
|
||||
}
|
||||
.openmlsys-frontpage .authors.mdl-grid {
|
||||
justify-content: center;
|
||||
}
|
||||
.openmlsys-frontpage .mdl-cell--5-col {
|
||||
flex: 1 1 calc(41.666% - 24px);
|
||||
max-width: calc(41.666% - 18px);
|
||||
}
|
||||
.openmlsys-frontpage .mdl-cell--12-col {
|
||||
flex: 1 1 100%;
|
||||
max-width: 100%;
|
||||
}
|
||||
.openmlsys-frontpage .mdl-cell--middle {
|
||||
align-self: center;
|
||||
}
|
||||
.openmlsys-frontpage .mdl-color-text--primary {
|
||||
color: var(--links, #0b6bcb);
|
||||
}
|
||||
.openmlsys-frontpage img {
|
||||
max-width: 100%;
|
||||
height: auto;
|
||||
}
|
||||
#content,
|
||||
.content {
|
||||
max-width: min(1440px, calc(100vw - 48px));
|
||||
}
|
||||
.content main {
|
||||
max-width: 75%;
|
||||
}
|
||||
.openmlsys-frontpage + ul,
|
||||
.openmlsys-frontpage + ul ul {
|
||||
max-width: 960px;
|
||||
}
|
||||
@media (max-width: 1000px) {
|
||||
.openmlsys-frontpage .mdl-cell,
|
||||
.openmlsys-frontpage .mdl-cell--1-col,
|
||||
.openmlsys-frontpage .mdl-cell--3-col,
|
||||
.openmlsys-frontpage .mdl-cell--5-col {
|
||||
flex: 1 1 100%;
|
||||
max-width: 100%;
|
||||
}
|
||||
}
|
||||
</style>
|
||||
""".strip()
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class TocItem:
|
||||
kind: str
|
||||
label: str
|
||||
target: str | None = None
|
||||
|
||||
|
||||
def extract_title(markdown: str, fallback: str = "Untitled") -> str:
|
||||
lines = markdown.splitlines()
|
||||
|
||||
for index, line in enumerate(lines):
|
||||
stripped = line.strip()
|
||||
if not stripped:
|
||||
continue
|
||||
if stripped.startswith("#"):
|
||||
heading = stripped.lstrip("#").strip()
|
||||
if heading:
|
||||
return heading
|
||||
|
||||
next_index = index + 1
|
||||
if next_index < len(lines):
|
||||
underline = lines[next_index].strip()
|
||||
if underline and set(underline) <= {"=", "-"}:
|
||||
return stripped
|
||||
|
||||
return fallback
|
||||
|
||||
|
||||
def parse_toc_entries(block_lines: list[str]) -> list[TocItem]:
|
||||
entries: list[TocItem] = []
|
||||
for line in block_lines:
|
||||
stripped = line.strip()
|
||||
if not stripped or stripped.startswith(":"):
|
||||
continue
|
||||
part_match = TOC_PART_RE.match(stripped)
|
||||
if part_match:
|
||||
entries.append(TocItem(kind="part", label=part_match.group(1).strip()))
|
||||
continue
|
||||
link_match = TOC_LINK_RE.match(stripped)
|
||||
if link_match:
|
||||
entries.append(TocItem(kind="chapter", label=link_match.group(1).strip(), target=link_match.group(2).strip()))
|
||||
continue
|
||||
entries.append(TocItem(kind="chapter", label="", target=stripped))
|
||||
return entries
|
||||
|
||||
|
||||
def parse_toc_blocks(markdown: str) -> list[list[TocItem]]:
|
||||
blocks: list[list[TocItem]] = []
|
||||
lines = markdown.splitlines()
|
||||
index = 0
|
||||
|
||||
while index < len(lines):
|
||||
if lines[index].strip() == f"```{TOC_FENCE}":
|
||||
index += 1
|
||||
block_lines: list[str] = []
|
||||
while index < len(lines) and lines[index].strip() != "```":
|
||||
block_lines.append(lines[index])
|
||||
index += 1
|
||||
entries = parse_toc_entries(block_lines)
|
||||
blocks.append(entries)
|
||||
index += 1
|
||||
|
||||
return blocks
|
||||
|
||||
|
||||
def resolve_toc_target(current_file: Path, entry: str) -> Path:
|
||||
target_name = entry if entry.endswith(".md") else f"{entry}.md"
|
||||
target = (current_file.parent / target_name).resolve()
|
||||
if not target.exists():
|
||||
raise FileNotFoundError(f"TOC entry '{entry}' from '{current_file}' does not exist")
|
||||
return target
|
||||
|
||||
|
||||
def relative_link(from_file: Path, target_file: Path) -> str:
|
||||
return Path(os.path.relpath(target_file, start=from_file.parent)).as_posix()
|
||||
|
||||
|
||||
def _strip_latex_escapes_outside_math(line: str) -> str:
|
||||
"""Remove LaTeX escapes (\\_, \\%, \\#, \\&) from text outside $...$ math spans."""
|
||||
parts = line.split("$")
|
||||
for i in range(0, len(parts), 2): # even indices are outside math
|
||||
parts[i] = LATEX_ESCAPE_RE.sub(r"\1", parts[i])
|
||||
return "$".join(parts)
|
||||
|
||||
|
||||
def normalize_directives(markdown: str) -> str:
|
||||
normalized = OPTION_LINE_RE.sub("", markdown)
|
||||
normalized = NUMREF_RE.sub(lambda match: f"`{match.group(1)}`", normalized)
|
||||
normalized = EQREF_RE.sub(lambda match: f"`{match.group(1)}`", normalized)
|
||||
|
||||
lines = [_strip_latex_escapes_outside_math(line.rstrip()) for line in normalized.splitlines()]
|
||||
collapsed: list[str] = []
|
||||
previous_blank = False
|
||||
for line in lines:
|
||||
is_blank = line == ""
|
||||
if is_blank and previous_blank:
|
||||
continue
|
||||
collapsed.append(line)
|
||||
previous_blank = is_blank
|
||||
|
||||
while collapsed and collapsed[-1] == "":
|
||||
collapsed.pop()
|
||||
|
||||
return "\n".join(collapsed) + "\n"
|
||||
|
||||
|
||||
# ── BibTeX parsing ────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def clean_bibtex(value: str) -> str:
|
||||
"""Remove BibTeX formatting (braces, LaTeX accents) from a string."""
|
||||
value = re.sub(r"\{\\[`'^\"~=.](\w)\}", r"\1", value)
|
||||
value = re.sub(r"\\[`'^\"~=.](\w)", r"\1", value)
|
||||
value = value.replace("{", "").replace("}", "")
|
||||
return value.strip()
|
||||
|
||||
|
||||
def _parse_bib_fields(body: str) -> dict[str, str]:
|
||||
"""Parse field=value pairs inside a BibTeX entry body."""
|
||||
fields: dict[str, str] = {}
|
||||
i = 0
|
||||
while i < len(body):
|
||||
while i < len(body) and body[i] in " \t\n\r,":
|
||||
i += 1
|
||||
if i >= len(body):
|
||||
break
|
||||
start = i
|
||||
while i < len(body) and body[i] not in "= \t\n\r":
|
||||
i += 1
|
||||
name = body[start:i].strip().lower()
|
||||
while i < len(body) and body[i] != "=":
|
||||
i += 1
|
||||
if i >= len(body):
|
||||
break
|
||||
i += 1
|
||||
while i < len(body) and body[i] in " \t\n\r":
|
||||
i += 1
|
||||
if i >= len(body):
|
||||
break
|
||||
if body[i] == "{":
|
||||
depth = 1
|
||||
i += 1
|
||||
vstart = i
|
||||
while i < len(body) and depth > 0:
|
||||
if body[i] == "{":
|
||||
depth += 1
|
||||
elif body[i] == "}":
|
||||
depth -= 1
|
||||
i += 1
|
||||
value = body[vstart : i - 1]
|
||||
elif body[i] == '"':
|
||||
i += 1
|
||||
vstart = i
|
||||
while i < len(body) and body[i] != '"':
|
||||
i += 1
|
||||
value = body[vstart:i]
|
||||
i += 1
|
||||
else:
|
||||
vstart = i
|
||||
while i < len(body) and body[i] not in ", \t\n\r}":
|
||||
i += 1
|
||||
value = body[vstart:i]
|
||||
if name:
|
||||
fields[name] = value.strip()
|
||||
return fields
|
||||
|
||||
|
||||
def parse_bib(bib_path: Path) -> dict[str, dict[str, str]]:
|
||||
"""Parse a BibTeX file and return a dict keyed by citation key."""
|
||||
text = bib_path.read_text(encoding="utf-8")
|
||||
entries: dict[str, dict[str, str]] = {}
|
||||
for match in BIB_ENTRY_RE.finditer(text):
|
||||
key = match.group(2).strip()
|
||||
start = match.end()
|
||||
depth = 1
|
||||
pos = start
|
||||
while pos < len(text) and depth > 0:
|
||||
if text[pos] == "{":
|
||||
depth += 1
|
||||
elif text[pos] == "}":
|
||||
depth -= 1
|
||||
pos += 1
|
||||
fields = _parse_bib_fields(text[start : pos - 1])
|
||||
fields["_type"] = match.group(1).lower()
|
||||
entries[key] = fields
|
||||
return entries
|
||||
|
||||
|
||||
# ── Citation formatting ───────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _first_author_surname(author_str: str) -> str:
|
||||
"""Extract the first author's surname from a BibTeX author string."""
|
||||
author_str = clean_bibtex(author_str)
|
||||
authors = [a.strip() for a in author_str.split(" and ")]
|
||||
if not authors or not authors[0]:
|
||||
return ""
|
||||
first = authors[0]
|
||||
if "," in first:
|
||||
return first.split(",")[0].strip()
|
||||
parts = first.split()
|
||||
return parts[-1] if parts else first
|
||||
|
||||
|
||||
def _format_cite_label(author: str, year: str) -> str:
|
||||
"""Format an inline citation label like 'Surname et al., Year'."""
|
||||
surname = _first_author_surname(author)
|
||||
if not surname:
|
||||
return year or "?"
|
||||
authors = [a.strip() for a in clean_bibtex(author).split(" and ")]
|
||||
if len(authors) > 2:
|
||||
name_part = f"{surname} et al."
|
||||
elif len(authors) == 2:
|
||||
second = authors[1]
|
||||
if second.lower() == "others":
|
||||
name_part = f"{surname} et al."
|
||||
else:
|
||||
if "," in second:
|
||||
surname2 = second.split(",")[0].strip()
|
||||
else:
|
||||
parts = second.split()
|
||||
surname2 = parts[-1] if parts else second
|
||||
name_part = f"{surname} and {surname2}"
|
||||
else:
|
||||
name_part = surname
|
||||
if year:
|
||||
return f"{name_part}, {year}"
|
||||
return name_part
|
||||
|
||||
|
||||
def _render_bibliography(
|
||||
cited_keys: list[str], bib_db: dict[str, dict[str, str]]
|
||||
) -> list[str]:
|
||||
"""Render a footnote-style bibliography section for the cited keys."""
|
||||
lines: list[str] = ["---", "", "## 参考文献", "", "<ol>"]
|
||||
for idx, key in enumerate(cited_keys, 1):
|
||||
entry = bib_db.get(key)
|
||||
if not entry:
|
||||
lines.append(f'<li id="ref-{key}">{key}. <a href="#cite-{key}">↩</a></li>')
|
||||
continue
|
||||
author = clean_bibtex(entry.get("author", ""))
|
||||
title = clean_bibtex(entry.get("title", ""))
|
||||
year = entry.get("year", "")
|
||||
venue = clean_bibtex(entry.get("journal", "") or entry.get("booktitle", ""))
|
||||
parts: list[str] = []
|
||||
if author:
|
||||
parts.append(author)
|
||||
if title:
|
||||
parts.append(f"<em>{title}</em>")
|
||||
if venue:
|
||||
parts.append(venue)
|
||||
if year:
|
||||
parts.append(year)
|
||||
text = ". ".join(parts) + "." if parts else f"{key}."
|
||||
lines.append(f'<li id="ref-{key}">{text} <a href="#cite-{key}">↩</a></li>')
|
||||
lines.append("</ol>")
|
||||
return lines
|
||||
|
||||
|
||||
def process_citations(
|
||||
markdown: str, bib_db: dict[str, dict[str, str]]
|
||||
) -> str:
|
||||
"""Replace :cite: references with footnote-style numbered citations."""
|
||||
cited_keys: list[str] = []
|
||||
|
||||
def _replace_cite(match: re.Match[str]) -> str:
|
||||
keys = [k.strip() for k in match.group(1).split(",")]
|
||||
for key in keys:
|
||||
if key not in cited_keys:
|
||||
cited_keys.append(key)
|
||||
if not bib_db:
|
||||
return "[" + ", ".join(keys) + "]"
|
||||
nums: list[str] = []
|
||||
for key in keys:
|
||||
idx = cited_keys.index(key) + 1
|
||||
nums.append(
|
||||
f'<sup id="cite-{key}"><a href="#ref-{key}">[{idx}]</a></sup>'
|
||||
)
|
||||
return "".join(nums)
|
||||
|
||||
processed = CITE_RE.sub(_replace_cite, markdown)
|
||||
if cited_keys and bib_db:
|
||||
bib_lines = _render_bibliography(cited_keys, bib_db)
|
||||
processed = processed.rstrip("\n") + "\n\n" + "\n".join(bib_lines) + "\n"
|
||||
return processed
|
||||
|
||||
|
||||
def resolve_raw_html_file(current_file: Path, filename: str) -> Path:
|
||||
direct = (current_file.parent / filename).resolve()
|
||||
if direct.exists():
|
||||
return direct
|
||||
|
||||
static_fallback = (current_file.parent / "static" / filename).resolve()
|
||||
if static_fallback.exists():
|
||||
return static_fallback
|
||||
|
||||
repo_static = (Path(__file__).resolve().parent.parent / "static" / filename)
|
||||
if repo_static.exists():
|
||||
return repo_static
|
||||
|
||||
raise FileNotFoundError(f"Raw HTML include '{filename}' from '{current_file}' does not exist")
|
||||
|
||||
|
||||
def rewrite_frontpage_assets(html: str) -> str:
|
||||
rewritten = html.replace('./_images/', 'static/image/')
|
||||
rewritten = rewritten.replace('_images/', 'static/image/')
|
||||
rewritten = HEAD_TAG_RE.sub("", rewritten)
|
||||
rewritten = STYLE_BLOCK_RE.sub(_minify_style_block, rewritten)
|
||||
return rewritten
|
||||
|
||||
|
||||
def _minify_style_block(match: re.Match[str]) -> str:
|
||||
content = match.group(1)
|
||||
parts = [line.strip() for line in content.splitlines() if line.strip()]
|
||||
return f"<style>{' '.join(parts)}</style>"
|
||||
|
||||
|
||||
def wrap_frontpage_html(html: str) -> str:
|
||||
return "\n".join([FRONTPAGE_LAYOUT_CSS, '<div class="openmlsys-frontpage">', html.strip(), '</div>'])
|
||||
|
||||
|
||||
def inline_raw_html(block_lines: list[str], current_file: Path) -> str | None:
|
||||
stripped = [line.strip() for line in block_lines if line.strip()]
|
||||
if not stripped or stripped[0] != ".. raw:: html":
|
||||
return None
|
||||
|
||||
filename: str | None = None
|
||||
for line in stripped[1:]:
|
||||
match = RAW_HTML_FILE_RE.match(line)
|
||||
if match:
|
||||
filename = match.group(1)
|
||||
break
|
||||
|
||||
if filename is None:
|
||||
return None
|
||||
|
||||
html_path = resolve_raw_html_file(current_file, filename)
|
||||
html = rewrite_frontpage_assets(html_path.read_text(encoding="utf-8")).strip()
|
||||
if Path(filename).name == "frontpage.html":
|
||||
return wrap_frontpage_html(html)
|
||||
return html
|
||||
|
||||
|
||||
def chapter_label(item: TocItem, target: Path, title_cache: dict[Path, str]) -> str:
|
||||
return item.label or title_cache[target]
|
||||
|
||||
|
||||
def render_toc_list(entries: list[TocItem], current_file: Path, title_cache: dict[Path, str]) -> list[str]:
|
||||
rendered: list[str] = []
|
||||
current_indent = 0
|
||||
for entry in entries:
|
||||
if entry.kind == "part":
|
||||
rendered.append(f"- {entry.label}")
|
||||
current_indent = 1
|
||||
continue
|
||||
|
||||
if entry.target is None:
|
||||
continue
|
||||
|
||||
target = resolve_toc_target(current_file, entry.target)
|
||||
label = chapter_label(entry, target, title_cache)
|
||||
rendered.append(f"{' ' * current_indent}- [{label}]({relative_link(current_file, target)})")
|
||||
return rendered
|
||||
|
||||
|
||||
def rewrite_markdown(
|
||||
markdown: str,
|
||||
current_file: Path,
|
||||
title_cache: dict[Path, str],
|
||||
bib_db: dict[str, dict[str, str]] | None = None,
|
||||
) -> str:
|
||||
output: list[str] = []
|
||||
lines = markdown.splitlines()
|
||||
index = 0
|
||||
|
||||
while index < len(lines):
|
||||
stripped = lines[index].strip()
|
||||
if stripped in (f"```{TOC_FENCE}", f"```{EVAL_RST_FENCE}"):
|
||||
fence = stripped[3:]
|
||||
index += 1
|
||||
block_lines: list[str] = []
|
||||
while index < len(lines) and lines[index].strip() != "```":
|
||||
block_lines.append(lines[index])
|
||||
index += 1
|
||||
|
||||
if fence == TOC_FENCE:
|
||||
entries = parse_toc_entries(block_lines)
|
||||
if entries:
|
||||
if output and output[-1] != "":
|
||||
output.append("")
|
||||
output.extend(render_toc_list(entries, current_file, title_cache))
|
||||
if output and output[-1] != "":
|
||||
output.append("")
|
||||
elif fence == EVAL_RST_FENCE:
|
||||
raw_html = inline_raw_html(block_lines, current_file)
|
||||
if raw_html:
|
||||
if output and output[-1] != "":
|
||||
output.append("")
|
||||
output.extend(raw_html.splitlines())
|
||||
if output and output[-1] != "":
|
||||
output.append("")
|
||||
index += 1
|
||||
continue
|
||||
|
||||
output.append(lines[index])
|
||||
index += 1
|
||||
|
||||
while output and output[-1] == "":
|
||||
output.pop()
|
||||
|
||||
result = normalize_directives("\n".join(output) + "\n")
|
||||
result = process_citations(result, bib_db or {})
|
||||
return result
|
||||
|
||||
|
||||
def build_title_cache(source_dir: Path) -> dict[Path, str]:
|
||||
cache: dict[Path, str] = {}
|
||||
for markdown_file in sorted(source_dir.rglob("*.md")):
|
||||
if "_build" in markdown_file.parts or markdown_file.name == "SUMMARY.md":
|
||||
continue
|
||||
cache[markdown_file.resolve()] = extract_title(markdown_file.read_text(encoding="utf-8"), fallback=markdown_file.stem)
|
||||
return cache
|
||||
|
||||
|
||||
def build_summary(source_dir: Path, title_cache: dict[Path, str]) -> str:
|
||||
root_index = (source_dir / "index.md").resolve()
|
||||
root_markdown = root_index.read_text(encoding="utf-8")
|
||||
|
||||
lines = ["# Summary", "", f"[{title_cache[root_index]}](index.md)"]
|
||||
seen: set[Path] = {root_index}
|
||||
|
||||
def append_entry(target: Path, indent: int, label: str | None = None) -> None:
|
||||
target = target.resolve()
|
||||
if target in seen:
|
||||
return
|
||||
seen.add(target)
|
||||
rel = target.relative_to(source_dir.resolve()).as_posix()
|
||||
title = label or title_cache[target]
|
||||
lines.append(f"{' ' * indent}- [{title}]({rel})")
|
||||
|
||||
child_markdown = target.read_text(encoding="utf-8")
|
||||
for block in parse_toc_blocks(child_markdown):
|
||||
for entry in block:
|
||||
if entry.kind != "chapter" or entry.target is None:
|
||||
continue
|
||||
append_entry(resolve_toc_target(target, entry.target), indent + 1, entry.label or None)
|
||||
|
||||
def append_prefix_chapter(target: Path, label: str | None = None) -> None:
|
||||
target = target.resolve()
|
||||
if target in seen:
|
||||
return
|
||||
seen.add(target)
|
||||
rel = target.relative_to(source_dir.resolve()).as_posix()
|
||||
title = label or title_cache[target]
|
||||
lines.append(f"[{title}]({rel})")
|
||||
|
||||
numbered_started = False
|
||||
for block in parse_toc_blocks(root_markdown):
|
||||
for entry in block:
|
||||
if entry.kind == "part":
|
||||
if lines and lines[-1] != "":
|
||||
lines.append("")
|
||||
lines.append(f"# {entry.label}")
|
||||
lines.append("")
|
||||
numbered_started = True
|
||||
continue
|
||||
|
||||
if entry.target is None:
|
||||
continue
|
||||
|
||||
target = resolve_toc_target(root_index, entry.target)
|
||||
if numbered_started:
|
||||
append_entry(target, 0, entry.label or None)
|
||||
else:
|
||||
append_prefix_chapter(target, entry.label or None)
|
||||
|
||||
return "\n".join(lines) + "\n"
|
||||
|
||||
|
||||
def write_summary(source_dir: Path, summary_path: Path | None = None) -> Path:
|
||||
source_dir = source_dir.resolve()
|
||||
summary_path = summary_path.resolve() if summary_path else (source_dir / "SUMMARY.md")
|
||||
title_cache = build_title_cache(source_dir)
|
||||
summary_path.write_text(build_summary(source_dir, title_cache), encoding="utf-8")
|
||||
return summary_path
|
||||
try:
|
||||
from tools.prepare_mdbook import (
|
||||
build_title_cache,
|
||||
extract_title,
|
||||
parse_bib,
|
||||
rewrite_markdown,
|
||||
write_summary,
|
||||
)
|
||||
except ModuleNotFoundError:
|
||||
from prepare_mdbook import (
|
||||
build_title_cache,
|
||||
extract_title,
|
||||
parse_bib,
|
||||
rewrite_markdown,
|
||||
write_summary,
|
||||
)
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
@@ -591,7 +35,7 @@ def parse_args() -> argparse.Namespace:
|
||||
|
||||
def main() -> int:
|
||||
args = parse_args()
|
||||
summary_path = write_summary(args.source, args.summary_output)
|
||||
summary_path = write_summary(args.source, summary_path=args.summary_output)
|
||||
print(f"Wrote mdBook summary to {summary_path}")
|
||||
return 0
|
||||
|
||||
|
||||
Reference in New Issue
Block a user