fix: group mdbook toc by part titles

2026-06-15 06:16:50 +08:00 · 2026-03-10 23:45:56 +00:00
parent a699ae34c1
commit 2fff3c81bf
3 changed files with 114 additions and 34 deletions
--- a/tools/prepare_mdbook_zh.py
+++ b/tools/prepare_mdbook_zh.py
@@ -1,7 +1,9 @@
 from __future__ import annotations

 import argparse
+import os
 import re
+from dataclasses import dataclass
 from pathlib import Path


@@ -12,6 +14,8 @@ NUMREF_RE = re.compile(r":numref:`([^`]+)`")
 EQREF_RE = re.compile(r":eqref:`([^`]+)`")
 CITE_RE = re.compile(r":cite:`([^`]+)`")
 RAW_HTML_FILE_RE = re.compile(r"^\s*:file:\s*([^\s]+)\s*$")
+TOC_LINK_RE = re.compile(r"^\[([^\]]+)\]\(([^)]+)\)\s*$")
+TOC_PART_RE = re.compile(r"^#+\s+(.+?)\s*$")
 HEAD_TAG_RE = re.compile(r"</?head>", re.IGNORECASE)
 STYLE_BLOCK_RE = re.compile(r"<style>(.*?)</style>", re.IGNORECASE | re.DOTALL)
 FRONTPAGE_LAYOUT_CSS = """
@@ -78,6 +82,13 @@ FRONTPAGE_LAYOUT_CSS = """
 """.strip()


+@dataclass(frozen=True)
+class TocItem:
+    kind: str
+    label: str
+    target: str | None = None
+
+
 def extract_title(markdown: str, fallback: str = "Untitled") -> str:
    lines = markdown.splitlines()

@@ -99,20 +110,37 @@ def extract_title(markdown: str, fallback: str = "Untitled") -> str:
    return fallback


-def parse_toc_blocks(markdown: str) -> list[list[str]]:
-    blocks: list[list[str]] = []
+def parse_toc_entries(block_lines: list[str]) -> list[TocItem]:
+    entries: list[TocItem] = []
+    for line in block_lines:
+        stripped = line.strip()
+        if not stripped or stripped.startswith(":"):
+            continue
+        part_match = TOC_PART_RE.match(stripped)
+        if part_match:
+            entries.append(TocItem(kind="part", label=part_match.group(1).strip()))
+            continue
+        link_match = TOC_LINK_RE.match(stripped)
+        if link_match:
+            entries.append(TocItem(kind="chapter", label=link_match.group(1).strip(), target=link_match.group(2).strip()))
+            continue
+        entries.append(TocItem(kind="chapter", label="", target=stripped))
+    return entries
+
+
+def parse_toc_blocks(markdown: str) -> list[list[TocItem]]:
+    blocks: list[list[TocItem]] = []
    lines = markdown.splitlines()
    index = 0

    while index < len(lines):
        if lines[index].strip() == f"```{TOC_FENCE}":
            index += 1
-            entries: list[str] = []
+            block_lines: list[str] = []
            while index < len(lines) and lines[index].strip() != "```":
-                stripped = lines[index].strip()
-                if stripped and not stripped.startswith(":"):
-                    entries.append(stripped)
+                block_lines.append(lines[index])
                index += 1
+            entries = parse_toc_entries(block_lines)
            blocks.append(entries)
        index += 1

@@ -120,14 +148,15 @@ def parse_toc_blocks(markdown: str) -> list[list[str]]:


 def resolve_toc_target(current_file: Path, entry: str) -> Path:
-    target = (current_file.parent / f"{entry}.md").resolve()
+    target_name = entry if entry.endswith(".md") else f"{entry}.md"
+    target = (current_file.parent / target_name).resolve()
    if not target.exists():
        raise FileNotFoundError(f"TOC entry '{entry}' from '{current_file}' does not exist")
    return target


 def relative_link(from_file: Path, target_file: Path) -> str:
-    return target_file.relative_to(from_file.parent).as_posix()
+    return Path(os.path.relpath(target_file, start=from_file.parent)).as_posix()


 def normalize_directives(markdown: str) -> str:
@@ -204,11 +233,25 @@ def inline_raw_html(block_lines: list[str], current_file: Path) -> str | None:
    return html


-def render_toc_list(entries: list[str], current_file: Path, title_cache: dict[Path, str]) -> list[str]:
+def chapter_label(item: TocItem, target: Path, title_cache: dict[Path, str]) -> str:
+    return item.label or title_cache[target]
+
+
+def render_toc_list(entries: list[TocItem], current_file: Path, title_cache: dict[Path, str]) -> list[str]:
    rendered: list[str] = []
+    current_indent = 0
    for entry in entries:
-        target = resolve_toc_target(current_file, entry)
-        rendered.append(f"- [{title_cache[target]}]({relative_link(current_file, target)})")
+        if entry.kind == "part":
+            rendered.append(f"- {entry.label}")
+            current_indent = 1
+            continue
+
+        if entry.target is None:
+            continue
+
+        target = resolve_toc_target(current_file, entry.target)
+        label = chapter_label(entry, target, title_cache)
+        rendered.append(f"{'  ' * current_indent}- [{label}]({relative_link(current_file, target)})")
    return rendered


@@ -228,7 +271,7 @@ def rewrite_markdown(markdown: str, current_file: Path, title_cache: dict[Path,
                index += 1

            if fence == TOC_FENCE:
-                entries = [line.strip() for line in block_lines if line.strip() and not line.strip().startswith(":")]
+                entries = parse_toc_entries(block_lines)
                if entries:
                    if output and output[-1] != "":
                        output.append("")
@@ -268,25 +311,53 @@ def build_summary(source_dir: Path, title_cache: dict[Path, str]) -> str:
    root_index = (source_dir / "index.md").resolve()
    root_markdown = root_index.read_text(encoding="utf-8")

-    lines = ["# Summary", "", f"- [{title_cache[root_index]}](index.md)"]
+    lines = ["# Summary", "", f"[{title_cache[root_index]}](index.md)"]
    seen: set[Path] = {root_index}

-    def append_entry(target: Path, indent: int) -> None:
+    def append_entry(target: Path, indent: int, label: str | None = None) -> None:
        target = target.resolve()
        if target in seen:
            return
        seen.add(target)
        rel = target.relative_to(source_dir.resolve()).as_posix()
-        lines.append(f"{'  ' * indent}- [{title_cache[target]}]({rel})")
+        title = label or title_cache[target]
+        lines.append(f"{'  ' * indent}- [{title}]({rel})")

        child_markdown = target.read_text(encoding="utf-8")
        for block in parse_toc_blocks(child_markdown):
            for entry in block:
-                append_entry(resolve_toc_target(target, entry), indent + 1)
+                if entry.kind != "chapter" or entry.target is None:
+                    continue
+                append_entry(resolve_toc_target(target, entry.target), indent + 1, entry.label or None)

+    def append_prefix_chapter(target: Path, label: str | None = None) -> None:
+        target = target.resolve()
+        if target in seen:
+            return
+        seen.add(target)
+        rel = target.relative_to(source_dir.resolve()).as_posix()
+        title = label or title_cache[target]
+        lines.append(f"[{title}]({rel})")
+
+    numbered_started = False
    for block in parse_toc_blocks(root_markdown):
        for entry in block:
-            append_entry(resolve_toc_target(root_index, entry), 0)
+            if entry.kind == "part":
+                if lines and lines[-1] != "":
+                    lines.append("")
+                lines.append(f"# {entry.label}")
+                lines.append("")
+                numbered_started = True
+                continue
+
+            if entry.target is None:
+                continue
+
+            target = resolve_toc_target(root_index, entry.target)
+            if numbered_started:
+                append_entry(target, 0, entry.label or None)
+            else:
+                append_prefix_chapter(target, entry.label or None)

    return "\n".join(lines) + "\n"

--- a/zh_chapters/SUMMARY.md
+++ b/zh_chapters/SUMMARY.md
@@ -1,7 +1,10 @@
 # Summary

- [机器学习系统：设计和实现](index.md)
- [前言](chapter_preface/index.md)
+[机器学习系统：设计和实现](index.md)
+[前言](chapter_preface/index.md)
+
+# 基础篇
+
 - [导论](chapter_introduction/index.md)
  - [机器学习应用](chapter_introduction/applications.md)
  - [机器学习框架的设计目标](chapter_introduction/design.md)
@@ -21,7 +24,10 @@
  - [计算图的生成](chapter_computational_graph/generation_of_computational_graph.md)
  - [计算图的调度](chapter_computational_graph/schedule_of_computational_graph.md)
  - [总结](chapter_computational_graph/summary.md)
- [第二部分：进阶篇](chapter_preface_advanced/index.md)
+
+# 进阶篇
+
+- [导读](chapter_preface_advanced/index.md)
 - [AI编译器和前端技术](chapter_frontend_and_ir/index.md)
  - [AI编译器设计原理](chapter_frontend_and_ir/ai_compiler_design_principle.md)
  - [AI编译器前端技术概述](chapter_frontend_and_ir/overview_of_frontend.md)
@@ -65,7 +71,10 @@
  - [集合通信](chapter_distributed_training/collective.md)
  - [参数服务器](chapter_distributed_training/parameter_servers.md)
  - [总结](chapter_distributed_training/summary.md)
- [第三部分：拓展篇](chapter_preface_extension/index.md)
+
+# 拓展篇
+
+- [导读](chapter_preface_extension/index.md)
 - [深度学习推荐系统](chapter_recommender_system/index.md)
  - [系统基本组成](chapter_recommender_system/system_architecture.md)
  - [多阶段推荐系统](chapter_recommender_system/multi_stage_recommender_system.md)
@@ -92,7 +101,10 @@
  - [通用机器人操作系统](chapter_rl_sys/ros.md)
  - [案例分析：使用机器人操作系统](chapter_rl_sys/ros_code_ex.md)
  - [总结](chapter_rl_sys/summary.md)
- [附录：机器学习介绍](appendix_machine_learning_introduction/index.md)
+
+# 附录
+
+- [机器学习介绍](appendix_machine_learning_introduction/index.md)
  - [神经网络](appendix_machine_learning_introduction/neural_network.md)
  - [梯度下降与反向传播](appendix_machine_learning_introduction/gradient_descent.md)
  - [经典机器学习方法](appendix_machine_learning_introduction/classic_machine_learning.md)
--- a/zh_chapters/index.md
+++ b/zh_chapters/index.md
@@ -8,15 +8,16 @@

 ```toc
 :maxdepth: 2
-:numbered:

-chapter_preface/index
+[前言](chapter_preface/index)
+
+# 基础篇
 chapter_introduction/index
 chapter_programming_interface/index
 chapter_computational_graph/index

-chapter_preface_advanced/index
-
+# 进阶篇
+[导读](chapter_preface_advanced/index)
 chapter_frontend_and_ir/index
 chapter_backend_and_runtime/index
 chapter_accelerator/index
@@ -24,18 +25,14 @@ chapter_data_processing/index
 chapter_model_deployment/index
 chapter_distributed_training/index

-chapter_preface_extension/index
-
+# 拓展篇
+[导读](chapter_preface_extension/index)
 chapter_recommender_system/index
 chapter_federated_learning/index
 chapter_reinforcement_learning/index
 chapter_explainable_AI/index
 chapter_rl_sys/index

-```
-
-```toc
-:maxdepth: 1
-
-appendix_machine_learning_introduction/index
+# 附录
+[机器学习介绍](appendix_machine_learning_introduction/index)
 ```