From c52a50988209139d0f09c9eca39b05d9a4ba7884 Mon Sep 17 00:00:00 2001 From: cydia2001 Date: Tue, 10 Mar 2026 21:41:36 +0000 Subject: [PATCH] build: add mdbook support for zh chapters Add mdBook configuration rooted at zh_chapters, generate and commit SUMMARY.md, rewrite d2l-specific directives through a Python preprocessor, refresh chapter resource symlinks from the build scripts, and ignore local build-only links and helper directories. --- .gitignore | 16 ++ book.toml | 17 ++ build_html.sh | 7 +- build_html_zh.sh | 7 +- build_mdbook_zh.sh | 21 ++ en_chapters/img | 1 - en_chapters/references | 1 - en_chapters/static | 1 - tools/mdbook_zh_preprocessor.py | 42 ++++ tools/prepare_mdbook_zh.py | 197 ++++++++++++++++++ zh_chapters/SUMMARY.md | 98 +++++++++ .../chapter_reinforcement_learning/index.md | 1 - zh_chapters/img | 1 - zh_chapters/references | 1 - zh_chapters/static | 1 - 15 files changed, 401 insertions(+), 11 deletions(-) create mode 100644 book.toml create mode 100755 build_mdbook_zh.sh delete mode 120000 en_chapters/img delete mode 120000 en_chapters/references delete mode 120000 en_chapters/static create mode 100644 tools/mdbook_zh_preprocessor.py create mode 100644 tools/prepare_mdbook_zh.py create mode 100644 zh_chapters/SUMMARY.md delete mode 120000 zh_chapters/img delete mode 120000 zh_chapters/references delete mode 120000 zh_chapters/static diff --git a/.gitignore b/.gitignore index 2fddff0..fa65c61 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,19 @@ test*.md run.sh .idea env +.mdbook-zh/ +.mdbook-zh-test/ +task_plan.md +findings.md +progress.md +d2l-book/ +docs/ +tests/ +en_chapters/img +en_chapters/references +en_chapters/static +en_chapters/mlsys.bib +zh_chapters/img +zh_chapters/references +zh_chapters/static +zh_chapters/mlsys.bib diff --git a/book.toml b/book.toml new file mode 100644 index 0000000..614bac6 --- /dev/null +++ b/book.toml @@ -0,0 +1,17 @@ +[book] +authors = ["OpenMLSys Contributors"] +language = "zh-CN" +src = "zh_chapters" +title = "机器学习系统:设计和实现" + +[build] +build-dir = ".mdbook-zh/book" +create-missing = false + +[preprocessor.openmlsys-zh] +command = "python3 tools/mdbook_zh_preprocessor.py" + +[output.html] +git-repository-url = "https://github.com/openmlsys/openmlsys-zh" +mathjax-support = true +preferred-dark-theme = "navy" diff --git a/build_html.sh b/build_html.sh index b052657..6e47429 100644 --- a/build_html.sh +++ b/build_html.sh @@ -12,9 +12,12 @@ ROOT="$(cd "$(dirname "$0")" && pwd)" # ── Create resource symlinks ────────────────────────────────────────────────── for target in img references static mlsys.bib; do link="$ROOT/en_chapters/$target" - if [ ! -e "$link" ]; then - ln -sf "$ROOT/$target" "$link" + rel_target="../$target" + if [ -e "$link" ] && [ ! -L "$link" ]; then + echo "Refusing to replace non-symlink path: $link" >&2 + exit 1 fi + ln -sfn "$rel_target" "$link" done # ── Build ───────────────────────────────────────────────────────────────────── diff --git a/build_html_zh.sh b/build_html_zh.sh index 5d9ec90..2949005 100755 --- a/build_html_zh.sh +++ b/build_html_zh.sh @@ -12,9 +12,12 @@ ROOT="$(cd "$(dirname "$0")" && pwd)" # ── Create resource symlinks ────────────────────────────────────────────────── for target in img references static mlsys.bib; do link="$ROOT/zh_chapters/$target" - if [ ! -e "$link" ]; then - ln -sf "$ROOT/$target" "$link" + rel_target="../$target" + if [ -e "$link" ] && [ ! -L "$link" ]; then + echo "Refusing to replace non-symlink path: $link" >&2 + exit 1 fi + ln -sfn "$rel_target" "$link" done # ── Build ───────────────────────────────────────────────────────────────────── diff --git a/build_mdbook_zh.sh b/build_mdbook_zh.sh new file mode 100755 index 0000000..2928777 --- /dev/null +++ b/build_mdbook_zh.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PYTHON_BIN="$(command -v python3 || command -v python || true)" + +if [[ -z "${PYTHON_BIN}" ]]; then + echo "Python is required to prepare the mdBook staging tree." >&2 + exit 1 +fi + +if ! command -v mdbook >/dev/null 2>&1; then + echo "mdbook is not installed. Install it first, for example with: cargo install mdbook" >&2 + exit 1 +fi + +"${PYTHON_BIN}" "${ROOT}/tools/prepare_mdbook_zh.py" \ + --source "${ROOT}/zh_chapters" \ + --summary-output "${ROOT}/zh_chapters/SUMMARY.md" + +mdbook build "${ROOT}" diff --git a/en_chapters/img b/en_chapters/img deleted file mode 120000 index 0af1dd5..0000000 --- a/en_chapters/img +++ /dev/null @@ -1 +0,0 @@ -/chivier-disk/hyq-home/Projects/openmlsys-zh/img \ No newline at end of file diff --git a/en_chapters/references b/en_chapters/references deleted file mode 120000 index 543a78f..0000000 --- a/en_chapters/references +++ /dev/null @@ -1 +0,0 @@ -/chivier-disk/hyq-home/Projects/openmlsys-zh/references \ No newline at end of file diff --git a/en_chapters/static b/en_chapters/static deleted file mode 120000 index 1ca9b6a..0000000 --- a/en_chapters/static +++ /dev/null @@ -1 +0,0 @@ -/chivier-disk/hyq-home/Projects/openmlsys-zh/static \ No newline at end of file diff --git a/tools/mdbook_zh_preprocessor.py b/tools/mdbook_zh_preprocessor.py new file mode 100644 index 0000000..42d05f6 --- /dev/null +++ b/tools/mdbook_zh_preprocessor.py @@ -0,0 +1,42 @@ +from __future__ import annotations + +import json +import sys +from pathlib import Path + +from prepare_mdbook_zh import build_title_cache, rewrite_markdown + + +def iter_chapters(items: list[dict]) -> list[dict]: + chapters: list[dict] = [] + for item in items: + chapter = item.get("Chapter") + if not chapter: + continue + chapters.append(chapter) + chapters.extend(iter_chapters(chapter.get("sub_items", []))) + return chapters + + +def main() -> int: + if len(sys.argv) > 1 and sys.argv[1] == "supports": + return 0 + + context, book = json.load(sys.stdin) + root = Path(context["root"]).resolve() + source_dir = root / context["config"]["book"]["src"] + title_cache = build_title_cache(source_dir) + + for chapter in iter_chapters(book.get("items", [])): + source_path = chapter.get("source_path") or chapter.get("path") + if not source_path: + continue + current_file = (source_dir / source_path).resolve() + chapter["content"] = rewrite_markdown(chapter["content"], current_file, title_cache) + + json.dump(book, sys.stdout, ensure_ascii=False) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tools/prepare_mdbook_zh.py b/tools/prepare_mdbook_zh.py new file mode 100644 index 0000000..031dc15 --- /dev/null +++ b/tools/prepare_mdbook_zh.py @@ -0,0 +1,197 @@ +from __future__ import annotations + +import argparse +import re +from pathlib import Path + + +TOC_FENCE = "toc" +EVAL_RST_FENCE = "eval_rst" +OPTION_LINE_RE = re.compile(r"^:(width|label):`[^`]+`\s*$", re.MULTILINE) +NUMREF_RE = re.compile(r":numref:`([^`]+)`") +EQREF_RE = re.compile(r":eqref:`([^`]+)`") +CITE_RE = re.compile(r":cite:`([^`]+)`") + + +def extract_title(markdown: str, fallback: str = "Untitled") -> str: + lines = markdown.splitlines() + + for index, line in enumerate(lines): + stripped = line.strip() + if not stripped: + continue + if stripped.startswith("#"): + heading = stripped.lstrip("#").strip() + if heading: + return heading + + next_index = index + 1 + if next_index < len(lines): + underline = lines[next_index].strip() + if underline and set(underline) <= {"=", "-"}: + return stripped + + return fallback + + +def parse_toc_blocks(markdown: str) -> list[list[str]]: + blocks: list[list[str]] = [] + lines = markdown.splitlines() + index = 0 + + while index < len(lines): + if lines[index].strip() == f"```{TOC_FENCE}": + index += 1 + entries: list[str] = [] + while index < len(lines) and lines[index].strip() != "```": + stripped = lines[index].strip() + if stripped and not stripped.startswith(":"): + entries.append(stripped) + index += 1 + blocks.append(entries) + index += 1 + + return blocks + + +def resolve_toc_target(current_file: Path, entry: str) -> Path: + target = (current_file.parent / f"{entry}.md").resolve() + if not target.exists(): + raise FileNotFoundError(f"TOC entry '{entry}' from '{current_file}' does not exist") + return target + + +def relative_link(from_file: Path, target_file: Path) -> str: + return target_file.relative_to(from_file.parent).as_posix() + + +def normalize_directives(markdown: str) -> str: + normalized = OPTION_LINE_RE.sub("", markdown) + normalized = NUMREF_RE.sub(lambda match: f"`{match.group(1)}`", normalized) + normalized = EQREF_RE.sub(lambda match: f"`{match.group(1)}`", normalized) + normalized = CITE_RE.sub(lambda match: f"[{match.group(1)}]", normalized) + + lines = [line.rstrip() for line in normalized.splitlines()] + collapsed: list[str] = [] + previous_blank = False + for line in lines: + is_blank = line == "" + if is_blank and previous_blank: + continue + collapsed.append(line) + previous_blank = is_blank + + while collapsed and collapsed[-1] == "": + collapsed.pop() + + return "\n".join(collapsed) + "\n" + + +def render_toc_list(entries: list[str], current_file: Path, title_cache: dict[Path, str]) -> list[str]: + rendered: list[str] = [] + for entry in entries: + target = resolve_toc_target(current_file, entry) + rendered.append(f"- [{title_cache[target]}]({relative_link(current_file, target)})") + return rendered + + +def rewrite_markdown(markdown: str, current_file: Path, title_cache: dict[Path, str]) -> str: + output: list[str] = [] + lines = markdown.splitlines() + index = 0 + + while index < len(lines): + stripped = lines[index].strip() + if stripped in (f"```{TOC_FENCE}", f"```{EVAL_RST_FENCE}"): + fence = stripped[3:] + index += 1 + block_lines: list[str] = [] + while index < len(lines) and lines[index].strip() != "```": + block_lines.append(lines[index]) + index += 1 + + if fence == TOC_FENCE: + entries = [line.strip() for line in block_lines if line.strip() and not line.strip().startswith(":")] + if entries: + if output and output[-1] != "": + output.append("") + output.extend(render_toc_list(entries, current_file, title_cache)) + if output and output[-1] != "": + output.append("") + index += 1 + continue + + output.append(lines[index]) + index += 1 + + while output and output[-1] == "": + output.pop() + + return normalize_directives("\n".join(output) + "\n") + + +def build_title_cache(source_dir: Path) -> dict[Path, str]: + cache: dict[Path, str] = {} + for markdown_file in sorted(source_dir.rglob("*.md")): + if "_build" in markdown_file.parts or markdown_file.name == "SUMMARY.md": + continue + cache[markdown_file.resolve()] = extract_title(markdown_file.read_text(encoding="utf-8"), fallback=markdown_file.stem) + return cache + + +def build_summary(source_dir: Path, title_cache: dict[Path, str]) -> str: + root_index = (source_dir / "index.md").resolve() + root_markdown = root_index.read_text(encoding="utf-8") + + lines = ["# Summary", "", f"- [{title_cache[root_index]}](index.md)"] + seen: set[Path] = {root_index} + + def append_entry(target: Path, indent: int) -> None: + target = target.resolve() + if target in seen: + return + seen.add(target) + rel = target.relative_to(source_dir.resolve()).as_posix() + lines.append(f"{' ' * indent}- [{title_cache[target]}]({rel})") + + child_markdown = target.read_text(encoding="utf-8") + for block in parse_toc_blocks(child_markdown): + for entry in block: + append_entry(resolve_toc_target(target, entry), indent + 1) + + for block in parse_toc_blocks(root_markdown): + for entry in block: + append_entry(resolve_toc_target(root_index, entry), 0) + + return "\n".join(lines) + "\n" + + +def write_summary(source_dir: Path, summary_path: Path | None = None) -> Path: + source_dir = source_dir.resolve() + summary_path = summary_path.resolve() if summary_path else (source_dir / "SUMMARY.md") + title_cache = build_title_cache(source_dir) + summary_path.write_text(build_summary(source_dir, title_cache), encoding="utf-8") + return summary_path + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Generate mdBook SUMMARY.md for zh_chapters.") + parser.add_argument("--source", type=Path, default=Path("zh_chapters"), help="Source chapter directory") + parser.add_argument( + "--summary-output", + type=Path, + default=Path("zh_chapters/SUMMARY.md"), + help="Where to write the generated SUMMARY.md", + ) + return parser.parse_args() + + +def main() -> int: + args = parse_args() + summary_path = write_summary(args.source, args.summary_output) + print(f"Wrote mdBook summary to {summary_path}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/zh_chapters/SUMMARY.md b/zh_chapters/SUMMARY.md new file mode 100644 index 0000000..782b07b --- /dev/null +++ b/zh_chapters/SUMMARY.md @@ -0,0 +1,98 @@ +# Summary + +- [机器学习系统:设计和实现](index.md) +- [前言](chapter_preface/index.md) +- [导论](chapter_introduction/index.md) + - [机器学习应用](chapter_introduction/applications.md) + - [机器学习框架的设计目标](chapter_introduction/design.md) + - [机器学习框架的基本组成原理](chapter_introduction/architecture.md) + - [机器学习系统生态](chapter_introduction/ecosystem.md) + - [图书结构和读者](chapter_introduction/readers.md) +- [编程接口](chapter_programming_interface/index.md) + - [机器学习系统编程模型的演进](chapter_programming_interface/development_history.md) + - [机器学习工作流](chapter_programming_interface/ml_workflow.md) + - [定义深度神经网络](chapter_programming_interface/neural_network_layer.md) + - [C/C++编程接口](chapter_programming_interface/c_python_interaction.md) + - [机器学习框架的编程范式](chapter_programming_interface/ml_programming_paradigm.md) + - [总结](chapter_programming_interface/summary.md) +- [计算图](chapter_computational_graph/index.md) + - [计算图的设计背景和作用](chapter_computational_graph/background_and_functionality.md) + - [计算图的基本构成](chapter_computational_graph/components_of_computational_graph.md) + - [计算图的生成](chapter_computational_graph/generation_of_computational_graph.md) + - [计算图的调度](chapter_computational_graph/schedule_of_computational_graph.md) + - [总结](chapter_computational_graph/summary.md) +- [第二部分:进阶篇](chapter_preface_advanced/index.md) +- [AI编译器和前端技术](chapter_frontend_and_ir/index.md) + - [AI编译器设计原理](chapter_frontend_and_ir/ai_compiler_design_principle.md) + - [AI编译器前端技术概述](chapter_frontend_and_ir/overview_of_frontend.md) + - [中间表示](chapter_frontend_and_ir/intermediate_representation.md) + - [自动微分](chapter_frontend_and_ir/ad.md) + - [类型系统和静态分析](chapter_frontend_and_ir/type_system_and_static_analysis.md) + - [常见前端编译优化方法](chapter_frontend_and_ir/common_frontend_optimization_pass.md) + - [总结](chapter_frontend_and_ir/summary.md) +- [编译器后端和运行时](chapter_backend_and_runtime/index.md) + - [概述](chapter_backend_and_runtime/overview.md) + - [计算图优化](chapter_backend_and_runtime/graph_optimizer.md) + - [算子选择](chapter_backend_and_runtime/kernel_selecter.md) + - [内存分配](chapter_backend_and_runtime/memory_allocator.md) + - [计算调度与执行](chapter_backend_and_runtime/compute_schedule_and_execute.md) + - [算子编译器](chapter_backend_and_runtime/op_compiler.md) + - [总结](chapter_backend_and_runtime/summary.md) +- [硬件加速器](chapter_accelerator/index.md) + - [概述](chapter_accelerator/accelerator_introduction.md) + - [加速器基本组成原理](chapter_accelerator/accelerator_architecture.md) + - [加速器基本编程原理](chapter_accelerator/accelerator_programming.md) + - [加速器实践](chapter_accelerator/accelerator_practise.md) + - [总结](chapter_accelerator/summary.md) +- [数据处理框架](chapter_data_processing/index.md) + - [概述](chapter_data_processing/requirements.md) + - [易用性设计](chapter_data_processing/program_model.md) + - [高效性设计](chapter_data_processing/performance.md) + - [保序性设计](chapter_data_processing/data_order.md) + - [单机数据处理性能的扩展](chapter_data_processing/extension.md) + - [总结](chapter_data_processing/summary.md) +- [模型部署](chapter_model_deployment/index.md) + - [概述](chapter_model_deployment/model_deployment_introduction.md) + - [训练模型到推理模型的转换及优化](chapter_model_deployment/model_converter_and_optimizer.md) + - [模型压缩](chapter_model_deployment/model_compression.md) + - [模型推理](chapter_model_deployment/model_inference.md) + - [模型的安全保护](chapter_model_deployment/model_security.md) + - [总结](chapter_model_deployment/summary.md) +- [分布式训练](chapter_distributed_training/index.md) + - [系统概述](chapter_distributed_training/overview.md) + - [实现方法](chapter_distributed_training/methods.md) + - [机器学习集群架构](chapter_distributed_training/cluster.md) + - [集合通信](chapter_distributed_training/collective.md) + - [参数服务器](chapter_distributed_training/parameter_servers.md) + - [总结](chapter_distributed_training/summary.md) +- [第三部分:拓展篇](chapter_preface_extension/index.md) +- [深度学习推荐系统](chapter_recommender_system/index.md) + - [系统基本组成](chapter_recommender_system/system_architecture.md) + - [多阶段推荐系统](chapter_recommender_system/multi_stage_recommender_system.md) + - [模型更新](chapter_recommender_system/model_update.md) + - [案例分析:支持在线模型更新的大型推荐系统](chapter_recommender_system/case_study.md) + - [小结](chapter_recommender_system/summary.md) +- [联邦学习系统](chapter_federated_learning/index.md) + - [概述](chapter_federated_learning/overview.md) + - [横向联邦学习](chapter_federated_learning/horizontal_fl.md) + - [纵向联邦学习](chapter_federated_learning/vertical_fl.md) + - [隐私加密算法](chapter_federated_learning/privacy_encryption_algorithm.md) + - [展望](chapter_federated_learning/outlook.md) + - [小结](chapter_federated_learning/summary.md) +- [强化学习系统](chapter_reinforcement_learning/index.md) + - [强化学习介绍](chapter_reinforcement_learning/rl_introduction.md) + - [单节点强化学习系统](chapter_reinforcement_learning/single_node_rl.md) + - [多智能体强化学习](chapter_reinforcement_learning/marl.md) + - [多智能体强化学习系统](chapter_reinforcement_learning/marl_sys.md) + - [小结](chapter_reinforcement_learning/summary.md) +- [可解释性AI系统](chapter_explainable_AI/index.md) + - [背景](chapter_explainable_AI/explainable_ai.md) +- [机器人系统](chapter_rl_sys/index.md) + - [机器人系统概述](chapter_rl_sys/rl_sys_intro.md) + - [通用机器人操作系统](chapter_rl_sys/ros.md) + - [案例分析:使用机器人操作系统](chapter_rl_sys/ros_code_ex.md) + - [总结](chapter_rl_sys/summary.md) +- [附录:机器学习介绍](appendix_machine_learning_introduction/index.md) + - [神经网络](appendix_machine_learning_introduction/neural_network.md) + - [梯度下降与反向传播](appendix_machine_learning_introduction/gradient_descent.md) + - [经典机器学习方法](appendix_machine_learning_introduction/classic_machine_learning.md) diff --git a/zh_chapters/chapter_reinforcement_learning/index.md b/zh_chapters/chapter_reinforcement_learning/index.md index 5062091..318893a 100644 --- a/zh_chapters/chapter_reinforcement_learning/index.md +++ b/zh_chapters/chapter_reinforcement_learning/index.md @@ -13,7 +13,6 @@ rl_introduction single_node_rl -distributed_node_rl marl marl_sys summary diff --git a/zh_chapters/img b/zh_chapters/img deleted file mode 120000 index 0af1dd5..0000000 --- a/zh_chapters/img +++ /dev/null @@ -1 +0,0 @@ -/chivier-disk/hyq-home/Projects/openmlsys-zh/img \ No newline at end of file diff --git a/zh_chapters/references b/zh_chapters/references deleted file mode 120000 index 543a78f..0000000 --- a/zh_chapters/references +++ /dev/null @@ -1 +0,0 @@ -/chivier-disk/hyq-home/Projects/openmlsys-zh/references \ No newline at end of file diff --git a/zh_chapters/static b/zh_chapters/static deleted file mode 120000 index 1ca9b6a..0000000 --- a/zh_chapters/static +++ /dev/null @@ -1 +0,0 @@ -/chivier-disk/hyq-home/Projects/openmlsys-zh/static \ No newline at end of file