mirror of
https://github.com/openmlsys/openmlsys-zh.git
synced 2026-04-01 01:41:17 +08:00
build: add mdbook support for zh chapters
Add mdBook configuration rooted at zh_chapters, generate and commit SUMMARY.md, rewrite d2l-specific directives through a Python preprocessor, refresh chapter resource symlinks from the build scripts, and ignore local build-only links and helper directories.
This commit is contained in:
16
.gitignore
vendored
16
.gitignore
vendored
@@ -12,3 +12,19 @@ test*.md
|
||||
run.sh
|
||||
.idea
|
||||
env
|
||||
.mdbook-zh/
|
||||
.mdbook-zh-test/
|
||||
task_plan.md
|
||||
findings.md
|
||||
progress.md
|
||||
d2l-book/
|
||||
docs/
|
||||
tests/
|
||||
en_chapters/img
|
||||
en_chapters/references
|
||||
en_chapters/static
|
||||
en_chapters/mlsys.bib
|
||||
zh_chapters/img
|
||||
zh_chapters/references
|
||||
zh_chapters/static
|
||||
zh_chapters/mlsys.bib
|
||||
|
||||
17
book.toml
Normal file
17
book.toml
Normal file
@@ -0,0 +1,17 @@
|
||||
[book]
|
||||
authors = ["OpenMLSys Contributors"]
|
||||
language = "zh-CN"
|
||||
src = "zh_chapters"
|
||||
title = "机器学习系统:设计和实现"
|
||||
|
||||
[build]
|
||||
build-dir = ".mdbook-zh/book"
|
||||
create-missing = false
|
||||
|
||||
[preprocessor.openmlsys-zh]
|
||||
command = "python3 tools/mdbook_zh_preprocessor.py"
|
||||
|
||||
[output.html]
|
||||
git-repository-url = "https://github.com/openmlsys/openmlsys-zh"
|
||||
mathjax-support = true
|
||||
preferred-dark-theme = "navy"
|
||||
@@ -12,9 +12,12 @@ ROOT="$(cd "$(dirname "$0")" && pwd)"
|
||||
# ── Create resource symlinks ──────────────────────────────────────────────────
|
||||
for target in img references static mlsys.bib; do
|
||||
link="$ROOT/en_chapters/$target"
|
||||
if [ ! -e "$link" ]; then
|
||||
ln -sf "$ROOT/$target" "$link"
|
||||
rel_target="../$target"
|
||||
if [ -e "$link" ] && [ ! -L "$link" ]; then
|
||||
echo "Refusing to replace non-symlink path: $link" >&2
|
||||
exit 1
|
||||
fi
|
||||
ln -sfn "$rel_target" "$link"
|
||||
done
|
||||
|
||||
# ── Build ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
@@ -12,9 +12,12 @@ ROOT="$(cd "$(dirname "$0")" && pwd)"
|
||||
# ── Create resource symlinks ──────────────────────────────────────────────────
|
||||
for target in img references static mlsys.bib; do
|
||||
link="$ROOT/zh_chapters/$target"
|
||||
if [ ! -e "$link" ]; then
|
||||
ln -sf "$ROOT/$target" "$link"
|
||||
rel_target="../$target"
|
||||
if [ -e "$link" ] && [ ! -L "$link" ]; then
|
||||
echo "Refusing to replace non-symlink path: $link" >&2
|
||||
exit 1
|
||||
fi
|
||||
ln -sfn "$rel_target" "$link"
|
||||
done
|
||||
|
||||
# ── Build ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
21
build_mdbook_zh.sh
Executable file
21
build_mdbook_zh.sh
Executable file
@@ -0,0 +1,21 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PYTHON_BIN="$(command -v python3 || command -v python || true)"
|
||||
|
||||
if [[ -z "${PYTHON_BIN}" ]]; then
|
||||
echo "Python is required to prepare the mdBook staging tree." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! command -v mdbook >/dev/null 2>&1; then
|
||||
echo "mdbook is not installed. Install it first, for example with: cargo install mdbook" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
"${PYTHON_BIN}" "${ROOT}/tools/prepare_mdbook_zh.py" \
|
||||
--source "${ROOT}/zh_chapters" \
|
||||
--summary-output "${ROOT}/zh_chapters/SUMMARY.md"
|
||||
|
||||
mdbook build "${ROOT}"
|
||||
@@ -1 +0,0 @@
|
||||
/chivier-disk/hyq-home/Projects/openmlsys-zh/img
|
||||
@@ -1 +0,0 @@
|
||||
/chivier-disk/hyq-home/Projects/openmlsys-zh/references
|
||||
@@ -1 +0,0 @@
|
||||
/chivier-disk/hyq-home/Projects/openmlsys-zh/static
|
||||
42
tools/mdbook_zh_preprocessor.py
Normal file
42
tools/mdbook_zh_preprocessor.py
Normal file
@@ -0,0 +1,42 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from prepare_mdbook_zh import build_title_cache, rewrite_markdown
|
||||
|
||||
|
||||
def iter_chapters(items: list[dict]) -> list[dict]:
|
||||
chapters: list[dict] = []
|
||||
for item in items:
|
||||
chapter = item.get("Chapter")
|
||||
if not chapter:
|
||||
continue
|
||||
chapters.append(chapter)
|
||||
chapters.extend(iter_chapters(chapter.get("sub_items", [])))
|
||||
return chapters
|
||||
|
||||
|
||||
def main() -> int:
|
||||
if len(sys.argv) > 1 and sys.argv[1] == "supports":
|
||||
return 0
|
||||
|
||||
context, book = json.load(sys.stdin)
|
||||
root = Path(context["root"]).resolve()
|
||||
source_dir = root / context["config"]["book"]["src"]
|
||||
title_cache = build_title_cache(source_dir)
|
||||
|
||||
for chapter in iter_chapters(book.get("items", [])):
|
||||
source_path = chapter.get("source_path") or chapter.get("path")
|
||||
if not source_path:
|
||||
continue
|
||||
current_file = (source_dir / source_path).resolve()
|
||||
chapter["content"] = rewrite_markdown(chapter["content"], current_file, title_cache)
|
||||
|
||||
json.dump(book, sys.stdout, ensure_ascii=False)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
197
tools/prepare_mdbook_zh.py
Normal file
197
tools/prepare_mdbook_zh.py
Normal file
@@ -0,0 +1,197 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
TOC_FENCE = "toc"
|
||||
EVAL_RST_FENCE = "eval_rst"
|
||||
OPTION_LINE_RE = re.compile(r"^:(width|label):`[^`]+`\s*$", re.MULTILINE)
|
||||
NUMREF_RE = re.compile(r":numref:`([^`]+)`")
|
||||
EQREF_RE = re.compile(r":eqref:`([^`]+)`")
|
||||
CITE_RE = re.compile(r":cite:`([^`]+)`")
|
||||
|
||||
|
||||
def extract_title(markdown: str, fallback: str = "Untitled") -> str:
|
||||
lines = markdown.splitlines()
|
||||
|
||||
for index, line in enumerate(lines):
|
||||
stripped = line.strip()
|
||||
if not stripped:
|
||||
continue
|
||||
if stripped.startswith("#"):
|
||||
heading = stripped.lstrip("#").strip()
|
||||
if heading:
|
||||
return heading
|
||||
|
||||
next_index = index + 1
|
||||
if next_index < len(lines):
|
||||
underline = lines[next_index].strip()
|
||||
if underline and set(underline) <= {"=", "-"}:
|
||||
return stripped
|
||||
|
||||
return fallback
|
||||
|
||||
|
||||
def parse_toc_blocks(markdown: str) -> list[list[str]]:
|
||||
blocks: list[list[str]] = []
|
||||
lines = markdown.splitlines()
|
||||
index = 0
|
||||
|
||||
while index < len(lines):
|
||||
if lines[index].strip() == f"```{TOC_FENCE}":
|
||||
index += 1
|
||||
entries: list[str] = []
|
||||
while index < len(lines) and lines[index].strip() != "```":
|
||||
stripped = lines[index].strip()
|
||||
if stripped and not stripped.startswith(":"):
|
||||
entries.append(stripped)
|
||||
index += 1
|
||||
blocks.append(entries)
|
||||
index += 1
|
||||
|
||||
return blocks
|
||||
|
||||
|
||||
def resolve_toc_target(current_file: Path, entry: str) -> Path:
|
||||
target = (current_file.parent / f"{entry}.md").resolve()
|
||||
if not target.exists():
|
||||
raise FileNotFoundError(f"TOC entry '{entry}' from '{current_file}' does not exist")
|
||||
return target
|
||||
|
||||
|
||||
def relative_link(from_file: Path, target_file: Path) -> str:
|
||||
return target_file.relative_to(from_file.parent).as_posix()
|
||||
|
||||
|
||||
def normalize_directives(markdown: str) -> str:
|
||||
normalized = OPTION_LINE_RE.sub("", markdown)
|
||||
normalized = NUMREF_RE.sub(lambda match: f"`{match.group(1)}`", normalized)
|
||||
normalized = EQREF_RE.sub(lambda match: f"`{match.group(1)}`", normalized)
|
||||
normalized = CITE_RE.sub(lambda match: f"[{match.group(1)}]", normalized)
|
||||
|
||||
lines = [line.rstrip() for line in normalized.splitlines()]
|
||||
collapsed: list[str] = []
|
||||
previous_blank = False
|
||||
for line in lines:
|
||||
is_blank = line == ""
|
||||
if is_blank and previous_blank:
|
||||
continue
|
||||
collapsed.append(line)
|
||||
previous_blank = is_blank
|
||||
|
||||
while collapsed and collapsed[-1] == "":
|
||||
collapsed.pop()
|
||||
|
||||
return "\n".join(collapsed) + "\n"
|
||||
|
||||
|
||||
def render_toc_list(entries: list[str], current_file: Path, title_cache: dict[Path, str]) -> list[str]:
|
||||
rendered: list[str] = []
|
||||
for entry in entries:
|
||||
target = resolve_toc_target(current_file, entry)
|
||||
rendered.append(f"- [{title_cache[target]}]({relative_link(current_file, target)})")
|
||||
return rendered
|
||||
|
||||
|
||||
def rewrite_markdown(markdown: str, current_file: Path, title_cache: dict[Path, str]) -> str:
|
||||
output: list[str] = []
|
||||
lines = markdown.splitlines()
|
||||
index = 0
|
||||
|
||||
while index < len(lines):
|
||||
stripped = lines[index].strip()
|
||||
if stripped in (f"```{TOC_FENCE}", f"```{EVAL_RST_FENCE}"):
|
||||
fence = stripped[3:]
|
||||
index += 1
|
||||
block_lines: list[str] = []
|
||||
while index < len(lines) and lines[index].strip() != "```":
|
||||
block_lines.append(lines[index])
|
||||
index += 1
|
||||
|
||||
if fence == TOC_FENCE:
|
||||
entries = [line.strip() for line in block_lines if line.strip() and not line.strip().startswith(":")]
|
||||
if entries:
|
||||
if output and output[-1] != "":
|
||||
output.append("")
|
||||
output.extend(render_toc_list(entries, current_file, title_cache))
|
||||
if output and output[-1] != "":
|
||||
output.append("")
|
||||
index += 1
|
||||
continue
|
||||
|
||||
output.append(lines[index])
|
||||
index += 1
|
||||
|
||||
while output and output[-1] == "":
|
||||
output.pop()
|
||||
|
||||
return normalize_directives("\n".join(output) + "\n")
|
||||
|
||||
|
||||
def build_title_cache(source_dir: Path) -> dict[Path, str]:
|
||||
cache: dict[Path, str] = {}
|
||||
for markdown_file in sorted(source_dir.rglob("*.md")):
|
||||
if "_build" in markdown_file.parts or markdown_file.name == "SUMMARY.md":
|
||||
continue
|
||||
cache[markdown_file.resolve()] = extract_title(markdown_file.read_text(encoding="utf-8"), fallback=markdown_file.stem)
|
||||
return cache
|
||||
|
||||
|
||||
def build_summary(source_dir: Path, title_cache: dict[Path, str]) -> str:
|
||||
root_index = (source_dir / "index.md").resolve()
|
||||
root_markdown = root_index.read_text(encoding="utf-8")
|
||||
|
||||
lines = ["# Summary", "", f"- [{title_cache[root_index]}](index.md)"]
|
||||
seen: set[Path] = {root_index}
|
||||
|
||||
def append_entry(target: Path, indent: int) -> None:
|
||||
target = target.resolve()
|
||||
if target in seen:
|
||||
return
|
||||
seen.add(target)
|
||||
rel = target.relative_to(source_dir.resolve()).as_posix()
|
||||
lines.append(f"{' ' * indent}- [{title_cache[target]}]({rel})")
|
||||
|
||||
child_markdown = target.read_text(encoding="utf-8")
|
||||
for block in parse_toc_blocks(child_markdown):
|
||||
for entry in block:
|
||||
append_entry(resolve_toc_target(target, entry), indent + 1)
|
||||
|
||||
for block in parse_toc_blocks(root_markdown):
|
||||
for entry in block:
|
||||
append_entry(resolve_toc_target(root_index, entry), 0)
|
||||
|
||||
return "\n".join(lines) + "\n"
|
||||
|
||||
|
||||
def write_summary(source_dir: Path, summary_path: Path | None = None) -> Path:
|
||||
source_dir = source_dir.resolve()
|
||||
summary_path = summary_path.resolve() if summary_path else (source_dir / "SUMMARY.md")
|
||||
title_cache = build_title_cache(source_dir)
|
||||
summary_path.write_text(build_summary(source_dir, title_cache), encoding="utf-8")
|
||||
return summary_path
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(description="Generate mdBook SUMMARY.md for zh_chapters.")
|
||||
parser.add_argument("--source", type=Path, default=Path("zh_chapters"), help="Source chapter directory")
|
||||
parser.add_argument(
|
||||
"--summary-output",
|
||||
type=Path,
|
||||
default=Path("zh_chapters/SUMMARY.md"),
|
||||
help="Where to write the generated SUMMARY.md",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main() -> int:
|
||||
args = parse_args()
|
||||
summary_path = write_summary(args.source, args.summary_output)
|
||||
print(f"Wrote mdBook summary to {summary_path}")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
98
zh_chapters/SUMMARY.md
Normal file
98
zh_chapters/SUMMARY.md
Normal file
@@ -0,0 +1,98 @@
|
||||
# Summary
|
||||
|
||||
- [机器学习系统:设计和实现](index.md)
|
||||
- [前言](chapter_preface/index.md)
|
||||
- [导论](chapter_introduction/index.md)
|
||||
- [机器学习应用](chapter_introduction/applications.md)
|
||||
- [机器学习框架的设计目标](chapter_introduction/design.md)
|
||||
- [机器学习框架的基本组成原理](chapter_introduction/architecture.md)
|
||||
- [机器学习系统生态](chapter_introduction/ecosystem.md)
|
||||
- [图书结构和读者](chapter_introduction/readers.md)
|
||||
- [编程接口](chapter_programming_interface/index.md)
|
||||
- [机器学习系统编程模型的演进](chapter_programming_interface/development_history.md)
|
||||
- [机器学习工作流](chapter_programming_interface/ml_workflow.md)
|
||||
- [定义深度神经网络](chapter_programming_interface/neural_network_layer.md)
|
||||
- [C/C++编程接口](chapter_programming_interface/c_python_interaction.md)
|
||||
- [机器学习框架的编程范式](chapter_programming_interface/ml_programming_paradigm.md)
|
||||
- [总结](chapter_programming_interface/summary.md)
|
||||
- [计算图](chapter_computational_graph/index.md)
|
||||
- [计算图的设计背景和作用](chapter_computational_graph/background_and_functionality.md)
|
||||
- [计算图的基本构成](chapter_computational_graph/components_of_computational_graph.md)
|
||||
- [计算图的生成](chapter_computational_graph/generation_of_computational_graph.md)
|
||||
- [计算图的调度](chapter_computational_graph/schedule_of_computational_graph.md)
|
||||
- [总结](chapter_computational_graph/summary.md)
|
||||
- [第二部分:进阶篇](chapter_preface_advanced/index.md)
|
||||
- [AI编译器和前端技术](chapter_frontend_and_ir/index.md)
|
||||
- [AI编译器设计原理](chapter_frontend_and_ir/ai_compiler_design_principle.md)
|
||||
- [AI编译器前端技术概述](chapter_frontend_and_ir/overview_of_frontend.md)
|
||||
- [中间表示](chapter_frontend_and_ir/intermediate_representation.md)
|
||||
- [自动微分](chapter_frontend_and_ir/ad.md)
|
||||
- [类型系统和静态分析](chapter_frontend_and_ir/type_system_and_static_analysis.md)
|
||||
- [常见前端编译优化方法](chapter_frontend_and_ir/common_frontend_optimization_pass.md)
|
||||
- [总结](chapter_frontend_and_ir/summary.md)
|
||||
- [编译器后端和运行时](chapter_backend_and_runtime/index.md)
|
||||
- [概述](chapter_backend_and_runtime/overview.md)
|
||||
- [计算图优化](chapter_backend_and_runtime/graph_optimizer.md)
|
||||
- [算子选择](chapter_backend_and_runtime/kernel_selecter.md)
|
||||
- [内存分配](chapter_backend_and_runtime/memory_allocator.md)
|
||||
- [计算调度与执行](chapter_backend_and_runtime/compute_schedule_and_execute.md)
|
||||
- [算子编译器](chapter_backend_and_runtime/op_compiler.md)
|
||||
- [总结](chapter_backend_and_runtime/summary.md)
|
||||
- [硬件加速器](chapter_accelerator/index.md)
|
||||
- [概述](chapter_accelerator/accelerator_introduction.md)
|
||||
- [加速器基本组成原理](chapter_accelerator/accelerator_architecture.md)
|
||||
- [加速器基本编程原理](chapter_accelerator/accelerator_programming.md)
|
||||
- [加速器实践](chapter_accelerator/accelerator_practise.md)
|
||||
- [总结](chapter_accelerator/summary.md)
|
||||
- [数据处理框架](chapter_data_processing/index.md)
|
||||
- [概述](chapter_data_processing/requirements.md)
|
||||
- [易用性设计](chapter_data_processing/program_model.md)
|
||||
- [高效性设计](chapter_data_processing/performance.md)
|
||||
- [保序性设计](chapter_data_processing/data_order.md)
|
||||
- [单机数据处理性能的扩展](chapter_data_processing/extension.md)
|
||||
- [总结](chapter_data_processing/summary.md)
|
||||
- [模型部署](chapter_model_deployment/index.md)
|
||||
- [概述](chapter_model_deployment/model_deployment_introduction.md)
|
||||
- [训练模型到推理模型的转换及优化](chapter_model_deployment/model_converter_and_optimizer.md)
|
||||
- [模型压缩](chapter_model_deployment/model_compression.md)
|
||||
- [模型推理](chapter_model_deployment/model_inference.md)
|
||||
- [模型的安全保护](chapter_model_deployment/model_security.md)
|
||||
- [总结](chapter_model_deployment/summary.md)
|
||||
- [分布式训练](chapter_distributed_training/index.md)
|
||||
- [系统概述](chapter_distributed_training/overview.md)
|
||||
- [实现方法](chapter_distributed_training/methods.md)
|
||||
- [机器学习集群架构](chapter_distributed_training/cluster.md)
|
||||
- [集合通信](chapter_distributed_training/collective.md)
|
||||
- [参数服务器](chapter_distributed_training/parameter_servers.md)
|
||||
- [总结](chapter_distributed_training/summary.md)
|
||||
- [第三部分:拓展篇](chapter_preface_extension/index.md)
|
||||
- [深度学习推荐系统](chapter_recommender_system/index.md)
|
||||
- [系统基本组成](chapter_recommender_system/system_architecture.md)
|
||||
- [多阶段推荐系统](chapter_recommender_system/multi_stage_recommender_system.md)
|
||||
- [模型更新](chapter_recommender_system/model_update.md)
|
||||
- [案例分析:支持在线模型更新的大型推荐系统](chapter_recommender_system/case_study.md)
|
||||
- [小结](chapter_recommender_system/summary.md)
|
||||
- [联邦学习系统](chapter_federated_learning/index.md)
|
||||
- [概述](chapter_federated_learning/overview.md)
|
||||
- [横向联邦学习](chapter_federated_learning/horizontal_fl.md)
|
||||
- [纵向联邦学习](chapter_federated_learning/vertical_fl.md)
|
||||
- [隐私加密算法](chapter_federated_learning/privacy_encryption_algorithm.md)
|
||||
- [展望](chapter_federated_learning/outlook.md)
|
||||
- [小结](chapter_federated_learning/summary.md)
|
||||
- [强化学习系统](chapter_reinforcement_learning/index.md)
|
||||
- [强化学习介绍](chapter_reinforcement_learning/rl_introduction.md)
|
||||
- [单节点强化学习系统](chapter_reinforcement_learning/single_node_rl.md)
|
||||
- [多智能体强化学习](chapter_reinforcement_learning/marl.md)
|
||||
- [多智能体强化学习系统](chapter_reinforcement_learning/marl_sys.md)
|
||||
- [小结](chapter_reinforcement_learning/summary.md)
|
||||
- [可解释性AI系统](chapter_explainable_AI/index.md)
|
||||
- [背景](chapter_explainable_AI/explainable_ai.md)
|
||||
- [机器人系统](chapter_rl_sys/index.md)
|
||||
- [机器人系统概述](chapter_rl_sys/rl_sys_intro.md)
|
||||
- [通用机器人操作系统](chapter_rl_sys/ros.md)
|
||||
- [案例分析:使用机器人操作系统](chapter_rl_sys/ros_code_ex.md)
|
||||
- [总结](chapter_rl_sys/summary.md)
|
||||
- [附录:机器学习介绍](appendix_machine_learning_introduction/index.md)
|
||||
- [神经网络](appendix_machine_learning_introduction/neural_network.md)
|
||||
- [梯度下降与反向传播](appendix_machine_learning_introduction/gradient_descent.md)
|
||||
- [经典机器学习方法](appendix_machine_learning_introduction/classic_machine_learning.md)
|
||||
@@ -13,7 +13,6 @@
|
||||
|
||||
rl_introduction
|
||||
single_node_rl
|
||||
distributed_node_rl
|
||||
marl
|
||||
marl_sys
|
||||
summary
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
/chivier-disk/hyq-home/Projects/openmlsys-zh/img
|
||||
@@ -1 +0,0 @@
|
||||
/chivier-disk/hyq-home/Projects/openmlsys-zh/references
|
||||
@@ -1 +0,0 @@
|
||||
/chivier-disk/hyq-home/Projects/openmlsys-zh/static
|
||||
Reference in New Issue
Block a user