Split book into English and Chinese builds with framework update (#487)

* docs: split the book into English and Chinese builds

* feat: update english version framework

* fix: fix ci
This commit is contained in:
Yeqi Huang
2026-03-09 17:32:24 +00:00
committed by GitHub
parent 1492ec6c41
commit f974d3e841
226 changed files with 4206 additions and 1096 deletions

View File

@@ -1,27 +1,78 @@
name: CI
on:
- push
- pull_request
- workflow_dispatch # Allows you to run this workflow manually from the Actions tab
push:
pull_request:
workflow_dispatch:
jobs:
build:
runs-on: ubuntu-20.04
build-en:
name: Build (English)
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v2
- uses: s-weigand/setup-conda@v1
- name: Set up Python 3.8
uses: actions/setup-python@v3
with:
python-version: '3.8'
- run: conda config --append channels conda-forge
- run: python3 -m pip install -r requirements.txt
- run: conda install -y pandoc==2.17
- uses: actions/checkout@v4
- run: |
- name: Set up Python 3.10
uses: actions/setup-python@v5
with:
python-version: '3.10'
cache: 'pip'
- name: Install pandoc
run: |
wget -q https://github.com/jgm/pandoc/releases/download/2.19.2/pandoc-2.19.2-1-amd64.deb
sudo dpkg -i pandoc-2.19.2-1-amd64.deb
- name: Install d2lbook
run: |
git clone https://github.com/openmlsys/d2l-book.git
cd d2l-book
# Fix Python 3.10+ incompatibility: bibtex<2.0.0 depends on oset which
# uses collections.MutableSet removed in Python 3.10.
sed -i "s/'sphinxcontrib-bibtex<2.0.0'/'sphinxcontrib-bibtex>=2.5.0'/" setup.py
python3 -m pip install .
- run: d2lbook build html
- name: Install Python dependencies
run: python3 -m pip install -r requirements.txt
- name: Build English HTML
run: bash build_html.sh
build-zh:
name: Build (Chinese)
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
- name: Set up Python 3.10
uses: actions/setup-python@v5
with:
python-version: '3.10'
cache: 'pip'
- name: Install pandoc
run: |
wget -q https://github.com/jgm/pandoc/releases/download/2.19.2/pandoc-2.19.2-1-amd64.deb
sudo dpkg -i pandoc-2.19.2-1-amd64.deb
- name: Install d2lbook
run: |
git clone https://github.com/openmlsys/d2l-book.git
cd d2l-book
sed -i "s/'sphinxcontrib-bibtex<2.0.0'/'sphinxcontrib-bibtex>=2.5.0'/" setup.py
python3 -m pip install .
- name: Install Python dependencies
run: python3 -m pip install -r requirements.txt
- name: Build Chinese HTML
run: bash build_html_zh.sh
build:
name: build
needs: [build-en, build-zh]
runs-on: ubuntu-22.04
steps:
- run: echo "All builds passed"

View File

@@ -1,5 +1,4 @@
name: CI
name: Deploy Docs
on:
pull_request:
@@ -7,30 +6,56 @@ on:
- closed
jobs:
if_merged:
deploy:
if: github.event.pull_request.merged == true
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v2
- uses: s-weigand/setup-conda@v1
- run: conda config --append channels conda-forge
- run: python3 -m pip install -r requirements.txt
- run: conda install -y pandoc==2.17
- run: pip install sphinx-mathjax-offline
runs-on: ubuntu-22.04
- run: |
steps:
- uses: actions/checkout@v4
- name: Set up Python 3.10
uses: actions/setup-python@v5
with:
python-version: '3.10'
cache: 'pip'
- name: Install pandoc
run: |
wget -q https://github.com/jgm/pandoc/releases/download/2.19.2/pandoc-2.19.2-1-amd64.deb
sudo dpkg -i pandoc-2.19.2-1-amd64.deb
- name: Install d2lbook
run: |
git clone https://github.com/openmlsys/d2l-book.git
cd d2l-book
# Fix Python 3.10+ incompatibility: bibtex<2.0.0 depends on oset which
# uses collections.MutableSet removed in Python 3.10.
sed -i "s/'sphinxcontrib-bibtex<2.0.0'/'sphinxcontrib-bibtex>=2.5.0'/" setup.py
python3 -m pip install .
- run: sh build_html.sh
- run: cd ..
- run: git clone https://github.com/openmlsys/openmlsys.github.io.git
- run: cp -r openmlsys-zh/_build/html/* openmlsys.github.io/docs/
- run: |
- name: Install Python dependencies
run: python3 -m pip install -r requirements.txt sphinx-mathjax-offline
- name: Build English HTML
run: bash build_html.sh
- name: Build Chinese HTML
run: bash build_html_zh.sh
- name: Deploy to GitHub Pages
run: |
git clone https://github.com/openmlsys/openmlsys.github.io.git
# English → root (default language)
cp -r openmlsys-zh/en_chapters/_build/html/* openmlsys.github.io/docs/
# Chinese → /cn/ subdirectory
mkdir -p openmlsys.github.io/docs/cn
cp -r openmlsys-zh/zh_chapters/_build/html/* openmlsys.github.io/docs/cn/
cd openmlsys.github.io
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
git add .
git commit -m 'update docs'
git commit -m "deploy: update docs (en+zh) from openmlsys-zh@${{ github.sha }}"
git push

167
README.md
View File

@@ -1,79 +1,132 @@
<p align="center">
<img src="static/logo-with-text.png" alt="OpenMLSys Logo" width="400"/>
</p>
<p align="center">
<a href="https://github.com/openmlsys/openmlsys-zh/actions/workflows/main.yml">
<img src="https://github.com/openmlsys/openmlsys-zh/actions/workflows/main.yml/badge.svg" alt="CI"/>
</a>
<a href="https://openmlsys.github.io/">
<img src="https://img.shields.io/badge/book-online-blue" alt="Book Online"/>
</a>
<a href="https://github.com/openmlsys/openmlsys-zh/blob/main/LICENSE">
<img src="https://img.shields.io/github/license/openmlsys/openmlsys-zh" alt="License"/>
</a>
<a href="https://github.com/openmlsys/openmlsys-zh/stargazers">
<img src="https://img.shields.io/github/stars/openmlsys/openmlsys-zh?style=social" alt="GitHub Stars"/>
</a>
</p>
<p align="center">
<b>中文</b> | <a href="README_EN.md">English</a>
</p>
---
# 机器学习系统:设计和实现
本开源项目试图给读者讲解现代机器学习系统的设计原理和实现经验。
本开源项目讲解现代机器学习系统的设计原理和实现经验,涵盖从编程接口、计算图、编译器到分布式训练的完整技术栈
🔥 **书籍网页版** [机器学习系统:设计和实现](https://openmlsys.github.io/)
**在线阅读** [openmlsys.github.io](https://openmlsys.github.io/)
🔥 **书籍PDF** 将在勘误后2022年中发布。
## 目录
## 发布
- 27/06/2022: OpenMLSys社区发布通俗易懂的高性能AI算子开发教程助力学生和工程师60分钟理解算子性能优化的关键知识点。相应的[技术博客](https://zhuanlan.zhihu.com/p/531498210)和[复现代码](https://github.com/openmlsys/openmlsys-cuda)都已免费公开。感谢@[Jie Ren](https://github.com/JieRen98) 和 @[Wenteng Liang](https://github.com/Went-Liang) 的贡献!🔥
- 17/03/2022: 本书处于勘误阶段。如发现文字和图片错误可创建Issue并@[章节编辑](info/editors.md)。我们非常欢迎社区提交PR直接勘误。
- [适用读者](#适用读者)
- [内容介绍](#内容介绍)
- [构建指南](#构建指南)
- [贡献指南](#贡献指南)
- [社区](#社区)
- [许可证](#许可证)
## 适用读者
本书的常见读者包括:
- **学生:**
随着大量机器学习课程在大学中的普及,学生已经开始掌握大量机器学习的基础理论和神经网络的实现。然而,需要训练出可以实际应用的机器学习模型,需要对现代机器学习系统有充分的认识。
- **科研人员:**
研发新型的机器学习模型不仅仅需要会使用基础的机器学习系统接口。同时新型的模型需要给系统提供新的自定义算子Custom
Operators又或者是会利用高级的分布式执行算子来实现大模型的开发。这一系列需求都需要对底层系统具有充分认识。
- **开发人员:**
大量的数据和AI驱动的公司都部署了机器学习基础设施。这一设施的核心就是机器学习系统。因此了解机器学习系统有助于开发人员对于系统性能调优以定位问题并且根据业务需求对机器学习系统进行深度定制。
- **学生**:掌握机器学习基础理论后,希望深入了解现代机器学习系统设计与实现的同学。
- **科研人员**需要开发自定义算子Custom Operators或利用分布式执行实现大模型的研究者。
- **开发人员**:负责机器学习基础设施建设,需要对系统性能调优和深度定制的工程师。
## 内容介绍
现代机器学习框架具有复杂的内部架构和繁多的外部相关组件。在本书中,我们将对其细致拆分,深入解读
本书分为基础篇、进阶篇和扩展篇三个部分
基础:
### 基础篇
- **编程接口:** 为了支持海量应用机器学习框架的编程接口设计具有大量的设计哲学在易用性和性能之间取得平衡。本书将讲述编程接口的演进机器学习工作流定义深度学习模型以及用C/C++进行框架开发。
| 章节 | 内容 |
|------|------|
| [编程接口](chapter_programming_interface/) | 框架接口设计哲学、机器学习工作流、深度学习模型定义、C/C++ 框架开发 |
| [计算图](chapter_computational_graph/) | 计算图基本构成、生成方法、调度策略、自动微分 |
- **计算图:** 机器学习框架需要支持自动微分硬件加速器多编程前端等。实现这些支持的核心技术是计算图Computational Graph。本书将讲述计算图的基本构成生成方法和调度策略。
### 进阶篇
性能进阶:
| 章节 | 内容 |
|------|------|
| [编译器前端和中间表示](chapter_frontend_and_ir/) | 类型推导、中间表示IR、自动微分、常见优化 Pass |
| [编译器后端和运行时](chapter_backend_and_runtime/) | 计算图优化、算子选择、内存分配、计算调度与执行 |
| [硬件加速器](chapter_accelerator/) | GPU/Ascend 架构原理、高性能编程接口CUDA/CANN |
| [数据处理框架](chapter_data_processing/) | 易用性、高效性、保序性、分布式数据处理 |
| [模型部署](chapter_model_deployment/) | 模型转换、模型压缩、模型推理、安全保护 |
| [分布式训练](chapter_distributed_training/) | 数据并行、模型并行、流水线并行、集合通讯、参数服务器 |
- **编译器前端:**
机器学习框架需要利用编译器前端技术对计算图进行功能拓展和性能优化。本书将讲述常见的前端技术包括类型推导中间表示Intermediate Representation自动微分等。
### 扩展篇
- **编译器后端和运行时:**
机器学习框架的一个核心目标是如何充分利用异构硬件。这其中会涉及编译器后端技术以及将计算图算子Operator调度到硬件上的运行时Runtime。本书将讲述计算图优化算子选择内存分配和计算调度与执行。
- **硬件加速器:**
机器学习框架的基本运行单元是算子而算子的实现必须充分利用硬件加速器GPU和Ascend的特性。本书将会讲述硬件加速器的基本构成原理和常见的高性能编程接口。
- **数据处理框架:**
机器学习框架会集成高性能框架来进行数据预处理。本书将会讲述这一类数据处理框架在设计中需要达到的多个目标:易用性,高效性,保序性,分布式等。
- **模型部署:**
在模型完成训练后,用户需要将模型部署到终端设备(如云服务器,移动终端和无人车)。这其中涉及到的模型转换,模型压缩,模型推理和安全保护等知识也会在本书中讨论。
- **分布式训练:**
机器学习模型的训练需要消耗大量资源。越来越多的机器学习框架因此原生支持分布式训练。在本书中我们将会讨论常见的分布式训练方法(包括数据并行,模型并行和流水线并行),以及实现这些方法的系统架构(包括集合通讯和参数服务器)。
功能拓展:
- **深度学习推荐系统:** 推荐系统是目前机器学习应用最成功的领域之一。本书将会概括推荐系统的运作原理,详细描述大规模工业场景下的推荐系统架构设计。
- **联邦学习系统:** 随着数据保护法规和隐私保护的崛起,联邦学习正成为日益重要的研究领域。本书将会介绍联邦学习的常用方法以及相关系统实现。
- **强化学习系统:** 强化学习是走向通用人工智能的关键技术。本书将会介绍目前常见的强化学习系统(包括单智能体和多智能体等)。
- **可解释性AI系统** 随着机器学习在安全攸关Safety-critical领域的应用机器学习系统越来越需要对决策给出充分解释。本书将会讨论可解释AI系统的常用方法和落地实践经验。
- **机器人系统:** 机器人(无人车,无人机,家用机器人等)作为机器学习技术重要的应用领域,在最近数年得到了广泛应用。在实践中,机器人系统在实时性,安全性,鲁棒性等方面都有极高要求,这要求开发者具有算法和系统的双重思维,从而解决实际问题。本书中我们将结合最新研究成果和机器人系统实践经验讲解该类系统的设计原则和实现细节。
我们在持续拓展拓展本书的内容如元学习系统自动并行深度学习集群调度绿色AI系统图学习系统等。我们也非常欢迎社区对于新内容提出建议贡献章节。
| 章节 | 内容 |
|------|------|
| [深度学习推荐系统](chapter_recommender_system/) | 推荐系统原理、大规模工业场景架构设计 |
| [联邦学习系统](chapter_federated_learning/) | 联邦学习方法、隐私保护、系统实现 |
| [强化学习系统](chapter_reinforcement_learning/) | 单智能体/多智能体强化学习系统 |
| [可解释性 AI 系统](chapter_explainable_AI/) | 可解释 AI 方法与落地实践 |
| [机器人学习系统](chapter_rl_sys/) | 机器人感知、规划、控制与系统安全 |
## 构建指南
请参考[构建指南](info/info.md)来了解如何构建本书的网页版本和PDF版本。
### 环境依赖
## 写作指南
- Python >= 3.10
- pandoc >= 2.19
我们欢迎大家来一起贡献和更新本书的内容。常见的贡献方式是提交PR来更新和添加Markdown文件。写作的风格和图片要求请参考[风格指南](info/style.md)。同时,机器学习领域涉及到大量的中英文翻译,相关的翻译要求请参考[术语指南](info/terminology.md)。
### 安装步骤
```bash
# 克隆仓库
git clone https://github.com/openmlsys/openmlsys-zh.git
cd openmlsys-zh
# 安装 d2lbook
git clone https://github.com/openmlsys/d2l-book.git
cd d2l-book && pip install . && cd ..
# 安装 Python 依赖
pip install -r requirements.txt
```
### 编译 HTML
```bash
sh build_html.sh
# 生成结果在 _build/html/
```
更多细节请参考 [构建指南](info/info.md)。
## 贡献指南
我们欢迎任何形式的贡献,包括:
- **勘误**:发现文字或图片错误,请创建 Issue 并 @ [章节编辑](info/editors.md),或直接提交 PR。
- **内容更新**:提交 PR 更新或添加 Markdown 文件。
- **新章节**:欢迎社区对元学习系统、自动并行、集群调度、绿色 AI、图学习等主题贡献章节。
提交前请阅读:
- [写作风格指南](info/style.md)
- [中英文术语对照](info/terminology.md)
## 社区
<p align="center">
<img src="info/mlsys_group.png" alt="微信群二维码" width="200"/>
<br/>
扫码加入微信交流群
</p>
## 许可证
本项目采用 [知识共享 署名-非商业性使用-相同方式共享 4.0 国际许可协议](https://creativecommons.org/licenses/by-nc-sa/4.0/deed.zh) 授权。

128
README_EN.md Normal file
View File

@@ -0,0 +1,128 @@
<p align="center">
<img src="static/logo-with-text.png" alt="OpenMLSys Logo" width="400"/>
</p>
<p align="center">
<a href="https://github.com/openmlsys/openmlsys-zh/actions/workflows/main.yml">
<img src="https://github.com/openmlsys/openmlsys-zh/actions/workflows/main.yml/badge.svg" alt="CI"/>
</a>
<a href="https://openmlsys.github.io/">
<img src="https://img.shields.io/badge/book-online-blue" alt="Book Online"/>
</a>
<a href="https://github.com/openmlsys/openmlsys-zh/blob/main/LICENSE">
<img src="https://img.shields.io/github/license/openmlsys/openmlsys-zh" alt="License"/>
</a>
<a href="https://github.com/openmlsys/openmlsys-zh/stargazers">
<img src="https://img.shields.io/github/stars/openmlsys/openmlsys-zh?style=social" alt="GitHub Stars"/>
</a>
</p>
<p align="center">
<a href="README.md">中文</a> | <b>English</b>
</p>
---
# Machine Learning Systems: Design and Implementation
An open-source book explaining the design principles and implementation experience of modern machine learning systems, covering the complete technology stack from programming interfaces and computational graphs to compilers and distributed training.
**Read Online:** [openmlsys.github.io](https://openmlsys.github.io/)
## Table of Contents
- [Target Audience](#target-audience)
- [Content Overview](#content-overview)
- [Build Guide](#build-guide)
- [Contributing](#contributing)
- [Community](#community)
- [License](#license)
## Target Audience
- **Students**: Those who have mastered machine learning fundamentals and want to deeply understand the design and implementation of modern ML systems.
- **Researchers**: Those who need to develop custom operators or leverage distributed execution for large model development.
- **Engineers**: Those responsible for building ML infrastructure and need to tune system performance or customize ML systems for business needs.
## Content Overview
The book is organized into three parts: Fundamentals, Advanced Topics, and Extensions.
### Part I: Fundamentals
| Chapter | Content |
|---------|---------|
| [Programming Interface](chapter_programming_interface/) | Framework API design, ML workflows, deep learning model definition, C/C++ framework development |
| [Computational Graph](chapter_computational_graph/) | Graph components, generation methods, scheduling strategies, automatic differentiation |
### Part II: Advanced Topics
| Chapter | Content |
|---------|---------|
| [Compiler Frontend & IR](chapter_frontend_and_ir/) | Type inference, intermediate representation (IR), automatic differentiation, common optimization passes |
| [Compiler Backend & Runtime](chapter_backend_and_runtime/) | Graph optimization, operator selection, memory allocation, compute scheduling and execution |
| [Hardware Accelerators](chapter_accelerator/) | GPU/Ascend architecture, high-performance programming interfaces (CUDA/CANN) |
| [Data Processing](chapter_data_processing/) | Usability, efficiency, order preservation, distributed data processing |
| [Model Deployment](chapter_model_deployment/) | Model conversion, compression, inference, and security |
| [Distributed Training](chapter_distributed_training/) | Data parallelism, model parallelism, pipeline parallelism, collective communication, parameter servers |
### Part III: Extensions
| Chapter | Content |
|---------|---------|
| [Recommender Systems](chapter_recommender_system/) | Recommendation principles, large-scale industrial architecture |
| [Federated Learning](chapter_federated_learning/) | Federated learning methods, privacy protection, system implementation |
| [Reinforcement Learning Systems](chapter_reinforcement_learning/) | Single-agent and multi-agent RL systems |
| [Explainable AI Systems](chapter_explainable_AI/) | XAI methods and production practices |
| [Robot Learning Systems](chapter_rl_sys/) | Robot perception, planning, control, and system safety |
## Build Guide
### Prerequisites
- Python >= 3.10
- pandoc >= 2.19
### Installation
```bash
# Clone the repository
git clone https://github.com/openmlsys/openmlsys-zh.git
cd openmlsys-zh
# Install d2lbook
git clone https://github.com/openmlsys/d2l-book.git
cd d2l-book && pip install . && cd ..
# Install Python dependencies
pip install -r requirements.txt
```
### Build HTML
```bash
sh build_html.sh
# Output is in _build/html/
```
For more details, see the [Build Guide](info/info.md).
## Contributing
We welcome all forms of contributions, including:
- **Errata**: If you find text or figure errors, please open an Issue and @ the [chapter editors](info/editors.md), or submit a PR directly.
- **Content updates**: Submit PRs to update or add Markdown files.
- **New chapters**: We welcome community contributions on topics such as meta-learning systems, automatic parallelism, cluster scheduling, green AI, and graph learning.
Before contributing, please read:
- [Writing Style Guide](info/style.md)
- [Terminology Guide](info/terminology.md)
## Community
Join our WeChat group by scanning the QR code in [info/mlsys_group.png](info/mlsys_group.png).
## License
This project is licensed under the [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License](https://creativecommons.org/licenses/by-nc-sa/4.0/).

View File

@@ -1,10 +1,28 @@
#!/bin/bash
# Build the English (en) version of the book from en_chapters/.
# Output: en_chapters/_build/html/
#
# Resources (img/, references/, static/, mlsys.bib) live at the repo root and
# are symlinked into en_chapters/ so d2lbook can find them at relative paths.
set -e
ROOT="$(cd "$(dirname "$0")" && pwd)"
# ── Create resource symlinks ──────────────────────────────────────────────────
for target in img references static mlsys.bib; do
link="$ROOT/en_chapters/$target"
if [ ! -e "$link" ]; then
ln -sf "$ROOT/$target" "$link"
fi
done
# ── Build ─────────────────────────────────────────────────────────────────────
cd "$ROOT/en_chapters"
rm -rf _build/rst _build/html
d2lbook build rst
cp static/frontpage.html _build/rst/
d2lbook build html
cp -r static/image/* _build/html/_images/
python3 tools/format_tables.py
cp -r static/image/* _build/html/_images/ 2>/dev/null || true
python3 "$ROOT/tools/format_tables.py"

28
build_html_zh.sh Executable file
View File

@@ -0,0 +1,28 @@
#!/bin/bash
# Build the Chinese (zh) version of the book from zh_chapters/.
# Output: zh_chapters/_build/html/
#
# Resources (img/, references/, static/, mlsys.bib) live at the repo root and
# are symlinked into zh_chapters/ so d2lbook can find them at relative paths.
set -e
ROOT="$(cd "$(dirname "$0")" && pwd)"
# ── Create resource symlinks ──────────────────────────────────────────────────
for target in img references static mlsys.bib; do
link="$ROOT/zh_chapters/$target"
if [ ! -e "$link" ]; then
ln -sf "$ROOT/$target" "$link"
fi
done
# ── Build ─────────────────────────────────────────────────────────────────────
cd "$ROOT/zh_chapters"
rm -rf _build/rst _build/html
d2lbook build rst
cp static/frontpage.html _build/rst/
d2lbook build html
cp -r static/image/* _build/html/_images/ 2>/dev/null || true
python3 "$ROOT/tools/format_tables.py"

View File

@@ -1,43 +0,0 @@
## 分布式强化学习系统
分布式强化学习系统是比上面介绍的单节点强化学习系统更强大的一种。它能支持多环境多模型并行处理,主要是能同时在多个实际计算机系统上对多个模型进行更新,将大大提高强化学习系统的学习速度和整体表现。我们这里介绍分布式强化学习常见的算法和系统。
异步优势行动-批判者Asynchronous Advantage Actor-CriticA3C是由DeepMind研究人员 :cite:`mnih2016asynchronous`于2016年提出的可以在多个计算设备上并行更新网络的学习算法。相比于 :numref:`ch12/ch12-rlzoo`中的单节点强化学习系统A3C通过创建一组工作者Worker并将每个工作者分配到不同的计算设备上且为他们各自创建可以交互的环境来实现并行采样和模型更新同时用一个主Master节点维护这些行动者Actor和批判者Critic网络的更新。行动者是策略网络批判者是价值网络分别对应强化学习中的策略和价值函数。通过这样的设计整个算法的各个工作者可以实时将所采集到样本计算出的梯度回传到主节点来更新主节点的模型参数并在主节点模型更新后即时下发到各个工作者进行模型更新。每个工作者可以单独在一个 GPU 上进行运算,从而整个算法可以在一个 GPU 集群上并行更新模型,算法结构由 :numref:`ch12/ch12-a3c`所示。研究表明,分布式强化学习训练除加速模型学习之外,由于其更新梯度是由多个计算节点各自对环境采样计算得到的,还有利于稳定学习表现。
![A3C分布式算法架构](../img/ch12/ch12-a3c.png)
:width:`800px`
:label:`ch12/ch12-a3c`
重要性加权行动-学习者架构Importance Weighted Actor-Learner ArchitectureIMPALA) 是由Lasse Espeholt等人于2018年 :cite:`espeholt2018impala`提出的能够实现多机集群训练的强化学习框架,如:numref:`ch12/ch12-impala`所示。与 A3C 算法类似IMPALA 能够在多个 GPU 上并行进行梯度计算。具体地IMPALA 并行多个行动者Actor和学习者Learner每个行动者包含一个策略网络并用这个策略网络与一个环境进行交互以收集样本。所收集到的样本轨迹由行动者发送到各自的学习者进行梯度计算。所有的学习者中有一个称为主学习者它可以和其他所有学习者通信获取他们计算的梯度从而在主学习者内部对模型进行更新随后下发到各个学习者及行动者做新一轮的采样和梯度计算。IMPALA 被证明是比 A3C 更高效的分布式计算架构,它同时得益于一个特殊设计的学习者内的梯度计算函数,称为 V-轨迹目标V-trace Target通过重要性加权来稳定训练。我们这里侧重对分布式强化学习结构的介绍对此不再赘述。感兴趣的读者可以参考原论文
![IMPALA分布式算法架构](../img/ch12/ch12-impala.png)
:width:`800px`
:label:`ch12/ch12-impala`
以上是两个著名的分布式强化学习算法A3C和IMPALA最近研究中还有许多其他成果如SEED :cite:`espeholt2019seed`、Ape-X :cite:`horgan2018distributed`等都对分布式强化学习有更好的效果,我们不再做过多介绍。下面我们将讨论几个典型的分布式强化学习算法库。
![RLlib系统架构](../img/ch12/ch12-rllib-arch.svg)
:width:`800px`
:label:`ch12/ch12-rllib`
Ray :cite:`moritz2018ray`是由伯克利大学几名研究人员发起的一个分布式计算框架基于Ray之上构建了一个专门针对强化学习的系统RLlib :cite:`liang2017ray`。RLlib 是一个面向工业级应用的开源强化学习框架,同时
包含了强化学习的算法库,没有太多强化学习经验的人也可以很方便地使用 RLlib。
![RLlib分布式训练](../img/ch12/ch12-rllib-distributed.svg)
:width:`600px`
:label:`ch12/ch12-rllib_dist`
RLlib的系统架构如 :numref:`ch12/ch12-rllib`所示,系统底层是构建在 Ray 的分布式计算和通信的基础组建之
上,面向强化学习的领域概念,在 Python 层抽象了 Trainer, Environment, Policy 等基础组件并为各个抽象组件提供了一些常用的内置实现同时用户可以根据自己的算法场景对组件进行扩展通过这些内置以及自定义的算法组件研究人员可以方便快速地实现具体的强化学习算法。RLlib支持多种范式的分布式强化学习训练如 :numref:`ch12/ch12-rllib_dist`所示为基于同步采样的强化学习算法的分布式训练架构。其中每一个 Rollout Worker 为一个独立进程,负责和对应的环境进行交互以完成经验采集,多个 Rollout Worker 可以并行地完成环境交互Trainer 负责 Rollout Worker之间的协调策略优化以及将更新后的策略同步到 Rollout Worker 中。
强化学习中的策略通常可以采用深度神经网络,而基于深度神经网络的分布式强化学习训练,可以采用 RLlib 结合 PyTorch 或者 TensorFlow 等深度学习框架协同完成深度学习框架负责策略网络的训练和更新RLlib 负责强化学习的算法计算。此外 RLlib 支持与并行的向量化Vectorized环境交互允许外接模拟器以及可以进行离线Offline强化学习。
对于分布式系统中样本回放缓冲池的管理我们会提到另一个工作Reverb :cite:`cassirer2021reverb`。回忆本章开头我们介绍了强化学习中的状态、动作、奖励等概念实际强化学习算法进行训练所使用的数据正是存放在经验缓冲池中的这些数据元组而每种数据自身的格式可能又有不同实际使用时也需要对不同的数据做不同类型的操作。常见的数据操作类型如拼接、截取、乘积、转置、部分乘积、取均值、取极值等而每种操作都可能需要对特定数据的特定维度进行这常常给现有的强化学习框架在实践中产生一定的困难。为了方便强化学习过程中灵活使用不同的数据形式Reverb 设计了数据块的概念Chunks所有使用的训练数据在缓冲池中都使用数据块的格式进行管理和调用这一设计基于数据是多维张量的特点增大了数据使用的灵活性和访问速度。Acme :cite:`hoffman2020acme`是近年来由DeepMind提出的一个分布式强化学习框架同样是针对学术界的研究和工业界的应用它基于 Reverb 对样本缓冲池的数据管理结合分布式采样的结构给出了一个更快的分布式强化学习解决方案。Reverb 帮助解决了数据管理和传输的效率问题,使得 Acme得以将分布式计算的效力充分发挥研究人员用 Acme 在大量强化学习基准测试中取得了显著的速度提升。

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/appendix_machine_learning_introduction/classic_machine_learning.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/appendix_machine_learning_introduction/gradient_descent.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/appendix_machine_learning_introduction/index.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/appendix_machine_learning_introduction/neural_network.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_accelerator/accelerator_architecture.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_accelerator/accelerator_introduction.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_accelerator/accelerator_practise.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_accelerator/accelerator_programming.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_accelerator/index.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_accelerator/summary.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_backend_and_runtime/compute_schedule_and_execute.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_backend_and_runtime/graph_optimizer.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_backend_and_runtime/index.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_backend_and_runtime/kernel_selecter.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_backend_and_runtime/memory_allocator.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_backend_and_runtime/op_compiler.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_backend_and_runtime/overview.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_backend_and_runtime/summary.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_computational_graph/background_and_functionality.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_computational_graph/components_of_computational_graph.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_computational_graph/generation_of_computational_graph.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_computational_graph/index.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_computational_graph/schedule_of_computational_graph.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_computational_graph/summary.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_data_processing/data_order.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_data_processing/extension.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_data_processing/index.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_data_processing/performance.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_data_processing/program_model.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_data_processing/requirements.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_data_processing/summary.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_distributed_training/cluster.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_distributed_training/collective.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_distributed_training/index.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_distributed_training/methods.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_distributed_training/overview.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_distributed_training/parameter_servers.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_distributed_training/summary.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_explainable_AI/explainable_ai.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_explainable_AI/index.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_federated_learning/horizontal_fl.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_federated_learning/index.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_federated_learning/outlook.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_federated_learning/overview.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_federated_learning/privacy_encryption_algorithm.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_federated_learning/summary.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_federated_learning/vertical_fl.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_frontend_and_ir/ad.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_frontend_and_ir/ai_compiler_design_principle.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_frontend_and_ir/common_frontend_optimization_pass.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_frontend_and_ir/index.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_frontend_and_ir/intermediate_representation.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_frontend_and_ir/overview_of_frontend.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_frontend_and_ir/summary.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_frontend_and_ir/type_system_and_static_analysis.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_introduction/applications.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_introduction/architecture.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_introduction/design.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_introduction/ecosystem.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_introduction/index.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_introduction/readers.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_model_deployment/index.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_model_deployment/model_compression.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_model_deployment/model_converter_and_optimizer.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_model_deployment/model_deployment_introduction.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_model_deployment/model_inference.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_model_deployment/model_security.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_model_deployment/summary.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_preface/index.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_preface_advanced/index.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_preface_extension/index.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_programming_interface/c_python_interaction.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_programming_interface/development_history.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_programming_interface/index.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_programming_interface/ml_programming_paradigm.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_programming_interface/ml_workflow.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_programming_interface/neural_network_layer.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_programming_interface/summary.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_recommender_system/case_study.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_recommender_system/index.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_recommender_system/model_update.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_recommender_system/multi_stage_recommender_system.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_recommender_system/summary.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_recommender_system/system_architecture.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_reinforcement_learning/index.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_reinforcement_learning/marl.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_reinforcement_learning/marl_sys.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_reinforcement_learning/rl_introduction.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_reinforcement_learning/single_node_rl.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_reinforcement_learning/summary.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_rl_sys/control.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_rl_sys/control_code_ex.md]

View File

@@ -0,0 +1 @@
[TODO: src = zh_chapters/chapter_rl_sys/index.md]

Some files were not shown because too many files have changed in this diff Show More