fix chinese

This commit is contained in:
quantumiracle
2022-05-01 23:01:29 -04:00
parent 7cf61c2119
commit 0d4d741203
3 changed files with 466 additions and 2 deletions

View File

@@ -20,7 +20,7 @@
以上是两个著名的分布式强化学习算法A3C和IMPALA最近研究中还有许多其他成果如SEED :cite:`espeholt2019seed`、Ape-X :cite:`horgan2018distributed`等都对分布式强化学习有更好的效果,我们不再做过多介绍。下面我们将讨论几个典型的分布式强化学习算法库。
![RLlib系统架构](../img/ch12/ch12-rllib-arch.png)
![RLlib系统架构](../img/ch12/ch12-rllib-arch.svg)
:width:`800px`
@@ -28,7 +28,7 @@
Ray :cite:`moritz2018ray`是由伯克利大学几名研究人员发起的一个分布式计算框架基于Ray之上构建了一个专门针对强化学习的系统RLlib :cite:`liang2017ray`。RLlib是一个面向工业级应用的开源强化学习框架同时包含了强化学习的算法库它对非强化学习专家使用也很方便。
![RLlib分布式训练](../img/ch12/ch12-rllib-distributed.png)
![RLlib分布式训练](../img/ch12/ch12-rllib-distributed.svg)
:width:`800px`

View File

@@ -0,0 +1,222 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!-- Created with Inkscape (http://www.inkscape.org/) -->
<svg
width="415.78836mm"
height="227.97214mm"
viewBox="0 0 415.78836 227.97214"
version="1.1"
id="svg5"
inkscape:version="1.1.1 (1:1.1+202109281949+c3084ef5ed)"
sodipodi:docname="rllib1.svg"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns="http://www.w3.org/2000/svg"
xmlns:svg="http://www.w3.org/2000/svg">
<sodipodi:namedview
id="namedview7"
pagecolor="#ffffff"
bordercolor="#666666"
borderopacity="1.0"
inkscape:pageshadow="2"
inkscape:pageopacity="0.0"
inkscape:pagecheckerboard="0"
inkscape:document-units="mm"
showgrid="false"
fit-margin-top="0"
fit-margin-left="0"
fit-margin-right="0"
fit-margin-bottom="0"
inkscape:zoom="0.45291836"
inkscape:cx="606.06949"
inkscape:cy="780.49386"
inkscape:window-width="1848"
inkscape:window-height="1136"
inkscape:window-x="72"
inkscape:window-y="27"
inkscape:window-maximized="1"
inkscape:current-layer="layer1-2" />
<defs
id="defs2" />
<g
inkscape:label="Layer 1"
inkscape:groupmode="layer"
id="layer1"
transform="translate(74.471364,125.55213)">
<g
inkscape:label="Layer 1"
id="layer1-2"
transform="translate(-56.466556,-170.91559)">
<rect
style="fill:#ff0000;fill-opacity:0;stroke:#000000;stroke-width:1;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none"
id="rect2439"
width="196.35287"
height="51.963364"
x="-16.031324"
y="46.388351"
ry="7.2835722" />
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:10.5833px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="82.013535"
y="66.777641"
id="text3865"><tspan
sodipodi:role="line"
style="text-align:center;text-anchor:middle;stroke-width:0.264583"
x="82.013535"
y="66.777641"
id="tspan3867">内置环境适配器</tspan><tspan
sodipodi:role="line"
style="text-align:center;text-anchor:middle;stroke-width:0.264583"
x="82.013542"
y="81.279243"
id="tspan3869">(如gym.Env, OpenSpiel, Unity3D)</tspan></text>
<rect
style="fill:#ff0000;fill-opacity:0;stroke:#000000;stroke-width:1;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none"
id="rect2439-4"
width="411.53979"
height="45.594776"
x="-16.918701"
y="181.33687"
ry="6.3909035" />
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:10.5833px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="176.1823"
y="200.71672"
id="text3865-4"><tspan
sodipodi:role="line"
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-family:sans-serif;-inkscape-font-specification:'sans-serif Bold';text-align:center;text-anchor:middle;stroke-width:0.264583"
x="176.1823"
y="200.71672"
id="tspan3869-3">RLlib内核抽象</tspan><tspan
sodipodi:role="line"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-family:sans-serif;-inkscape-font-specification:sans-serif;text-align:center;text-anchor:middle;stroke-width:0.264583"
x="176.1823"
y="215.21832"
id="tspan86060">环境,工作者,输入阅读器,训练器,策略</tspan></text>
<rect
style="fill:#ff0000;fill-opacity:0;stroke:#000000;stroke-width:0.826335;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none"
id="rect2439-4-5"
width="411.71347"
height="31.120325"
x="-17.59164"
y="241.80211"
ry="4.3620567" />
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:10.5833px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="175.59619"
y="261.2688"
id="text3865-4-9"><tspan
sodipodi:role="line"
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-family:sans-serif;-inkscape-font-specification:'sans-serif Bold';text-align:center;text-anchor:middle;stroke-width:0.264583"
x="175.59619"
y="261.2688"
id="tspan3869-3-6">@ray.remote: <tspan
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-family:sans-serif;-inkscape-font-specification:sans-serif"
id="tspan71205">任务和行动者</tspan></tspan><tspan
sodipodi:role="line"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-family:sans-serif;-inkscape-font-specification:sans-serif;text-align:center;text-anchor:middle;stroke-width:0.264583"
x="175.59619"
y="274.49792"
id="tspan86060-2" /></text>
<rect
style="fill:#ff0000;fill-opacity:0;stroke:#000000;stroke-width:1;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none"
id="rect2439-9"
width="196.35287"
height="51.963364"
x="200.93066"
y="45.863461"
ry="7.2835722" />
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:10.5833px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="300.61472"
y="67.605286"
id="text3865-3"><tspan
sodipodi:role="line"
id="tspan3863-9"
style="text-align:center;text-anchor:middle;stroke-width:0.264583"
x="300.61472"
y="67.605286">内置算法</tspan><tspan
sodipodi:role="line"
style="text-align:center;text-anchor:middle;stroke-width:0.264583"
x="300.61472"
y="82.106888"
id="tspan3869-8">(如PPOTrainer)</tspan></text>
<rect
style="fill:#ff0000;fill-opacity:0;stroke:#000000;stroke-width:1;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none"
id="rect2439-8"
width="144.96021"
height="52.102989"
x="-16.117212"
y="111.84494"
ry="7.303143" />
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:10.5833px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="56.822338"
y="127.51099"
id="text3865-5"><tspan
sodipodi:role="line"
style="text-align:center;text-anchor:middle;stroke-width:0.264583"
x="56.822338"
y="127.51099"
id="tspan3867-9">内置客户端/服务器支持</tspan><tspan
sodipodi:role="line"
style="text-align:center;text-anchor:middle;stroke-width:0.264583"
x="56.822342"
y="142.01259"
id="tspan3869-6">(用于外部模拟器)</tspan></text>
<rect
style="fill:#ff0000;fill-opacity:0;stroke:#000000;stroke-width:1;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none"
id="rect2439-8-1"
width="144.96021"
height="52.102989"
x="249.19214"
y="112.4102"
ry="7.303143" />
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:10.5833px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="322.29852"
y="131.36505"
id="text3865-5-5"><tspan
sodipodi:role="line"
style="text-align:center;text-anchor:middle;stroke-width:0.264583"
x="322.29852"
y="131.36505"
id="tspan3867-9-8">内置默认模型</tspan><tspan
sodipodi:role="line"
style="text-align:center;text-anchor:middle;stroke-width:0.264583"
x="322.29852"
y="145.86665"
id="tspan3869-6-4">(包括LSTM+注意力机制)</tspan></text>
<rect
style="fill:#ff0000;fill-opacity:0;stroke:#000000;stroke-width:1;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none"
id="rect2439-8-3"
width="91.98288"
height="52.304867"
x="141.09564"
y="112.26492"
ry="7.33144" />
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:10.5833px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="189.86104"
y="129.81113"
id="text3865-5-8"><tspan
sodipodi:role="line"
style="text-align:center;text-anchor:middle;stroke-width:0.264583"
x="189.86104"
y="129.81113"
id="tspan55122">内置策略和</tspan><tspan
sodipodi:role="line"
style="text-align:center;text-anchor:middle;stroke-width:0.264583"
x="189.86104"
y="144.31273"
id="tspan33591">损失函数</tspan></text>
</g>
</g>
</svg>

After

Width:  |  Height:  |  Size: 9.4 KiB

View File

@@ -0,0 +1,242 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!-- Created with Inkscape (http://www.inkscape.org/) -->
<svg
width="272.17957mm"
height="140.78094mm"
viewBox="0 0 272.17956 140.78095"
version="1.1"
id="svg5"
inkscape:version="1.1.1 (1:1.1+202109281949+c3084ef5ed)"
sodipodi:docname="rllib_dist1.svg"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns="http://www.w3.org/2000/svg"
xmlns:svg="http://www.w3.org/2000/svg">
<sodipodi:namedview
id="namedview7"
pagecolor="#ffffff"
bordercolor="#666666"
borderopacity="1.0"
inkscape:pageshadow="2"
inkscape:pageopacity="0.0"
inkscape:pagecheckerboard="0"
inkscape:document-units="mm"
showgrid="false"
fit-margin-top="0"
fit-margin-left="0"
fit-margin-right="0"
fit-margin-bottom="0"
inkscape:zoom="0.45291836"
inkscape:cx="442.68464"
inkscape:cy="325.66576"
inkscape:window-width="1848"
inkscape:window-height="1136"
inkscape:window-x="72"
inkscape:window-y="27"
inkscape:window-maximized="1"
inkscape:current-layer="layer1-2" />
<defs
id="defs2">
<marker
style="overflow:visible"
id="Arrow2Mend"
refX="0"
refY="0"
orient="auto"
inkscape:stockid="Arrow2Mend"
inkscape:isstock="true">
<path
transform="scale(-0.6)"
d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:0.625;stroke-linejoin:round"
id="path17874" />
</marker>
<marker
style="overflow:visible"
id="Arrow1Lend"
refX="0"
refY="0"
orient="auto"
inkscape:stockid="Arrow1Lend"
inkscape:isstock="true">
<path
transform="matrix(-0.8,0,0,-0.8,-10,0)"
style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:1pt"
d="M 0,0 5,-5 -12.5,0 5,5 Z"
id="path17850" />
</marker>
</defs>
<g
inkscape:label="Layer 1"
inkscape:groupmode="layer"
id="layer1"
transform="translate(-2.8301877,53.170878)">
<g
inkscape:label="Layer 1"
id="layer1-2"
transform="translate(27.267558,-90.996319)">
<rect
style="fill:#ff0000;fill-opacity:0;stroke:#000000;stroke-width:1;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none"
id="rect2439"
width="117.86252"
height="67.372093"
x="-23.93737"
y="79.646179"
ry="7.9062257" />
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:10.5833px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="-17.67737"
y="117.43518"
id="text3785"><tspan
sodipodi:role="line"
id="tspan3783"
style="stroke-width:0.264583"
x="-17.67737"
y="117.43518">训练器</tspan></text>
<rect
style="fill:#ff0000;fill-opacity:0;stroke:#000000;stroke-width:1;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none"
id="rect2439-8"
width="54.641663"
height="25.560568"
x="30.502342"
y="82.889626"
ry="2.9995747" />
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:10.5833px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="46.489758"
y="98.735794"
id="text3785-5"><tspan
sodipodi:role="line"
id="tspan3783-0"
style="stroke-width:0.264583"
x="46.489758"
y="98.735794">拼接</tspan></text>
<rect
style="fill:#ff0000;fill-opacity:0;stroke:#000000;stroke-width:1;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none"
id="rect2439-8-9"
width="54.641663"
height="25.560568"
x="30.239189"
y="117.45507"
ry="2.9995747" />
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:10.5833px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="41.634937"
y="133.92754"
id="text3785-5-6"><tspan
sodipodi:role="line"
id="tspan3783-0-3"
style="stroke-width:0.264583"
x="41.634937"
y="133.92754">学习器</tspan></text>
<rect
style="fill:#ff0000;fill-opacity:0;stroke:#000000;stroke-width:1;stroke-linejoin:round"
id="rect2439-9-8"
width="89.30571"
height="38.476437"
x="146.70959"
y="72.034241"
ry="4.5152731" />
<rect
style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:1;stroke-linejoin:round"
id="rect2439-9-0"
width="89.30571"
height="38.476437"
x="152.25026"
y="78.543053"
ry="4.5152731" />
<rect
style="fill:#ffffff;fill-opacity:1;stroke:#000000;stroke-width:1;stroke-linejoin:round"
id="rect2439-9"
width="89.30571"
height="38.476437"
x="157.93648"
y="85.490364"
ry="4.5152731" />
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:10.5833px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="175.90797"
y="108.62353"
id="text3785-3"><tspan
sodipodi:role="line"
id="tspan3783-9"
style="stroke-width:0.264583"
x="175.90797"
y="108.62353">采样工作者</tspan></text>
<path
style="fill:none;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#Arrow2Mend)"
d="m 59.266346,143.17388 -0.137708,18.93118 146.731042,1.06734 0.17197,-37.75235"
id="path17845"
sodipodi:nodetypes="cccc" />
<path
style="fill:none;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m 59.75092,108.92605 0.03189,8.81987"
id="path18293" />
<path
style="fill:none;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#Arrow2Mend)"
d="M 205.96492,71.048464 206.19379,39.397006 58.50728,38.32907 58.51278,81.318156"
id="path18295"
sodipodi:nodetypes="cccc" />
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:10.5833px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="19.942596"
y="69.28083"
id="text19014"><tspan
sodipodi:role="line"
id="tspan19012"
style="font-style:italic;font-variant:normal;font-weight:normal;font-stretch:normal;font-family:sans-serif;-inkscape-font-specification:'sans-serif Italic';stroke-width:0.264583"
x="19.942596"
y="69.28083">批样本</tspan></text>
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:10.5833px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="158.9675"
y="140.5659"
id="text19014-1"><tspan
sodipodi:role="line"
id="tspan19012-1"
style="font-style:italic;font-variant:normal;font-weight:normal;font-stretch:normal;font-family:sans-serif;-inkscape-font-specification:'sans-serif Italic';stroke-width:0.264583"
x="158.9675"
y="140.5659">新的权重</tspan></text>
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:10.5833px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="109.50063"
y="157.80412"
id="text19014-1-5"><tspan
sodipodi:role="line"
id="tspan19012-1-9"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-family:sans-serif;-inkscape-font-specification:sans-serif;fill:#008000;stroke-width:0.264583"
x="109.50063"
y="157.80412">同步广播</tspan></text>
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:10.5833px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="109.03542"
y="51.166973"
id="text19014-1-5-8"><tspan
sodipodi:role="line"
id="tspan19012-1-9-4"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-family:sans-serif;-inkscape-font-specification:sans-serif;fill:#008000;stroke-width:0.264583"
x="109.03542"
y="51.166973">同步采样</tspan></text>
<text
xml:space="preserve"
style="font-style:normal;font-weight:normal;font-size:10.5833px;line-height:1.25;font-family:sans-serif;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
x="63.407452"
y="177.3765"
id="text19014-8"><tspan
sodipodi:role="line"
id="tspan19012-5"
style="stroke-width:0.264583"
x="63.407452"
y="177.3765">同步采样如A2C, PG, PPO</tspan></text>
</g>
</g>
</svg>

After

Width:  |  Height:  |  Size: 9.8 KiB