diff --git a/.vscode/settings.json b/.vscode/settings.json index 87db21fb..1e3b2f16 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -74,5 +74,6 @@ "xstddef": "cpp", "xtr1common": "cpp", "xtree": "cpp" - } + }, + "git.ignoreLimitWarning": true } \ No newline at end of file diff --git a/Python/sklearn/sklearn-cookbook-zh/.gitignore b/Python/sklearn/sklearn-cookbook-zh/.gitignore deleted file mode 100644 index aafa7683..00000000 --- a/Python/sklearn/sklearn-cookbook-zh/.gitignore +++ /dev/null @@ -1 +0,0 @@ -Thumbs.db \ No newline at end of file diff --git a/Python/sklearn/sklearn-cookbook-zh/LICENSE b/Python/sklearn/sklearn-cookbook-zh/LICENSE deleted file mode 100644 index 53202f12..00000000 --- a/Python/sklearn/sklearn-cookbook-zh/LICENSE +++ /dev/null @@ -1,101 +0,0 @@ -Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International Public License (CC BY-NC-SA 4.0) - -Copyright © 2020 ApacheCN(apachecn@163.com) - -By exercising the Licensed Rights (defined below), You accept and agree to be bound by the terms and conditions of this Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International Public License ("Public License"). To the extent this Public License may be interpreted as a contract, You are granted the Licensed Rights in consideration of Your acceptance of these terms and conditions, and the Licensor grants You such rights in consideration of benefits the Licensor receives from making the Licensed Material available under these terms and conditions. - -Section 1 – Definitions. - -a. Adapted Material means material subject to Copyright and Similar Rights that is derived from or based upon the Licensed Material and in which the Licensed Material is translated, altered, arranged, transformed, or otherwise modified in a manner requiring permission under the Copyright and Similar Rights held by the Licensor. For purposes of this Public License, where the Licensed Material is a musical work, performance, or sound recording, Adapted Material is always produced where the Licensed Material is synched in timed relation with a moving image. -b. Adapter's License means the license You apply to Your Copyright and Similar Rights in Your contributions to Adapted Material in accordance with the terms and conditions of this Public License. -c. BY-NC-SA Compatible License means a license listed at creativecommons.org/compatiblelicenses, approved by Creative Commons as essentially the equivalent of this Public License. -d. Copyright and Similar Rights means copyright and/or similar rights closely related to copyright including, without limitation, performance, broadcast, sound recording, and Sui Generis Database Rights, without regard to how the rights are labeled or categorized. For purposes of this Public License, the rights specified in Section 2(b)(1)-(2) are not Copyright and Similar Rights. -e. Effective Technological Measures means those measures that, in the absence of proper authority, may not be circumvented under laws fulfilling obligations under Article 11 of the WIPO Copyright Treaty adopted on December 20, 1996, and/or similar international agreements. -f. Exceptions and Limitations means fair use, fair dealing, and/or any other exception or limitation to Copyright and Similar Rights that applies to Your use of the Licensed Material. -g. License Elements means the license attributes listed in the name of a Creative Commons Public License. The License Elements of this Public License are Attribution, NonCommercial, and ShareAlike. -h. Licensed Material means the artistic or literary work, database, or other material to which the Licensor applied this Public License. -i. Licensed Rights means the rights granted to You subject to the terms and conditions of this Public License, which are limited to all Copyright and Similar Rights that apply to Your use of the Licensed Material and that the Licensor has authority to license. -j. Licensor means the individual(s) or entity(ies) granting rights under this Public License. -k. NonCommercial means not primarily intended for or directed towards commercial advantage or monetary compensation. For purposes of this Public License, the exchange of the Licensed Material for other material subject to Copyright and Similar Rights by digital file-sharing or similar means is NonCommercial provided there is no payment of monetary compensation in connection with the exchange. -l. Share means to provide material to the public by any means or process that requires permission under the Licensed Rights, such as reproduction, public display, public performance, distribution, dissemination, communication, or importation, and to make material available to the public including in ways that members of the public may access the material from a place and at a time individually chosen by them. -m. Sui Generis Database Rights means rights other than copyright resulting from Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, as amended and/or succeeded, as well as other essentially equivalent rights anywhere in the world. -n. You means the individual or entity exercising the Licensed Rights under this Public License. Your has a corresponding meaning. - -Section 2 – Scope. - -a. License grant. - 1. Subject to the terms and conditions of this Public License, the Licensor hereby grants You a worldwide, royalty-free, non-sublicensable, non-exclusive, irrevocable license to exercise the Licensed Rights in the Licensed Material to: - A. reproduce and Share the Licensed Material, in whole or in part, for NonCommercial purposes only; and - B. produce, reproduce, and Share Adapted Material for NonCommercial purposes only. - 2. Exceptions and Limitations. For the avoidance of doubt, where Exceptions and Limitations apply to Your use, this Public License does not apply, and You do not need to comply with its terms and conditions. - 3. Term. The term of this Public License is specified in Section 6(a). - 4. Media and formats; technical modifications allowed. The Licensor authorizes You to exercise the Licensed Rights in all media and formats whether now known or hereafter created, and to make technical modifications necessary to do so. The Licensor waives and/or agrees not to assert any right or authority to forbid You from making technical modifications necessary to exercise the Licensed Rights, including technical modifications necessary to circumvent Effective Technological Measures. For purposes of this Public License, simply making modifications authorized by this Section 2(a)(4) never produces Adapted Material. - 5. Downstream recipients. - A. Offer from the Licensor – Licensed Material. Every recipient of the Licensed Material automatically receives an offer from the Licensor to exercise the Licensed Rights under the terms and conditions of this Public License. - B. Additional offer from the Licensor – Adapted Material. Every recipient of Adapted Material from You automatically receives an offer from the Licensor to exercise the Licensed Rights in the Adapted Material under the conditions of the Adapter’s License You apply. - C. No downstream restrictions. You may not offer or impose any additional or different terms or conditions on, or apply any Effective Technological Measures to, the Licensed Material if doing so restricts exercise of the Licensed Rights by any recipient of the Licensed Material. - 6. No endorsement. Nothing in this Public License constitutes or may be construed as permission to assert or imply that You are, or that Your use of the Licensed Material is, connected with, or sponsored, endorsed, or granted official status by, the Licensor or others designated to receive attribution as provided in Section 3(a)(1)(A)(i). -b. Other rights. - 1. Moral rights, such as the right of integrity, are not licensed under this Public License, nor are publicity, privacy, and/or other similar personality rights; however, to the extent possible, the Licensor waives and/or agrees not to assert any such rights held by the Licensor to the limited extent necessary to allow You to exercise the Licensed Rights, but not otherwise. - 2. Patent and trademark rights are not licensed under this Public License. - 3. To the extent possible, the Licensor waives any right to collect royalties from You for the exercise of the Licensed Rights, whether directly or through a collecting society under any voluntary or waivable statutory or compulsory licensing scheme. In all other cases the Licensor expressly reserves any right to collect such royalties, including when the Licensed Material is used other than for NonCommercial purposes. - -Section 3 – License Conditions. - -Your exercise of the Licensed Rights is expressly made subject to the following conditions. - -a. Attribution. - 1. If You Share the Licensed Material (including in modified form), You must: - A. retain the following if it is supplied by the Licensor with the Licensed Material: - i. identification of the creator(s) of the Licensed Material and any others designated to receive attribution, in any reasonable manner requested by the Licensor (including by pseudonym if designated); - ii. a copyright notice; - iii. a notice that refers to this Public License; - iv. a notice that refers to the disclaimer of warranties; - v. a URI or hyperlink to the Licensed Material to the extent reasonably practicable; - B. indicate if You modified the Licensed Material and retain an indication of any previous modifications; and - C. indicate the Licensed Material is licensed under this Public License, and include the text of, or the URI or hyperlink to, this Public License. - 2. You may satisfy the conditions in Section 3(a)(1) in any reasonable manner based on the medium, means, and context in which You Share the Licensed Material. For example, it may be reasonable to satisfy the conditions by providing a URI or hyperlink to a resource that includes the required information. - 3. If requested by the Licensor, You must remove any of the information required by Section 3(a)(1)(A) to the extent reasonably practicable. -b. ShareAlike. - In addition to the conditions in Section 3(a), if You Share Adapted Material You produce, the following conditions also apply. - 1. The Adapter’s License You apply must be a Creative Commons license with the same License Elements, this version or later, or a BY-NC-SA Compatible License. - 2. You must include the text of, or the URI or hyperlink to, the Adapter's License You apply. You may satisfy this condition in any reasonable manner based on the medium, means, and context in which You Share Adapted Material. - 3. You may not offer or impose any additional or different terms or conditions on, or apply any Effective Technological Measures to, Adapted Material that restrict exercise of the rights granted under the Adapter's License You apply. - -Section 4 – Sui Generis Database Rights. - -Where the Licensed Rights include Sui Generis Database Rights that apply to Your use of the Licensed Material: - -a. for the avoidance of doubt, Section 2(a)(1) grants You the right to extract, reuse, reproduce, and Share all or a substantial portion of the contents of the database for NonCommercial purposes only; -b. if You include all or a substantial portion of the database contents in a database in which You have Sui Generis Database Rights, then the database in which You have Sui Generis Database Rights (but not its individual contents) is Adapted Material, including for purposes of Section 3(b); and -c. You must comply with the conditions in Section 3(a) if You Share all or a substantial portion of the contents of the database. - -For the avoidance of doubt, this Section 4 supplements and does not replace Your obligations under this Public License where the Licensed Rights include other Copyright and Similar Rights. - -Section 5 – Disclaimer of Warranties and Limitation of Liability. - -a. Unless otherwise separately undertaken by the Licensor, to the extent possible, the Licensor offers the Licensed Material as-is and as-available, and makes no representations or warranties of any kind concerning the Licensed Material, whether express, implied, statutory, or other. This includes, without limitation, warranties of title, merchantability, fitness for a particular purpose, non-infringement, absence of latent or other defects, accuracy, or the presence or absence of errors, whether or not known or discoverable. Where disclaimers of warranties are not allowed in full or in part, this disclaimer may not apply to You. -b. To the extent possible, in no event will the Licensor be liable to You on any legal theory (including, without limitation, negligence) or otherwise for any direct, special, indirect, incidental, consequential, punitive, exemplary, or other losses, costs, expenses, or damages arising out of this Public License or use of the Licensed Material, even if the Licensor has been advised of the possibility of such losses, costs, expenses, or damages. Where a limitation of liability is not allowed in full or in part, this limitation may not apply to You. -c. The disclaimer of warranties and limitation of liability provided above shall be interpreted in a manner that, to the extent possible, most closely approximates an absolute disclaimer and waiver of all liability. - -Section 6 – Term and Termination. - -a. This Public License applies for the term of the Copyright and Similar Rights licensed here. However, if You fail to comply with this Public License, then Your rights under this Public License terminate automatically. -b. Where Your right to use the Licensed Material has terminated under Section 6(a), it reinstates: - 1. automatically as of the date the violation is cured, provided it is cured within 30 days of Your discovery of the violation; or - 2. upon express reinstatement by the Licensor. - For the avoidance of doubt, this Section 6(b) does not affect any right the Licensor may have to seek remedies for Your violations of this Public License. -c. For the avoidance of doubt, the Licensor may also offer the Licensed Material under separate terms or conditions or stop distributing the Licensed Material at any time; however, doing so will not terminate this Public License. -d. Sections 1, 5, 6, 7, and 8 survive termination of this Public License. - -Section 7 – Other Terms and Conditions. - -a. The Licensor shall not be bound by any additional or different terms or conditions communicated by You unless expressly agreed. -b. Any arrangements, understandings, or agreements regarding the Licensed Material not stated herein are separate from and independent of the terms and conditions of this Public License. - -Section 8 – Interpretation. - -a. For the avoidance of doubt, this Public License does not, and shall not be interpreted to, reduce, limit, restrict, or impose conditions on any use of the Licensed Material that could lawfully be made without permission under this Public License. -b. To the extent possible, if any provision of this Public License is deemed unenforceable, it shall be automatically reformed to the minimum extent necessary to make it enforceable. If the provision cannot be reformed, it shall be severed from this Public License without affecting the enforceability of the remaining terms and conditions. -c. No term or condition of this Public License will be waived and no failure to comply consented to unless expressly agreed to by the Licensor. -d. Nothing in this Public License constitutes or may be interpreted as a limitation upon, or waiver of, any privileges and immunities that apply to the Licensor or You, including from the legal processes of any jurisdiction or authority. \ No newline at end of file diff --git a/Python/sklearn/sklearn-cookbook-zh/README.md b/Python/sklearn/sklearn-cookbook-zh/README.md deleted file mode 100644 index a3facd90..00000000 --- a/Python/sklearn/sklearn-cookbook-zh/README.md +++ /dev/null @@ -1,32 +0,0 @@ -# Scikit-learn 秘籍 - -> 原书:[Scikit-learn Cookbook](https://www.packtpub.com/big-data-and-business-intelligence/scikit-learn-cookbook) -> -> 协议:[CC BY-NC-SA 4.0](http://creativecommons.org/licenses/by-nc-sa/4.0/) -> -> 欢迎任何人参与和完善:一个人可以走的很快,但是一群人却可以走的更远。 - -+ [ApacheCN 机器学习交流群 629470233](http://shang.qq.com/wpa/qunwpa?idkey=30e5f1123a79867570f665aa3a483ca404b1c3f77737bc01ec520ed5f078ddef) -+ [ApacheCN 学习资源](http://www.apachecn.org/) - - - -+ [在线阅读](https://www.gitbook.com/book/wizardforcel/sklearn-cookbook/details) -+ [PDF格式](https://www.gitbook.com/download/pdf/book/wizardforcel/sklearn-cookbook) -+ [EPUB格式](https://www.gitbook.com/download/epub/book/wizardforcel/sklearn-cookbook) -+ [MOBI格式](https://www.gitbook.com/download/mobi/book/wizardforcel/sklearn-cookbook) -+ [代码仓库](http://git.oschina.net/wizardforcel/sklearn-cb) - -## 译者 - -| | 章节 | 译者 | -| --- | --- | --- | -| 1 | 预处理 | [muxuezi](https://muxuezi.github.io/posts/1-premodel-workflow.html) | -| 2 | 回归 | [muxuezi](https://muxuezi.github.io/posts/2-working-with-linear-models.html) | -| 3 | 聚类 | [飞龙](https://github.com/wizardforcel) | -| 4 | 分类 | [飞龙](https://github.com/wizardforcel) | -| 5 | 后处理 | [飞龙](https://github.com/wizardforcel) | - -## 赞助我 - -![](http://ww1.sinaimg.cn/large/841aea59ly1fx0qnvulnjj2074074747.jpg) diff --git a/Python/sklearn/sklearn-cookbook-zh/cover.jpg b/Python/sklearn/sklearn-cookbook-zh/cover.jpg deleted file mode 100644 index e8674823..00000000 Binary files a/Python/sklearn/sklearn-cookbook-zh/cover.jpg and /dev/null differ diff --git a/Python/sklearn/sklearn-cookbook-zh/styles/ebook.css b/Python/sklearn/sklearn-cookbook-zh/styles/ebook.css deleted file mode 100644 index 1791fd46..00000000 --- a/Python/sklearn/sklearn-cookbook-zh/styles/ebook.css +++ /dev/null @@ -1,284 +0,0 @@ -/* GitHub stylesheet for MarkdownPad (http://markdownpad.com) */ -/* Author: Nicolas Hery - http://nicolashery.com */ -/* Version: b13fe65ca28d2e568c6ed5d7f06581183df8f2ff */ -/* Source: https://github.com/nicolahery/markdownpad-github */ - -/* RESET -=============================================================================*/ - -html, body, div, span, applet, object, iframe, h1, h2, h3, h4, h5, h6, p, blockquote, pre, a, abbr, acronym, address, big, cite, code, del, dfn, em, img, ins, kbd, q, s, samp, small, strike, strong, sub, sup, tt, var, b, u, i, center, dl, dt, dd, ol, ul, li, fieldset, form, label, legend, table, caption, tbody, tfoot, thead, tr, th, td, article, aside, canvas, details, embed, figure, figcaption, footer, header, hgroup, menu, nav, output, ruby, section, summary, time, mark, audio, video { - margin: 0; - padding: 0; - border: 0; -} - -/* BODY -=============================================================================*/ - -body { - font-family: Helvetica, arial, freesans, clean, sans-serif; - font-size: 14px; - line-height: 1.6; - color: #333; - background-color: #fff; - padding: 20px; - max-width: 960px; - margin: 0 auto; -} - -body>*:first-child { - margin-top: 0 !important; -} - -body>*:last-child { - margin-bottom: 0 !important; -} - -/* BLOCKS -=============================================================================*/ - -p, blockquote, ul, ol, dl, table, pre { - margin: 15px 0; -} - -/* HEADERS -=============================================================================*/ - -h1, h2, h3, h4, h5, h6 { - margin: 20px 0 10px; - padding: 0; - font-weight: bold; - -webkit-font-smoothing: antialiased; -} - -h1 tt, h1 code, h2 tt, h2 code, h3 tt, h3 code, h4 tt, h4 code, h5 tt, h5 code, h6 tt, h6 code { - font-size: inherit; -} - -h1 { - font-size: 24px; - border-bottom: 1px solid #ccc; - color: #000; -} - -h2 { - font-size: 18px; - color: #000; -} - -h3 { - font-size: 14px; -} - -h4 { - font-size: 14px; -} - -h5 { - font-size: 14px; -} - -h6 { - color: #777; - font-size: 14px; -} - -body>h2:first-child, body>h1:first-child, body>h1:first-child+h2, body>h3:first-child, body>h4:first-child, body>h5:first-child, body>h6:first-child { - margin-top: 0; - padding-top: 0; -} - -a:first-child h1, a:first-child h2, a:first-child h3, a:first-child h4, a:first-child h5, a:first-child h6 { - margin-top: 0; - padding-top: 0; -} - -h1+p, h2+p, h3+p, h4+p, h5+p, h6+p { - margin-top: 10px; -} - -/* LINKS -=============================================================================*/ - -a { - color: #4183C4; - text-decoration: none; -} - -a:hover { - text-decoration: underline; -} - -/* LISTS -=============================================================================*/ - -ul, ol { - padding-left: 30px; -} - -ul li > :first-child, -ol li > :first-child, -ul li ul:first-of-type, -ol li ol:first-of-type, -ul li ol:first-of-type, -ol li ul:first-of-type { - margin-top: 0px; -} - -ul ul, ul ol, ol ol, ol ul { - margin-bottom: 0; -} - -dl { - padding: 0; -} - -dl dt { - font-size: 14px; - font-weight: bold; - font-style: italic; - padding: 0; - margin: 15px 0 5px; -} - -dl dt:first-child { - padding: 0; -} - -dl dt>:first-child { - margin-top: 0px; -} - -dl dt>:last-child { - margin-bottom: 0px; -} - -dl dd { - margin: 0 0 15px; - padding: 0 15px; -} - -dl dd>:first-child { - margin-top: 0px; -} - -dl dd>:last-child { - margin-bottom: 0px; -} - -/* CODE -=============================================================================*/ - -pre, code, tt { - font-size: 12px; - font-family: Consolas, "Liberation Mono", Courier, monospace; -} - -code, tt { - margin: 0 0px; - padding: 0px 0px; - white-space: nowrap; - border: 1px solid #eaeaea; - background-color: #f8f8f8; - border-radius: 3px; -} - -pre>code { - margin: 0; - padding: 0; - white-space: pre; - border: none; - background: transparent; -} - -pre { - background-color: #f8f8f8; - border: 1px solid #ccc; - font-size: 13px; - line-height: 19px; - overflow: auto; - padding: 6px 10px; - border-radius: 3px; -} - -pre code, pre tt { - background-color: transparent; - border: none; -} - -kbd { - -moz-border-bottom-colors: none; - -moz-border-left-colors: none; - -moz-border-right-colors: none; - -moz-border-top-colors: none; - background-color: #DDDDDD; - background-image: linear-gradient(#F1F1F1, #DDDDDD); - background-repeat: repeat-x; - border-color: #DDDDDD #CCCCCC #CCCCCC #DDDDDD; - border-image: none; - border-radius: 2px 2px 2px 2px; - border-style: solid; - border-width: 1px; - font-family: "Helvetica Neue",Helvetica,Arial,sans-serif; - line-height: 10px; - padding: 1px 4px; -} - -/* QUOTES -=============================================================================*/ - -blockquote { - border-left: 4px solid #DDD; - padding: 0 15px; - color: #777; -} - -blockquote>:first-child { - margin-top: 0px; -} - -blockquote>:last-child { - margin-bottom: 0px; -} - -/* HORIZONTAL RULES -=============================================================================*/ - -hr { - clear: both; - margin: 15px 0; - height: 0px; - overflow: hidden; - border: none; - background: transparent; - border-bottom: 4px solid #ddd; - padding: 0; -} - -/* TABLES -=============================================================================*/ - -table th { - font-weight: bold; -} - -table th, table td { - border: 1px solid #ccc; - padding: 6px 13px; -} - -table tr { - border-top: 1px solid #ccc; - background-color: #fff; -} - -table tr:nth-child(2n) { - background-color: #f8f8f8; -} - -/* IMAGES -=============================================================================*/ - -img { - max-width: 100% -} \ No newline at end of file diff --git a/Python/sklearn/sklearn-doc-zh/README.md b/Python/sklearn/sklearn-doc-zh/README.md deleted file mode 100644 index 6a8a522c..00000000 --- a/Python/sklearn/sklearn-doc-zh/README.md +++ /dev/null @@ -1,216 +0,0 @@ -#
scikit-learn (sklearn) 官方文档中文版
- -
logo
- -
- - - - - - -
sklearn 0.21.3 中文文档sklearn 0.21.3 中文示例sklearn 英文官网
-
- ---- - -## 介绍 - -sklearn (scikit-learn) 是基于 Python 语言的机器学习工具 - -1. 简单高效的数据挖掘和数据分析工具 -2. 可供大家在各种环境中重复使用 -3. 建立在 NumPy ,SciPy 和 matplotlib 上 -4. 开源,可商业使用 - BSD许可证 - -> 组织构建[网站] - -+ GitHub Pages(国外): https://sklearn.apachecn.org -+ Gitee Pages(国内): https://apachecn.gitee.io/sklearn-doc-zh - -> 第三方站长[网站] - -+ sklearn 中文文档: http://www.scikitlearn.com.cn -+ 地址A: xxx (欢迎留言,我们完善补充) - -> 其他补充 - -+ [官方Github](https://github.com/apachecn/scikit-learn-doc-zh) -+ [EPUB 下载地址](https://github.com/apachecn/sklearn-doc-zh/raw/epub/sklearn_0.21.3_2019_12_13.epub) - -## 下载 - -### Docker - -``` -docker pull apachecn0/sklearn-doc-zh -docker run -tid -p :80 apachecn0/sklearn-doc-zh -# 访问 http://localhost:{port} 查看文档 -``` - -### PYPI - -``` -pip install sklearn-doc-zh -sklearn-doc-zh -# 访问 http://localhost:{port} 查看文档 -``` - -### NPM - -``` -npm install -g sklearn-doc-zh -sklearn-doc-zh -# 访问 http://localhost:{port} 查看文档 -``` - -## 目录 - -* [安装 scikit-learn](docs/master/62.md) -* 用户指南 - * [1. 监督学习](docs/master/1.md) - * [1.1. 广义线性模型](docs/master/2.md) - * [1.2. 线性和二次判别分析](docs/master/3.md) - * [1.3. 内核岭回归](docs/master/4.md) - * [1.4. 支持向量机](docs/master/5.md) - * [1.5. 随机梯度下降](docs/master/6.md) - * [1.6. 最近邻](docs/master/7.md) - * [1.7. 高斯过程](docs/master/8.md) - * [1.8. 交叉分解](docs/master/9.md) - * [1.9. 朴素贝叶斯](docs/master/10.md) - * [1.10. 决策树](docs/master/11.md) - * [1.11. 集成方法](docs/master/12.md) - * [1.12. 多类和多标签算法](docs/master/13.md) - * [1.13. 特征选择](docs/master/14.md) - * [1.14. 半监督学习](docs/master/15.md) - * [1.15. 等式回归](docs/master/16.md) - * [1.16. 概率校准](docs/master/17.md) - * [1.17. 神经网络模型(有监督)](docs/master/18.md) - * [2. 无监督学习](docs/master/19.md) - * [2.1. 高斯混合模型](docs/master/20.md) - * [2.2. 流形学习](docs/master/21.md) - * [2.3. 聚类](docs/master/22.md) - * [2.4. 双聚类](docs/master/23.md) - * [2.5. 分解成分中的信号(矩阵分解问题)](docs/master/24.md) - * [2.6. 协方差估计](docs/master/25.md) - * [2.7. 新奇和异常值检测](docs/master/26.md) - * [2.8. 密度估计](docs/master/27.md) - * [2.9. 神经网络模型(无监督)](docs/master/28.md) - * [3. 模型选择和评估](docs/master/29.md) - * [3.1. 交叉验证:评估估算器的表现](docs/master/30.md) - * [3.2. 调整估计器的超参数](docs/master/31.md) - * [3.3. 模型评估: 量化预测的质量](docs/master/32.md) - * [3.4. 模型持久化](docs/master/33.md) - * [3.5. 验证曲线: 绘制分数以评估模型](docs/master/34.md) - * [4. 检验](docs/master/35.md) - * [4.1. 部分依赖图](docs/master/36.md) - * [5. 数据集转换](docs/master/37.md) - * [5.1. Pipeline(管道)和 FeatureUnion(特征联合): 合并的评估器](docs/master/38.md) - * [5.2. 特征提取](docs/master/39.md) - * [5.3 预处理数据](docs/master/40.md) - * [5.4 缺失值插补](docs/master/41.md) - * [5.5. 无监督降维](docs/master/42.md) - * [5.6. 随机投影](docs/master/43.md) - * [5.7. 内核近似](docs/master/44.md) - * [5.8. 成对的矩阵, 类别和核函数](docs/master/45.md) - * [5.9. 预测目标 (`y`) 的转换](docs/master/46.md) - * [6. 数据集加载工具](docs/master/47.md) - * [6.1. 通用数据集 API](docs/master/47.md) - * [6.2. 玩具数据集](docs/master/47.md) - * [6.3 真实世界中的数据集](docs/master/47.md) - * [6.4. 样本生成器](docs/master/47.md) - * [6.5. 加载其他数据集](docs/master/47.md) - * [7. 使用scikit-learn计算](docs/master/48.md) - * [7.1. 大规模计算的策略: 更大量的数据](docs/master/48.md) - * [7.2. 计算性能](docs/master/48.md) - * [7.3. 并行性、资源管理和配置](docs/master/48.md) -* [教程](docs/master/50.md) - * [使用 scikit-learn 介绍机器学习](docs/master/51.md) - * [关于科学数据处理的统计学习教程](docs/master/52.md) - * [机器学习: scikit-learn 中的设置以及预估对象](docs/master/53.md) - * [监督学习:从高维观察预测输出变量](docs/master/54.md) - * [模型选择:选择估计量及其参数](docs/master/55.md) - * [无监督学习: 寻求数据表示](docs/master/56.md) - * [把它们放在一起](docs/master/57.md) - * [寻求帮助](docs/master/58.md) - * [处理文本数据](docs/master/59.md) - * [选择正确的评估器(estimator.md)](docs/master/60.md) - * [外部资源,视频和谈话](docs/master/61.md) -* [API 参考](https://scikit-learn.org/stable/modules/classes.html) -* [常见问题](docs/master/63.md) -* [时光轴](docs/master/64.md) - -## 历史版本 - -* [scikit-learn (sklearn) 0.19 官方文档中文版](https://github.com/apachecn/sklearn-doc-zh/tree/master/docs/0.19.x.zip) -* [scikit-learn (sklearn) 0.18 官方文档中文版](http://cwiki.apachecn.org/pages/viewpage.action?pageId=10030181) - -如何编译使用历史版本: - -* 解压 `0.19.x.zip` 文件夹 -* 将 `master/img` 的图片资源, 复制到 `0.19.x` 里面去 -* gitbook 正常编译过程,可以使用 `sh run_website.sh` - -## 贡献指南 - -项目当前处于校对阶段,请查看[贡献指南](CONTRIBUTING.md),并在[整体进度](https://github.com/apachecn/sklearn-doc-zh/issues/352)中领取任务。 - -> 请您勇敢地去翻译和改进翻译。虽然我们追求卓越,但我们并不要求您做到十全十美,因此请不要担心因为翻译上犯错——在大部分情况下,我们的服务器已经记录所有的翻译,因此您不必担心会因为您的失误遭到无法挽回的破坏。(改编自维基百科) - -## 项目负责人 - -格式: GitHub + QQ - -> 第一期 (2017-09-29) - -* [@那伊抹微笑](https://github.com/wangyangting) -* [@片刻](https://github.com/jiangzhonglian) -* [@小瑶](https://github.com/chenyyx) - -> 第二期 (2019-06-29) - -* [@N!no](https://github.com/lovelybuggies):1352899627 -* [@mahaoyang](https://github.com/mahaoyang):992635910 -* [@loopyme](https://github.com/loopyme):3322728009 -* [飞龙](https://github.com/wizardforcel):562826179 -* [片刻](https://github.com/jiangzhonglian):529815144 - --- 负责人要求: (欢迎一起为 `sklearn 中文版本` 做贡献) - -* 热爱开源,喜欢装逼 -* 长期使用 sklearn(至少0.5年) + 提交Pull Requests>=3 -* 能够有时间及时优化页面 bug 和用户 issues -* 试用期: 2个月 -* 欢迎联系: [片刻](https://github.com/jiangzhonglian) 529815144 - -## 贡献者 - -[【0.19.X】贡献者名单](https://github.com/apachecn/sklearn-doc-zh/issues/354) - -## 建议反馈 - -* 在我们的 [apachecn/pytorch-doc-zh](https://github.com/apachecn/sklearn-doc-zh) github 上提 issue. -* 发邮件到 Email: `apachecn@163.com`. -* 在我们的 [QQ群-搜索: 交流方式](https://github.com/apachecn/home) 中联系群主/管理员即可. - -## **项目协议** - -* **最近有很多人联系我们,关于内容授权问题!** -* 开源是指知识应该重在传播和迭代(而不是禁止别人转载) -* 不然你TM在GitHub开源,然后又说不让转载,你TM有病吧! -* 禁止商业化,符合协议规范,备注地址来源,**重点: 不需要**发邮件给我们申请 -* ApacheCN 账号下没有协议的项目,一律视为 [CC BY-NC-SA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/deed.zh)。 - -温馨提示: - -* 对于个人想自己copy一份再更新的人 -* 我也是有这样的经历,但是这种激情维持不了几个月,就泄气了! -* 不仅浪费了你的心血,还浪费了更多人看到你的翻译成果!很可惜!你觉得呢? -* 个人的建议是: fork -> pull requests 到 `https://github.com/apachecn/sklearn-doc-zh` -* 那为什么要选择 `ApacheCN` 呢? -* 因为我们做翻译这事情是觉得开心和装逼,比较纯粹! -* 你如果喜欢,你可以来参与/甚至负责这个项目,没有任何学历和背景的限制 - -## 赞助我们 - -微信&支付宝 diff --git a/Python/sklearn/sklearn-doc-zh/master/book.json b/Python/sklearn/sklearn-doc-zh/master/book.json deleted file mode 100644 index 430e399a..00000000 --- a/Python/sklearn/sklearn-doc-zh/master/book.json +++ /dev/null @@ -1,176 +0,0 @@ -{ - "title" : "sklearn 中文文档", - "author" : "ApacheCN", - "description" : "sklearn 中文文档: 教程和文档", - "language" : "zh-hans", - "plugins": [ - "github", - "github-buttons", - "-sharing", - "insert-logo", - "sharing-plus", - "back-to-top-button", - "code", - "copy-code-button", - "katex", - "pageview-count", - "edit-link", - "emphasize", - "alerts", - "auto-scroll-table", - "popup", - "hide-element", - "page-toc-button", - "tbfed-pagefooter", - "sitemap", - "advanced-emoji", - "expandable-chapters", - "splitter", - "search-pro" - ], - "pluginsConfig": { - "github": { - "url": "https://github.com/apachecn/sklearn-doc-zh" - }, - "github-buttons": { - "buttons": [ - { - "user": "apachecn", - "repo": "sklearn-doc-zh", - "type": "star", - "count": true, - "size": "small" - } - ] - }, - "insert-logo": { - "url": "http://data.apachecn.org/img/logo.jpg", - "style": "background: none; max-height: 150px; min-height: 150px" - }, - "hide-element": { - "elements": [".gitbook-link"] - }, - "edit-link": { - "base": "https://github.com/apachecn/sklearn-doc-zh/blob/master/docs/0.21.3", - "label": "编辑本页" - }, - "sharing": { - "qzone": true, - "weibo": true, - "twitter": false, - "facebook": false, - "google": false, - "qq": false, - "line": false, - "whatsapp": false, - "douban": false, - "all": [ - "qq", "douban", "facebook", "google", "linkedin", "twitter", "weibo", "whatsapp" - ] - }, - "page-toc-button": { - "maxTocDepth": 4, - "minTocSize": 4 - }, - "tbfed-pagefooter": { - "copyright":"Copyright © ibooker.org.cn 2019", - "modify_label": "该文件修订时间: ", - "modify_format": "YYYY-MM-DD HH:mm:ss" - }, - "sitemap": { - "hostname": "http://sklearn.apachecn.org" - } - }, - "my_links" : { - "sidebar" : { - "Home" : "https://www.baidu.com" - } - }, - "my_plugins": [ - "donate", - "todo", - "-lunr", - "-search", - "expandable-chapters-small", - "chapter-fold", - "expandable-chapters", - "expandable-chapters-small", - "back-to-top-button", - "ga", - "baidu", - "sitemap", - "tbfed-pagefooter", - "advanced-emoji", - "sectionx", - "page-treeview", - "simple-page-toc", - "ancre-navigation", - "theme-apachecn@git+https://github.com/apachecn/theme-apachecn#HEAD", - "pagefooter-apachecn@git+https://github.com/apachecn/gitbook-plugin-pagefooter-apachecn#HEAD" - ], - "my_pluginsConfig": { - "github-buttons": { - "buttons": [ - { - "user": "apachecn", - "repo": "sklearn-doc-zh", - "type": "star", - "count": true, - "size": "small" - }, - { - "user": "apachecn", - "width": "160", - "type": "follow", - "count": true, - "size": "small" - } - ] - }, - "ignores": ["node_modules"], - "simple-page-toc": { - "maxDepth": 3, - "skipFirstH1": true - }, - "page-toc-button": { - "maxTocDepth": 2, - "minTocSize": 2 - }, - "page-treeview": { - "copyright": "Copyright © aleen42", - "minHeaderCount": "2", - "minHeaderDeep": "2" - }, - "donate": { - "wechat": "微信收款的二维码URL", - "alipay": "支付宝收款的二维码URL", - "title": "", - "button": "赏", - "alipayText": "支付宝打赏", - "wechatText": "微信打赏" - }, - "page-copyright": { - "description": "modified at", - "signature": "你的签名", - "wisdom": "Designer, Frontend Developer & overall web enthusiast", - "format": "YYYY-MM-dd hh:mm:ss", - "copyright": "Copyright © 你的名字", - "timeColor": "#666", - "copyrightColor": "#666", - "utcOffset": "8", - "style": "normal", - "noPowered": false - }, - "ga": { - "token": "UA-102475051-10" - }, - "baidu": { - "token": "75439e2cbd22bdd813226000e9dcc12f" - }, - "pagefooter-apachecn": { - "copyright":"Copyright © ibooker.org.cn 2019", - "modify_label": "该文件修订时间: ", - "modify_format": "YYYY-MM-DD HH:mm:ss" - } - } -} diff --git a/Python/sklearn/sklearn-cookbook-zh/1.md b/Sklearn/sklearn-cookbook-zh/1.md similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/1.md rename to Sklearn/sklearn-cookbook-zh/1.md diff --git a/Python/sklearn/sklearn-cookbook-zh/2.md b/Sklearn/sklearn-cookbook-zh/2.md similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/2.md rename to Sklearn/sklearn-cookbook-zh/2.md diff --git a/Python/sklearn/sklearn-cookbook-zh/3.md b/Sklearn/sklearn-cookbook-zh/3.md similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/3.md rename to Sklearn/sklearn-cookbook-zh/3.md diff --git a/Python/sklearn/sklearn-cookbook-zh/4.md b/Sklearn/sklearn-cookbook-zh/4.md similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/4.md rename to Sklearn/sklearn-cookbook-zh/4.md diff --git a/Python/sklearn/sklearn-cookbook-zh/5.md b/Sklearn/sklearn-cookbook-zh/5.md similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/5.md rename to Sklearn/sklearn-cookbook-zh/5.md diff --git a/Python/sklearn/sklearn-cookbook-zh/SUMMARY.md b/Sklearn/sklearn-cookbook-zh/SUMMARY.md similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/SUMMARY.md rename to Sklearn/sklearn-cookbook-zh/SUMMARY.md diff --git a/Python/sklearn/sklearn-cookbook-zh/img/1-10-1.png b/Sklearn/sklearn-cookbook-zh/img/1-10-1.png similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/1-10-1.png rename to Sklearn/sklearn-cookbook-zh/img/1-10-1.png diff --git a/Python/sklearn/sklearn-cookbook-zh/img/1-11-1.png b/Sklearn/sklearn-cookbook-zh/img/1-11-1.png similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/1-11-1.png rename to Sklearn/sklearn-cookbook-zh/img/1-11-1.png diff --git a/Python/sklearn/sklearn-cookbook-zh/img/1-11-2.png b/Sklearn/sklearn-cookbook-zh/img/1-11-2.png similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/1-11-2.png rename to Sklearn/sklearn-cookbook-zh/img/1-11-2.png diff --git a/Python/sklearn/sklearn-cookbook-zh/img/1-11-3.png b/Sklearn/sklearn-cookbook-zh/img/1-11-3.png similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/1-11-3.png rename to Sklearn/sklearn-cookbook-zh/img/1-11-3.png diff --git a/Python/sklearn/sklearn-cookbook-zh/img/1-12-1.png b/Sklearn/sklearn-cookbook-zh/img/1-12-1.png similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/1-12-1.png rename to Sklearn/sklearn-cookbook-zh/img/1-12-1.png diff --git a/Python/sklearn/sklearn-cookbook-zh/img/1-13-1.png b/Sklearn/sklearn-cookbook-zh/img/1-13-1.png similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/1-13-1.png rename to Sklearn/sklearn-cookbook-zh/img/1-13-1.png diff --git a/Python/sklearn/sklearn-cookbook-zh/img/1-13-2.png b/Sklearn/sklearn-cookbook-zh/img/1-13-2.png similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/1-13-2.png rename to Sklearn/sklearn-cookbook-zh/img/1-13-2.png diff --git a/Python/sklearn/sklearn-cookbook-zh/img/1-15-1.png b/Sklearn/sklearn-cookbook-zh/img/1-15-1.png similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/1-15-1.png rename to Sklearn/sklearn-cookbook-zh/img/1-15-1.png diff --git a/Python/sklearn/sklearn-cookbook-zh/img/1-15-2.png b/Sklearn/sklearn-cookbook-zh/img/1-15-2.png similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/1-15-2.png rename to Sklearn/sklearn-cookbook-zh/img/1-15-2.png diff --git a/Python/sklearn/sklearn-cookbook-zh/img/1-15-3.png b/Sklearn/sklearn-cookbook-zh/img/1-15-3.png similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/1-15-3.png rename to Sklearn/sklearn-cookbook-zh/img/1-15-3.png diff --git a/Python/sklearn/sklearn-cookbook-zh/img/1-17-1.png b/Sklearn/sklearn-cookbook-zh/img/1-17-1.png similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/1-17-1.png rename to Sklearn/sklearn-cookbook-zh/img/1-17-1.png diff --git a/Python/sklearn/sklearn-cookbook-zh/img/1-2-1.png b/Sklearn/sklearn-cookbook-zh/img/1-2-1.png similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/1-2-1.png rename to Sklearn/sklearn-cookbook-zh/img/1-2-1.png diff --git a/Python/sklearn/sklearn-cookbook-zh/img/1-9-1.png b/Sklearn/sklearn-cookbook-zh/img/1-9-1.png similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/1-9-1.png rename to Sklearn/sklearn-cookbook-zh/img/1-9-1.png diff --git a/Python/sklearn/sklearn-cookbook-zh/img/2-1-1.png b/Sklearn/sklearn-cookbook-zh/img/2-1-1.png similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/2-1-1.png rename to Sklearn/sklearn-cookbook-zh/img/2-1-1.png diff --git a/Python/sklearn/sklearn-cookbook-zh/img/2-1-2.png b/Sklearn/sklearn-cookbook-zh/img/2-1-2.png similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/2-1-2.png rename to Sklearn/sklearn-cookbook-zh/img/2-1-2.png diff --git a/Python/sklearn/sklearn-cookbook-zh/img/2-2-1.png b/Sklearn/sklearn-cookbook-zh/img/2-2-1.png similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/2-2-1.png rename to Sklearn/sklearn-cookbook-zh/img/2-2-1.png diff --git a/Python/sklearn/sklearn-cookbook-zh/img/2-2-2.png b/Sklearn/sklearn-cookbook-zh/img/2-2-2.png similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/2-2-2.png rename to Sklearn/sklearn-cookbook-zh/img/2-2-2.png diff --git a/Python/sklearn/sklearn-cookbook-zh/img/2-2-3.png b/Sklearn/sklearn-cookbook-zh/img/2-2-3.png similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/2-2-3.png rename to Sklearn/sklearn-cookbook-zh/img/2-2-3.png diff --git a/Python/sklearn/sklearn-cookbook-zh/img/2-3-1.png b/Sklearn/sklearn-cookbook-zh/img/2-3-1.png similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/2-3-1.png rename to Sklearn/sklearn-cookbook-zh/img/2-3-1.png diff --git a/Python/sklearn/sklearn-cookbook-zh/img/2-3-2.png b/Sklearn/sklearn-cookbook-zh/img/2-3-2.png similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/2-3-2.png rename to Sklearn/sklearn-cookbook-zh/img/2-3-2.png diff --git a/Python/sklearn/sklearn-cookbook-zh/img/2-4-1.png b/Sklearn/sklearn-cookbook-zh/img/2-4-1.png similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/2-4-1.png rename to Sklearn/sklearn-cookbook-zh/img/2-4-1.png diff --git a/Python/sklearn/sklearn-cookbook-zh/img/2-6-1.png b/Sklearn/sklearn-cookbook-zh/img/2-6-1.png similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/2-6-1.png rename to Sklearn/sklearn-cookbook-zh/img/2-6-1.png diff --git a/Python/sklearn/sklearn-cookbook-zh/img/2-7-1.png b/Sklearn/sklearn-cookbook-zh/img/2-7-1.png similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/2-7-1.png rename to Sklearn/sklearn-cookbook-zh/img/2-7-1.png diff --git a/Python/sklearn/sklearn-cookbook-zh/img/2-8-1.png b/Sklearn/sklearn-cookbook-zh/img/2-8-1.png similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/2-8-1.png rename to Sklearn/sklearn-cookbook-zh/img/2-8-1.png diff --git a/Python/sklearn/sklearn-cookbook-zh/img/2-8-2.png b/Sklearn/sklearn-cookbook-zh/img/2-8-2.png similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/2-8-2.png rename to Sklearn/sklearn-cookbook-zh/img/2-8-2.png diff --git a/Python/sklearn/sklearn-cookbook-zh/img/2-9-1.png b/Sklearn/sklearn-cookbook-zh/img/2-9-1.png similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/2-9-1.png rename to Sklearn/sklearn-cookbook-zh/img/2-9-1.png diff --git a/Python/sklearn/sklearn-cookbook-zh/img/2-9-2.png b/Sklearn/sklearn-cookbook-zh/img/2-9-2.png similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/2-9-2.png rename to Sklearn/sklearn-cookbook-zh/img/2-9-2.png diff --git a/Python/sklearn/sklearn-cookbook-zh/img/3-1-1.jpg b/Sklearn/sklearn-cookbook-zh/img/3-1-1.jpg similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/3-1-1.jpg rename to Sklearn/sklearn-cookbook-zh/img/3-1-1.jpg diff --git a/Python/sklearn/sklearn-cookbook-zh/img/3-1-2.jpg b/Sklearn/sklearn-cookbook-zh/img/3-1-2.jpg similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/3-1-2.jpg rename to Sklearn/sklearn-cookbook-zh/img/3-1-2.jpg diff --git a/Python/sklearn/sklearn-cookbook-zh/img/3-2-1.jpg b/Sklearn/sklearn-cookbook-zh/img/3-2-1.jpg similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/3-2-1.jpg rename to Sklearn/sklearn-cookbook-zh/img/3-2-1.jpg diff --git a/Python/sklearn/sklearn-cookbook-zh/img/3-2-2.jpg b/Sklearn/sklearn-cookbook-zh/img/3-2-2.jpg similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/3-2-2.jpg rename to Sklearn/sklearn-cookbook-zh/img/3-2-2.jpg diff --git a/Python/sklearn/sklearn-cookbook-zh/img/3-3-1.jpg b/Sklearn/sklearn-cookbook-zh/img/3-3-1.jpg similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/3-3-1.jpg rename to Sklearn/sklearn-cookbook-zh/img/3-3-1.jpg diff --git a/Python/sklearn/sklearn-cookbook-zh/img/3-3-2.jpg b/Sklearn/sklearn-cookbook-zh/img/3-3-2.jpg similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/3-3-2.jpg rename to Sklearn/sklearn-cookbook-zh/img/3-3-2.jpg diff --git a/Python/sklearn/sklearn-cookbook-zh/img/3-5-1.jpg b/Sklearn/sklearn-cookbook-zh/img/3-5-1.jpg similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/3-5-1.jpg rename to Sklearn/sklearn-cookbook-zh/img/3-5-1.jpg diff --git a/Python/sklearn/sklearn-cookbook-zh/img/3-5-2.jpg b/Sklearn/sklearn-cookbook-zh/img/3-5-2.jpg similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/3-5-2.jpg rename to Sklearn/sklearn-cookbook-zh/img/3-5-2.jpg diff --git a/Python/sklearn/sklearn-cookbook-zh/img/3-6-1.jpg b/Sklearn/sklearn-cookbook-zh/img/3-6-1.jpg similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/3-6-1.jpg rename to Sklearn/sklearn-cookbook-zh/img/3-6-1.jpg diff --git a/Python/sklearn/sklearn-cookbook-zh/img/3-6-2.jpg b/Sklearn/sklearn-cookbook-zh/img/3-6-2.jpg similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/3-6-2.jpg rename to Sklearn/sklearn-cookbook-zh/img/3-6-2.jpg diff --git a/Python/sklearn/sklearn-cookbook-zh/img/3-6-3.jpg b/Sklearn/sklearn-cookbook-zh/img/3-6-3.jpg similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/3-6-3.jpg rename to Sklearn/sklearn-cookbook-zh/img/3-6-3.jpg diff --git a/Python/sklearn/sklearn-cookbook-zh/img/3-7-1.jpg b/Sklearn/sklearn-cookbook-zh/img/3-7-1.jpg similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/3-7-1.jpg rename to Sklearn/sklearn-cookbook-zh/img/3-7-1.jpg diff --git a/Python/sklearn/sklearn-cookbook-zh/img/3-7-2.jpg b/Sklearn/sklearn-cookbook-zh/img/3-7-2.jpg similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/3-7-2.jpg rename to Sklearn/sklearn-cookbook-zh/img/3-7-2.jpg diff --git a/Python/sklearn/sklearn-cookbook-zh/img/3-7-3.jpg b/Sklearn/sklearn-cookbook-zh/img/3-7-3.jpg similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/3-7-3.jpg rename to Sklearn/sklearn-cookbook-zh/img/3-7-3.jpg diff --git a/Python/sklearn/sklearn-cookbook-zh/img/3-7-4.jpg b/Sklearn/sklearn-cookbook-zh/img/3-7-4.jpg similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/3-7-4.jpg rename to Sklearn/sklearn-cookbook-zh/img/3-7-4.jpg diff --git a/Python/sklearn/sklearn-cookbook-zh/img/3-8-1.jpg b/Sklearn/sklearn-cookbook-zh/img/3-8-1.jpg similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/3-8-1.jpg rename to Sklearn/sklearn-cookbook-zh/img/3-8-1.jpg diff --git a/Python/sklearn/sklearn-cookbook-zh/img/3-8-2.jpg b/Sklearn/sklearn-cookbook-zh/img/3-8-2.jpg similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/3-8-2.jpg rename to Sklearn/sklearn-cookbook-zh/img/3-8-2.jpg diff --git a/Python/sklearn/sklearn-cookbook-zh/img/3-8-3.jpg b/Sklearn/sklearn-cookbook-zh/img/3-8-3.jpg similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/3-8-3.jpg rename to Sklearn/sklearn-cookbook-zh/img/3-8-3.jpg diff --git a/Python/sklearn/sklearn-cookbook-zh/img/3-9-1.jpg b/Sklearn/sklearn-cookbook-zh/img/3-9-1.jpg similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/3-9-1.jpg rename to Sklearn/sklearn-cookbook-zh/img/3-9-1.jpg diff --git a/Python/sklearn/sklearn-cookbook-zh/img/4-1-1.jpg b/Sklearn/sklearn-cookbook-zh/img/4-1-1.jpg similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/4-1-1.jpg rename to Sklearn/sklearn-cookbook-zh/img/4-1-1.jpg diff --git a/Python/sklearn/sklearn-cookbook-zh/img/4-1-2.jpg b/Sklearn/sklearn-cookbook-zh/img/4-1-2.jpg similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/4-1-2.jpg rename to Sklearn/sklearn-cookbook-zh/img/4-1-2.jpg diff --git a/Python/sklearn/sklearn-cookbook-zh/img/4-1-3.jpg b/Sklearn/sklearn-cookbook-zh/img/4-1-3.jpg similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/4-1-3.jpg rename to Sklearn/sklearn-cookbook-zh/img/4-1-3.jpg diff --git a/Python/sklearn/sklearn-cookbook-zh/img/4-2-1.jpg b/Sklearn/sklearn-cookbook-zh/img/4-2-1.jpg similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/4-2-1.jpg rename to Sklearn/sklearn-cookbook-zh/img/4-2-1.jpg diff --git a/Python/sklearn/sklearn-cookbook-zh/img/4-2-2.jpg b/Sklearn/sklearn-cookbook-zh/img/4-2-2.jpg similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/4-2-2.jpg rename to Sklearn/sklearn-cookbook-zh/img/4-2-2.jpg diff --git a/Python/sklearn/sklearn-cookbook-zh/img/4-2-3.jpg b/Sklearn/sklearn-cookbook-zh/img/4-2-3.jpg similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/4-2-3.jpg rename to Sklearn/sklearn-cookbook-zh/img/4-2-3.jpg diff --git a/Python/sklearn/sklearn-cookbook-zh/img/4-3-1.jpg b/Sklearn/sklearn-cookbook-zh/img/4-3-1.jpg similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/4-3-1.jpg rename to Sklearn/sklearn-cookbook-zh/img/4-3-1.jpg diff --git a/Python/sklearn/sklearn-cookbook-zh/img/4-3-2.jpg b/Sklearn/sklearn-cookbook-zh/img/4-3-2.jpg similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/4-3-2.jpg rename to Sklearn/sklearn-cookbook-zh/img/4-3-2.jpg diff --git a/Python/sklearn/sklearn-cookbook-zh/img/4-4-1.jpg b/Sklearn/sklearn-cookbook-zh/img/4-4-1.jpg similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/4-4-1.jpg rename to Sklearn/sklearn-cookbook-zh/img/4-4-1.jpg diff --git a/Python/sklearn/sklearn-cookbook-zh/img/4-4-2.jpg b/Sklearn/sklearn-cookbook-zh/img/4-4-2.jpg similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/4-4-2.jpg rename to Sklearn/sklearn-cookbook-zh/img/4-4-2.jpg diff --git a/Python/sklearn/sklearn-cookbook-zh/img/4-5-1.jpg b/Sklearn/sklearn-cookbook-zh/img/4-5-1.jpg similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/4-5-1.jpg rename to Sklearn/sklearn-cookbook-zh/img/4-5-1.jpg diff --git a/Python/sklearn/sklearn-cookbook-zh/img/4-5-2.jpg b/Sklearn/sklearn-cookbook-zh/img/4-5-2.jpg similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/4-5-2.jpg rename to Sklearn/sklearn-cookbook-zh/img/4-5-2.jpg diff --git a/Python/sklearn/sklearn-cookbook-zh/img/4-5-3.jpg b/Sklearn/sklearn-cookbook-zh/img/4-5-3.jpg similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/4-5-3.jpg rename to Sklearn/sklearn-cookbook-zh/img/4-5-3.jpg diff --git a/Python/sklearn/sklearn-cookbook-zh/img/4-7-1.jpg b/Sklearn/sklearn-cookbook-zh/img/4-7-1.jpg similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/4-7-1.jpg rename to Sklearn/sklearn-cookbook-zh/img/4-7-1.jpg diff --git a/Python/sklearn/sklearn-cookbook-zh/img/4-7-2.jpg b/Sklearn/sklearn-cookbook-zh/img/4-7-2.jpg similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/4-7-2.jpg rename to Sklearn/sklearn-cookbook-zh/img/4-7-2.jpg diff --git a/Python/sklearn/sklearn-cookbook-zh/img/4-7-3.jpg b/Sklearn/sklearn-cookbook-zh/img/4-7-3.jpg similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/4-7-3.jpg rename to Sklearn/sklearn-cookbook-zh/img/4-7-3.jpg diff --git a/Python/sklearn/sklearn-cookbook-zh/img/5-3-1.jpg b/Sklearn/sklearn-cookbook-zh/img/5-3-1.jpg similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/5-3-1.jpg rename to Sklearn/sklearn-cookbook-zh/img/5-3-1.jpg diff --git a/Python/sklearn/sklearn-cookbook-zh/img/5-3-2.jpg b/Sklearn/sklearn-cookbook-zh/img/5-3-2.jpg similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/5-3-2.jpg rename to Sklearn/sklearn-cookbook-zh/img/5-3-2.jpg diff --git a/Python/sklearn/sklearn-cookbook-zh/img/5-3-3.jpg b/Sklearn/sklearn-cookbook-zh/img/5-3-3.jpg similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/5-3-3.jpg rename to Sklearn/sklearn-cookbook-zh/img/5-3-3.jpg diff --git a/Python/sklearn/sklearn-cookbook-zh/img/5-4-1.jpg b/Sklearn/sklearn-cookbook-zh/img/5-4-1.jpg similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/5-4-1.jpg rename to Sklearn/sklearn-cookbook-zh/img/5-4-1.jpg diff --git a/Python/sklearn/sklearn-cookbook-zh/img/5-5-1.jpg b/Sklearn/sklearn-cookbook-zh/img/5-5-1.jpg similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/5-5-1.jpg rename to Sklearn/sklearn-cookbook-zh/img/5-5-1.jpg diff --git a/Python/sklearn/sklearn-cookbook-zh/img/5-8-1.jpg b/Sklearn/sklearn-cookbook-zh/img/5-8-1.jpg similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/5-8-1.jpg rename to Sklearn/sklearn-cookbook-zh/img/5-8-1.jpg diff --git a/Python/sklearn/sklearn-cookbook-zh/img/5-8-2.jpg b/Sklearn/sklearn-cookbook-zh/img/5-8-2.jpg similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/5-8-2.jpg rename to Sklearn/sklearn-cookbook-zh/img/5-8-2.jpg diff --git a/Python/sklearn/sklearn-cookbook-zh/img/5-9-1.jpg b/Sklearn/sklearn-cookbook-zh/img/5-9-1.jpg similarity index 100% rename from Python/sklearn/sklearn-cookbook-zh/img/5-9-1.jpg rename to Sklearn/sklearn-cookbook-zh/img/5-9-1.jpg diff --git a/Python/sklearn/sklearn-doc-zh/SUMMARY.md b/Sklearn/sklearn-doc-zh/SUMMARY.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/SUMMARY.md rename to Sklearn/sklearn-doc-zh/SUMMARY.md diff --git a/Python/sklearn/sklearn-doc-zh/examples/Biclustering/a_demo_of_the_spectral_clustering_algorithm.md b/Sklearn/sklearn-doc-zh/examples/Biclustering/a_demo_of_the_spectral_clustering_algorithm.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/Biclustering/a_demo_of_the_spectral_clustering_algorithm.md rename to Sklearn/sklearn-doc-zh/examples/Biclustering/a_demo_of_the_spectral_clustering_algorithm.md diff --git a/Python/sklearn/sklearn-doc-zh/examples/Biclustering/a_demo_of_the_spectral_co-clustering_algorithm.md b/Sklearn/sklearn-doc-zh/examples/Biclustering/a_demo_of_the_spectral_co-clustering_algorithm.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/Biclustering/a_demo_of_the_spectral_co-clustering_algorithm.md rename to Sklearn/sklearn-doc-zh/examples/Biclustering/a_demo_of_the_spectral_co-clustering_algorithm.md diff --git a/Python/sklearn/sklearn-doc-zh/examples/Biclustering/biclustering_documents_with_the_spectral_co-clustering_algorithm.md b/Sklearn/sklearn-doc-zh/examples/Biclustering/biclustering_documents_with_the_spectral_co-clustering_algorithm.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/Biclustering/biclustering_documents_with_the_spectral_co-clustering_algorithm.md rename to Sklearn/sklearn-doc-zh/examples/Biclustering/biclustering_documents_with_the_spectral_co-clustering_algorithm.md diff --git a/Python/sklearn/sklearn-doc-zh/examples/Generalized_Linear_Models/plot_document_classification_20newsgroups.md b/Sklearn/sklearn-doc-zh/examples/Generalized_Linear_Models/plot_document_classification_20newsgroups.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/Generalized_Linear_Models/plot_document_classification_20newsgroups.md rename to Sklearn/sklearn-doc-zh/examples/Generalized_Linear_Models/plot_document_classification_20newsgroups.md diff --git a/Python/sklearn/sklearn-doc-zh/examples/Generalized_Linear_Models/plot_lasso_and_elasticnet.md b/Sklearn/sklearn-doc-zh/examples/Generalized_Linear_Models/plot_lasso_and_elasticnet.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/Generalized_Linear_Models/plot_lasso_and_elasticnet.md rename to Sklearn/sklearn-doc-zh/examples/Generalized_Linear_Models/plot_lasso_and_elasticnet.md diff --git a/Python/sklearn/sklearn-doc-zh/examples/Generalized_Linear_Models/plot_lasso_coordinate_descent_path.md b/Sklearn/sklearn-doc-zh/examples/Generalized_Linear_Models/plot_lasso_coordinate_descent_path.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/Generalized_Linear_Models/plot_lasso_coordinate_descent_path.md rename to Sklearn/sklearn-doc-zh/examples/Generalized_Linear_Models/plot_lasso_coordinate_descent_path.md diff --git a/Python/sklearn/sklearn-doc-zh/examples/Generalized_Linear_Models/plot_lasso_model_selection.md b/Sklearn/sklearn-doc-zh/examples/Generalized_Linear_Models/plot_lasso_model_selection.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/Generalized_Linear_Models/plot_lasso_model_selection.md rename to Sklearn/sklearn-doc-zh/examples/Generalized_Linear_Models/plot_lasso_model_selection.md diff --git a/Python/sklearn/sklearn-doc-zh/examples/Generalized_Linear_Models/plot_multi_task_lasso_support.md b/Sklearn/sklearn-doc-zh/examples/Generalized_Linear_Models/plot_multi_task_lasso_support.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/Generalized_Linear_Models/plot_multi_task_lasso_support.md rename to Sklearn/sklearn-doc-zh/examples/Generalized_Linear_Models/plot_multi_task_lasso_support.md diff --git a/Python/sklearn/sklearn-doc-zh/examples/Generalized_Linear_Models/plot_ols.md b/Sklearn/sklearn-doc-zh/examples/Generalized_Linear_Models/plot_ols.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/Generalized_Linear_Models/plot_ols.md rename to Sklearn/sklearn-doc-zh/examples/Generalized_Linear_Models/plot_ols.md diff --git a/Python/sklearn/sklearn-doc-zh/examples/Generalized_Linear_Models/plot_ridge_path.md b/Sklearn/sklearn-doc-zh/examples/Generalized_Linear_Models/plot_ridge_path.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/Generalized_Linear_Models/plot_ridge_path.md rename to Sklearn/sklearn-doc-zh/examples/Generalized_Linear_Models/plot_ridge_path.md diff --git a/Python/sklearn/sklearn-doc-zh/examples/Generalized_Linear_Models/plot_tomography_l1_reconstruction.md b/Sklearn/sklearn-doc-zh/examples/Generalized_Linear_Models/plot_tomography_l1_reconstruction.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/Generalized_Linear_Models/plot_tomography_l1_reconstruction.md rename to Sklearn/sklearn-doc-zh/examples/Generalized_Linear_Models/plot_tomography_l1_reconstruction.md diff --git a/Python/sklearn/sklearn-doc-zh/examples/README.md b/Sklearn/sklearn-doc-zh/examples/README.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/README.md rename to Sklearn/sklearn-doc-zh/examples/README.md diff --git a/Python/sklearn/sklearn-doc-zh/examples/SUMMARY.md b/Sklearn/sklearn-doc-zh/examples/SUMMARY.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/SUMMARY.md rename to Sklearn/sklearn-doc-zh/examples/SUMMARY.md diff --git a/Python/sklearn/sklearn-doc-zh/examples/book.json b/Sklearn/sklearn-doc-zh/examples/book.json similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/book.json rename to Sklearn/sklearn-doc-zh/examples/book.json diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_approximate_nearest_neighbors_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_approximate_nearest_neighbors_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_approximate_nearest_neighbors_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_approximate_nearest_neighbors_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_grid_search_text_feature_extraction_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_grid_search_text_feature_extraction_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_grid_search_text_feature_extraction_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_grid_search_text_feature_extraction_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_adaboost_hastie_10_2_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_adaboost_hastie_10_2_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_adaboost_hastie_10_2_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_adaboost_hastie_10_2_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_adaboost_multiclass_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_adaboost_multiclass_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_adaboost_multiclass_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_adaboost_multiclass_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_adaboost_regression_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_adaboost_regression_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_adaboost_regression_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_adaboost_regression_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_adaboost_twoclass_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_adaboost_twoclass_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_adaboost_twoclass_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_adaboost_twoclass_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_adjusted_for_chance_measures_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_adjusted_for_chance_measures_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_adjusted_for_chance_measures_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_adjusted_for_chance_measures_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_affinity_propagation_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_affinity_propagation_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_affinity_propagation_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_affinity_propagation_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_agglomerative_clustering_metrics_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_agglomerative_clustering_metrics_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_agglomerative_clustering_metrics_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_agglomerative_clustering_metrics_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_agglomerative_clustering_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_agglomerative_clustering_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_agglomerative_clustering_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_agglomerative_clustering_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_agglomerative_dendrogram_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_agglomerative_dendrogram_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_agglomerative_dendrogram_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_agglomerative_dendrogram_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_all_scaling_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_all_scaling_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_all_scaling_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_all_scaling_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_anomaly_comparison_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_anomaly_comparison_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_anomaly_comparison_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_anomaly_comparison_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_ard_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_ard_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_ard_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_ard_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_bayesian_ridge_curvefit_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_bayesian_ridge_curvefit_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_bayesian_ridge_curvefit_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_bayesian_ridge_curvefit_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_bayesian_ridge_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_bayesian_ridge_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_bayesian_ridge_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_bayesian_ridge_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_beta_divergence_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_beta_divergence_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_beta_divergence_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_beta_divergence_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_bias_variance_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_bias_variance_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_bias_variance_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_bias_variance_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_bicluster_newsgroups_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_bicluster_newsgroups_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_bicluster_newsgroups_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_bicluster_newsgroups_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_birch_vs_minibatchkmeans_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_birch_vs_minibatchkmeans_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_birch_vs_minibatchkmeans_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_birch_vs_minibatchkmeans_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_caching_nearest_neighbors_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_caching_nearest_neighbors_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_caching_nearest_neighbors_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_caching_nearest_neighbors_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_calibration_curve_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_calibration_curve_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_calibration_curve_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_calibration_curve_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_calibration_multiclass_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_calibration_multiclass_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_calibration_multiclass_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_calibration_multiclass_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_calibration_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_calibration_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_calibration_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_calibration_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_changed_only_pprint_parameter_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_changed_only_pprint_parameter_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_changed_only_pprint_parameter_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_changed_only_pprint_parameter_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_classification_probability_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_classification_probability_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_classification_probability_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_classification_probability_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_classification_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_classification_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_classification_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_classification_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_classifier_chain_yeast_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_classifier_chain_yeast_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_classifier_chain_yeast_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_classifier_chain_yeast_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_classifier_comparison_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_classifier_comparison_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_classifier_comparison_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_classifier_comparison_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_cluster_comparison_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_cluster_comparison_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_cluster_comparison_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_cluster_comparison_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_cluster_iris_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_cluster_iris_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_cluster_iris_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_cluster_iris_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_coin_segmentation_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_coin_segmentation_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_coin_segmentation_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_coin_segmentation_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_coin_ward_segmentation_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_coin_ward_segmentation_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_coin_ward_segmentation_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_coin_ward_segmentation_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_color_quantization_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_color_quantization_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_color_quantization_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_color_quantization_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_column_transformer_mixed_types_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_column_transformer_mixed_types_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_column_transformer_mixed_types_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_column_transformer_mixed_types_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_column_transformer_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_column_transformer_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_column_transformer_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_column_transformer_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_compare_calibration_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_compare_calibration_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_compare_calibration_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_compare_calibration_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_compare_cross_decomposition_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_compare_cross_decomposition_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_compare_cross_decomposition_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_compare_cross_decomposition_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_compare_gpr_krr_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_compare_gpr_krr_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_compare_gpr_krr_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_compare_gpr_krr_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_compare_methods_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_compare_methods_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_compare_methods_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_compare_methods_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_compare_reduction_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_compare_reduction_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_compare_reduction_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_compare_reduction_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_concentration_prior_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_concentration_prior_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_concentration_prior_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_concentration_prior_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_confusion_matrix_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_confusion_matrix_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_confusion_matrix_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_confusion_matrix_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_cost_complexity_pruning_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_cost_complexity_pruning_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_cost_complexity_pruning_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_cost_complexity_pruning_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_covariance_estimation_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_covariance_estimation_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_covariance_estimation_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_covariance_estimation_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_custom_kernel_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_custom_kernel_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_custom_kernel_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_custom_kernel_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_cv_diabetes_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_cv_diabetes_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_cv_diabetes_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_cv_diabetes_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_cv_digits_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_cv_digits_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_cv_digits_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_cv_digits_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_cv_indices_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_cv_indices_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_cv_indices_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_cv_indices_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_cv_predict_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_cv_predict_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_cv_predict_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_cv_predict_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_dbscan_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_dbscan_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_dbscan_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_dbscan_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_dict_face_patches_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_dict_face_patches_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_dict_face_patches_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_dict_face_patches_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_digits_agglomeration_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_digits_agglomeration_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_digits_agglomeration_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_digits_agglomeration_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_digits_classification_exercise_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_digits_classification_exercise_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_digits_classification_exercise_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_digits_classification_exercise_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_digits_classification_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_digits_classification_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_digits_classification_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_digits_classification_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_digits_kde_sampling_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_digits_kde_sampling_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_digits_kde_sampling_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_digits_kde_sampling_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_digits_last_image_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_digits_last_image_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_digits_last_image_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_digits_last_image_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_digits_linkage_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_digits_linkage_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_digits_linkage_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_digits_linkage_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_digits_pipe_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_digits_pipe_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_digits_pipe_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_digits_pipe_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_discretization_classification_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_discretization_classification_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_discretization_classification_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_discretization_classification_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_discretization_strategies_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_discretization_strategies_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_discretization_strategies_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_discretization_strategies_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_discretization_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_discretization_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_discretization_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_discretization_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_document_classification_20newsgroups_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_document_classification_20newsgroups_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_document_classification_20newsgroups_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_document_classification_20newsgroups_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_document_clustering_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_document_clustering_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_document_clustering_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_document_clustering_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_ensemble_oob_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_ensemble_oob_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_ensemble_oob_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_ensemble_oob_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_f_test_vs_mi_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_f_test_vs_mi_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_f_test_vs_mi_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_f_test_vs_mi_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_face_compress_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_face_compress_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_face_compress_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_face_compress_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_face_recognition_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_face_recognition_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_face_recognition_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_face_recognition_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_faces_decomposition_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_faces_decomposition_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_faces_decomposition_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_faces_decomposition_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_feature_agglomeration_vs_univariate_selection_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_feature_agglomeration_vs_univariate_selection_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_feature_agglomeration_vs_univariate_selection_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_feature_agglomeration_vs_univariate_selection_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_feature_selection_pipeline_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_feature_selection_pipeline_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_feature_selection_pipeline_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_feature_selection_pipeline_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_feature_selection_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_feature_selection_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_feature_selection_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_feature_selection_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_feature_transformation_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_feature_transformation_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_feature_transformation_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_feature_transformation_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_feature_union_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_feature_union_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_feature_union_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_feature_union_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_forest_importances_faces_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_forest_importances_faces_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_forest_importances_faces_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_forest_importances_faces_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_forest_importances_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_forest_importances_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_forest_importances_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_forest_importances_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_forest_iris_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_forest_iris_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_forest_iris_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_forest_iris_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_function_transformer_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_function_transformer_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_function_transformer_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_function_transformer_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gmm_covariances_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gmm_covariances_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gmm_covariances_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gmm_covariances_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gmm_pdf_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gmm_pdf_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gmm_pdf_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gmm_pdf_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gmm_selection_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gmm_selection_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gmm_selection_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gmm_selection_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gmm_sin_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gmm_sin_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gmm_sin_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gmm_sin_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gmm_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gmm_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gmm_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gmm_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gpc_iris_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gpc_iris_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gpc_iris_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gpc_iris_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gpc_isoprobability_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gpc_isoprobability_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gpc_isoprobability_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gpc_isoprobability_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gpc_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gpc_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gpc_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gpc_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gpc_xor_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gpc_xor_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gpc_xor_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gpc_xor_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gpr_co2_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gpr_co2_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gpr_co2_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gpr_co2_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gpr_noisy_targets_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gpr_noisy_targets_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gpr_noisy_targets_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gpr_noisy_targets_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gpr_noisy_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gpr_noisy_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gpr_noisy_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gpr_noisy_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gpr_on_structured_data_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gpr_on_structured_data_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gpr_on_structured_data_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gpr_on_structured_data_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gpr_prior_posterior_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gpr_prior_posterior_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gpr_prior_posterior_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gpr_prior_posterior_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gradient_boosting_early_stopping_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gradient_boosting_early_stopping_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gradient_boosting_early_stopping_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gradient_boosting_early_stopping_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gradient_boosting_oob_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gradient_boosting_oob_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gradient_boosting_oob_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gradient_boosting_oob_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gradient_boosting_quantile_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gradient_boosting_quantile_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gradient_boosting_quantile_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gradient_boosting_quantile_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gradient_boosting_regression_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gradient_boosting_regression_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gradient_boosting_regression_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gradient_boosting_regression_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gradient_boosting_regularization_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gradient_boosting_regularization_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gradient_boosting_regularization_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_gradient_boosting_regularization_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_grid_search_digits_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_grid_search_digits_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_grid_search_digits_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_grid_search_digits_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_grid_search_refit_callable_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_grid_search_refit_callable_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_grid_search_refit_callable_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_grid_search_refit_callable_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_hashing_vs_dict_vectorizer_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_hashing_vs_dict_vectorizer_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_hashing_vs_dict_vectorizer_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_hashing_vs_dict_vectorizer_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_huber_vs_ridge_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_huber_vs_ridge_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_huber_vs_ridge_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_huber_vs_ridge_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_ica_blind_source_separation_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_ica_blind_source_separation_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_ica_blind_source_separation_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_ica_blind_source_separation_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_ica_vs_pca_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_ica_vs_pca_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_ica_vs_pca_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_ica_vs_pca_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_image_denoising_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_image_denoising_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_image_denoising_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_image_denoising_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_incremental_pca_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_incremental_pca_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_incremental_pca_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_incremental_pca_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_inductive_clustering_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_inductive_clustering_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_inductive_clustering_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_inductive_clustering_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_iris_dataset_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_iris_dataset_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_iris_dataset_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_iris_dataset_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_iris_dtc_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_iris_dtc_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_iris_dtc_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_iris_dtc_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_iris_exercise_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_iris_exercise_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_iris_exercise_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_iris_exercise_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_iris_logistic_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_iris_logistic_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_iris_logistic_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_iris_logistic_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_iris_svc_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_iris_svc_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_iris_svc_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_iris_svc_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_isolation_forest_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_isolation_forest_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_isolation_forest_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_isolation_forest_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_isotonic_regression_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_isotonic_regression_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_isotonic_regression_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_isotonic_regression_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_iterative_imputer_variants_comparison_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_iterative_imputer_variants_comparison_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_iterative_imputer_variants_comparison_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_iterative_imputer_variants_comparison_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_johnson_lindenstrauss_bound_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_johnson_lindenstrauss_bound_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_johnson_lindenstrauss_bound_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_johnson_lindenstrauss_bound_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_kde_1d_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_kde_1d_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_kde_1d_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_kde_1d_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_kernel_approximation_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_kernel_approximation_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_kernel_approximation_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_kernel_approximation_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_kernel_pca_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_kernel_pca_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_kernel_pca_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_kernel_pca_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_kernel_ridge_regression_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_kernel_ridge_regression_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_kernel_ridge_regression_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_kernel_ridge_regression_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_kmeans_assumptions_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_kmeans_assumptions_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_kmeans_assumptions_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_kmeans_assumptions_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_kmeans_digits_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_kmeans_digits_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_kmeans_digits_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_kmeans_digits_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_kmeans_silhouette_analysis_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_kmeans_silhouette_analysis_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_kmeans_silhouette_analysis_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_kmeans_silhouette_analysis_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_kmeans_stability_low_dim_dense_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_kmeans_stability_low_dim_dense_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_kmeans_stability_low_dim_dense_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_kmeans_stability_low_dim_dense_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_label_propagation_digits_active_learning_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_label_propagation_digits_active_learning_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_label_propagation_digits_active_learning_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_label_propagation_digits_active_learning_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_label_propagation_digits_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_label_propagation_digits_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_label_propagation_digits_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_label_propagation_digits_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_label_propagation_structure_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_label_propagation_structure_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_label_propagation_structure_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_label_propagation_structure_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_label_propagation_versus_svm_iris_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_label_propagation_versus_svm_iris_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_label_propagation_versus_svm_iris_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_label_propagation_versus_svm_iris_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_lasso_and_elasticnet_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_lasso_and_elasticnet_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_lasso_and_elasticnet_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_lasso_and_elasticnet_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_lasso_coordinate_descent_path_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_lasso_coordinate_descent_path_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_lasso_coordinate_descent_path_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_lasso_coordinate_descent_path_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_lasso_dense_vs_sparse_data_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_lasso_dense_vs_sparse_data_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_lasso_dense_vs_sparse_data_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_lasso_dense_vs_sparse_data_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_lasso_lars_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_lasso_lars_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_lasso_lars_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_lasso_lars_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_lasso_model_selection_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_lasso_model_selection_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_lasso_model_selection_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_lasso_model_selection_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_lda_qda_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_lda_qda_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_lda_qda_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_lda_qda_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_lda_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_lda_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_lda_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_lda_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_learning_curve_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_learning_curve_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_learning_curve_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_learning_curve_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_linearsvc_support_vectors_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_linearsvc_support_vectors_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_linearsvc_support_vectors_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_linearsvc_support_vectors_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_linkage_comparison_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_linkage_comparison_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_linkage_comparison_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_linkage_comparison_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_lle_digits_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_lle_digits_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_lle_digits_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_lle_digits_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_lof_novelty_detection_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_lof_novelty_detection_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_lof_novelty_detection_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_lof_novelty_detection_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_lof_outlier_detection_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_lof_outlier_detection_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_lof_outlier_detection_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_lof_outlier_detection_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_logistic_l1_l2_sparsity_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_logistic_l1_l2_sparsity_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_logistic_l1_l2_sparsity_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_logistic_l1_l2_sparsity_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_logistic_multinomial_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_logistic_multinomial_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_logistic_multinomial_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_logistic_multinomial_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_logistic_path_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_logistic_path_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_logistic_path_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_logistic_path_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_logistic_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_logistic_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_logistic_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_logistic_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_lw_vs_oas_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_lw_vs_oas_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_lw_vs_oas_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_lw_vs_oas_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_mahalanobis_distances_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_mahalanobis_distances_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_mahalanobis_distances_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_mahalanobis_distances_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_manifold_sphere_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_manifold_sphere_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_manifold_sphere_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_manifold_sphere_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_map_data_to_normal_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_map_data_to_normal_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_map_data_to_normal_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_map_data_to_normal_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_mds_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_mds_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_mds_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_mds_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_mean_shift_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_mean_shift_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_mean_shift_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_mean_shift_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_mini_batch_kmeans_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_mini_batch_kmeans_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_mini_batch_kmeans_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_mini_batch_kmeans_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_missing_values_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_missing_values_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_missing_values_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_missing_values_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_mlp_alpha_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_mlp_alpha_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_mlp_alpha_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_mlp_alpha_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_mlp_training_curves_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_mlp_training_curves_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_mlp_training_curves_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_mlp_training_curves_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_mnist_filters_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_mnist_filters_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_mnist_filters_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_mnist_filters_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_model_complexity_influence_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_model_complexity_influence_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_model_complexity_influence_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_model_complexity_influence_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_multi_metric_evaluation_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_multi_metric_evaluation_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_multi_metric_evaluation_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_multi_metric_evaluation_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_multi_task_lasso_support_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_multi_task_lasso_support_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_multi_task_lasso_support_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_multi_task_lasso_support_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_multilabel_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_multilabel_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_multilabel_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_multilabel_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_multioutput_face_completion_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_multioutput_face_completion_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_multioutput_face_completion_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_multioutput_face_completion_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_nca_classification_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_nca_classification_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_nca_classification_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_nca_classification_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_nca_dim_reduction_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_nca_dim_reduction_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_nca_dim_reduction_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_nca_dim_reduction_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_nca_illustration_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_nca_illustration_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_nca_illustration_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_nca_illustration_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_nearest_centroid_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_nearest_centroid_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_nearest_centroid_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_nearest_centroid_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_nested_cross_validation_iris_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_nested_cross_validation_iris_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_nested_cross_validation_iris_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_nested_cross_validation_iris_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_ols_3d_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_ols_3d_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_ols_3d_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_ols_3d_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_ols_ridge_variance_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_ols_ridge_variance_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_ols_ridge_variance_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_ols_ridge_variance_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_ols_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_ols_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_ols_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_ols_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_omp_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_omp_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_omp_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_omp_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_oneclass_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_oneclass_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_oneclass_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_oneclass_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_optics_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_optics_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_optics_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_optics_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_out_of_core_classification_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_out_of_core_classification_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_out_of_core_classification_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_out_of_core_classification_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_outlier_detection_housing_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_outlier_detection_housing_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_outlier_detection_housing_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_outlier_detection_housing_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_partial_dependence_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_partial_dependence_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_partial_dependence_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_partial_dependence_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_partial_dependence_visualization_api_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_partial_dependence_visualization_api_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_partial_dependence_visualization_api_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_partial_dependence_visualization_api_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_pca_3d_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_pca_3d_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_pca_3d_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_pca_3d_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_pca_iris_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_pca_iris_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_pca_iris_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_pca_iris_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_pca_vs_fa_model_selection_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_pca_vs_fa_model_selection_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_pca_vs_fa_model_selection_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_pca_vs_fa_model_selection_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_pca_vs_lda_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_pca_vs_lda_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_pca_vs_lda_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_pca_vs_lda_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_permutation_importance_multicollinear_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_permutation_importance_multicollinear_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_permutation_importance_multicollinear_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_permutation_importance_multicollinear_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_permutation_importance_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_permutation_importance_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_permutation_importance_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_permutation_importance_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_permutation_test_for_classification_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_permutation_test_for_classification_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_permutation_test_for_classification_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_permutation_test_for_classification_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_polynomial_interpolation_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_polynomial_interpolation_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_polynomial_interpolation_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_polynomial_interpolation_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_precision_recall_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_precision_recall_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_precision_recall_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_precision_recall_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_prediction_latency_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_prediction_latency_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_prediction_latency_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_prediction_latency_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_random_dataset_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_random_dataset_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_random_dataset_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_random_dataset_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_random_forest_embedding_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_random_forest_embedding_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_random_forest_embedding_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_random_forest_embedding_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_random_forest_regression_multioutput_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_random_forest_regression_multioutput_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_random_forest_regression_multioutput_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_random_forest_regression_multioutput_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_random_multilabel_dataset_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_random_multilabel_dataset_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_random_multilabel_dataset_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_random_multilabel_dataset_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_randomized_search_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_randomized_search_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_randomized_search_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_randomized_search_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_ransac_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_ransac_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_ransac_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_ransac_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_rbf_parameters_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_rbf_parameters_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_rbf_parameters_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_rbf_parameters_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_rbm_logistic_classification_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_rbm_logistic_classification_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_rbm_logistic_classification_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_rbm_logistic_classification_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_regression_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_regression_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_regression_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_regression_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_release_highlights_0_22_0_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_release_highlights_0_22_0_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_release_highlights_0_22_0_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_release_highlights_0_22_0_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_rfe_digits_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_rfe_digits_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_rfe_digits_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_rfe_digits_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_rfe_with_cross_validation_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_rfe_with_cross_validation_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_rfe_with_cross_validation_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_rfe_with_cross_validation_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_ridge_coeffs_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_ridge_coeffs_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_ridge_coeffs_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_ridge_coeffs_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_ridge_path_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_ridge_path_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_ridge_path_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_ridge_path_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_robust_fit_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_robust_fit_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_robust_fit_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_robust_fit_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_robust_vs_empirical_covariance_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_robust_vs_empirical_covariance_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_robust_vs_empirical_covariance_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_robust_vs_empirical_covariance_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_roc_crossval_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_roc_crossval_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_roc_crossval_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_roc_crossval_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_roc_curve_visualization_api_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_roc_curve_visualization_api_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_roc_curve_visualization_api_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_roc_curve_visualization_api_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_roc_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_roc_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_roc_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_roc_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_scaling_importance_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_scaling_importance_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_scaling_importance_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_scaling_importance_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_segmentation_toy_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_segmentation_toy_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_segmentation_toy_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_segmentation_toy_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_select_from_model_boston_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_select_from_model_boston_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_select_from_model_boston_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_select_from_model_boston_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_separating_hyperplane_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_separating_hyperplane_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_separating_hyperplane_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_separating_hyperplane_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_separating_hyperplane_unbalanced_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_separating_hyperplane_unbalanced_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_separating_hyperplane_unbalanced_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_separating_hyperplane_unbalanced_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_sgd_comparison_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_sgd_comparison_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_sgd_comparison_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_sgd_comparison_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_sgd_early_stopping_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_sgd_early_stopping_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_sgd_early_stopping_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_sgd_early_stopping_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_sgd_iris_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_sgd_iris_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_sgd_iris_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_sgd_iris_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_sgd_loss_functions_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_sgd_loss_functions_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_sgd_loss_functions_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_sgd_loss_functions_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_sgd_penalties_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_sgd_penalties_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_sgd_penalties_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_sgd_penalties_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_sgd_separating_hyperplane_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_sgd_separating_hyperplane_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_sgd_separating_hyperplane_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_sgd_separating_hyperplane_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_sgd_weighted_samples_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_sgd_weighted_samples_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_sgd_weighted_samples_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_sgd_weighted_samples_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_sparse_coding_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_sparse_coding_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_sparse_coding_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_sparse_coding_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_sparse_cov_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_sparse_cov_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_sparse_cov_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_sparse_cov_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_sparse_logistic_regression_20newsgroups_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_sparse_logistic_regression_20newsgroups_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_sparse_logistic_regression_20newsgroups_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_sparse_logistic_regression_20newsgroups_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_sparse_logistic_regression_mnist_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_sparse_logistic_regression_mnist_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_sparse_logistic_regression_mnist_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_sparse_logistic_regression_mnist_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_species_distribution_modeling_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_species_distribution_modeling_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_species_distribution_modeling_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_species_distribution_modeling_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_species_kde_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_species_kde_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_species_kde_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_species_kde_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_spectral_biclustering_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_spectral_biclustering_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_spectral_biclustering_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_spectral_biclustering_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_spectral_coclustering_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_spectral_coclustering_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_spectral_coclustering_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_spectral_coclustering_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_stack_predictors_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_stack_predictors_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_stack_predictors_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_stack_predictors_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_stock_market_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_stock_market_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_stock_market_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_stock_market_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_svm_anova_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_svm_anova_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_svm_anova_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_svm_anova_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_svm_kernels_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_svm_kernels_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_svm_kernels_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_svm_kernels_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_svm_margin_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_svm_margin_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_svm_margin_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_svm_margin_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_svm_nonlinear_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_svm_nonlinear_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_svm_nonlinear_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_svm_nonlinear_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_svm_regression_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_svm_regression_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_svm_regression_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_svm_regression_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_svm_scale_c_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_svm_scale_c_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_svm_scale_c_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_svm_scale_c_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_svm_tie_breaking_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_svm_tie_breaking_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_svm_tie_breaking_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_svm_tie_breaking_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_swissroll_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_swissroll_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_swissroll_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_swissroll_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_t_sne_perplexity_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_t_sne_perplexity_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_t_sne_perplexity_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_t_sne_perplexity_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_theilsen_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_theilsen_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_theilsen_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_theilsen_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_tomography_l1_reconstruction_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_tomography_l1_reconstruction_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_tomography_l1_reconstruction_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_tomography_l1_reconstruction_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_topics_extraction_with_nmf_lda_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_topics_extraction_with_nmf_lda_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_topics_extraction_with_nmf_lda_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_topics_extraction_with_nmf_lda_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_train_error_vs_test_error_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_train_error_vs_test_error_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_train_error_vs_test_error_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_train_error_vs_test_error_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_transformed_target_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_transformed_target_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_transformed_target_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_transformed_target_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_tree_regression_multioutput_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_tree_regression_multioutput_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_tree_regression_multioutput_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_tree_regression_multioutput_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_tree_regression_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_tree_regression_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_tree_regression_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_tree_regression_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_underfitting_overfitting_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_underfitting_overfitting_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_underfitting_overfitting_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_underfitting_overfitting_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_unveil_tree_structure_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_unveil_tree_structure_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_unveil_tree_structure_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_unveil_tree_structure_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_validation_curve_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_validation_curve_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_validation_curve_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_validation_curve_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_voting_decision_regions_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_voting_decision_regions_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_voting_decision_regions_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_voting_decision_regions_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_voting_probas_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_voting_probas_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_voting_probas_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_voting_probas_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_voting_regressor_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_voting_regressor_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_voting_regressor_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_voting_regressor_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_ward_structured_vs_unstructured_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_ward_structured_vs_unstructured_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_ward_structured_vs_unstructured_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_ward_structured_vs_unstructured_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_weighted_samples_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_weighted_samples_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_weighted_samples_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_plot_weighted_samples_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_svm_gui_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_svm_gui_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_svm_gui_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_svm_gui_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_wikipedia_principal_eigenvector_thumb.png b/Sklearn/sklearn-doc-zh/examples/img/sphx_glr_wikipedia_principal_eigenvector_thumb.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/examples/img/sphx_glr_wikipedia_principal_eigenvector_thumb.png rename to Sklearn/sklearn-doc-zh/examples/img/sphx_glr_wikipedia_principal_eigenvector_thumb.png diff --git a/Python/sklearn/sklearn-doc-zh/master/1.md b/Sklearn/sklearn-doc-zh/master/1.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/1.md rename to Sklearn/sklearn-doc-zh/master/1.md diff --git a/Python/sklearn/sklearn-doc-zh/master/10.md b/Sklearn/sklearn-doc-zh/master/10.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/10.md rename to Sklearn/sklearn-doc-zh/master/10.md diff --git a/Python/sklearn/sklearn-doc-zh/master/11.md b/Sklearn/sklearn-doc-zh/master/11.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/11.md rename to Sklearn/sklearn-doc-zh/master/11.md diff --git a/Python/sklearn/sklearn-doc-zh/master/12.md b/Sklearn/sklearn-doc-zh/master/12.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/12.md rename to Sklearn/sklearn-doc-zh/master/12.md diff --git a/Python/sklearn/sklearn-doc-zh/master/13.md b/Sklearn/sklearn-doc-zh/master/13.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/13.md rename to Sklearn/sklearn-doc-zh/master/13.md diff --git a/Python/sklearn/sklearn-doc-zh/master/14.md b/Sklearn/sklearn-doc-zh/master/14.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/14.md rename to Sklearn/sklearn-doc-zh/master/14.md diff --git a/Python/sklearn/sklearn-doc-zh/master/15.md b/Sklearn/sklearn-doc-zh/master/15.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/15.md rename to Sklearn/sklearn-doc-zh/master/15.md diff --git a/Python/sklearn/sklearn-doc-zh/master/16.md b/Sklearn/sklearn-doc-zh/master/16.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/16.md rename to Sklearn/sklearn-doc-zh/master/16.md diff --git a/Python/sklearn/sklearn-doc-zh/master/17.md b/Sklearn/sklearn-doc-zh/master/17.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/17.md rename to Sklearn/sklearn-doc-zh/master/17.md diff --git a/Python/sklearn/sklearn-doc-zh/master/18.md b/Sklearn/sklearn-doc-zh/master/18.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/18.md rename to Sklearn/sklearn-doc-zh/master/18.md diff --git a/Python/sklearn/sklearn-doc-zh/master/19.md b/Sklearn/sklearn-doc-zh/master/19.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/19.md rename to Sklearn/sklearn-doc-zh/master/19.md diff --git a/Python/sklearn/sklearn-doc-zh/master/2.md b/Sklearn/sklearn-doc-zh/master/2.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/2.md rename to Sklearn/sklearn-doc-zh/master/2.md diff --git a/Python/sklearn/sklearn-doc-zh/master/20.md b/Sklearn/sklearn-doc-zh/master/20.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/20.md rename to Sklearn/sklearn-doc-zh/master/20.md diff --git a/Python/sklearn/sklearn-doc-zh/master/21.md b/Sklearn/sklearn-doc-zh/master/21.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/21.md rename to Sklearn/sklearn-doc-zh/master/21.md diff --git a/Python/sklearn/sklearn-doc-zh/master/22.md b/Sklearn/sklearn-doc-zh/master/22.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/22.md rename to Sklearn/sklearn-doc-zh/master/22.md diff --git a/Python/sklearn/sklearn-doc-zh/master/23.md b/Sklearn/sklearn-doc-zh/master/23.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/23.md rename to Sklearn/sklearn-doc-zh/master/23.md diff --git a/Python/sklearn/sklearn-doc-zh/master/24.md b/Sklearn/sklearn-doc-zh/master/24.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/24.md rename to Sklearn/sklearn-doc-zh/master/24.md diff --git a/Python/sklearn/sklearn-doc-zh/master/25.md b/Sklearn/sklearn-doc-zh/master/25.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/25.md rename to Sklearn/sklearn-doc-zh/master/25.md diff --git a/Python/sklearn/sklearn-doc-zh/master/26.md b/Sklearn/sklearn-doc-zh/master/26.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/26.md rename to Sklearn/sklearn-doc-zh/master/26.md diff --git a/Python/sklearn/sklearn-doc-zh/master/27.md b/Sklearn/sklearn-doc-zh/master/27.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/27.md rename to Sklearn/sklearn-doc-zh/master/27.md diff --git a/Python/sklearn/sklearn-doc-zh/master/28.md b/Sklearn/sklearn-doc-zh/master/28.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/28.md rename to Sklearn/sklearn-doc-zh/master/28.md diff --git a/Python/sklearn/sklearn-doc-zh/master/29.md b/Sklearn/sklearn-doc-zh/master/29.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/29.md rename to Sklearn/sklearn-doc-zh/master/29.md diff --git a/Python/sklearn/sklearn-doc-zh/master/3.md b/Sklearn/sklearn-doc-zh/master/3.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/3.md rename to Sklearn/sklearn-doc-zh/master/3.md diff --git a/Python/sklearn/sklearn-doc-zh/master/30.md b/Sklearn/sklearn-doc-zh/master/30.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/30.md rename to Sklearn/sklearn-doc-zh/master/30.md diff --git a/Python/sklearn/sklearn-doc-zh/master/31.md b/Sklearn/sklearn-doc-zh/master/31.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/31.md rename to Sklearn/sklearn-doc-zh/master/31.md diff --git a/Python/sklearn/sklearn-doc-zh/master/32.md b/Sklearn/sklearn-doc-zh/master/32.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/32.md rename to Sklearn/sklearn-doc-zh/master/32.md diff --git a/Python/sklearn/sklearn-doc-zh/master/33.md b/Sklearn/sklearn-doc-zh/master/33.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/33.md rename to Sklearn/sklearn-doc-zh/master/33.md diff --git a/Python/sklearn/sklearn-doc-zh/master/34.md b/Sklearn/sklearn-doc-zh/master/34.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/34.md rename to Sklearn/sklearn-doc-zh/master/34.md diff --git a/Python/sklearn/sklearn-doc-zh/master/35.md b/Sklearn/sklearn-doc-zh/master/35.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/35.md rename to Sklearn/sklearn-doc-zh/master/35.md diff --git a/Python/sklearn/sklearn-doc-zh/master/36.md b/Sklearn/sklearn-doc-zh/master/36.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/36.md rename to Sklearn/sklearn-doc-zh/master/36.md diff --git a/Python/sklearn/sklearn-doc-zh/master/37.md b/Sklearn/sklearn-doc-zh/master/37.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/37.md rename to Sklearn/sklearn-doc-zh/master/37.md diff --git a/Python/sklearn/sklearn-doc-zh/master/38.md b/Sklearn/sklearn-doc-zh/master/38.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/38.md rename to Sklearn/sklearn-doc-zh/master/38.md diff --git a/Python/sklearn/sklearn-doc-zh/master/39.md b/Sklearn/sklearn-doc-zh/master/39.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/39.md rename to Sklearn/sklearn-doc-zh/master/39.md diff --git a/Python/sklearn/sklearn-doc-zh/master/4.md b/Sklearn/sklearn-doc-zh/master/4.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/4.md rename to Sklearn/sklearn-doc-zh/master/4.md diff --git a/Python/sklearn/sklearn-doc-zh/master/40.md b/Sklearn/sklearn-doc-zh/master/40.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/40.md rename to Sklearn/sklearn-doc-zh/master/40.md diff --git a/Python/sklearn/sklearn-doc-zh/master/41.md b/Sklearn/sklearn-doc-zh/master/41.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/41.md rename to Sklearn/sklearn-doc-zh/master/41.md diff --git a/Python/sklearn/sklearn-doc-zh/master/42.md b/Sklearn/sklearn-doc-zh/master/42.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/42.md rename to Sklearn/sklearn-doc-zh/master/42.md diff --git a/Python/sklearn/sklearn-doc-zh/master/43.md b/Sklearn/sklearn-doc-zh/master/43.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/43.md rename to Sklearn/sklearn-doc-zh/master/43.md diff --git a/Python/sklearn/sklearn-doc-zh/master/44.md b/Sklearn/sklearn-doc-zh/master/44.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/44.md rename to Sklearn/sklearn-doc-zh/master/44.md diff --git a/Python/sklearn/sklearn-doc-zh/master/45.md b/Sklearn/sklearn-doc-zh/master/45.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/45.md rename to Sklearn/sklearn-doc-zh/master/45.md diff --git a/Python/sklearn/sklearn-doc-zh/master/46.md b/Sklearn/sklearn-doc-zh/master/46.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/46.md rename to Sklearn/sklearn-doc-zh/master/46.md diff --git a/Python/sklearn/sklearn-doc-zh/master/47.md b/Sklearn/sklearn-doc-zh/master/47.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/47.md rename to Sklearn/sklearn-doc-zh/master/47.md diff --git a/Python/sklearn/sklearn-doc-zh/master/48.md b/Sklearn/sklearn-doc-zh/master/48.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/48.md rename to Sklearn/sklearn-doc-zh/master/48.md diff --git a/Python/sklearn/sklearn-doc-zh/master/5.md b/Sklearn/sklearn-doc-zh/master/5.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/5.md rename to Sklearn/sklearn-doc-zh/master/5.md diff --git a/Python/sklearn/sklearn-doc-zh/master/50.md b/Sklearn/sklearn-doc-zh/master/50.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/50.md rename to Sklearn/sklearn-doc-zh/master/50.md diff --git a/Python/sklearn/sklearn-doc-zh/master/51.md b/Sklearn/sklearn-doc-zh/master/51.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/51.md rename to Sklearn/sklearn-doc-zh/master/51.md diff --git a/Python/sklearn/sklearn-doc-zh/master/52.md b/Sklearn/sklearn-doc-zh/master/52.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/52.md rename to Sklearn/sklearn-doc-zh/master/52.md diff --git a/Python/sklearn/sklearn-doc-zh/master/53.md b/Sklearn/sklearn-doc-zh/master/53.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/53.md rename to Sklearn/sklearn-doc-zh/master/53.md diff --git a/Python/sklearn/sklearn-doc-zh/master/54.md b/Sklearn/sklearn-doc-zh/master/54.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/54.md rename to Sklearn/sklearn-doc-zh/master/54.md diff --git a/Python/sklearn/sklearn-doc-zh/master/55.md b/Sklearn/sklearn-doc-zh/master/55.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/55.md rename to Sklearn/sklearn-doc-zh/master/55.md diff --git a/Python/sklearn/sklearn-doc-zh/master/56.md b/Sklearn/sklearn-doc-zh/master/56.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/56.md rename to Sklearn/sklearn-doc-zh/master/56.md diff --git a/Python/sklearn/sklearn-doc-zh/master/57.md b/Sklearn/sklearn-doc-zh/master/57.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/57.md rename to Sklearn/sklearn-doc-zh/master/57.md diff --git a/Python/sklearn/sklearn-doc-zh/master/58.md b/Sklearn/sklearn-doc-zh/master/58.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/58.md rename to Sklearn/sklearn-doc-zh/master/58.md diff --git a/Python/sklearn/sklearn-doc-zh/master/59.md b/Sklearn/sklearn-doc-zh/master/59.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/59.md rename to Sklearn/sklearn-doc-zh/master/59.md diff --git a/Python/sklearn/sklearn-doc-zh/master/6.md b/Sklearn/sklearn-doc-zh/master/6.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/6.md rename to Sklearn/sklearn-doc-zh/master/6.md diff --git a/Python/sklearn/sklearn-doc-zh/master/60.md b/Sklearn/sklearn-doc-zh/master/60.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/60.md rename to Sklearn/sklearn-doc-zh/master/60.md diff --git a/Python/sklearn/sklearn-doc-zh/master/61.md b/Sklearn/sklearn-doc-zh/master/61.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/61.md rename to Sklearn/sklearn-doc-zh/master/61.md diff --git a/Python/sklearn/sklearn-doc-zh/master/62.md b/Sklearn/sklearn-doc-zh/master/62.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/62.md rename to Sklearn/sklearn-doc-zh/master/62.md diff --git a/Python/sklearn/sklearn-doc-zh/master/63.md b/Sklearn/sklearn-doc-zh/master/63.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/63.md rename to Sklearn/sklearn-doc-zh/master/63.md diff --git a/Python/sklearn/sklearn-doc-zh/master/64.md b/Sklearn/sklearn-doc-zh/master/64.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/64.md rename to Sklearn/sklearn-doc-zh/master/64.md diff --git a/Python/sklearn/sklearn-doc-zh/master/7.md b/Sklearn/sklearn-doc-zh/master/7.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/7.md rename to Sklearn/sklearn-doc-zh/master/7.md diff --git a/Python/sklearn/sklearn-doc-zh/master/8.md b/Sklearn/sklearn-doc-zh/master/8.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/8.md rename to Sklearn/sklearn-doc-zh/master/8.md diff --git a/Python/sklearn/sklearn-doc-zh/master/9.md b/Sklearn/sklearn-doc-zh/master/9.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/9.md rename to Sklearn/sklearn-doc-zh/master/9.md diff --git a/Python/sklearn/sklearn-doc-zh/master/SUMMARY.md b/Sklearn/sklearn-doc-zh/master/SUMMARY.md similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/SUMMARY.md rename to Sklearn/sklearn-doc-zh/master/SUMMARY.md diff --git a/Python/sklearn/sklearn-doc-zh/master/img/001d34ad977d110ce0931112c362d07e.jpg b/Sklearn/sklearn-doc-zh/master/img/001d34ad977d110ce0931112c362d07e.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/001d34ad977d110ce0931112c362d07e.jpg rename to Sklearn/sklearn-doc-zh/master/img/001d34ad977d110ce0931112c362d07e.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/00a1e1837f700cef7352acfafd328607.jpg b/Sklearn/sklearn-doc-zh/master/img/00a1e1837f700cef7352acfafd328607.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/00a1e1837f700cef7352acfafd328607.jpg rename to Sklearn/sklearn-doc-zh/master/img/00a1e1837f700cef7352acfafd328607.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/00af2cbeb1deda7098a17d0491060339.jpg b/Sklearn/sklearn-doc-zh/master/img/00af2cbeb1deda7098a17d0491060339.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/00af2cbeb1deda7098a17d0491060339.jpg rename to Sklearn/sklearn-doc-zh/master/img/00af2cbeb1deda7098a17d0491060339.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/01024e528443374ebac4e8cb2f6dc463.jpg b/Sklearn/sklearn-doc-zh/master/img/01024e528443374ebac4e8cb2f6dc463.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/01024e528443374ebac4e8cb2f6dc463.jpg rename to Sklearn/sklearn-doc-zh/master/img/01024e528443374ebac4e8cb2f6dc463.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/014b479ec81146a77562d251269a0f2e.jpg b/Sklearn/sklearn-doc-zh/master/img/014b479ec81146a77562d251269a0f2e.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/014b479ec81146a77562d251269a0f2e.jpg rename to Sklearn/sklearn-doc-zh/master/img/014b479ec81146a77562d251269a0f2e.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/015fcf78112c08948e66bb51171ae137.jpg b/Sklearn/sklearn-doc-zh/master/img/015fcf78112c08948e66bb51171ae137.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/015fcf78112c08948e66bb51171ae137.jpg rename to Sklearn/sklearn-doc-zh/master/img/015fcf78112c08948e66bb51171ae137.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/017a1400b81bc9ef956adc43050bb5c8.jpg b/Sklearn/sklearn-doc-zh/master/img/017a1400b81bc9ef956adc43050bb5c8.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/017a1400b81bc9ef956adc43050bb5c8.jpg rename to Sklearn/sklearn-doc-zh/master/img/017a1400b81bc9ef956adc43050bb5c8.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/018a312145ba4dee4c257135644ced91.jpg b/Sklearn/sklearn-doc-zh/master/img/018a312145ba4dee4c257135644ced91.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/018a312145ba4dee4c257135644ced91.jpg rename to Sklearn/sklearn-doc-zh/master/img/018a312145ba4dee4c257135644ced91.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/01e7c74ccc13a6832f6bfcd46b442a1b.jpg b/Sklearn/sklearn-doc-zh/master/img/01e7c74ccc13a6832f6bfcd46b442a1b.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/01e7c74ccc13a6832f6bfcd46b442a1b.jpg rename to Sklearn/sklearn-doc-zh/master/img/01e7c74ccc13a6832f6bfcd46b442a1b.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/021new1.jpg b/Sklearn/sklearn-doc-zh/master/img/021new1.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/021new1.jpg rename to Sklearn/sklearn-doc-zh/master/img/021new1.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/0243e3516b65d89a7e3da13680c1a1b7.jpg b/Sklearn/sklearn-doc-zh/master/img/0243e3516b65d89a7e3da13680c1a1b7.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/0243e3516b65d89a7e3da13680c1a1b7.jpg rename to Sklearn/sklearn-doc-zh/master/img/0243e3516b65d89a7e3da13680c1a1b7.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/02848ebe72029503696b6523e4052c0c.jpg b/Sklearn/sklearn-doc-zh/master/img/02848ebe72029503696b6523e4052c0c.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/02848ebe72029503696b6523e4052c0c.jpg rename to Sklearn/sklearn-doc-zh/master/img/02848ebe72029503696b6523e4052c0c.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/03269860556d3525e4ae266f30c6982d.jpg b/Sklearn/sklearn-doc-zh/master/img/03269860556d3525e4ae266f30c6982d.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/03269860556d3525e4ae266f30c6982d.jpg rename to Sklearn/sklearn-doc-zh/master/img/03269860556d3525e4ae266f30c6982d.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/0333d4e1607c1cab19f576a212267ec1.jpg b/Sklearn/sklearn-doc-zh/master/img/0333d4e1607c1cab19f576a212267ec1.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/0333d4e1607c1cab19f576a212267ec1.jpg rename to Sklearn/sklearn-doc-zh/master/img/0333d4e1607c1cab19f576a212267ec1.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/0345993f9c673bd3a928dc6cb07bcae1.jpg b/Sklearn/sklearn-doc-zh/master/img/0345993f9c673bd3a928dc6cb07bcae1.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/0345993f9c673bd3a928dc6cb07bcae1.jpg rename to Sklearn/sklearn-doc-zh/master/img/0345993f9c673bd3a928dc6cb07bcae1.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/035f009eecfdebf82b493f797843a919.jpg b/Sklearn/sklearn-doc-zh/master/img/035f009eecfdebf82b493f797843a919.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/035f009eecfdebf82b493f797843a919.jpg rename to Sklearn/sklearn-doc-zh/master/img/035f009eecfdebf82b493f797843a919.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/03aa3da890dedc42b04c1df154062257.jpg b/Sklearn/sklearn-doc-zh/master/img/03aa3da890dedc42b04c1df154062257.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/03aa3da890dedc42b04c1df154062257.jpg rename to Sklearn/sklearn-doc-zh/master/img/03aa3da890dedc42b04c1df154062257.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/03dc262433e357325639af531c5bf70e.jpg b/Sklearn/sklearn-doc-zh/master/img/03dc262433e357325639af531c5bf70e.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/03dc262433e357325639af531c5bf70e.jpg rename to Sklearn/sklearn-doc-zh/master/img/03dc262433e357325639af531c5bf70e.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/04388b884d40fc8b56559b6c2364e7ce.jpg b/Sklearn/sklearn-doc-zh/master/img/04388b884d40fc8b56559b6c2364e7ce.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/04388b884d40fc8b56559b6c2364e7ce.jpg rename to Sklearn/sklearn-doc-zh/master/img/04388b884d40fc8b56559b6c2364e7ce.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/0449a2a9bce6d759e7253da7d17fa938.jpg b/Sklearn/sklearn-doc-zh/master/img/0449a2a9bce6d759e7253da7d17fa938.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/0449a2a9bce6d759e7253da7d17fa938.jpg rename to Sklearn/sklearn-doc-zh/master/img/0449a2a9bce6d759e7253da7d17fa938.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/047826f1c2e6f2687b304cb5217be8d8.jpg b/Sklearn/sklearn-doc-zh/master/img/047826f1c2e6f2687b304cb5217be8d8.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/047826f1c2e6f2687b304cb5217be8d8.jpg rename to Sklearn/sklearn-doc-zh/master/img/047826f1c2e6f2687b304cb5217be8d8.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/05459a925be9207abbb2f72203e48cf2.jpg b/Sklearn/sklearn-doc-zh/master/img/05459a925be9207abbb2f72203e48cf2.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/05459a925be9207abbb2f72203e48cf2.jpg rename to Sklearn/sklearn-doc-zh/master/img/05459a925be9207abbb2f72203e48cf2.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/05588cdc4e82289930a92b0097f67d2d.jpg b/Sklearn/sklearn-doc-zh/master/img/05588cdc4e82289930a92b0097f67d2d.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/05588cdc4e82289930a92b0097f67d2d.jpg rename to Sklearn/sklearn-doc-zh/master/img/05588cdc4e82289930a92b0097f67d2d.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/0563dd4e7c322dfdafde0770d8a9dc4a.jpg b/Sklearn/sklearn-doc-zh/master/img/0563dd4e7c322dfdafde0770d8a9dc4a.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/0563dd4e7c322dfdafde0770d8a9dc4a.jpg rename to Sklearn/sklearn-doc-zh/master/img/0563dd4e7c322dfdafde0770d8a9dc4a.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/05c3632395ec8941c82954de930b9d3e.jpg b/Sklearn/sklearn-doc-zh/master/img/05c3632395ec8941c82954de930b9d3e.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/05c3632395ec8941c82954de930b9d3e.jpg rename to Sklearn/sklearn-doc-zh/master/img/05c3632395ec8941c82954de930b9d3e.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/06731dd14500ea710a28370843818539.jpg b/Sklearn/sklearn-doc-zh/master/img/06731dd14500ea710a28370843818539.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/06731dd14500ea710a28370843818539.jpg rename to Sklearn/sklearn-doc-zh/master/img/06731dd14500ea710a28370843818539.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/0679b7c6ff086b5e9ad73b1cb08d9205.jpg b/Sklearn/sklearn-doc-zh/master/img/0679b7c6ff086b5e9ad73b1cb08d9205.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/0679b7c6ff086b5e9ad73b1cb08d9205.jpg rename to Sklearn/sklearn-doc-zh/master/img/0679b7c6ff086b5e9ad73b1cb08d9205.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/06d3f93ccdf3b4b5cd0fea7225848848.jpg b/Sklearn/sklearn-doc-zh/master/img/06d3f93ccdf3b4b5cd0fea7225848848.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/06d3f93ccdf3b4b5cd0fea7225848848.jpg rename to Sklearn/sklearn-doc-zh/master/img/06d3f93ccdf3b4b5cd0fea7225848848.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/070018458bf56c0d94293de45828e878.jpg b/Sklearn/sklearn-doc-zh/master/img/070018458bf56c0d94293de45828e878.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/070018458bf56c0d94293de45828e878.jpg rename to Sklearn/sklearn-doc-zh/master/img/070018458bf56c0d94293de45828e878.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/07610ee9d3a524eb0a3fb7ae409614c1.jpg b/Sklearn/sklearn-doc-zh/master/img/07610ee9d3a524eb0a3fb7ae409614c1.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/07610ee9d3a524eb0a3fb7ae409614c1.jpg rename to Sklearn/sklearn-doc-zh/master/img/07610ee9d3a524eb0a3fb7ae409614c1.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/0775c03fc710a24df297dedcec515aaf.jpg b/Sklearn/sklearn-doc-zh/master/img/0775c03fc710a24df297dedcec515aaf.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/0775c03fc710a24df297dedcec515aaf.jpg rename to Sklearn/sklearn-doc-zh/master/img/0775c03fc710a24df297dedcec515aaf.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/07794b8fa83c7e18c5d1fb175fd7d7bd.jpg b/Sklearn/sklearn-doc-zh/master/img/07794b8fa83c7e18c5d1fb175fd7d7bd.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/07794b8fa83c7e18c5d1fb175fd7d7bd.jpg rename to Sklearn/sklearn-doc-zh/master/img/07794b8fa83c7e18c5d1fb175fd7d7bd.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/07921ae49a32570fd5559004f1cca103.jpg b/Sklearn/sklearn-doc-zh/master/img/07921ae49a32570fd5559004f1cca103.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/07921ae49a32570fd5559004f1cca103.jpg rename to Sklearn/sklearn-doc-zh/master/img/07921ae49a32570fd5559004f1cca103.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/088266f72839f4195c9058dfd17d778b.jpg b/Sklearn/sklearn-doc-zh/master/img/088266f72839f4195c9058dfd17d778b.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/088266f72839f4195c9058dfd17d778b.jpg rename to Sklearn/sklearn-doc-zh/master/img/088266f72839f4195c9058dfd17d778b.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/08a5f2b42e497598497f5265194ce4a3.jpg b/Sklearn/sklearn-doc-zh/master/img/08a5f2b42e497598497f5265194ce4a3.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/08a5f2b42e497598497f5265194ce4a3.jpg rename to Sklearn/sklearn-doc-zh/master/img/08a5f2b42e497598497f5265194ce4a3.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/094a3a73abc84f5a6c1e0b72e15152d7.jpg b/Sklearn/sklearn-doc-zh/master/img/094a3a73abc84f5a6c1e0b72e15152d7.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/094a3a73abc84f5a6c1e0b72e15152d7.jpg rename to Sklearn/sklearn-doc-zh/master/img/094a3a73abc84f5a6c1e0b72e15152d7.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/0992b23a98660c7b2102695e74407be2.jpg b/Sklearn/sklearn-doc-zh/master/img/0992b23a98660c7b2102695e74407be2.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/0992b23a98660c7b2102695e74407be2.jpg rename to Sklearn/sklearn-doc-zh/master/img/0992b23a98660c7b2102695e74407be2.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/09eb9862841b1c17d77e2e4830df3770.jpg b/Sklearn/sklearn-doc-zh/master/img/09eb9862841b1c17d77e2e4830df3770.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/09eb9862841b1c17d77e2e4830df3770.jpg rename to Sklearn/sklearn-doc-zh/master/img/09eb9862841b1c17d77e2e4830df3770.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/09ed5f467366506cf3b8d425d00db588.jpg b/Sklearn/sklearn-doc-zh/master/img/09ed5f467366506cf3b8d425d00db588.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/09ed5f467366506cf3b8d425d00db588.jpg rename to Sklearn/sklearn-doc-zh/master/img/09ed5f467366506cf3b8d425d00db588.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/0a3546c8f30354c128ef2acb96e91e16.jpg b/Sklearn/sklearn-doc-zh/master/img/0a3546c8f30354c128ef2acb96e91e16.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/0a3546c8f30354c128ef2acb96e91e16.jpg rename to Sklearn/sklearn-doc-zh/master/img/0a3546c8f30354c128ef2acb96e91e16.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/0a7b173908e1ba21b1132121dd409ded.jpg b/Sklearn/sklearn-doc-zh/master/img/0a7b173908e1ba21b1132121dd409ded.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/0a7b173908e1ba21b1132121dd409ded.jpg rename to Sklearn/sklearn-doc-zh/master/img/0a7b173908e1ba21b1132121dd409ded.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/0acf1512409eb0a9a90102698304fd52.jpg b/Sklearn/sklearn-doc-zh/master/img/0acf1512409eb0a9a90102698304fd52.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/0acf1512409eb0a9a90102698304fd52.jpg rename to Sklearn/sklearn-doc-zh/master/img/0acf1512409eb0a9a90102698304fd52.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/0bb5a8e1b524523dcfb5104d9f20ba2b.jpg b/Sklearn/sklearn-doc-zh/master/img/0bb5a8e1b524523dcfb5104d9f20ba2b.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/0bb5a8e1b524523dcfb5104d9f20ba2b.jpg rename to Sklearn/sklearn-doc-zh/master/img/0bb5a8e1b524523dcfb5104d9f20ba2b.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/0bc78b13595e61ff422e00bb2686c7e8.jpg b/Sklearn/sklearn-doc-zh/master/img/0bc78b13595e61ff422e00bb2686c7e8.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/0bc78b13595e61ff422e00bb2686c7e8.jpg rename to Sklearn/sklearn-doc-zh/master/img/0bc78b13595e61ff422e00bb2686c7e8.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/0cd05229735908f0f99e59deb90a4434.jpg b/Sklearn/sklearn-doc-zh/master/img/0cd05229735908f0f99e59deb90a4434.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/0cd05229735908f0f99e59deb90a4434.jpg rename to Sklearn/sklearn-doc-zh/master/img/0cd05229735908f0f99e59deb90a4434.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/0ce3ae4e9a8bbd17b08f5fae78d60f21.jpg b/Sklearn/sklearn-doc-zh/master/img/0ce3ae4e9a8bbd17b08f5fae78d60f21.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/0ce3ae4e9a8bbd17b08f5fae78d60f21.jpg rename to Sklearn/sklearn-doc-zh/master/img/0ce3ae4e9a8bbd17b08f5fae78d60f21.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/0d0c4e4a12f6e3bb90bf30161951dcc5.jpg b/Sklearn/sklearn-doc-zh/master/img/0d0c4e4a12f6e3bb90bf30161951dcc5.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/0d0c4e4a12f6e3bb90bf30161951dcc5.jpg rename to Sklearn/sklearn-doc-zh/master/img/0d0c4e4a12f6e3bb90bf30161951dcc5.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/0dd4aedd59a06ecbaead248c17a3ce80.jpg b/Sklearn/sklearn-doc-zh/master/img/0dd4aedd59a06ecbaead248c17a3ce80.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/0dd4aedd59a06ecbaead248c17a3ce80.jpg rename to Sklearn/sklearn-doc-zh/master/img/0dd4aedd59a06ecbaead248c17a3ce80.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/0e7d39317aed470ee92522354b5fbe04.jpg b/Sklearn/sklearn-doc-zh/master/img/0e7d39317aed470ee92522354b5fbe04.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/0e7d39317aed470ee92522354b5fbe04.jpg rename to Sklearn/sklearn-doc-zh/master/img/0e7d39317aed470ee92522354b5fbe04.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/0e8aa67015918fa2807e6ddf7192c32f.jpg b/Sklearn/sklearn-doc-zh/master/img/0e8aa67015918fa2807e6ddf7192c32f.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/0e8aa67015918fa2807e6ddf7192c32f.jpg rename to Sklearn/sklearn-doc-zh/master/img/0e8aa67015918fa2807e6ddf7192c32f.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/0f30c5ace43eda2a8d25c6e9365264d7.jpg b/Sklearn/sklearn-doc-zh/master/img/0f30c5ace43eda2a8d25c6e9365264d7.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/0f30c5ace43eda2a8d25c6e9365264d7.jpg rename to Sklearn/sklearn-doc-zh/master/img/0f30c5ace43eda2a8d25c6e9365264d7.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/0f92bc682b050115d03c625ce770c77d.jpg b/Sklearn/sklearn-doc-zh/master/img/0f92bc682b050115d03c625ce770c77d.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/0f92bc682b050115d03c625ce770c77d.jpg rename to Sklearn/sklearn-doc-zh/master/img/0f92bc682b050115d03c625ce770c77d.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/0faa297883831c0432cf4d72960eeb6c.jpg b/Sklearn/sklearn-doc-zh/master/img/0faa297883831c0432cf4d72960eeb6c.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/0faa297883831c0432cf4d72960eeb6c.jpg rename to Sklearn/sklearn-doc-zh/master/img/0faa297883831c0432cf4d72960eeb6c.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/0fac1c7ec23344da41ff45485bb3ac12.jpg b/Sklearn/sklearn-doc-zh/master/img/0fac1c7ec23344da41ff45485bb3ac12.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/0fac1c7ec23344da41ff45485bb3ac12.jpg rename to Sklearn/sklearn-doc-zh/master/img/0fac1c7ec23344da41ff45485bb3ac12.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/0fccbdc535b0a4d8003725e8ad606561.jpg b/Sklearn/sklearn-doc-zh/master/img/0fccbdc535b0a4d8003725e8ad606561.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/0fccbdc535b0a4d8003725e8ad606561.jpg rename to Sklearn/sklearn-doc-zh/master/img/0fccbdc535b0a4d8003725e8ad606561.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/0fd5cc61b1ca3db3b190fbfad2a38813.jpg b/Sklearn/sklearn-doc-zh/master/img/0fd5cc61b1ca3db3b190fbfad2a38813.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/0fd5cc61b1ca3db3b190fbfad2a38813.jpg rename to Sklearn/sklearn-doc-zh/master/img/0fd5cc61b1ca3db3b190fbfad2a38813.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/0fec9d3a9833abc417480a03be883b3e.jpg b/Sklearn/sklearn-doc-zh/master/img/0fec9d3a9833abc417480a03be883b3e.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/0fec9d3a9833abc417480a03be883b3e.jpg rename to Sklearn/sklearn-doc-zh/master/img/0fec9d3a9833abc417480a03be883b3e.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/10026a0d2a4dcca9387e58932c14f0e7.jpg b/Sklearn/sklearn-doc-zh/master/img/10026a0d2a4dcca9387e58932c14f0e7.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/10026a0d2a4dcca9387e58932c14f0e7.jpg rename to Sklearn/sklearn-doc-zh/master/img/10026a0d2a4dcca9387e58932c14f0e7.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/107556ec49c074270575d6b99f3d2029.jpg b/Sklearn/sklearn-doc-zh/master/img/107556ec49c074270575d6b99f3d2029.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/107556ec49c074270575d6b99f3d2029.jpg rename to Sklearn/sklearn-doc-zh/master/img/107556ec49c074270575d6b99f3d2029.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/1091409a4db7e77230536d6b05b4fae6.jpg b/Sklearn/sklearn-doc-zh/master/img/1091409a4db7e77230536d6b05b4fae6.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/1091409a4db7e77230536d6b05b4fae6.jpg rename to Sklearn/sklearn-doc-zh/master/img/1091409a4db7e77230536d6b05b4fae6.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/10a8344b866fca53744b728ef788a668.jpg b/Sklearn/sklearn-doc-zh/master/img/10a8344b866fca53744b728ef788a668.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/10a8344b866fca53744b728ef788a668.jpg rename to Sklearn/sklearn-doc-zh/master/img/10a8344b866fca53744b728ef788a668.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/10bfda066ccebace59b1d11135e10196.jpg b/Sklearn/sklearn-doc-zh/master/img/10bfda066ccebace59b1d11135e10196.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/10bfda066ccebace59b1d11135e10196.jpg rename to Sklearn/sklearn-doc-zh/master/img/10bfda066ccebace59b1d11135e10196.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/10ce8fc56aa233e3e9cec9776dc315e6.jpg b/Sklearn/sklearn-doc-zh/master/img/10ce8fc56aa233e3e9cec9776dc315e6.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/10ce8fc56aa233e3e9cec9776dc315e6.jpg rename to Sklearn/sklearn-doc-zh/master/img/10ce8fc56aa233e3e9cec9776dc315e6.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/11265c80ea298a58e0a1010736d28b38.jpg b/Sklearn/sklearn-doc-zh/master/img/11265c80ea298a58e0a1010736d28b38.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/11265c80ea298a58e0a1010736d28b38.jpg rename to Sklearn/sklearn-doc-zh/master/img/11265c80ea298a58e0a1010736d28b38.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/11336a74b43f75a360b60ce81f9cbdc0.jpg b/Sklearn/sklearn-doc-zh/master/img/11336a74b43f75a360b60ce81f9cbdc0.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/11336a74b43f75a360b60ce81f9cbdc0.jpg rename to Sklearn/sklearn-doc-zh/master/img/11336a74b43f75a360b60ce81f9cbdc0.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/114b22cba4861a82ce7df1eab3219a0d.jpg b/Sklearn/sklearn-doc-zh/master/img/114b22cba4861a82ce7df1eab3219a0d.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/114b22cba4861a82ce7df1eab3219a0d.jpg rename to Sklearn/sklearn-doc-zh/master/img/114b22cba4861a82ce7df1eab3219a0d.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/11c00539ec3e5944afd76511830591db.jpg b/Sklearn/sklearn-doc-zh/master/img/11c00539ec3e5944afd76511830591db.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/11c00539ec3e5944afd76511830591db.jpg rename to Sklearn/sklearn-doc-zh/master/img/11c00539ec3e5944afd76511830591db.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/11cde057716cf1a820780a60c8ffa8e4.jpg b/Sklearn/sklearn-doc-zh/master/img/11cde057716cf1a820780a60c8ffa8e4.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/11cde057716cf1a820780a60c8ffa8e4.jpg rename to Sklearn/sklearn-doc-zh/master/img/11cde057716cf1a820780a60c8ffa8e4.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/1252aa7af065b0afd424b2ff01b4e2a5.jpg b/Sklearn/sklearn-doc-zh/master/img/1252aa7af065b0afd424b2ff01b4e2a5.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/1252aa7af065b0afd424b2ff01b4e2a5.jpg rename to Sklearn/sklearn-doc-zh/master/img/1252aa7af065b0afd424b2ff01b4e2a5.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/12867664a0e0e6047ee303c542b4deac.jpg b/Sklearn/sklearn-doc-zh/master/img/12867664a0e0e6047ee303c542b4deac.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/12867664a0e0e6047ee303c542b4deac.jpg rename to Sklearn/sklearn-doc-zh/master/img/12867664a0e0e6047ee303c542b4deac.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/12ab1980b4b3f069be032c0d4f1184ed.jpg b/Sklearn/sklearn-doc-zh/master/img/12ab1980b4b3f069be032c0d4f1184ed.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/12ab1980b4b3f069be032c0d4f1184ed.jpg rename to Sklearn/sklearn-doc-zh/master/img/12ab1980b4b3f069be032c0d4f1184ed.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/12b2c1da1f9041738fa7153efc651372.jpg b/Sklearn/sklearn-doc-zh/master/img/12b2c1da1f9041738fa7153efc651372.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/12b2c1da1f9041738fa7153efc651372.jpg rename to Sklearn/sklearn-doc-zh/master/img/12b2c1da1f9041738fa7153efc651372.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/12ecd862769bee1e71c75c134b6423bb.jpg b/Sklearn/sklearn-doc-zh/master/img/12ecd862769bee1e71c75c134b6423bb.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/12ecd862769bee1e71c75c134b6423bb.jpg rename to Sklearn/sklearn-doc-zh/master/img/12ecd862769bee1e71c75c134b6423bb.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/1375f487efd6b9db955b7f7aafecc441.jpg b/Sklearn/sklearn-doc-zh/master/img/1375f487efd6b9db955b7f7aafecc441.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/1375f487efd6b9db955b7f7aafecc441.jpg rename to Sklearn/sklearn-doc-zh/master/img/1375f487efd6b9db955b7f7aafecc441.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/13e06e04807641041a1d2df7a80043e2.jpg b/Sklearn/sklearn-doc-zh/master/img/13e06e04807641041a1d2df7a80043e2.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/13e06e04807641041a1d2df7a80043e2.jpg rename to Sklearn/sklearn-doc-zh/master/img/13e06e04807641041a1d2df7a80043e2.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/14120d5c5d7ad74513d356eed762622b.jpg b/Sklearn/sklearn-doc-zh/master/img/14120d5c5d7ad74513d356eed762622b.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/14120d5c5d7ad74513d356eed762622b.jpg rename to Sklearn/sklearn-doc-zh/master/img/14120d5c5d7ad74513d356eed762622b.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/148aed7690723555d32f36019c3d6948.jpg b/Sklearn/sklearn-doc-zh/master/img/148aed7690723555d32f36019c3d6948.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/148aed7690723555d32f36019c3d6948.jpg rename to Sklearn/sklearn-doc-zh/master/img/148aed7690723555d32f36019c3d6948.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/14d65d3148b0ea7c9ecb364423ecb0ed.jpg b/Sklearn/sklearn-doc-zh/master/img/14d65d3148b0ea7c9ecb364423ecb0ed.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/14d65d3148b0ea7c9ecb364423ecb0ed.jpg rename to Sklearn/sklearn-doc-zh/master/img/14d65d3148b0ea7c9ecb364423ecb0ed.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/14f6506599a88a5297ea712fa70eece4.jpg b/Sklearn/sklearn-doc-zh/master/img/14f6506599a88a5297ea712fa70eece4.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/14f6506599a88a5297ea712fa70eece4.jpg rename to Sklearn/sklearn-doc-zh/master/img/14f6506599a88a5297ea712fa70eece4.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/153aceb3cdac953277c6c840339ac023.jpg b/Sklearn/sklearn-doc-zh/master/img/153aceb3cdac953277c6c840339ac023.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/153aceb3cdac953277c6c840339ac023.jpg rename to Sklearn/sklearn-doc-zh/master/img/153aceb3cdac953277c6c840339ac023.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/156554c81cfe5f0230627ac0487fd07f.jpg b/Sklearn/sklearn-doc-zh/master/img/156554c81cfe5f0230627ac0487fd07f.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/156554c81cfe5f0230627ac0487fd07f.jpg rename to Sklearn/sklearn-doc-zh/master/img/156554c81cfe5f0230627ac0487fd07f.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/15a03cac7d4ab66fdf23e702ee75da2a.jpg b/Sklearn/sklearn-doc-zh/master/img/15a03cac7d4ab66fdf23e702ee75da2a.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/15a03cac7d4ab66fdf23e702ee75da2a.jpg rename to Sklearn/sklearn-doc-zh/master/img/15a03cac7d4ab66fdf23e702ee75da2a.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/163f83188195d81958bfb733b7a3daa2.jpg b/Sklearn/sklearn-doc-zh/master/img/163f83188195d81958bfb733b7a3daa2.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/163f83188195d81958bfb733b7a3daa2.jpg rename to Sklearn/sklearn-doc-zh/master/img/163f83188195d81958bfb733b7a3daa2.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/165303a7d56136efa39130cd3cd9539e.jpg b/Sklearn/sklearn-doc-zh/master/img/165303a7d56136efa39130cd3cd9539e.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/165303a7d56136efa39130cd3cd9539e.jpg rename to Sklearn/sklearn-doc-zh/master/img/165303a7d56136efa39130cd3cd9539e.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/16622481c2bbb001363e20660b549ae9.jpg b/Sklearn/sklearn-doc-zh/master/img/16622481c2bbb001363e20660b549ae9.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/16622481c2bbb001363e20660b549ae9.jpg rename to Sklearn/sklearn-doc-zh/master/img/16622481c2bbb001363e20660b549ae9.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/170bd587959dabf132e4e0f39fa0a7b7.jpg b/Sklearn/sklearn-doc-zh/master/img/170bd587959dabf132e4e0f39fa0a7b7.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/170bd587959dabf132e4e0f39fa0a7b7.jpg rename to Sklearn/sklearn-doc-zh/master/img/170bd587959dabf132e4e0f39fa0a7b7.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/17430579d0bcbef3e2d99eba470792c8.jpg b/Sklearn/sklearn-doc-zh/master/img/17430579d0bcbef3e2d99eba470792c8.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/17430579d0bcbef3e2d99eba470792c8.jpg rename to Sklearn/sklearn-doc-zh/master/img/17430579d0bcbef3e2d99eba470792c8.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/17689bafe240fb42feab1cca674b5b88.jpg b/Sklearn/sklearn-doc-zh/master/img/17689bafe240fb42feab1cca674b5b88.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/17689bafe240fb42feab1cca674b5b88.jpg rename to Sklearn/sklearn-doc-zh/master/img/17689bafe240fb42feab1cca674b5b88.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/187eddee2de4e12860dc001c5f74b2b4.jpg b/Sklearn/sklearn-doc-zh/master/img/187eddee2de4e12860dc001c5f74b2b4.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/187eddee2de4e12860dc001c5f74b2b4.jpg rename to Sklearn/sklearn-doc-zh/master/img/187eddee2de4e12860dc001c5f74b2b4.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/18906a7fe0c5d78e0a291e472ded58ce.jpg b/Sklearn/sklearn-doc-zh/master/img/18906a7fe0c5d78e0a291e472ded58ce.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/18906a7fe0c5d78e0a291e472ded58ce.jpg rename to Sklearn/sklearn-doc-zh/master/img/18906a7fe0c5d78e0a291e472ded58ce.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/195ff50d437f1bb16125cdb1c311f3ab.jpg b/Sklearn/sklearn-doc-zh/master/img/195ff50d437f1bb16125cdb1c311f3ab.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/195ff50d437f1bb16125cdb1c311f3ab.jpg rename to Sklearn/sklearn-doc-zh/master/img/195ff50d437f1bb16125cdb1c311f3ab.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/1_1_3.gif b/Sklearn/sklearn-doc-zh/master/img/1_1_3.gif similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/1_1_3.gif rename to Sklearn/sklearn-doc-zh/master/img/1_1_3.gif diff --git a/Python/sklearn/sklearn-doc-zh/master/img/1_1_4.gif b/Sklearn/sklearn-doc-zh/master/img/1_1_4.gif similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/1_1_4.gif rename to Sklearn/sklearn-doc-zh/master/img/1_1_4.gif diff --git a/Python/sklearn/sklearn-doc-zh/master/img/1_1_5.gif b/Sklearn/sklearn-doc-zh/master/img/1_1_5.gif similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/1_1_5.gif rename to Sklearn/sklearn-doc-zh/master/img/1_1_5.gif diff --git a/Python/sklearn/sklearn-doc-zh/master/img/1_1_6.gif b/Sklearn/sklearn-doc-zh/master/img/1_1_6.gif similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/1_1_6.gif rename to Sklearn/sklearn-doc-zh/master/img/1_1_6.gif diff --git a/Python/sklearn/sklearn-doc-zh/master/img/1_1_7.gif b/Sklearn/sklearn-doc-zh/master/img/1_1_7.gif similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/1_1_7.gif rename to Sklearn/sklearn-doc-zh/master/img/1_1_7.gif diff --git a/Python/sklearn/sklearn-doc-zh/master/img/1_6_1.png b/Sklearn/sklearn-doc-zh/master/img/1_6_1.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/1_6_1.png rename to Sklearn/sklearn-doc-zh/master/img/1_6_1.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/1_6_2.png b/Sklearn/sklearn-doc-zh/master/img/1_6_2.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/1_6_2.png rename to Sklearn/sklearn-doc-zh/master/img/1_6_2.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/1a125d0568be1ce93a6bc7b3ee1071e1.jpg b/Sklearn/sklearn-doc-zh/master/img/1a125d0568be1ce93a6bc7b3ee1071e1.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/1a125d0568be1ce93a6bc7b3ee1071e1.jpg rename to Sklearn/sklearn-doc-zh/master/img/1a125d0568be1ce93a6bc7b3ee1071e1.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/1a1bc66f06af187108d4250f068748c9.jpg b/Sklearn/sklearn-doc-zh/master/img/1a1bc66f06af187108d4250f068748c9.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/1a1bc66f06af187108d4250f068748c9.jpg rename to Sklearn/sklearn-doc-zh/master/img/1a1bc66f06af187108d4250f068748c9.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/1a78828504944887ab23097011f807d5.jpg b/Sklearn/sklearn-doc-zh/master/img/1a78828504944887ab23097011f807d5.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/1a78828504944887ab23097011f807d5.jpg rename to Sklearn/sklearn-doc-zh/master/img/1a78828504944887ab23097011f807d5.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/1a91bab921cf39f58a522ed15f475235.jpg b/Sklearn/sklearn-doc-zh/master/img/1a91bab921cf39f58a522ed15f475235.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/1a91bab921cf39f58a522ed15f475235.jpg rename to Sklearn/sklearn-doc-zh/master/img/1a91bab921cf39f58a522ed15f475235.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/1ae6d373d81c5f3f50905f336b4a070a.jpg b/Sklearn/sklearn-doc-zh/master/img/1ae6d373d81c5f3f50905f336b4a070a.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/1ae6d373d81c5f3f50905f336b4a070a.jpg rename to Sklearn/sklearn-doc-zh/master/img/1ae6d373d81c5f3f50905f336b4a070a.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/1b6228a71a038f66ac7b8a2743adf4e7.jpg b/Sklearn/sklearn-doc-zh/master/img/1b6228a71a038f66ac7b8a2743adf4e7.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/1b6228a71a038f66ac7b8a2743adf4e7.jpg rename to Sklearn/sklearn-doc-zh/master/img/1b6228a71a038f66ac7b8a2743adf4e7.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/1c12ea7ea179efd16ce513645034d41a.jpg b/Sklearn/sklearn-doc-zh/master/img/1c12ea7ea179efd16ce513645034d41a.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/1c12ea7ea179efd16ce513645034d41a.jpg rename to Sklearn/sklearn-doc-zh/master/img/1c12ea7ea179efd16ce513645034d41a.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/1c25ce7b0d4e2c7da1f0e73d2565c431.jpg b/Sklearn/sklearn-doc-zh/master/img/1c25ce7b0d4e2c7da1f0e73d2565c431.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/1c25ce7b0d4e2c7da1f0e73d2565c431.jpg rename to Sklearn/sklearn-doc-zh/master/img/1c25ce7b0d4e2c7da1f0e73d2565c431.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/1cde7e5f92efe9056f9f53e23ea04102.jpg b/Sklearn/sklearn-doc-zh/master/img/1cde7e5f92efe9056f9f53e23ea04102.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/1cde7e5f92efe9056f9f53e23ea04102.jpg rename to Sklearn/sklearn-doc-zh/master/img/1cde7e5f92efe9056f9f53e23ea04102.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/1ce6ae6c075734e41812dc91b67d16e5.jpg b/Sklearn/sklearn-doc-zh/master/img/1ce6ae6c075734e41812dc91b67d16e5.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/1ce6ae6c075734e41812dc91b67d16e5.jpg rename to Sklearn/sklearn-doc-zh/master/img/1ce6ae6c075734e41812dc91b67d16e5.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/1d57cc4e24d28b8283f98bda247d84e7.jpg b/Sklearn/sklearn-doc-zh/master/img/1d57cc4e24d28b8283f98bda247d84e7.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/1d57cc4e24d28b8283f98bda247d84e7.jpg rename to Sklearn/sklearn-doc-zh/master/img/1d57cc4e24d28b8283f98bda247d84e7.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/1d9a1eb9200948482ebfd5811679276a.jpg b/Sklearn/sklearn-doc-zh/master/img/1d9a1eb9200948482ebfd5811679276a.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/1d9a1eb9200948482ebfd5811679276a.jpg rename to Sklearn/sklearn-doc-zh/master/img/1d9a1eb9200948482ebfd5811679276a.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/1da4a8f74cdb166cdc91e2f691cf3ac5.jpg b/Sklearn/sklearn-doc-zh/master/img/1da4a8f74cdb166cdc91e2f691cf3ac5.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/1da4a8f74cdb166cdc91e2f691cf3ac5.jpg rename to Sklearn/sklearn-doc-zh/master/img/1da4a8f74cdb166cdc91e2f691cf3ac5.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/1daad9ca4ebf0fe49529674b890b3df2.jpg b/Sklearn/sklearn-doc-zh/master/img/1daad9ca4ebf0fe49529674b890b3df2.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/1daad9ca4ebf0fe49529674b890b3df2.jpg rename to Sklearn/sklearn-doc-zh/master/img/1daad9ca4ebf0fe49529674b890b3df2.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/1ddb602277c74f97c01c61af96da4a87.jpg b/Sklearn/sklearn-doc-zh/master/img/1ddb602277c74f97c01c61af96da4a87.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/1ddb602277c74f97c01c61af96da4a87.jpg rename to Sklearn/sklearn-doc-zh/master/img/1ddb602277c74f97c01c61af96da4a87.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/1e4e584cd8a99da7f18a5581de1f7be3.jpg b/Sklearn/sklearn-doc-zh/master/img/1e4e584cd8a99da7f18a5581de1f7be3.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/1e4e584cd8a99da7f18a5581de1f7be3.jpg rename to Sklearn/sklearn-doc-zh/master/img/1e4e584cd8a99da7f18a5581de1f7be3.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/1f1667a67d885f419222cbd85c70dd56.jpg b/Sklearn/sklearn-doc-zh/master/img/1f1667a67d885f419222cbd85c70dd56.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/1f1667a67d885f419222cbd85c70dd56.jpg rename to Sklearn/sklearn-doc-zh/master/img/1f1667a67d885f419222cbd85c70dd56.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/1f7b275b5002d3772b809055d9199f91.jpg b/Sklearn/sklearn-doc-zh/master/img/1f7b275b5002d3772b809055d9199f91.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/1f7b275b5002d3772b809055d9199f91.jpg rename to Sklearn/sklearn-doc-zh/master/img/1f7b275b5002d3772b809055d9199f91.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/1f8c02d4fdbdbcaa014972bca5620cf3.jpg b/Sklearn/sklearn-doc-zh/master/img/1f8c02d4fdbdbcaa014972bca5620cf3.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/1f8c02d4fdbdbcaa014972bca5620cf3.jpg rename to Sklearn/sklearn-doc-zh/master/img/1f8c02d4fdbdbcaa014972bca5620cf3.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/1f9000a4bf057edcb9b87d7a4abb8e8d.jpg b/Sklearn/sklearn-doc-zh/master/img/1f9000a4bf057edcb9b87d7a4abb8e8d.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/1f9000a4bf057edcb9b87d7a4abb8e8d.jpg rename to Sklearn/sklearn-doc-zh/master/img/1f9000a4bf057edcb9b87d7a4abb8e8d.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/1fa419ff78b610bf4a5b3b71df728cec.jpg b/Sklearn/sklearn-doc-zh/master/img/1fa419ff78b610bf4a5b3b71df728cec.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/1fa419ff78b610bf4a5b3b71df728cec.jpg rename to Sklearn/sklearn-doc-zh/master/img/1fa419ff78b610bf4a5b3b71df728cec.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/1fc7cc5cbdba693962c7708456165810.jpg b/Sklearn/sklearn-doc-zh/master/img/1fc7cc5cbdba693962c7708456165810.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/1fc7cc5cbdba693962c7708456165810.jpg rename to Sklearn/sklearn-doc-zh/master/img/1fc7cc5cbdba693962c7708456165810.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/1fc92e9d8efa5433f7346284592e9ea0.jpg b/Sklearn/sklearn-doc-zh/master/img/1fc92e9d8efa5433f7346284592e9ea0.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/1fc92e9d8efa5433f7346284592e9ea0.jpg rename to Sklearn/sklearn-doc-zh/master/img/1fc92e9d8efa5433f7346284592e9ea0.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/1ff26934befcf3ca9623f1e729a8824c.jpg b/Sklearn/sklearn-doc-zh/master/img/1ff26934befcf3ca9623f1e729a8824c.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/1ff26934befcf3ca9623f1e729a8824c.jpg rename to Sklearn/sklearn-doc-zh/master/img/1ff26934befcf3ca9623f1e729a8824c.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/1ff751c4de3bbad5543f0dbbad73dd35.jpg b/Sklearn/sklearn-doc-zh/master/img/1ff751c4de3bbad5543f0dbbad73dd35.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/1ff751c4de3bbad5543f0dbbad73dd35.jpg rename to Sklearn/sklearn-doc-zh/master/img/1ff751c4de3bbad5543f0dbbad73dd35.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/20310556eb1fb84146ff2584e166fd9c.jpg b/Sklearn/sklearn-doc-zh/master/img/20310556eb1fb84146ff2584e166fd9c.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/20310556eb1fb84146ff2584e166fd9c.jpg rename to Sklearn/sklearn-doc-zh/master/img/20310556eb1fb84146ff2584e166fd9c.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/203c5a2c58d6567a86dbc86faa92209e.jpg b/Sklearn/sklearn-doc-zh/master/img/203c5a2c58d6567a86dbc86faa92209e.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/203c5a2c58d6567a86dbc86faa92209e.jpg rename to Sklearn/sklearn-doc-zh/master/img/203c5a2c58d6567a86dbc86faa92209e.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/207e92cfc624372bc9c72a160c02114f.jpg b/Sklearn/sklearn-doc-zh/master/img/207e92cfc624372bc9c72a160c02114f.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/207e92cfc624372bc9c72a160c02114f.jpg rename to Sklearn/sklearn-doc-zh/master/img/207e92cfc624372bc9c72a160c02114f.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/20d6857e752f6ffdfdd20a88c32f837c.jpg b/Sklearn/sklearn-doc-zh/master/img/20d6857e752f6ffdfdd20a88c32f837c.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/20d6857e752f6ffdfdd20a88c32f837c.jpg rename to Sklearn/sklearn-doc-zh/master/img/20d6857e752f6ffdfdd20a88c32f837c.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/21937e85250a7aaa8aea86e4fbf93452.jpg b/Sklearn/sklearn-doc-zh/master/img/21937e85250a7aaa8aea86e4fbf93452.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/21937e85250a7aaa8aea86e4fbf93452.jpg rename to Sklearn/sklearn-doc-zh/master/img/21937e85250a7aaa8aea86e4fbf93452.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/21b6e31779ad3b1a382b13e65ec917c7.jpg b/Sklearn/sklearn-doc-zh/master/img/21b6e31779ad3b1a382b13e65ec917c7.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/21b6e31779ad3b1a382b13e65ec917c7.jpg rename to Sklearn/sklearn-doc-zh/master/img/21b6e31779ad3b1a382b13e65ec917c7.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/21e9f42c5b6730d593e37a11c6ffb13a.jpg b/Sklearn/sklearn-doc-zh/master/img/21e9f42c5b6730d593e37a11c6ffb13a.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/21e9f42c5b6730d593e37a11c6ffb13a.jpg rename to Sklearn/sklearn-doc-zh/master/img/21e9f42c5b6730d593e37a11c6ffb13a.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/2238e05d9e3ae45b81577c9902a9cfbb.jpg b/Sklearn/sklearn-doc-zh/master/img/2238e05d9e3ae45b81577c9902a9cfbb.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/2238e05d9e3ae45b81577c9902a9cfbb.jpg rename to Sklearn/sklearn-doc-zh/master/img/2238e05d9e3ae45b81577c9902a9cfbb.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/223988a8bef489edcaa2f198e5e3a9a5.jpg b/Sklearn/sklearn-doc-zh/master/img/223988a8bef489edcaa2f198e5e3a9a5.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/223988a8bef489edcaa2f198e5e3a9a5.jpg rename to Sklearn/sklearn-doc-zh/master/img/223988a8bef489edcaa2f198e5e3a9a5.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/224162d8cfff6c8ad85718be6b261b70.jpg b/Sklearn/sklearn-doc-zh/master/img/224162d8cfff6c8ad85718be6b261b70.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/224162d8cfff6c8ad85718be6b261b70.jpg rename to Sklearn/sklearn-doc-zh/master/img/224162d8cfff6c8ad85718be6b261b70.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/22c1b7663568bf6c404a07778507a93e.jpg b/Sklearn/sklearn-doc-zh/master/img/22c1b7663568bf6c404a07778507a93e.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/22c1b7663568bf6c404a07778507a93e.jpg rename to Sklearn/sklearn-doc-zh/master/img/22c1b7663568bf6c404a07778507a93e.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/22f3a10ad9acceb77ea6193f945b11cf.jpg b/Sklearn/sklearn-doc-zh/master/img/22f3a10ad9acceb77ea6193f945b11cf.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/22f3a10ad9acceb77ea6193f945b11cf.jpg rename to Sklearn/sklearn-doc-zh/master/img/22f3a10ad9acceb77ea6193f945b11cf.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/23bd4397a3e30a81d2ee26977f708e63.jpg b/Sklearn/sklearn-doc-zh/master/img/23bd4397a3e30a81d2ee26977f708e63.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/23bd4397a3e30a81d2ee26977f708e63.jpg rename to Sklearn/sklearn-doc-zh/master/img/23bd4397a3e30a81d2ee26977f708e63.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/23dac8b2be31a1cbe914b59ff2670dbf.jpg b/Sklearn/sklearn-doc-zh/master/img/23dac8b2be31a1cbe914b59ff2670dbf.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/23dac8b2be31a1cbe914b59ff2670dbf.jpg rename to Sklearn/sklearn-doc-zh/master/img/23dac8b2be31a1cbe914b59ff2670dbf.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/2490002eaeb5d875d22825fc267ef925.jpg b/Sklearn/sklearn-doc-zh/master/img/2490002eaeb5d875d22825fc267ef925.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/2490002eaeb5d875d22825fc267ef925.jpg rename to Sklearn/sklearn-doc-zh/master/img/2490002eaeb5d875d22825fc267ef925.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/25a334612cb5a1736ebcc7eec00c7b29.jpg b/Sklearn/sklearn-doc-zh/master/img/25a334612cb5a1736ebcc7eec00c7b29.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/25a334612cb5a1736ebcc7eec00c7b29.jpg rename to Sklearn/sklearn-doc-zh/master/img/25a334612cb5a1736ebcc7eec00c7b29.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/2639e09c454322633772269753a50710.jpg b/Sklearn/sklearn-doc-zh/master/img/2639e09c454322633772269753a50710.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/2639e09c454322633772269753a50710.jpg rename to Sklearn/sklearn-doc-zh/master/img/2639e09c454322633772269753a50710.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/264ba68c53c2e2867b9e307e8b940d49.jpg b/Sklearn/sklearn-doc-zh/master/img/264ba68c53c2e2867b9e307e8b940d49.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/264ba68c53c2e2867b9e307e8b940d49.jpg rename to Sklearn/sklearn-doc-zh/master/img/264ba68c53c2e2867b9e307e8b940d49.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/264dc5b617a5aa98151c4ea6975e9a90.jpg b/Sklearn/sklearn-doc-zh/master/img/264dc5b617a5aa98151c4ea6975e9a90.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/264dc5b617a5aa98151c4ea6975e9a90.jpg rename to Sklearn/sklearn-doc-zh/master/img/264dc5b617a5aa98151c4ea6975e9a90.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/26f66401927461c0129b7f4ad33e5322.jpg b/Sklearn/sklearn-doc-zh/master/img/26f66401927461c0129b7f4ad33e5322.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/26f66401927461c0129b7f4ad33e5322.jpg rename to Sklearn/sklearn-doc-zh/master/img/26f66401927461c0129b7f4ad33e5322.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/2736dbaab8f81e4cb2d0e388f2b0c6b2.jpg b/Sklearn/sklearn-doc-zh/master/img/2736dbaab8f81e4cb2d0e388f2b0c6b2.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/2736dbaab8f81e4cb2d0e388f2b0c6b2.jpg rename to Sklearn/sklearn-doc-zh/master/img/2736dbaab8f81e4cb2d0e388f2b0c6b2.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/27449ee75d40c9391b04e2ca48c4d83b.jpg b/Sklearn/sklearn-doc-zh/master/img/27449ee75d40c9391b04e2ca48c4d83b.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/27449ee75d40c9391b04e2ca48c4d83b.jpg rename to Sklearn/sklearn-doc-zh/master/img/27449ee75d40c9391b04e2ca48c4d83b.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/27eaae520bfaa9c4bdbef494c5029741.jpg b/Sklearn/sklearn-doc-zh/master/img/27eaae520bfaa9c4bdbef494c5029741.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/27eaae520bfaa9c4bdbef494c5029741.jpg rename to Sklearn/sklearn-doc-zh/master/img/27eaae520bfaa9c4bdbef494c5029741.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/280a00b263d3144cd3a9c424ed44ee51.jpg b/Sklearn/sklearn-doc-zh/master/img/280a00b263d3144cd3a9c424ed44ee51.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/280a00b263d3144cd3a9c424ed44ee51.jpg rename to Sklearn/sklearn-doc-zh/master/img/280a00b263d3144cd3a9c424ed44ee51.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/287e15c4b3d9b3f227fdc8e364609382.jpg b/Sklearn/sklearn-doc-zh/master/img/287e15c4b3d9b3f227fdc8e364609382.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/287e15c4b3d9b3f227fdc8e364609382.jpg rename to Sklearn/sklearn-doc-zh/master/img/287e15c4b3d9b3f227fdc8e364609382.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/2953c3498ec0877c5ebcc172050cce88.jpg b/Sklearn/sklearn-doc-zh/master/img/2953c3498ec0877c5ebcc172050cce88.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/2953c3498ec0877c5ebcc172050cce88.jpg rename to Sklearn/sklearn-doc-zh/master/img/2953c3498ec0877c5ebcc172050cce88.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/2a0c137e7b86ad939e131293a273579b.jpg b/Sklearn/sklearn-doc-zh/master/img/2a0c137e7b86ad939e131293a273579b.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/2a0c137e7b86ad939e131293a273579b.jpg rename to Sklearn/sklearn-doc-zh/master/img/2a0c137e7b86ad939e131293a273579b.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/2a0c137e7b86ad939e131293a273579b.png b/Sklearn/sklearn-doc-zh/master/img/2a0c137e7b86ad939e131293a273579b.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/2a0c137e7b86ad939e131293a273579b.png rename to Sklearn/sklearn-doc-zh/master/img/2a0c137e7b86ad939e131293a273579b.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/2a332823ff053f404ac53657eb86b1a0.jpg b/Sklearn/sklearn-doc-zh/master/img/2a332823ff053f404ac53657eb86b1a0.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/2a332823ff053f404ac53657eb86b1a0.jpg rename to Sklearn/sklearn-doc-zh/master/img/2a332823ff053f404ac53657eb86b1a0.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/2a58cf81e4ccf9022d6033557d49aed5.jpg b/Sklearn/sklearn-doc-zh/master/img/2a58cf81e4ccf9022d6033557d49aed5.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/2a58cf81e4ccf9022d6033557d49aed5.jpg rename to Sklearn/sklearn-doc-zh/master/img/2a58cf81e4ccf9022d6033557d49aed5.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/2a96390b6e7eb8fc07579c2f9066fc4d.jpg b/Sklearn/sklearn-doc-zh/master/img/2a96390b6e7eb8fc07579c2f9066fc4d.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/2a96390b6e7eb8fc07579c2f9066fc4d.jpg rename to Sklearn/sklearn-doc-zh/master/img/2a96390b6e7eb8fc07579c2f9066fc4d.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/2ad6b07024498864a0ce275913a42d9f.jpg b/Sklearn/sklearn-doc-zh/master/img/2ad6b07024498864a0ce275913a42d9f.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/2ad6b07024498864a0ce275913a42d9f.jpg rename to Sklearn/sklearn-doc-zh/master/img/2ad6b07024498864a0ce275913a42d9f.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/2af4d75ca07ede34c7d38b8f7708723d.jpg b/Sklearn/sklearn-doc-zh/master/img/2af4d75ca07ede34c7d38b8f7708723d.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/2af4d75ca07ede34c7d38b8f7708723d.jpg rename to Sklearn/sklearn-doc-zh/master/img/2af4d75ca07ede34c7d38b8f7708723d.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/2b0d9f09a2b8a107ace9ce7aa234481e.jpg b/Sklearn/sklearn-doc-zh/master/img/2b0d9f09a2b8a107ace9ce7aa234481e.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/2b0d9f09a2b8a107ace9ce7aa234481e.jpg rename to Sklearn/sklearn-doc-zh/master/img/2b0d9f09a2b8a107ace9ce7aa234481e.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/2b117c8d0556a027e7ee3bb265a6bf63.jpg b/Sklearn/sklearn-doc-zh/master/img/2b117c8d0556a027e7ee3bb265a6bf63.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/2b117c8d0556a027e7ee3bb265a6bf63.jpg rename to Sklearn/sklearn-doc-zh/master/img/2b117c8d0556a027e7ee3bb265a6bf63.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/2bd24ed32bcf24db79058c3cc81f5331.jpg b/Sklearn/sklearn-doc-zh/master/img/2bd24ed32bcf24db79058c3cc81f5331.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/2bd24ed32bcf24db79058c3cc81f5331.jpg rename to Sklearn/sklearn-doc-zh/master/img/2bd24ed32bcf24db79058c3cc81f5331.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/2bf67de05d37f31bd2154fdc96690102.jpg b/Sklearn/sklearn-doc-zh/master/img/2bf67de05d37f31bd2154fdc96690102.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/2bf67de05d37f31bd2154fdc96690102.jpg rename to Sklearn/sklearn-doc-zh/master/img/2bf67de05d37f31bd2154fdc96690102.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/2c1da71c882c95ba6660cdad0d976f6d.jpg b/Sklearn/sklearn-doc-zh/master/img/2c1da71c882c95ba6660cdad0d976f6d.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/2c1da71c882c95ba6660cdad0d976f6d.jpg rename to Sklearn/sklearn-doc-zh/master/img/2c1da71c882c95ba6660cdad0d976f6d.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/2c2dcc83fc38e46810a36e59b2614a5c.jpg b/Sklearn/sklearn-doc-zh/master/img/2c2dcc83fc38e46810a36e59b2614a5c.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/2c2dcc83fc38e46810a36e59b2614a5c.jpg rename to Sklearn/sklearn-doc-zh/master/img/2c2dcc83fc38e46810a36e59b2614a5c.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/2c90187d36ba884ee9ae4c99334fb3b4.jpg b/Sklearn/sklearn-doc-zh/master/img/2c90187d36ba884ee9ae4c99334fb3b4.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/2c90187d36ba884ee9ae4c99334fb3b4.jpg rename to Sklearn/sklearn-doc-zh/master/img/2c90187d36ba884ee9ae4c99334fb3b4.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/2ca002ed0f4e27f9040d3f3ec58fbb38.jpg b/Sklearn/sklearn-doc-zh/master/img/2ca002ed0f4e27f9040d3f3ec58fbb38.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/2ca002ed0f4e27f9040d3f3ec58fbb38.jpg rename to Sklearn/sklearn-doc-zh/master/img/2ca002ed0f4e27f9040d3f3ec58fbb38.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/2d3029206649000f40ed9f51bbeceafb.jpg b/Sklearn/sklearn-doc-zh/master/img/2d3029206649000f40ed9f51bbeceafb.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/2d3029206649000f40ed9f51bbeceafb.jpg rename to Sklearn/sklearn-doc-zh/master/img/2d3029206649000f40ed9f51bbeceafb.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/2d4c303729e327500afa8bdb343713ff.jpg b/Sklearn/sklearn-doc-zh/master/img/2d4c303729e327500afa8bdb343713ff.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/2d4c303729e327500afa8bdb343713ff.jpg rename to Sklearn/sklearn-doc-zh/master/img/2d4c303729e327500afa8bdb343713ff.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/2d7a3ddf62ceb125c15ba1947173e790.jpg b/Sklearn/sklearn-doc-zh/master/img/2d7a3ddf62ceb125c15ba1947173e790.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/2d7a3ddf62ceb125c15ba1947173e790.jpg rename to Sklearn/sklearn-doc-zh/master/img/2d7a3ddf62ceb125c15ba1947173e790.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/2e06713c93719ff874fb9f4fab7a6fbf.jpg b/Sklearn/sklearn-doc-zh/master/img/2e06713c93719ff874fb9f4fab7a6fbf.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/2e06713c93719ff874fb9f4fab7a6fbf.jpg rename to Sklearn/sklearn-doc-zh/master/img/2e06713c93719ff874fb9f4fab7a6fbf.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/2e2461d59015f9759fa0612965e2425e.jpg b/Sklearn/sklearn-doc-zh/master/img/2e2461d59015f9759fa0612965e2425e.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/2e2461d59015f9759fa0612965e2425e.jpg rename to Sklearn/sklearn-doc-zh/master/img/2e2461d59015f9759fa0612965e2425e.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/2edeef5a5007d4bd8b4f43fe2670cf85.jpg b/Sklearn/sklearn-doc-zh/master/img/2edeef5a5007d4bd8b4f43fe2670cf85.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/2edeef5a5007d4bd8b4f43fe2670cf85.jpg rename to Sklearn/sklearn-doc-zh/master/img/2edeef5a5007d4bd8b4f43fe2670cf85.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/2f373d871220ee042a8c2ee44e6fff3a.jpg b/Sklearn/sklearn-doc-zh/master/img/2f373d871220ee042a8c2ee44e6fff3a.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/2f373d871220ee042a8c2ee44e6fff3a.jpg rename to Sklearn/sklearn-doc-zh/master/img/2f373d871220ee042a8c2ee44e6fff3a.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/2f6a285b749960084841d17c3c97f2d7.jpg b/Sklearn/sklearn-doc-zh/master/img/2f6a285b749960084841d17c3c97f2d7.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/2f6a285b749960084841d17c3c97f2d7.jpg rename to Sklearn/sklearn-doc-zh/master/img/2f6a285b749960084841d17c3c97f2d7.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/3-5-001.png b/Sklearn/sklearn-doc-zh/master/img/3-5-001.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/3-5-001.png rename to Sklearn/sklearn-doc-zh/master/img/3-5-001.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/3001d4b4ed72087e1a6010f43c053cc6.jpg b/Sklearn/sklearn-doc-zh/master/img/3001d4b4ed72087e1a6010f43c053cc6.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/3001d4b4ed72087e1a6010f43c053cc6.jpg rename to Sklearn/sklearn-doc-zh/master/img/3001d4b4ed72087e1a6010f43c053cc6.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/300d1995dc6050bbfd575b2c14ec81ae.jpg b/Sklearn/sklearn-doc-zh/master/img/300d1995dc6050bbfd575b2c14ec81ae.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/300d1995dc6050bbfd575b2c14ec81ae.jpg rename to Sklearn/sklearn-doc-zh/master/img/300d1995dc6050bbfd575b2c14ec81ae.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/30641b10b766d35775b6bbb4d21e74b7.jpg b/Sklearn/sklearn-doc-zh/master/img/30641b10b766d35775b6bbb4d21e74b7.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/30641b10b766d35775b6bbb4d21e74b7.jpg rename to Sklearn/sklearn-doc-zh/master/img/30641b10b766d35775b6bbb4d21e74b7.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/30fc2a610fc7de5c19317e1fc584765f.jpg b/Sklearn/sklearn-doc-zh/master/img/30fc2a610fc7de5c19317e1fc584765f.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/30fc2a610fc7de5c19317e1fc584765f.jpg rename to Sklearn/sklearn-doc-zh/master/img/30fc2a610fc7de5c19317e1fc584765f.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/3109bc087a626380237668dfcc4ecd96.jpg b/Sklearn/sklearn-doc-zh/master/img/3109bc087a626380237668dfcc4ecd96.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/3109bc087a626380237668dfcc4ecd96.jpg rename to Sklearn/sklearn-doc-zh/master/img/3109bc087a626380237668dfcc4ecd96.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/319e234a072e86b6b55ce431ca56b43e.jpg b/Sklearn/sklearn-doc-zh/master/img/319e234a072e86b6b55ce431ca56b43e.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/319e234a072e86b6b55ce431ca56b43e.jpg rename to Sklearn/sklearn-doc-zh/master/img/319e234a072e86b6b55ce431ca56b43e.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/32246af90101d1607825a589ebea6880.jpg b/Sklearn/sklearn-doc-zh/master/img/32246af90101d1607825a589ebea6880.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/32246af90101d1607825a589ebea6880.jpg rename to Sklearn/sklearn-doc-zh/master/img/32246af90101d1607825a589ebea6880.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/32f500a4e2eba65727c1e003699dff90.jpg b/Sklearn/sklearn-doc-zh/master/img/32f500a4e2eba65727c1e003699dff90.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/32f500a4e2eba65727c1e003699dff90.jpg rename to Sklearn/sklearn-doc-zh/master/img/32f500a4e2eba65727c1e003699dff90.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/337f0e255aa71dafb655629cb09a0c14.jpg b/Sklearn/sklearn-doc-zh/master/img/337f0e255aa71dafb655629cb09a0c14.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/337f0e255aa71dafb655629cb09a0c14.jpg rename to Sklearn/sklearn-doc-zh/master/img/337f0e255aa71dafb655629cb09a0c14.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/33a8ceddf8e3edfad259a804819c2637.jpg b/Sklearn/sklearn-doc-zh/master/img/33a8ceddf8e3edfad259a804819c2637.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/33a8ceddf8e3edfad259a804819c2637.jpg rename to Sklearn/sklearn-doc-zh/master/img/33a8ceddf8e3edfad259a804819c2637.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/33b1cdc0654561cadac36a1232552b99.jpg b/Sklearn/sklearn-doc-zh/master/img/33b1cdc0654561cadac36a1232552b99.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/33b1cdc0654561cadac36a1232552b99.jpg rename to Sklearn/sklearn-doc-zh/master/img/33b1cdc0654561cadac36a1232552b99.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/33b99b5c21f0cf5b03e92fe60cbe6ad0.jpg b/Sklearn/sklearn-doc-zh/master/img/33b99b5c21f0cf5b03e92fe60cbe6ad0.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/33b99b5c21f0cf5b03e92fe60cbe6ad0.jpg rename to Sklearn/sklearn-doc-zh/master/img/33b99b5c21f0cf5b03e92fe60cbe6ad0.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/33d1bf322bf0f6046a1145dbc264803b.jpg b/Sklearn/sklearn-doc-zh/master/img/33d1bf322bf0f6046a1145dbc264803b.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/33d1bf322bf0f6046a1145dbc264803b.jpg rename to Sklearn/sklearn-doc-zh/master/img/33d1bf322bf0f6046a1145dbc264803b.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/33eb0dacfcc0df16c84bfaed52d31859.jpg b/Sklearn/sklearn-doc-zh/master/img/33eb0dacfcc0df16c84bfaed52d31859.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/33eb0dacfcc0df16c84bfaed52d31859.jpg rename to Sklearn/sklearn-doc-zh/master/img/33eb0dacfcc0df16c84bfaed52d31859.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/33f1b6fb64999d2af571c675b7f17f34.jpg b/Sklearn/sklearn-doc-zh/master/img/33f1b6fb64999d2af571c675b7f17f34.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/33f1b6fb64999d2af571c675b7f17f34.jpg rename to Sklearn/sklearn-doc-zh/master/img/33f1b6fb64999d2af571c675b7f17f34.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/3405852dc63c9a78447d479784f1ee7e.jpg b/Sklearn/sklearn-doc-zh/master/img/3405852dc63c9a78447d479784f1ee7e.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/3405852dc63c9a78447d479784f1ee7e.jpg rename to Sklearn/sklearn-doc-zh/master/img/3405852dc63c9a78447d479784f1ee7e.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/341145ae3a937e5fa152262d13dc6fcf.jpg b/Sklearn/sklearn-doc-zh/master/img/341145ae3a937e5fa152262d13dc6fcf.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/341145ae3a937e5fa152262d13dc6fcf.jpg rename to Sklearn/sklearn-doc-zh/master/img/341145ae3a937e5fa152262d13dc6fcf.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/343401666d8fc0aeeea395495b9dc570.jpg b/Sklearn/sklearn-doc-zh/master/img/343401666d8fc0aeeea395495b9dc570.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/343401666d8fc0aeeea395495b9dc570.jpg rename to Sklearn/sklearn-doc-zh/master/img/343401666d8fc0aeeea395495b9dc570.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/345ab99b5a1246fb019e249dae570191.jpg b/Sklearn/sklearn-doc-zh/master/img/345ab99b5a1246fb019e249dae570191.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/345ab99b5a1246fb019e249dae570191.jpg rename to Sklearn/sklearn-doc-zh/master/img/345ab99b5a1246fb019e249dae570191.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/352bc5f9f9d6aefcdaf8deca4f7964ff.jpg b/Sklearn/sklearn-doc-zh/master/img/352bc5f9f9d6aefcdaf8deca4f7964ff.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/352bc5f9f9d6aefcdaf8deca4f7964ff.jpg rename to Sklearn/sklearn-doc-zh/master/img/352bc5f9f9d6aefcdaf8deca4f7964ff.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/355df435a05593b653d7d988c06e5d3c.jpg b/Sklearn/sklearn-doc-zh/master/img/355df435a05593b653d7d988c06e5d3c.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/355df435a05593b653d7d988c06e5d3c.jpg rename to Sklearn/sklearn-doc-zh/master/img/355df435a05593b653d7d988c06e5d3c.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/3579ce0b5c145fb891d865367eeba3ac.jpg b/Sklearn/sklearn-doc-zh/master/img/3579ce0b5c145fb891d865367eeba3ac.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/3579ce0b5c145fb891d865367eeba3ac.jpg rename to Sklearn/sklearn-doc-zh/master/img/3579ce0b5c145fb891d865367eeba3ac.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/35a2693b8dbfe5cf9335dc2659c6ef96.jpg b/Sklearn/sklearn-doc-zh/master/img/35a2693b8dbfe5cf9335dc2659c6ef96.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/35a2693b8dbfe5cf9335dc2659c6ef96.jpg rename to Sklearn/sklearn-doc-zh/master/img/35a2693b8dbfe5cf9335dc2659c6ef96.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/35b3276dd7e50cda7dd79a91161a1a26.jpg b/Sklearn/sklearn-doc-zh/master/img/35b3276dd7e50cda7dd79a91161a1a26.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/35b3276dd7e50cda7dd79a91161a1a26.jpg rename to Sklearn/sklearn-doc-zh/master/img/35b3276dd7e50cda7dd79a91161a1a26.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/362af37df82b07d11576fc5e45db7828.jpg b/Sklearn/sklearn-doc-zh/master/img/362af37df82b07d11576fc5e45db7828.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/362af37df82b07d11576fc5e45db7828.jpg rename to Sklearn/sklearn-doc-zh/master/img/362af37df82b07d11576fc5e45db7828.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/36846302fd24ac609e0cfef5fe6f8678.jpg b/Sklearn/sklearn-doc-zh/master/img/36846302fd24ac609e0cfef5fe6f8678.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/36846302fd24ac609e0cfef5fe6f8678.jpg rename to Sklearn/sklearn-doc-zh/master/img/36846302fd24ac609e0cfef5fe6f8678.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/369b6e6bd43ee84fe99e14c8d78cdc9f.jpg b/Sklearn/sklearn-doc-zh/master/img/369b6e6bd43ee84fe99e14c8d78cdc9f.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/369b6e6bd43ee84fe99e14c8d78cdc9f.jpg rename to Sklearn/sklearn-doc-zh/master/img/369b6e6bd43ee84fe99e14c8d78cdc9f.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/36aff9afacf42a6a0b903bb6cd3409dc.jpg b/Sklearn/sklearn-doc-zh/master/img/36aff9afacf42a6a0b903bb6cd3409dc.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/36aff9afacf42a6a0b903bb6cd3409dc.jpg rename to Sklearn/sklearn-doc-zh/master/img/36aff9afacf42a6a0b903bb6cd3409dc.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/36c2dba9ae7680cd09eff62c73e37963.jpg b/Sklearn/sklearn-doc-zh/master/img/36c2dba9ae7680cd09eff62c73e37963.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/36c2dba9ae7680cd09eff62c73e37963.jpg rename to Sklearn/sklearn-doc-zh/master/img/36c2dba9ae7680cd09eff62c73e37963.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/36f54997ff4df647587d1bfd2ddb3ee2.jpg b/Sklearn/sklearn-doc-zh/master/img/36f54997ff4df647587d1bfd2ddb3ee2.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/36f54997ff4df647587d1bfd2ddb3ee2.jpg rename to Sklearn/sklearn-doc-zh/master/img/36f54997ff4df647587d1bfd2ddb3ee2.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/375fd0c32c15c4547b00ae36c0ec0a6a.jpg b/Sklearn/sklearn-doc-zh/master/img/375fd0c32c15c4547b00ae36c0ec0a6a.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/375fd0c32c15c4547b00ae36c0ec0a6a.jpg rename to Sklearn/sklearn-doc-zh/master/img/375fd0c32c15c4547b00ae36c0ec0a6a.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/3771db7af1e3b7bf33e15ec20d278f39.jpg b/Sklearn/sklearn-doc-zh/master/img/3771db7af1e3b7bf33e15ec20d278f39.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/3771db7af1e3b7bf33e15ec20d278f39.jpg rename to Sklearn/sklearn-doc-zh/master/img/3771db7af1e3b7bf33e15ec20d278f39.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/3776f899ba5f1d7432c553c6c3aae381.jpg b/Sklearn/sklearn-doc-zh/master/img/3776f899ba5f1d7432c553c6c3aae381.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/3776f899ba5f1d7432c553c6c3aae381.jpg rename to Sklearn/sklearn-doc-zh/master/img/3776f899ba5f1d7432c553c6c3aae381.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/377b02d08a6b388008fc8cb132080d20.jpg b/Sklearn/sklearn-doc-zh/master/img/377b02d08a6b388008fc8cb132080d20.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/377b02d08a6b388008fc8cb132080d20.jpg rename to Sklearn/sklearn-doc-zh/master/img/377b02d08a6b388008fc8cb132080d20.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/37e4251726a37bc02df4ef4390572e9a.jpg b/Sklearn/sklearn-doc-zh/master/img/37e4251726a37bc02df4ef4390572e9a.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/37e4251726a37bc02df4ef4390572e9a.jpg rename to Sklearn/sklearn-doc-zh/master/img/37e4251726a37bc02df4ef4390572e9a.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/38320089278fc639e640f3f772eac6b1.jpg b/Sklearn/sklearn-doc-zh/master/img/38320089278fc639e640f3f772eac6b1.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/38320089278fc639e640f3f772eac6b1.jpg rename to Sklearn/sklearn-doc-zh/master/img/38320089278fc639e640f3f772eac6b1.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/38437ee82743c886e2ebfbb5bd5e0c89.jpg b/Sklearn/sklearn-doc-zh/master/img/38437ee82743c886e2ebfbb5bd5e0c89.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/38437ee82743c886e2ebfbb5bd5e0c89.jpg rename to Sklearn/sklearn-doc-zh/master/img/38437ee82743c886e2ebfbb5bd5e0c89.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/385a9104b38457eeb59acf86cf974472.jpg b/Sklearn/sklearn-doc-zh/master/img/385a9104b38457eeb59acf86cf974472.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/385a9104b38457eeb59acf86cf974472.jpg rename to Sklearn/sklearn-doc-zh/master/img/385a9104b38457eeb59acf86cf974472.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/38a411931f9f49e71b888f7998427122.jpg b/Sklearn/sklearn-doc-zh/master/img/38a411931f9f49e71b888f7998427122.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/38a411931f9f49e71b888f7998427122.jpg rename to Sklearn/sklearn-doc-zh/master/img/38a411931f9f49e71b888f7998427122.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/390e2bdab30b6e7421082f13e8cfd6b0.jpg b/Sklearn/sklearn-doc-zh/master/img/390e2bdab30b6e7421082f13e8cfd6b0.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/390e2bdab30b6e7421082f13e8cfd6b0.jpg rename to Sklearn/sklearn-doc-zh/master/img/390e2bdab30b6e7421082f13e8cfd6b0.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/3944d32654b4bf939d248b496f950b10.jpg b/Sklearn/sklearn-doc-zh/master/img/3944d32654b4bf939d248b496f950b10.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/3944d32654b4bf939d248b496f950b10.jpg rename to Sklearn/sklearn-doc-zh/master/img/3944d32654b4bf939d248b496f950b10.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/395ca6ce9617a4fc0695db973496d29b.jpg b/Sklearn/sklearn-doc-zh/master/img/395ca6ce9617a4fc0695db973496d29b.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/395ca6ce9617a4fc0695db973496d29b.jpg rename to Sklearn/sklearn-doc-zh/master/img/395ca6ce9617a4fc0695db973496d29b.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/3a03009ea272ed427cfa033086b89c72.jpg b/Sklearn/sklearn-doc-zh/master/img/3a03009ea272ed427cfa033086b89c72.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/3a03009ea272ed427cfa033086b89c72.jpg rename to Sklearn/sklearn-doc-zh/master/img/3a03009ea272ed427cfa033086b89c72.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/3a378bb516408c0bd771667be478b787.jpg b/Sklearn/sklearn-doc-zh/master/img/3a378bb516408c0bd771667be478b787.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/3a378bb516408c0bd771667be478b787.jpg rename to Sklearn/sklearn-doc-zh/master/img/3a378bb516408c0bd771667be478b787.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/3a876d35f8a2c82a19a71b0fd52f7153.jpg b/Sklearn/sklearn-doc-zh/master/img/3a876d35f8a2c82a19a71b0fd52f7153.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/3a876d35f8a2c82a19a71b0fd52f7153.jpg rename to Sklearn/sklearn-doc-zh/master/img/3a876d35f8a2c82a19a71b0fd52f7153.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/3ad7487dba3ebdba90bc11d2016d0c99.jpg b/Sklearn/sklearn-doc-zh/master/img/3ad7487dba3ebdba90bc11d2016d0c99.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/3ad7487dba3ebdba90bc11d2016d0c99.jpg rename to Sklearn/sklearn-doc-zh/master/img/3ad7487dba3ebdba90bc11d2016d0c99.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/3ae54efdf60edfa32ccbe7388834b0f2.jpg b/Sklearn/sklearn-doc-zh/master/img/3ae54efdf60edfa32ccbe7388834b0f2.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/3ae54efdf60edfa32ccbe7388834b0f2.jpg rename to Sklearn/sklearn-doc-zh/master/img/3ae54efdf60edfa32ccbe7388834b0f2.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/3af1cb0026efe5789c3acf3669ff5bc6.jpg b/Sklearn/sklearn-doc-zh/master/img/3af1cb0026efe5789c3acf3669ff5bc6.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/3af1cb0026efe5789c3acf3669ff5bc6.jpg rename to Sklearn/sklearn-doc-zh/master/img/3af1cb0026efe5789c3acf3669ff5bc6.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/3b1e10150e98ef95e977c12ad0607620.jpg b/Sklearn/sklearn-doc-zh/master/img/3b1e10150e98ef95e977c12ad0607620.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/3b1e10150e98ef95e977c12ad0607620.jpg rename to Sklearn/sklearn-doc-zh/master/img/3b1e10150e98ef95e977c12ad0607620.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/3b70a99c882760b6d8ed230e145ed742.jpg b/Sklearn/sklearn-doc-zh/master/img/3b70a99c882760b6d8ed230e145ed742.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/3b70a99c882760b6d8ed230e145ed742.jpg rename to Sklearn/sklearn-doc-zh/master/img/3b70a99c882760b6d8ed230e145ed742.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/3b87b19c6aee2b7c40b6ede968a45c07.jpg b/Sklearn/sklearn-doc-zh/master/img/3b87b19c6aee2b7c40b6ede968a45c07.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/3b87b19c6aee2b7c40b6ede968a45c07.jpg rename to Sklearn/sklearn-doc-zh/master/img/3b87b19c6aee2b7c40b6ede968a45c07.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/3bdd2a9b74f6a2e0db32e159c63ffec0.jpg b/Sklearn/sklearn-doc-zh/master/img/3bdd2a9b74f6a2e0db32e159c63ffec0.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/3bdd2a9b74f6a2e0db32e159c63ffec0.jpg rename to Sklearn/sklearn-doc-zh/master/img/3bdd2a9b74f6a2e0db32e159c63ffec0.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/3bf32d926cdf24f440b6b831f0d9cc37.jpg b/Sklearn/sklearn-doc-zh/master/img/3bf32d926cdf24f440b6b831f0d9cc37.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/3bf32d926cdf24f440b6b831f0d9cc37.jpg rename to Sklearn/sklearn-doc-zh/master/img/3bf32d926cdf24f440b6b831f0d9cc37.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/3cc550ecff73666ed35ae1efee48b4f4.jpg b/Sklearn/sklearn-doc-zh/master/img/3cc550ecff73666ed35ae1efee48b4f4.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/3cc550ecff73666ed35ae1efee48b4f4.jpg rename to Sklearn/sklearn-doc-zh/master/img/3cc550ecff73666ed35ae1efee48b4f4.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/3cca81fd08a4732dc7061cd246b323ed.jpg b/Sklearn/sklearn-doc-zh/master/img/3cca81fd08a4732dc7061cd246b323ed.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/3cca81fd08a4732dc7061cd246b323ed.jpg rename to Sklearn/sklearn-doc-zh/master/img/3cca81fd08a4732dc7061cd246b323ed.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/3d43f503466701e58ca2d516de2db505.jpg b/Sklearn/sklearn-doc-zh/master/img/3d43f503466701e58ca2d516de2db505.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/3d43f503466701e58ca2d516de2db505.jpg rename to Sklearn/sklearn-doc-zh/master/img/3d43f503466701e58ca2d516de2db505.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/3d4ce0d796d2662b24eb8fa7b4f9710c.jpg b/Sklearn/sklearn-doc-zh/master/img/3d4ce0d796d2662b24eb8fa7b4f9710c.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/3d4ce0d796d2662b24eb8fa7b4f9710c.jpg rename to Sklearn/sklearn-doc-zh/master/img/3d4ce0d796d2662b24eb8fa7b4f9710c.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/3d52bf36b893b26195748e89c94273f0.jpg b/Sklearn/sklearn-doc-zh/master/img/3d52bf36b893b26195748e89c94273f0.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/3d52bf36b893b26195748e89c94273f0.jpg rename to Sklearn/sklearn-doc-zh/master/img/3d52bf36b893b26195748e89c94273f0.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/3d59995ed97bdad674b6afd6fbd928ec.jpg b/Sklearn/sklearn-doc-zh/master/img/3d59995ed97bdad674b6afd6fbd928ec.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/3d59995ed97bdad674b6afd6fbd928ec.jpg rename to Sklearn/sklearn-doc-zh/master/img/3d59995ed97bdad674b6afd6fbd928ec.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/3dae1c97513f643047c2e33ee90ca8b5.jpg b/Sklearn/sklearn-doc-zh/master/img/3dae1c97513f643047c2e33ee90ca8b5.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/3dae1c97513f643047c2e33ee90ca8b5.jpg rename to Sklearn/sklearn-doc-zh/master/img/3dae1c97513f643047c2e33ee90ca8b5.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/3e0dffeddefe6ba1e809bd9b6276c771.jpg b/Sklearn/sklearn-doc-zh/master/img/3e0dffeddefe6ba1e809bd9b6276c771.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/3e0dffeddefe6ba1e809bd9b6276c771.jpg rename to Sklearn/sklearn-doc-zh/master/img/3e0dffeddefe6ba1e809bd9b6276c771.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/3e233cefc937a43bb4481dd23d728b54.jpg b/Sklearn/sklearn-doc-zh/master/img/3e233cefc937a43bb4481dd23d728b54.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/3e233cefc937a43bb4481dd23d728b54.jpg rename to Sklearn/sklearn-doc-zh/master/img/3e233cefc937a43bb4481dd23d728b54.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/3f5adc0c9b0e51a0759ed6ac49f94431.jpg b/Sklearn/sklearn-doc-zh/master/img/3f5adc0c9b0e51a0759ed6ac49f94431.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/3f5adc0c9b0e51a0759ed6ac49f94431.jpg rename to Sklearn/sklearn-doc-zh/master/img/3f5adc0c9b0e51a0759ed6ac49f94431.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/401b1228a76ba9190680851b9d095653.jpg b/Sklearn/sklearn-doc-zh/master/img/401b1228a76ba9190680851b9d095653.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/401b1228a76ba9190680851b9d095653.jpg rename to Sklearn/sklearn-doc-zh/master/img/401b1228a76ba9190680851b9d095653.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/403595258114953d3411fd1bfbf335f8.jpg b/Sklearn/sklearn-doc-zh/master/img/403595258114953d3411fd1bfbf335f8.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/403595258114953d3411fd1bfbf335f8.jpg rename to Sklearn/sklearn-doc-zh/master/img/403595258114953d3411fd1bfbf335f8.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/405095229d24f3525298dc6f99077666.jpg b/Sklearn/sklearn-doc-zh/master/img/405095229d24f3525298dc6f99077666.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/405095229d24f3525298dc6f99077666.jpg rename to Sklearn/sklearn-doc-zh/master/img/405095229d24f3525298dc6f99077666.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/407341c3d4d055b857bb3229003b9daf.jpg b/Sklearn/sklearn-doc-zh/master/img/407341c3d4d055b857bb3229003b9daf.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/407341c3d4d055b857bb3229003b9daf.jpg rename to Sklearn/sklearn-doc-zh/master/img/407341c3d4d055b857bb3229003b9daf.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/412f5988c3736daa68f47a0dc9fba659.jpg b/Sklearn/sklearn-doc-zh/master/img/412f5988c3736daa68f47a0dc9fba659.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/412f5988c3736daa68f47a0dc9fba659.jpg rename to Sklearn/sklearn-doc-zh/master/img/412f5988c3736daa68f47a0dc9fba659.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/418f3dbcb32031257a948ec23d05e53e.jpg b/Sklearn/sklearn-doc-zh/master/img/418f3dbcb32031257a948ec23d05e53e.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/418f3dbcb32031257a948ec23d05e53e.jpg rename to Sklearn/sklearn-doc-zh/master/img/418f3dbcb32031257a948ec23d05e53e.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/41c9612e6e74708a274b11f770810663.jpg b/Sklearn/sklearn-doc-zh/master/img/41c9612e6e74708a274b11f770810663.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/41c9612e6e74708a274b11f770810663.jpg rename to Sklearn/sklearn-doc-zh/master/img/41c9612e6e74708a274b11f770810663.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/423f64b70bdfeba3566e0bbcca01c277.jpg b/Sklearn/sklearn-doc-zh/master/img/423f64b70bdfeba3566e0bbcca01c277.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/423f64b70bdfeba3566e0bbcca01c277.jpg rename to Sklearn/sklearn-doc-zh/master/img/423f64b70bdfeba3566e0bbcca01c277.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/4246a718076893e37084bc69a7e16007.jpg b/Sklearn/sklearn-doc-zh/master/img/4246a718076893e37084bc69a7e16007.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/4246a718076893e37084bc69a7e16007.jpg rename to Sklearn/sklearn-doc-zh/master/img/4246a718076893e37084bc69a7e16007.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/42edb18b0951c4f7ab739e5c24bf9ba3.jpg b/Sklearn/sklearn-doc-zh/master/img/42edb18b0951c4f7ab739e5c24bf9ba3.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/42edb18b0951c4f7ab739e5c24bf9ba3.jpg rename to Sklearn/sklearn-doc-zh/master/img/42edb18b0951c4f7ab739e5c24bf9ba3.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/42f93b2b294f585223e6c663f86504d0.jpg b/Sklearn/sklearn-doc-zh/master/img/42f93b2b294f585223e6c663f86504d0.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/42f93b2b294f585223e6c663f86504d0.jpg rename to Sklearn/sklearn-doc-zh/master/img/42f93b2b294f585223e6c663f86504d0.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/433674c5864f3cec96b82f9e63b80fb7.jpg b/Sklearn/sklearn-doc-zh/master/img/433674c5864f3cec96b82f9e63b80fb7.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/433674c5864f3cec96b82f9e63b80fb7.jpg rename to Sklearn/sklearn-doc-zh/master/img/433674c5864f3cec96b82f9e63b80fb7.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/433fedd575581cddbd612624b65e5dac.jpg b/Sklearn/sklearn-doc-zh/master/img/433fedd575581cddbd612624b65e5dac.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/433fedd575581cddbd612624b65e5dac.jpg rename to Sklearn/sklearn-doc-zh/master/img/433fedd575581cddbd612624b65e5dac.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/4341393efadcef482cea0dd54509e011.jpg b/Sklearn/sklearn-doc-zh/master/img/4341393efadcef482cea0dd54509e011.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/4341393efadcef482cea0dd54509e011.jpg rename to Sklearn/sklearn-doc-zh/master/img/4341393efadcef482cea0dd54509e011.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/43996aff9311511e6e2f81912a249c7e.jpg b/Sklearn/sklearn-doc-zh/master/img/43996aff9311511e6e2f81912a249c7e.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/43996aff9311511e6e2f81912a249c7e.jpg rename to Sklearn/sklearn-doc-zh/master/img/43996aff9311511e6e2f81912a249c7e.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/43c1fea57579e54f80c0535bc582626f.jpg b/Sklearn/sklearn-doc-zh/master/img/43c1fea57579e54f80c0535bc582626f.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/43c1fea57579e54f80c0535bc582626f.jpg rename to Sklearn/sklearn-doc-zh/master/img/43c1fea57579e54f80c0535bc582626f.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/43e13b580daefe5ba754b790dfbd216c.jpg b/Sklearn/sklearn-doc-zh/master/img/43e13b580daefe5ba754b790dfbd216c.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/43e13b580daefe5ba754b790dfbd216c.jpg rename to Sklearn/sklearn-doc-zh/master/img/43e13b580daefe5ba754b790dfbd216c.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/43fc75930300610d8cb41a8d15899c15.jpg b/Sklearn/sklearn-doc-zh/master/img/43fc75930300610d8cb41a8d15899c15.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/43fc75930300610d8cb41a8d15899c15.jpg rename to Sklearn/sklearn-doc-zh/master/img/43fc75930300610d8cb41a8d15899c15.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/43fc8286f3bb11d7c8eb1e83e6538ac6.jpg b/Sklearn/sklearn-doc-zh/master/img/43fc8286f3bb11d7c8eb1e83e6538ac6.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/43fc8286f3bb11d7c8eb1e83e6538ac6.jpg rename to Sklearn/sklearn-doc-zh/master/img/43fc8286f3bb11d7c8eb1e83e6538ac6.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/446d6d36c20a79508f1cc84c737a597b.jpg b/Sklearn/sklearn-doc-zh/master/img/446d6d36c20a79508f1cc84c737a597b.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/446d6d36c20a79508f1cc84c737a597b.jpg rename to Sklearn/sklearn-doc-zh/master/img/446d6d36c20a79508f1cc84c737a597b.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/451ef7ed1a14a6cdc38324c8a5c7c683.jpg b/Sklearn/sklearn-doc-zh/master/img/451ef7ed1a14a6cdc38324c8a5c7c683.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/451ef7ed1a14a6cdc38324c8a5c7c683.jpg rename to Sklearn/sklearn-doc-zh/master/img/451ef7ed1a14a6cdc38324c8a5c7c683.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/45c6e9fedc8fe61dd8d3f388a0464d30.jpg b/Sklearn/sklearn-doc-zh/master/img/45c6e9fedc8fe61dd8d3f388a0464d30.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/45c6e9fedc8fe61dd8d3f388a0464d30.jpg rename to Sklearn/sklearn-doc-zh/master/img/45c6e9fedc8fe61dd8d3f388a0464d30.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/45e0e70e4566592b549e6835d6cba2a8.jpg b/Sklearn/sklearn-doc-zh/master/img/45e0e70e4566592b549e6835d6cba2a8.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/45e0e70e4566592b549e6835d6cba2a8.jpg rename to Sklearn/sklearn-doc-zh/master/img/45e0e70e4566592b549e6835d6cba2a8.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/4609693b88f682790da8203535625471.jpg b/Sklearn/sklearn-doc-zh/master/img/4609693b88f682790da8203535625471.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/4609693b88f682790da8203535625471.jpg rename to Sklearn/sklearn-doc-zh/master/img/4609693b88f682790da8203535625471.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/468283fb7514d3373112cb7db7c43356.jpg b/Sklearn/sklearn-doc-zh/master/img/468283fb7514d3373112cb7db7c43356.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/468283fb7514d3373112cb7db7c43356.jpg rename to Sklearn/sklearn-doc-zh/master/img/468283fb7514d3373112cb7db7c43356.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/46cc434e4ffc05f8d38712603b34bf7f.jpg b/Sklearn/sklearn-doc-zh/master/img/46cc434e4ffc05f8d38712603b34bf7f.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/46cc434e4ffc05f8d38712603b34bf7f.jpg rename to Sklearn/sklearn-doc-zh/master/img/46cc434e4ffc05f8d38712603b34bf7f.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/47738e3e36a9bddb5bc708e8fc666204.jpg b/Sklearn/sklearn-doc-zh/master/img/47738e3e36a9bddb5bc708e8fc666204.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/47738e3e36a9bddb5bc708e8fc666204.jpg rename to Sklearn/sklearn-doc-zh/master/img/47738e3e36a9bddb5bc708e8fc666204.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/47d90c837620a14d53233bae4fe8fe57.jpg b/Sklearn/sklearn-doc-zh/master/img/47d90c837620a14d53233bae4fe8fe57.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/47d90c837620a14d53233bae4fe8fe57.jpg rename to Sklearn/sklearn-doc-zh/master/img/47d90c837620a14d53233bae4fe8fe57.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/48a36c240dcfa54de5ea4cc6250087fa.jpg b/Sklearn/sklearn-doc-zh/master/img/48a36c240dcfa54de5ea4cc6250087fa.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/48a36c240dcfa54de5ea4cc6250087fa.jpg rename to Sklearn/sklearn-doc-zh/master/img/48a36c240dcfa54de5ea4cc6250087fa.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/48cf684e54ca494e275074761259069c.jpg b/Sklearn/sklearn-doc-zh/master/img/48cf684e54ca494e275074761259069c.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/48cf684e54ca494e275074761259069c.jpg rename to Sklearn/sklearn-doc-zh/master/img/48cf684e54ca494e275074761259069c.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/4953c9da8999e3eb76b63a4dd0432896.jpg b/Sklearn/sklearn-doc-zh/master/img/4953c9da8999e3eb76b63a4dd0432896.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/4953c9da8999e3eb76b63a4dd0432896.jpg rename to Sklearn/sklearn-doc-zh/master/img/4953c9da8999e3eb76b63a4dd0432896.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/499e262369261799dec950eb33da9ccf.jpg b/Sklearn/sklearn-doc-zh/master/img/499e262369261799dec950eb33da9ccf.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/499e262369261799dec950eb33da9ccf.jpg rename to Sklearn/sklearn-doc-zh/master/img/499e262369261799dec950eb33da9ccf.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/49b0512284893ed2ca56a2b8c0b7d0b5.jpg b/Sklearn/sklearn-doc-zh/master/img/49b0512284893ed2ca56a2b8c0b7d0b5.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/49b0512284893ed2ca56a2b8c0b7d0b5.jpg rename to Sklearn/sklearn-doc-zh/master/img/49b0512284893ed2ca56a2b8c0b7d0b5.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/4a22ca544916918b2358e5fc7c71b8e6.jpg b/Sklearn/sklearn-doc-zh/master/img/4a22ca544916918b2358e5fc7c71b8e6.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/4a22ca544916918b2358e5fc7c71b8e6.jpg rename to Sklearn/sklearn-doc-zh/master/img/4a22ca544916918b2358e5fc7c71b8e6.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/4a22ca544916918b2358e5fc7c71b8e7.png b/Sklearn/sklearn-doc-zh/master/img/4a22ca544916918b2358e5fc7c71b8e7.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/4a22ca544916918b2358e5fc7c71b8e7.png rename to Sklearn/sklearn-doc-zh/master/img/4a22ca544916918b2358e5fc7c71b8e7.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/4a733ee899c074bde7a4d5292c9fc83e.jpg b/Sklearn/sklearn-doc-zh/master/img/4a733ee899c074bde7a4d5292c9fc83e.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/4a733ee899c074bde7a4d5292c9fc83e.jpg rename to Sklearn/sklearn-doc-zh/master/img/4a733ee899c074bde7a4d5292c9fc83e.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/4aafe42b7f9cf8d06d93b9246d01bbfd.jpg b/Sklearn/sklearn-doc-zh/master/img/4aafe42b7f9cf8d06d93b9246d01bbfd.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/4aafe42b7f9cf8d06d93b9246d01bbfd.jpg rename to Sklearn/sklearn-doc-zh/master/img/4aafe42b7f9cf8d06d93b9246d01bbfd.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/4b3d9c4467b467af3714ba45c54e5c2e.jpg b/Sklearn/sklearn-doc-zh/master/img/4b3d9c4467b467af3714ba45c54e5c2e.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/4b3d9c4467b467af3714ba45c54e5c2e.jpg rename to Sklearn/sklearn-doc-zh/master/img/4b3d9c4467b467af3714ba45c54e5c2e.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/4b64f9acb85d7f2b6169e5a58f255e44.jpg b/Sklearn/sklearn-doc-zh/master/img/4b64f9acb85d7f2b6169e5a58f255e44.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/4b64f9acb85d7f2b6169e5a58f255e44.jpg rename to Sklearn/sklearn-doc-zh/master/img/4b64f9acb85d7f2b6169e5a58f255e44.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/4b6d782a67ac392e97215c46b7590bf7.jpg b/Sklearn/sklearn-doc-zh/master/img/4b6d782a67ac392e97215c46b7590bf7.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/4b6d782a67ac392e97215c46b7590bf7.jpg rename to Sklearn/sklearn-doc-zh/master/img/4b6d782a67ac392e97215c46b7590bf7.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/4bb6ac59e053fd48275c31c9af35b2d1.jpg b/Sklearn/sklearn-doc-zh/master/img/4bb6ac59e053fd48275c31c9af35b2d1.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/4bb6ac59e053fd48275c31c9af35b2d1.jpg rename to Sklearn/sklearn-doc-zh/master/img/4bb6ac59e053fd48275c31c9af35b2d1.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/4bcd849c1a2c10ab9857df1dc223e175.jpg b/Sklearn/sklearn-doc-zh/master/img/4bcd849c1a2c10ab9857df1dc223e175.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/4bcd849c1a2c10ab9857df1dc223e175.jpg rename to Sklearn/sklearn-doc-zh/master/img/4bcd849c1a2c10ab9857df1dc223e175.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/4bfe956324cef23278c5192b0fb8029b.jpg b/Sklearn/sklearn-doc-zh/master/img/4bfe956324cef23278c5192b0fb8029b.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/4bfe956324cef23278c5192b0fb8029b.jpg rename to Sklearn/sklearn-doc-zh/master/img/4bfe956324cef23278c5192b0fb8029b.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/4c0a0003e110c44c538fbf113c159a3a.jpg b/Sklearn/sklearn-doc-zh/master/img/4c0a0003e110c44c538fbf113c159a3a.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/4c0a0003e110c44c538fbf113c159a3a.jpg rename to Sklearn/sklearn-doc-zh/master/img/4c0a0003e110c44c538fbf113c159a3a.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/4d1bc681619acee3db7da4d570bcb4cd.jpg b/Sklearn/sklearn-doc-zh/master/img/4d1bc681619acee3db7da4d570bcb4cd.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/4d1bc681619acee3db7da4d570bcb4cd.jpg rename to Sklearn/sklearn-doc-zh/master/img/4d1bc681619acee3db7da4d570bcb4cd.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/4d831898e5c564ee2d4df9fa647a97ce.jpg b/Sklearn/sklearn-doc-zh/master/img/4d831898e5c564ee2d4df9fa647a97ce.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/4d831898e5c564ee2d4df9fa647a97ce.jpg rename to Sklearn/sklearn-doc-zh/master/img/4d831898e5c564ee2d4df9fa647a97ce.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/4dee38783cbd4faef5d5639ce23a5c59.jpg b/Sklearn/sklearn-doc-zh/master/img/4dee38783cbd4faef5d5639ce23a5c59.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/4dee38783cbd4faef5d5639ce23a5c59.jpg rename to Sklearn/sklearn-doc-zh/master/img/4dee38783cbd4faef5d5639ce23a5c59.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/4e0d8935ff82f26fc3a46a3202bd1fa3.jpg b/Sklearn/sklearn-doc-zh/master/img/4e0d8935ff82f26fc3a46a3202bd1fa3.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/4e0d8935ff82f26fc3a46a3202bd1fa3.jpg rename to Sklearn/sklearn-doc-zh/master/img/4e0d8935ff82f26fc3a46a3202bd1fa3.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/4e6d5ce51d78cff57187dc09b6710a7c.jpg b/Sklearn/sklearn-doc-zh/master/img/4e6d5ce51d78cff57187dc09b6710a7c.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/4e6d5ce51d78cff57187dc09b6710a7c.jpg rename to Sklearn/sklearn-doc-zh/master/img/4e6d5ce51d78cff57187dc09b6710a7c.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/4ee9f6c666393981b6458e54c3ec89d0.jpg b/Sklearn/sklearn-doc-zh/master/img/4ee9f6c666393981b6458e54c3ec89d0.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/4ee9f6c666393981b6458e54c3ec89d0.jpg rename to Sklearn/sklearn-doc-zh/master/img/4ee9f6c666393981b6458e54c3ec89d0.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/4f11727a275459ce82826a9e02800c28.jpg b/Sklearn/sklearn-doc-zh/master/img/4f11727a275459ce82826a9e02800c28.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/4f11727a275459ce82826a9e02800c28.jpg rename to Sklearn/sklearn-doc-zh/master/img/4f11727a275459ce82826a9e02800c28.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/4f8138b00b37d9734bb93aec7e00ac5e.jpg b/Sklearn/sklearn-doc-zh/master/img/4f8138b00b37d9734bb93aec7e00ac5e.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/4f8138b00b37d9734bb93aec7e00ac5e.jpg rename to Sklearn/sklearn-doc-zh/master/img/4f8138b00b37d9734bb93aec7e00ac5e.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/500efeff217bde3e862144e9bc90b049.jpg b/Sklearn/sklearn-doc-zh/master/img/500efeff217bde3e862144e9bc90b049.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/500efeff217bde3e862144e9bc90b049.jpg rename to Sklearn/sklearn-doc-zh/master/img/500efeff217bde3e862144e9bc90b049.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/502926bb104c175c6f3e809b0207830c.jpg b/Sklearn/sklearn-doc-zh/master/img/502926bb104c175c6f3e809b0207830c.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/502926bb104c175c6f3e809b0207830c.jpg rename to Sklearn/sklearn-doc-zh/master/img/502926bb104c175c6f3e809b0207830c.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/505ffca1dc9570f24fd66272d18abb1f.jpg b/Sklearn/sklearn-doc-zh/master/img/505ffca1dc9570f24fd66272d18abb1f.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/505ffca1dc9570f24fd66272d18abb1f.jpg rename to Sklearn/sklearn-doc-zh/master/img/505ffca1dc9570f24fd66272d18abb1f.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/5062c88fba7988fa39aca3bc91857721.jpg b/Sklearn/sklearn-doc-zh/master/img/5062c88fba7988fa39aca3bc91857721.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/5062c88fba7988fa39aca3bc91857721.jpg rename to Sklearn/sklearn-doc-zh/master/img/5062c88fba7988fa39aca3bc91857721.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/507fd1a87cb6a0196c0203a0af5e9bbb.jpg b/Sklearn/sklearn-doc-zh/master/img/507fd1a87cb6a0196c0203a0af5e9bbb.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/507fd1a87cb6a0196c0203a0af5e9bbb.jpg rename to Sklearn/sklearn-doc-zh/master/img/507fd1a87cb6a0196c0203a0af5e9bbb.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/50bc02ed6fb21594c72e30d1a33bbf89.jpg b/Sklearn/sklearn-doc-zh/master/img/50bc02ed6fb21594c72e30d1a33bbf89.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/50bc02ed6fb21594c72e30d1a33bbf89.jpg rename to Sklearn/sklearn-doc-zh/master/img/50bc02ed6fb21594c72e30d1a33bbf89.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/50eda5a92ebcfda1468e1508393b748a.jpg b/Sklearn/sklearn-doc-zh/master/img/50eda5a92ebcfda1468e1508393b748a.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/50eda5a92ebcfda1468e1508393b748a.jpg rename to Sklearn/sklearn-doc-zh/master/img/50eda5a92ebcfda1468e1508393b748a.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/515ee7781876d7344cc383bb43cb30ea.jpg b/Sklearn/sklearn-doc-zh/master/img/515ee7781876d7344cc383bb43cb30ea.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/515ee7781876d7344cc383bb43cb30ea.jpg rename to Sklearn/sklearn-doc-zh/master/img/515ee7781876d7344cc383bb43cb30ea.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/51d052e3e4c7f694f3c05eb4159ba243.jpg b/Sklearn/sklearn-doc-zh/master/img/51d052e3e4c7f694f3c05eb4159ba243.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/51d052e3e4c7f694f3c05eb4159ba243.jpg rename to Sklearn/sklearn-doc-zh/master/img/51d052e3e4c7f694f3c05eb4159ba243.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/51d70ae60903891457d75099cc46e450.jpg b/Sklearn/sklearn-doc-zh/master/img/51d70ae60903891457d75099cc46e450.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/51d70ae60903891457d75099cc46e450.jpg rename to Sklearn/sklearn-doc-zh/master/img/51d70ae60903891457d75099cc46e450.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/51fa9007646861e0569f8f66731c64e7.jpg b/Sklearn/sklearn-doc-zh/master/img/51fa9007646861e0569f8f66731c64e7.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/51fa9007646861e0569f8f66731c64e7.jpg rename to Sklearn/sklearn-doc-zh/master/img/51fa9007646861e0569f8f66731c64e7.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/520d26d427ec8afe74b5538d779f5f49.jpg b/Sklearn/sklearn-doc-zh/master/img/520d26d427ec8afe74b5538d779f5f49.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/520d26d427ec8afe74b5538d779f5f49.jpg rename to Sklearn/sklearn-doc-zh/master/img/520d26d427ec8afe74b5538d779f5f49.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/522bc8957a5d77edbdc533813dbef086.jpg b/Sklearn/sklearn-doc-zh/master/img/522bc8957a5d77edbdc533813dbef086.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/522bc8957a5d77edbdc533813dbef086.jpg rename to Sklearn/sklearn-doc-zh/master/img/522bc8957a5d77edbdc533813dbef086.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/52654d94cd29c421bad069f802bb69c4.jpg b/Sklearn/sklearn-doc-zh/master/img/52654d94cd29c421bad069f802bb69c4.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/52654d94cd29c421bad069f802bb69c4.jpg rename to Sklearn/sklearn-doc-zh/master/img/52654d94cd29c421bad069f802bb69c4.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/526e2da298d085b5fd557f49433d4143.jpg b/Sklearn/sklearn-doc-zh/master/img/526e2da298d085b5fd557f49433d4143.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/526e2da298d085b5fd557f49433d4143.jpg rename to Sklearn/sklearn-doc-zh/master/img/526e2da298d085b5fd557f49433d4143.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/5303ecbc70bf5189b8785555c03c54ee.jpg b/Sklearn/sklearn-doc-zh/master/img/5303ecbc70bf5189b8785555c03c54ee.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/5303ecbc70bf5189b8785555c03c54ee.jpg rename to Sklearn/sklearn-doc-zh/master/img/5303ecbc70bf5189b8785555c03c54ee.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/533e54759d696211ebe7819cc107d3bc.jpg b/Sklearn/sklearn-doc-zh/master/img/533e54759d696211ebe7819cc107d3bc.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/533e54759d696211ebe7819cc107d3bc.jpg rename to Sklearn/sklearn-doc-zh/master/img/533e54759d696211ebe7819cc107d3bc.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/535f86af715e90b9c394e3cbf53d99eb.jpg b/Sklearn/sklearn-doc-zh/master/img/535f86af715e90b9c394e3cbf53d99eb.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/535f86af715e90b9c394e3cbf53d99eb.jpg rename to Sklearn/sklearn-doc-zh/master/img/535f86af715e90b9c394e3cbf53d99eb.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/53f6b3d47807f65fe25b4fa232cd7abc.jpg b/Sklearn/sklearn-doc-zh/master/img/53f6b3d47807f65fe25b4fa232cd7abc.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/53f6b3d47807f65fe25b4fa232cd7abc.jpg rename to Sklearn/sklearn-doc-zh/master/img/53f6b3d47807f65fe25b4fa232cd7abc.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/53fd9843c9af9a7ea05df92bce997456.jpg b/Sklearn/sklearn-doc-zh/master/img/53fd9843c9af9a7ea05df92bce997456.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/53fd9843c9af9a7ea05df92bce997456.jpg rename to Sklearn/sklearn-doc-zh/master/img/53fd9843c9af9a7ea05df92bce997456.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/5421b26a31de754ee8d186d038006fa3.jpg b/Sklearn/sklearn-doc-zh/master/img/5421b26a31de754ee8d186d038006fa3.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/5421b26a31de754ee8d186d038006fa3.jpg rename to Sklearn/sklearn-doc-zh/master/img/5421b26a31de754ee8d186d038006fa3.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/55f40b1e092983fff81024042966adec.jpg b/Sklearn/sklearn-doc-zh/master/img/55f40b1e092983fff81024042966adec.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/55f40b1e092983fff81024042966adec.jpg rename to Sklearn/sklearn-doc-zh/master/img/55f40b1e092983fff81024042966adec.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/55f44df097de0ddde791d3084a69a1bf.jpg b/Sklearn/sklearn-doc-zh/master/img/55f44df097de0ddde791d3084a69a1bf.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/55f44df097de0ddde791d3084a69a1bf.jpg rename to Sklearn/sklearn-doc-zh/master/img/55f44df097de0ddde791d3084a69a1bf.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/5656d5270c0ee866d09e2b271ed04a67.jpg b/Sklearn/sklearn-doc-zh/master/img/5656d5270c0ee866d09e2b271ed04a67.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/5656d5270c0ee866d09e2b271ed04a67.jpg rename to Sklearn/sklearn-doc-zh/master/img/5656d5270c0ee866d09e2b271ed04a67.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/5679f1fec5dfb7ab05db5e5aa9fa11a2.jpg b/Sklearn/sklearn-doc-zh/master/img/5679f1fec5dfb7ab05db5e5aa9fa11a2.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/5679f1fec5dfb7ab05db5e5aa9fa11a2.jpg rename to Sklearn/sklearn-doc-zh/master/img/5679f1fec5dfb7ab05db5e5aa9fa11a2.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/56c751b0714a570fdcef0caf63f81580.jpg b/Sklearn/sklearn-doc-zh/master/img/56c751b0714a570fdcef0caf63f81580.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/56c751b0714a570fdcef0caf63f81580.jpg rename to Sklearn/sklearn-doc-zh/master/img/56c751b0714a570fdcef0caf63f81580.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/572f614c4b9bc376ebbf6ca259b6558e.jpg b/Sklearn/sklearn-doc-zh/master/img/572f614c4b9bc376ebbf6ca259b6558e.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/572f614c4b9bc376ebbf6ca259b6558e.jpg rename to Sklearn/sklearn-doc-zh/master/img/572f614c4b9bc376ebbf6ca259b6558e.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/578c95150175e4efdf851fe66d503079.jpg b/Sklearn/sklearn-doc-zh/master/img/578c95150175e4efdf851fe66d503079.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/578c95150175e4efdf851fe66d503079.jpg rename to Sklearn/sklearn-doc-zh/master/img/578c95150175e4efdf851fe66d503079.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/57e15e43b846791e47a202e1a9a5d8ce.jpg b/Sklearn/sklearn-doc-zh/master/img/57e15e43b846791e47a202e1a9a5d8ce.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/57e15e43b846791e47a202e1a9a5d8ce.jpg rename to Sklearn/sklearn-doc-zh/master/img/57e15e43b846791e47a202e1a9a5d8ce.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/580270908cf4e5ba3907b7267fcfbb44.jpg b/Sklearn/sklearn-doc-zh/master/img/580270908cf4e5ba3907b7267fcfbb44.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/580270908cf4e5ba3907b7267fcfbb44.jpg rename to Sklearn/sklearn-doc-zh/master/img/580270908cf4e5ba3907b7267fcfbb44.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/587f27ca8cf947779c1929d65c697e0c.jpg b/Sklearn/sklearn-doc-zh/master/img/587f27ca8cf947779c1929d65c697e0c.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/587f27ca8cf947779c1929d65c697e0c.jpg rename to Sklearn/sklearn-doc-zh/master/img/587f27ca8cf947779c1929d65c697e0c.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/58d06eb9b8003c392af19e09ce5ab1a4.jpg b/Sklearn/sklearn-doc-zh/master/img/58d06eb9b8003c392af19e09ce5ab1a4.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/58d06eb9b8003c392af19e09ce5ab1a4.jpg rename to Sklearn/sklearn-doc-zh/master/img/58d06eb9b8003c392af19e09ce5ab1a4.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/58d86a5573e0796f320435a8ce8346ea.jpg b/Sklearn/sklearn-doc-zh/master/img/58d86a5573e0796f320435a8ce8346ea.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/58d86a5573e0796f320435a8ce8346ea.jpg rename to Sklearn/sklearn-doc-zh/master/img/58d86a5573e0796f320435a8ce8346ea.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/58ef9e1b5d2ee139dcb588a3879ca1a6.jpg b/Sklearn/sklearn-doc-zh/master/img/58ef9e1b5d2ee139dcb588a3879ca1a6.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/58ef9e1b5d2ee139dcb588a3879ca1a6.jpg rename to Sklearn/sklearn-doc-zh/master/img/58ef9e1b5d2ee139dcb588a3879ca1a6.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/59100a001bb4b110e00f7ddf1354cd5b.jpg b/Sklearn/sklearn-doc-zh/master/img/59100a001bb4b110e00f7ddf1354cd5b.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/59100a001bb4b110e00f7ddf1354cd5b.jpg rename to Sklearn/sklearn-doc-zh/master/img/59100a001bb4b110e00f7ddf1354cd5b.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/59420186f988199ba986eefc023fb637.jpg b/Sklearn/sklearn-doc-zh/master/img/59420186f988199ba986eefc023fb637.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/59420186f988199ba986eefc023fb637.jpg rename to Sklearn/sklearn-doc-zh/master/img/59420186f988199ba986eefc023fb637.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/5959a6fe3c27570b7d474f26126eb628.jpg b/Sklearn/sklearn-doc-zh/master/img/5959a6fe3c27570b7d474f26126eb628.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/5959a6fe3c27570b7d474f26126eb628.jpg rename to Sklearn/sklearn-doc-zh/master/img/5959a6fe3c27570b7d474f26126eb628.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/5959a6fe3c27570b7d474f26126eb628.png b/Sklearn/sklearn-doc-zh/master/img/5959a6fe3c27570b7d474f26126eb628.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/5959a6fe3c27570b7d474f26126eb628.png rename to Sklearn/sklearn-doc-zh/master/img/5959a6fe3c27570b7d474f26126eb628.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/5a5de287d8a2c74dd12f86219cc19697.jpg b/Sklearn/sklearn-doc-zh/master/img/5a5de287d8a2c74dd12f86219cc19697.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/5a5de287d8a2c74dd12f86219cc19697.jpg rename to Sklearn/sklearn-doc-zh/master/img/5a5de287d8a2c74dd12f86219cc19697.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/5a8d4539001fa07eb00b24f2e74adeca.jpg b/Sklearn/sklearn-doc-zh/master/img/5a8d4539001fa07eb00b24f2e74adeca.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/5a8d4539001fa07eb00b24f2e74adeca.jpg rename to Sklearn/sklearn-doc-zh/master/img/5a8d4539001fa07eb00b24f2e74adeca.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/5b84281b8f1a26c9e9cba1b6cb0126ce.jpg b/Sklearn/sklearn-doc-zh/master/img/5b84281b8f1a26c9e9cba1b6cb0126ce.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/5b84281b8f1a26c9e9cba1b6cb0126ce.jpg rename to Sklearn/sklearn-doc-zh/master/img/5b84281b8f1a26c9e9cba1b6cb0126ce.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/5b87e1a1b34a0ac402ef602b152ee2f9.jpg b/Sklearn/sklearn-doc-zh/master/img/5b87e1a1b34a0ac402ef602b152ee2f9.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/5b87e1a1b34a0ac402ef602b152ee2f9.jpg rename to Sklearn/sklearn-doc-zh/master/img/5b87e1a1b34a0ac402ef602b152ee2f9.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/5bb034cee5851ab5105aca4c40a4e16e.jpg b/Sklearn/sklearn-doc-zh/master/img/5bb034cee5851ab5105aca4c40a4e16e.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/5bb034cee5851ab5105aca4c40a4e16e.jpg rename to Sklearn/sklearn-doc-zh/master/img/5bb034cee5851ab5105aca4c40a4e16e.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/5bd6037aeb804486a6f7cc0415ace8fc.jpg b/Sklearn/sklearn-doc-zh/master/img/5bd6037aeb804486a6f7cc0415ace8fc.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/5bd6037aeb804486a6f7cc0415ace8fc.jpg rename to Sklearn/sklearn-doc-zh/master/img/5bd6037aeb804486a6f7cc0415ace8fc.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/5c0b2807058791d6069327b709fae60c.jpg b/Sklearn/sklearn-doc-zh/master/img/5c0b2807058791d6069327b709fae60c.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/5c0b2807058791d6069327b709fae60c.jpg rename to Sklearn/sklearn-doc-zh/master/img/5c0b2807058791d6069327b709fae60c.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/5c3cdb1b3bec4126d850a52d7fe8dc18.jpg b/Sklearn/sklearn-doc-zh/master/img/5c3cdb1b3bec4126d850a52d7fe8dc18.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/5c3cdb1b3bec4126d850a52d7fe8dc18.jpg rename to Sklearn/sklearn-doc-zh/master/img/5c3cdb1b3bec4126d850a52d7fe8dc18.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/5c82dbae35dc43d2f556f9f284d9d184.jpg b/Sklearn/sklearn-doc-zh/master/img/5c82dbae35dc43d2f556f9f284d9d184.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/5c82dbae35dc43d2f556f9f284d9d184.jpg rename to Sklearn/sklearn-doc-zh/master/img/5c82dbae35dc43d2f556f9f284d9d184.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/5cc4d35f246f0aeb95f154a5343635c2.jpg b/Sklearn/sklearn-doc-zh/master/img/5cc4d35f246f0aeb95f154a5343635c2.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/5cc4d35f246f0aeb95f154a5343635c2.jpg rename to Sklearn/sklearn-doc-zh/master/img/5cc4d35f246f0aeb95f154a5343635c2.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/5cf078f7cdbb5755436e396cad3c37a0.jpg b/Sklearn/sklearn-doc-zh/master/img/5cf078f7cdbb5755436e396cad3c37a0.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/5cf078f7cdbb5755436e396cad3c37a0.jpg rename to Sklearn/sklearn-doc-zh/master/img/5cf078f7cdbb5755436e396cad3c37a0.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/5d0c433dc4dc7ca883ac8173e6e2096f.jpg b/Sklearn/sklearn-doc-zh/master/img/5d0c433dc4dc7ca883ac8173e6e2096f.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/5d0c433dc4dc7ca883ac8173e6e2096f.jpg rename to Sklearn/sklearn-doc-zh/master/img/5d0c433dc4dc7ca883ac8173e6e2096f.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/5d0d4b3cde172f7fd71da81c2e1b21a9.jpg b/Sklearn/sklearn-doc-zh/master/img/5d0d4b3cde172f7fd71da81c2e1b21a9.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/5d0d4b3cde172f7fd71da81c2e1b21a9.jpg rename to Sklearn/sklearn-doc-zh/master/img/5d0d4b3cde172f7fd71da81c2e1b21a9.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/5d197c73e8e0bc6ba78f74b2a205886e.jpg b/Sklearn/sklearn-doc-zh/master/img/5d197c73e8e0bc6ba78f74b2a205886e.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/5d197c73e8e0bc6ba78f74b2a205886e.jpg rename to Sklearn/sklearn-doc-zh/master/img/5d197c73e8e0bc6ba78f74b2a205886e.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/5d71bcc9676eddcac89936397b2cd79c.jpg b/Sklearn/sklearn-doc-zh/master/img/5d71bcc9676eddcac89936397b2cd79c.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/5d71bcc9676eddcac89936397b2cd79c.jpg rename to Sklearn/sklearn-doc-zh/master/img/5d71bcc9676eddcac89936397b2cd79c.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/5d8cf5fcf13a72776158a787bc29143c.jpg b/Sklearn/sklearn-doc-zh/master/img/5d8cf5fcf13a72776158a787bc29143c.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/5d8cf5fcf13a72776158a787bc29143c.jpg rename to Sklearn/sklearn-doc-zh/master/img/5d8cf5fcf13a72776158a787bc29143c.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/5daa1b5d6a3d63020722cb0f4b41eee2.jpg b/Sklearn/sklearn-doc-zh/master/img/5daa1b5d6a3d63020722cb0f4b41eee2.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/5daa1b5d6a3d63020722cb0f4b41eee2.jpg rename to Sklearn/sklearn-doc-zh/master/img/5daa1b5d6a3d63020722cb0f4b41eee2.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/5db611c8f58fbd9a9776c013656a16ff.jpg b/Sklearn/sklearn-doc-zh/master/img/5db611c8f58fbd9a9776c013656a16ff.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/5db611c8f58fbd9a9776c013656a16ff.jpg rename to Sklearn/sklearn-doc-zh/master/img/5db611c8f58fbd9a9776c013656a16ff.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/5df8f915c528f34f0ada91db5228605f.jpg b/Sklearn/sklearn-doc-zh/master/img/5df8f915c528f34f0ada91db5228605f.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/5df8f915c528f34f0ada91db5228605f.jpg rename to Sklearn/sklearn-doc-zh/master/img/5df8f915c528f34f0ada91db5228605f.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/5e45807b4775fcfaca64f6363102dc5e.jpg b/Sklearn/sklearn-doc-zh/master/img/5e45807b4775fcfaca64f6363102dc5e.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/5e45807b4775fcfaca64f6363102dc5e.jpg rename to Sklearn/sklearn-doc-zh/master/img/5e45807b4775fcfaca64f6363102dc5e.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/5ec012661471fa940c27472afcce01a2.jpg b/Sklearn/sklearn-doc-zh/master/img/5ec012661471fa940c27472afcce01a2.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/5ec012661471fa940c27472afcce01a2.jpg rename to Sklearn/sklearn-doc-zh/master/img/5ec012661471fa940c27472afcce01a2.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/5eea9f6c78020e75b9cc37d038d297ab.jpg b/Sklearn/sklearn-doc-zh/master/img/5eea9f6c78020e75b9cc37d038d297ab.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/5eea9f6c78020e75b9cc37d038d297ab.jpg rename to Sklearn/sklearn-doc-zh/master/img/5eea9f6c78020e75b9cc37d038d297ab.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/5f0a6e9a20a071d688e183c9675544e5.jpg b/Sklearn/sklearn-doc-zh/master/img/5f0a6e9a20a071d688e183c9675544e5.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/5f0a6e9a20a071d688e183c9675544e5.jpg rename to Sklearn/sklearn-doc-zh/master/img/5f0a6e9a20a071d688e183c9675544e5.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/5fc2f399717cfe5187dc09896972a850.jpg b/Sklearn/sklearn-doc-zh/master/img/5fc2f399717cfe5187dc09896972a850.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/5fc2f399717cfe5187dc09896972a850.jpg rename to Sklearn/sklearn-doc-zh/master/img/5fc2f399717cfe5187dc09896972a850.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/60337a9162822d71dc32e68952b4e02a.jpg b/Sklearn/sklearn-doc-zh/master/img/60337a9162822d71dc32e68952b4e02a.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/60337a9162822d71dc32e68952b4e02a.jpg rename to Sklearn/sklearn-doc-zh/master/img/60337a9162822d71dc32e68952b4e02a.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/6042b714de932f6ed841e71bfe9acede.jpg b/Sklearn/sklearn-doc-zh/master/img/6042b714de932f6ed841e71bfe9acede.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/6042b714de932f6ed841e71bfe9acede.jpg rename to Sklearn/sklearn-doc-zh/master/img/6042b714de932f6ed841e71bfe9acede.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/6081a672a0d5d6cc7563c531599dde91.jpg b/Sklearn/sklearn-doc-zh/master/img/6081a672a0d5d6cc7563c531599dde91.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/6081a672a0d5d6cc7563c531599dde91.jpg rename to Sklearn/sklearn-doc-zh/master/img/6081a672a0d5d6cc7563c531599dde91.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/60fef7a79e647e4e8dc02f0b0dc25772.jpg b/Sklearn/sklearn-doc-zh/master/img/60fef7a79e647e4e8dc02f0b0dc25772.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/60fef7a79e647e4e8dc02f0b0dc25772.jpg rename to Sklearn/sklearn-doc-zh/master/img/60fef7a79e647e4e8dc02f0b0dc25772.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/610475f160f25407a547e5430c792460.jpg b/Sklearn/sklearn-doc-zh/master/img/610475f160f25407a547e5430c792460.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/610475f160f25407a547e5430c792460.jpg rename to Sklearn/sklearn-doc-zh/master/img/610475f160f25407a547e5430c792460.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/611639bdcfd73c857a43842913d6e826.jpg b/Sklearn/sklearn-doc-zh/master/img/611639bdcfd73c857a43842913d6e826.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/611639bdcfd73c857a43842913d6e826.jpg rename to Sklearn/sklearn-doc-zh/master/img/611639bdcfd73c857a43842913d6e826.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/6122e23454910f4f076c71a84c068291.jpg b/Sklearn/sklearn-doc-zh/master/img/6122e23454910f4f076c71a84c068291.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/6122e23454910f4f076c71a84c068291.jpg rename to Sklearn/sklearn-doc-zh/master/img/6122e23454910f4f076c71a84c068291.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/61a540d6591602c8f513910fd2f33b40.jpg b/Sklearn/sklearn-doc-zh/master/img/61a540d6591602c8f513910fd2f33b40.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/61a540d6591602c8f513910fd2f33b40.jpg rename to Sklearn/sklearn-doc-zh/master/img/61a540d6591602c8f513910fd2f33b40.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/61a79d63783315d8e68d8ecf1324105f.jpg b/Sklearn/sklearn-doc-zh/master/img/61a79d63783315d8e68d8ecf1324105f.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/61a79d63783315d8e68d8ecf1324105f.jpg rename to Sklearn/sklearn-doc-zh/master/img/61a79d63783315d8e68d8ecf1324105f.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/61b05c3bf030b831f23f257ca8182f51.jpg b/Sklearn/sklearn-doc-zh/master/img/61b05c3bf030b831f23f257ca8182f51.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/61b05c3bf030b831f23f257ca8182f51.jpg rename to Sklearn/sklearn-doc-zh/master/img/61b05c3bf030b831f23f257ca8182f51.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/6211fb320c2cdb794a80e9e0b800a6a1.jpg b/Sklearn/sklearn-doc-zh/master/img/6211fb320c2cdb794a80e9e0b800a6a1.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/6211fb320c2cdb794a80e9e0b800a6a1.jpg rename to Sklearn/sklearn-doc-zh/master/img/6211fb320c2cdb794a80e9e0b800a6a1.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/62eb544f1f6e234c61099fea1517300b.jpg b/Sklearn/sklearn-doc-zh/master/img/62eb544f1f6e234c61099fea1517300b.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/62eb544f1f6e234c61099fea1517300b.jpg rename to Sklearn/sklearn-doc-zh/master/img/62eb544f1f6e234c61099fea1517300b.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/638e0b526b8a69a226bc23950f6eeb3f.jpg b/Sklearn/sklearn-doc-zh/master/img/638e0b526b8a69a226bc23950f6eeb3f.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/638e0b526b8a69a226bc23950f6eeb3f.jpg rename to Sklearn/sklearn-doc-zh/master/img/638e0b526b8a69a226bc23950f6eeb3f.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/639e82f3829a0ad677110cc33a028c98.jpg b/Sklearn/sklearn-doc-zh/master/img/639e82f3829a0ad677110cc33a028c98.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/639e82f3829a0ad677110cc33a028c98.jpg rename to Sklearn/sklearn-doc-zh/master/img/639e82f3829a0ad677110cc33a028c98.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/63f146cd209ad922f402bf81bfdeb621.jpg b/Sklearn/sklearn-doc-zh/master/img/63f146cd209ad922f402bf81bfdeb621.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/63f146cd209ad922f402bf81bfdeb621.jpg rename to Sklearn/sklearn-doc-zh/master/img/63f146cd209ad922f402bf81bfdeb621.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/642372b631f22b9db0dc4f30d9ab67e6.jpg b/Sklearn/sklearn-doc-zh/master/img/642372b631f22b9db0dc4f30d9ab67e6.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/642372b631f22b9db0dc4f30d9ab67e6.jpg rename to Sklearn/sklearn-doc-zh/master/img/642372b631f22b9db0dc4f30d9ab67e6.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/6447f40a171271e014eed37b28b4a711.jpg b/Sklearn/sklearn-doc-zh/master/img/6447f40a171271e014eed37b28b4a711.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/6447f40a171271e014eed37b28b4a711.jpg rename to Sklearn/sklearn-doc-zh/master/img/6447f40a171271e014eed37b28b4a711.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/6479476ebd2e5d741f1a712f671fccb6.jpg b/Sklearn/sklearn-doc-zh/master/img/6479476ebd2e5d741f1a712f671fccb6.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/6479476ebd2e5d741f1a712f671fccb6.jpg rename to Sklearn/sklearn-doc-zh/master/img/6479476ebd2e5d741f1a712f671fccb6.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/64ccaf1b6c08784a30158f809c081987.jpg b/Sklearn/sklearn-doc-zh/master/img/64ccaf1b6c08784a30158f809c081987.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/64ccaf1b6c08784a30158f809c081987.jpg rename to Sklearn/sklearn-doc-zh/master/img/64ccaf1b6c08784a30158f809c081987.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/64ecb0afa71752378a987a33e1e4e76f.jpg b/Sklearn/sklearn-doc-zh/master/img/64ecb0afa71752378a987a33e1e4e76f.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/64ecb0afa71752378a987a33e1e4e76f.jpg rename to Sklearn/sklearn-doc-zh/master/img/64ecb0afa71752378a987a33e1e4e76f.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/6518e8fbaaadd8a258c9a3f96b2ef42e.jpg b/Sklearn/sklearn-doc-zh/master/img/6518e8fbaaadd8a258c9a3f96b2ef42e.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/6518e8fbaaadd8a258c9a3f96b2ef42e.jpg rename to Sklearn/sklearn-doc-zh/master/img/6518e8fbaaadd8a258c9a3f96b2ef42e.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/6521e34e11e73c0fae9a5bd3c7980a9f.jpg b/Sklearn/sklearn-doc-zh/master/img/6521e34e11e73c0fae9a5bd3c7980a9f.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/6521e34e11e73c0fae9a5bd3c7980a9f.jpg rename to Sklearn/sklearn-doc-zh/master/img/6521e34e11e73c0fae9a5bd3c7980a9f.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/6526868397aa8da766b3dc60bbcc30ef.jpg b/Sklearn/sklearn-doc-zh/master/img/6526868397aa8da766b3dc60bbcc30ef.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/6526868397aa8da766b3dc60bbcc30ef.jpg rename to Sklearn/sklearn-doc-zh/master/img/6526868397aa8da766b3dc60bbcc30ef.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/6552bde3d3999c1a9728016416932af7.jpg b/Sklearn/sklearn-doc-zh/master/img/6552bde3d3999c1a9728016416932af7.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/6552bde3d3999c1a9728016416932af7.jpg rename to Sklearn/sklearn-doc-zh/master/img/6552bde3d3999c1a9728016416932af7.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/6569ca3d831148970ddb4c7dfc3f2572.jpg b/Sklearn/sklearn-doc-zh/master/img/6569ca3d831148970ddb4c7dfc3f2572.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/6569ca3d831148970ddb4c7dfc3f2572.jpg rename to Sklearn/sklearn-doc-zh/master/img/6569ca3d831148970ddb4c7dfc3f2572.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/658840c7508dc5a73ca6180323904862.jpg b/Sklearn/sklearn-doc-zh/master/img/658840c7508dc5a73ca6180323904862.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/658840c7508dc5a73ca6180323904862.jpg rename to Sklearn/sklearn-doc-zh/master/img/658840c7508dc5a73ca6180323904862.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/6653de9b4dea7e5e9a897b5f34e7a4f0.jpg b/Sklearn/sklearn-doc-zh/master/img/6653de9b4dea7e5e9a897b5f34e7a4f0.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/6653de9b4dea7e5e9a897b5f34e7a4f0.jpg rename to Sklearn/sklearn-doc-zh/master/img/6653de9b4dea7e5e9a897b5f34e7a4f0.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/6689aa593e8e42bb5c2caa474e642b5f.jpg b/Sklearn/sklearn-doc-zh/master/img/6689aa593e8e42bb5c2caa474e642b5f.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/6689aa593e8e42bb5c2caa474e642b5f.jpg rename to Sklearn/sklearn-doc-zh/master/img/6689aa593e8e42bb5c2caa474e642b5f.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/67f18f488d2173299bc076b212f6aee9.jpg b/Sklearn/sklearn-doc-zh/master/img/67f18f488d2173299bc076b212f6aee9.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/67f18f488d2173299bc076b212f6aee9.jpg rename to Sklearn/sklearn-doc-zh/master/img/67f18f488d2173299bc076b212f6aee9.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/67f62308a1f409829599e546c843d53f.jpg b/Sklearn/sklearn-doc-zh/master/img/67f62308a1f409829599e546c843d53f.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/67f62308a1f409829599e546c843d53f.jpg rename to Sklearn/sklearn-doc-zh/master/img/67f62308a1f409829599e546c843d53f.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/67f9d3900eb064f6354d23271f16c2b0.jpg b/Sklearn/sklearn-doc-zh/master/img/67f9d3900eb064f6354d23271f16c2b0.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/67f9d3900eb064f6354d23271f16c2b0.jpg rename to Sklearn/sklearn-doc-zh/master/img/67f9d3900eb064f6354d23271f16c2b0.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/685006d43de154949bfb11efd87df4f1.jpg b/Sklearn/sklearn-doc-zh/master/img/685006d43de154949bfb11efd87df4f1.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/685006d43de154949bfb11efd87df4f1.jpg rename to Sklearn/sklearn-doc-zh/master/img/685006d43de154949bfb11efd87df4f1.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/6875a3a68e07bfa51a631f014fcf8a4f.jpg b/Sklearn/sklearn-doc-zh/master/img/6875a3a68e07bfa51a631f014fcf8a4f.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/6875a3a68e07bfa51a631f014fcf8a4f.jpg rename to Sklearn/sklearn-doc-zh/master/img/6875a3a68e07bfa51a631f014fcf8a4f.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/69bbc745b66051792cd1b5166ce18420.jpg b/Sklearn/sklearn-doc-zh/master/img/69bbc745b66051792cd1b5166ce18420.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/69bbc745b66051792cd1b5166ce18420.jpg rename to Sklearn/sklearn-doc-zh/master/img/69bbc745b66051792cd1b5166ce18420.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/69be855df457ccfee81f59ace2595a23.jpg b/Sklearn/sklearn-doc-zh/master/img/69be855df457ccfee81f59ace2595a23.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/69be855df457ccfee81f59ace2595a23.jpg rename to Sklearn/sklearn-doc-zh/master/img/69be855df457ccfee81f59ace2595a23.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/6a0ac3a1f45ae7072f58ae85160eca33.jpg b/Sklearn/sklearn-doc-zh/master/img/6a0ac3a1f45ae7072f58ae85160eca33.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/6a0ac3a1f45ae7072f58ae85160eca33.jpg rename to Sklearn/sklearn-doc-zh/master/img/6a0ac3a1f45ae7072f58ae85160eca33.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/6a0cf5d5f1d5ad90f9713a46fa55111f.jpg b/Sklearn/sklearn-doc-zh/master/img/6a0cf5d5f1d5ad90f9713a46fa55111f.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/6a0cf5d5f1d5ad90f9713a46fa55111f.jpg rename to Sklearn/sklearn-doc-zh/master/img/6a0cf5d5f1d5ad90f9713a46fa55111f.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/6a8621a4ada40acd48b43436ca6a4527.jpg b/Sklearn/sklearn-doc-zh/master/img/6a8621a4ada40acd48b43436ca6a4527.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/6a8621a4ada40acd48b43436ca6a4527.jpg rename to Sklearn/sklearn-doc-zh/master/img/6a8621a4ada40acd48b43436ca6a4527.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/6ade66280d509fe54883d82b315259e1.jpg b/Sklearn/sklearn-doc-zh/master/img/6ade66280d509fe54883d82b315259e1.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/6ade66280d509fe54883d82b315259e1.jpg rename to Sklearn/sklearn-doc-zh/master/img/6ade66280d509fe54883d82b315259e1.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/6ae91fb0f3221b92d2dd4e22204d8008.jpg b/Sklearn/sklearn-doc-zh/master/img/6ae91fb0f3221b92d2dd4e22204d8008.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/6ae91fb0f3221b92d2dd4e22204d8008.jpg rename to Sklearn/sklearn-doc-zh/master/img/6ae91fb0f3221b92d2dd4e22204d8008.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/6b0371e0d33eebe00fe5ec3d59de5b43.jpg b/Sklearn/sklearn-doc-zh/master/img/6b0371e0d33eebe00fe5ec3d59de5b43.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/6b0371e0d33eebe00fe5ec3d59de5b43.jpg rename to Sklearn/sklearn-doc-zh/master/img/6b0371e0d33eebe00fe5ec3d59de5b43.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/6b1c74edd599db63c339ead392e8e54a.jpg b/Sklearn/sklearn-doc-zh/master/img/6b1c74edd599db63c339ead392e8e54a.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/6b1c74edd599db63c339ead392e8e54a.jpg rename to Sklearn/sklearn-doc-zh/master/img/6b1c74edd599db63c339ead392e8e54a.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/6b474f60cd7fcc77b4a950334fc6483f.jpg b/Sklearn/sklearn-doc-zh/master/img/6b474f60cd7fcc77b4a950334fc6483f.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/6b474f60cd7fcc77b4a950334fc6483f.jpg rename to Sklearn/sklearn-doc-zh/master/img/6b474f60cd7fcc77b4a950334fc6483f.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/6b7248d635f4161b925734dbc60de37a.jpg b/Sklearn/sklearn-doc-zh/master/img/6b7248d635f4161b925734dbc60de37a.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/6b7248d635f4161b925734dbc60de37a.jpg rename to Sklearn/sklearn-doc-zh/master/img/6b7248d635f4161b925734dbc60de37a.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/6bcc641ece97b81c42261e28eaad3ad7.jpg b/Sklearn/sklearn-doc-zh/master/img/6bcc641ece97b81c42261e28eaad3ad7.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/6bcc641ece97b81c42261e28eaad3ad7.jpg rename to Sklearn/sklearn-doc-zh/master/img/6bcc641ece97b81c42261e28eaad3ad7.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/6c281997fc8d9f34a530a7e2bc854adf.jpg b/Sklearn/sklearn-doc-zh/master/img/6c281997fc8d9f34a530a7e2bc854adf.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/6c281997fc8d9f34a530a7e2bc854adf.jpg rename to Sklearn/sklearn-doc-zh/master/img/6c281997fc8d9f34a530a7e2bc854adf.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/6c446734a6837b7541db12e2b55f1a2b.jpg b/Sklearn/sklearn-doc-zh/master/img/6c446734a6837b7541db12e2b55f1a2b.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/6c446734a6837b7541db12e2b55f1a2b.jpg rename to Sklearn/sklearn-doc-zh/master/img/6c446734a6837b7541db12e2b55f1a2b.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/6c70b46b88f05e00e292f1a0f98d2aa8.jpg b/Sklearn/sklearn-doc-zh/master/img/6c70b46b88f05e00e292f1a0f98d2aa8.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/6c70b46b88f05e00e292f1a0f98d2aa8.jpg rename to Sklearn/sklearn-doc-zh/master/img/6c70b46b88f05e00e292f1a0f98d2aa8.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/6c8db7614197ace6a4bf0f437c085e6d.jpg b/Sklearn/sklearn-doc-zh/master/img/6c8db7614197ace6a4bf0f437c085e6d.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/6c8db7614197ace6a4bf0f437c085e6d.jpg rename to Sklearn/sklearn-doc-zh/master/img/6c8db7614197ace6a4bf0f437c085e6d.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/6d8a1d709ef804f4629126d6e1c449f1.jpg b/Sklearn/sklearn-doc-zh/master/img/6d8a1d709ef804f4629126d6e1c449f1.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/6d8a1d709ef804f4629126d6e1c449f1.jpg rename to Sklearn/sklearn-doc-zh/master/img/6d8a1d709ef804f4629126d6e1c449f1.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/6d8b62cf31afb168e2b2acb89d6abccd.jpg b/Sklearn/sklearn-doc-zh/master/img/6d8b62cf31afb168e2b2acb89d6abccd.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/6d8b62cf31afb168e2b2acb89d6abccd.jpg rename to Sklearn/sklearn-doc-zh/master/img/6d8b62cf31afb168e2b2acb89d6abccd.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/6da3d48746433a02996c5821013ac4e7.jpg b/Sklearn/sklearn-doc-zh/master/img/6da3d48746433a02996c5821013ac4e7.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/6da3d48746433a02996c5821013ac4e7.jpg rename to Sklearn/sklearn-doc-zh/master/img/6da3d48746433a02996c5821013ac4e7.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/6db85b1ad926d9ad860d58629ff5f235.jpg b/Sklearn/sklearn-doc-zh/master/img/6db85b1ad926d9ad860d58629ff5f235.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/6db85b1ad926d9ad860d58629ff5f235.jpg rename to Sklearn/sklearn-doc-zh/master/img/6db85b1ad926d9ad860d58629ff5f235.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/6e87bd4511cfd9af64076cc1cf8f1bbc.jpg b/Sklearn/sklearn-doc-zh/master/img/6e87bd4511cfd9af64076cc1cf8f1bbc.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/6e87bd4511cfd9af64076cc1cf8f1bbc.jpg rename to Sklearn/sklearn-doc-zh/master/img/6e87bd4511cfd9af64076cc1cf8f1bbc.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/6edba8b762eb4f9d843ba76d9e344c2a.jpg b/Sklearn/sklearn-doc-zh/master/img/6edba8b762eb4f9d843ba76d9e344c2a.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/6edba8b762eb4f9d843ba76d9e344c2a.jpg rename to Sklearn/sklearn-doc-zh/master/img/6edba8b762eb4f9d843ba76d9e344c2a.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/6efb484bc0e0c91b3ba13708bfe46aba.jpg b/Sklearn/sklearn-doc-zh/master/img/6efb484bc0e0c91b3ba13708bfe46aba.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/6efb484bc0e0c91b3ba13708bfe46aba.jpg rename to Sklearn/sklearn-doc-zh/master/img/6efb484bc0e0c91b3ba13708bfe46aba.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/6f0cdccb5dc60bae6e7a303075ddbdf6.jpg b/Sklearn/sklearn-doc-zh/master/img/6f0cdccb5dc60bae6e7a303075ddbdf6.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/6f0cdccb5dc60bae6e7a303075ddbdf6.jpg rename to Sklearn/sklearn-doc-zh/master/img/6f0cdccb5dc60bae6e7a303075ddbdf6.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/6f25bd1d6d3abb565ca3007f8ac1d855.jpg b/Sklearn/sklearn-doc-zh/master/img/6f25bd1d6d3abb565ca3007f8ac1d855.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/6f25bd1d6d3abb565ca3007f8ac1d855.jpg rename to Sklearn/sklearn-doc-zh/master/img/6f25bd1d6d3abb565ca3007f8ac1d855.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/6f98755e60bfb560a671770b8e51cb28.jpg b/Sklearn/sklearn-doc-zh/master/img/6f98755e60bfb560a671770b8e51cb28.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/6f98755e60bfb560a671770b8e51cb28.jpg rename to Sklearn/sklearn-doc-zh/master/img/6f98755e60bfb560a671770b8e51cb28.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/6fcf3a401454fd3c65ac740912e12467.jpg b/Sklearn/sklearn-doc-zh/master/img/6fcf3a401454fd3c65ac740912e12467.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/6fcf3a401454fd3c65ac740912e12467.jpg rename to Sklearn/sklearn-doc-zh/master/img/6fcf3a401454fd3c65ac740912e12467.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/7057da795219773c01c26cde1adf90e5.jpg b/Sklearn/sklearn-doc-zh/master/img/7057da795219773c01c26cde1adf90e5.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/7057da795219773c01c26cde1adf90e5.jpg rename to Sklearn/sklearn-doc-zh/master/img/7057da795219773c01c26cde1adf90e5.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/7085885523458d64e2f9981f39c7b038.jpg b/Sklearn/sklearn-doc-zh/master/img/7085885523458d64e2f9981f39c7b038.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/7085885523458d64e2f9981f39c7b038.jpg rename to Sklearn/sklearn-doc-zh/master/img/7085885523458d64e2f9981f39c7b038.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/70abd4aa320170aa6dbe8204a5ed846e.jpg b/Sklearn/sklearn-doc-zh/master/img/70abd4aa320170aa6dbe8204a5ed846e.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/70abd4aa320170aa6dbe8204a5ed846e.jpg rename to Sklearn/sklearn-doc-zh/master/img/70abd4aa320170aa6dbe8204a5ed846e.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/71581bfc44b992a82bd0bc7a6eee38f4.jpg b/Sklearn/sklearn-doc-zh/master/img/71581bfc44b992a82bd0bc7a6eee38f4.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/71581bfc44b992a82bd0bc7a6eee38f4.jpg rename to Sklearn/sklearn-doc-zh/master/img/71581bfc44b992a82bd0bc7a6eee38f4.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/715cb9aaddb7a5ea10028eb49f509f61.jpg b/Sklearn/sklearn-doc-zh/master/img/715cb9aaddb7a5ea10028eb49f509f61.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/715cb9aaddb7a5ea10028eb49f509f61.jpg rename to Sklearn/sklearn-doc-zh/master/img/715cb9aaddb7a5ea10028eb49f509f61.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/725082a3e3f2eacec65e9c1435a6960d.jpg b/Sklearn/sklearn-doc-zh/master/img/725082a3e3f2eacec65e9c1435a6960d.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/725082a3e3f2eacec65e9c1435a6960d.jpg rename to Sklearn/sklearn-doc-zh/master/img/725082a3e3f2eacec65e9c1435a6960d.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/7304e7fb0302be38d7fa1688bcd14df4.jpg b/Sklearn/sklearn-doc-zh/master/img/7304e7fb0302be38d7fa1688bcd14df4.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/7304e7fb0302be38d7fa1688bcd14df4.jpg rename to Sklearn/sklearn-doc-zh/master/img/7304e7fb0302be38d7fa1688bcd14df4.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/73658f99647e50786817b44416d09df1.jpg b/Sklearn/sklearn-doc-zh/master/img/73658f99647e50786817b44416d09df1.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/73658f99647e50786817b44416d09df1.jpg rename to Sklearn/sklearn-doc-zh/master/img/73658f99647e50786817b44416d09df1.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/74f4ea0e25b673d30d56ab4269f03f3b.jpg b/Sklearn/sklearn-doc-zh/master/img/74f4ea0e25b673d30d56ab4269f03f3b.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/74f4ea0e25b673d30d56ab4269f03f3b.jpg rename to Sklearn/sklearn-doc-zh/master/img/74f4ea0e25b673d30d56ab4269f03f3b.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/75a6f2c15bfb418edcb993c4e27873d0.jpg b/Sklearn/sklearn-doc-zh/master/img/75a6f2c15bfb418edcb993c4e27873d0.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/75a6f2c15bfb418edcb993c4e27873d0.jpg rename to Sklearn/sklearn-doc-zh/master/img/75a6f2c15bfb418edcb993c4e27873d0.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/75d98860b528f3fcd2b060ad5e624ca0.jpg b/Sklearn/sklearn-doc-zh/master/img/75d98860b528f3fcd2b060ad5e624ca0.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/75d98860b528f3fcd2b060ad5e624ca0.jpg rename to Sklearn/sklearn-doc-zh/master/img/75d98860b528f3fcd2b060ad5e624ca0.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/75fd7809a46f43dcd922f39ff8f91026.jpg b/Sklearn/sklearn-doc-zh/master/img/75fd7809a46f43dcd922f39ff8f91026.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/75fd7809a46f43dcd922f39ff8f91026.jpg rename to Sklearn/sklearn-doc-zh/master/img/75fd7809a46f43dcd922f39ff8f91026.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/76530e85b09bd8385fad05337b968caf.jpg b/Sklearn/sklearn-doc-zh/master/img/76530e85b09bd8385fad05337b968caf.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/76530e85b09bd8385fad05337b968caf.jpg rename to Sklearn/sklearn-doc-zh/master/img/76530e85b09bd8385fad05337b968caf.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/76814b51cd880ede8da9a2b5ad3d4143.jpg b/Sklearn/sklearn-doc-zh/master/img/76814b51cd880ede8da9a2b5ad3d4143.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/76814b51cd880ede8da9a2b5ad3d4143.jpg rename to Sklearn/sklearn-doc-zh/master/img/76814b51cd880ede8da9a2b5ad3d4143.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/7682696b3b598c55d49ca030059f0a18.jpg b/Sklearn/sklearn-doc-zh/master/img/7682696b3b598c55d49ca030059f0a18.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/7682696b3b598c55d49ca030059f0a18.jpg rename to Sklearn/sklearn-doc-zh/master/img/7682696b3b598c55d49ca030059f0a18.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/7695a05e60c9dc0ec13f779fc19da966.jpg b/Sklearn/sklearn-doc-zh/master/img/7695a05e60c9dc0ec13f779fc19da966.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/7695a05e60c9dc0ec13f779fc19da966.jpg rename to Sklearn/sklearn-doc-zh/master/img/7695a05e60c9dc0ec13f779fc19da966.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/769ae28b43c9c1f02556a7d6d878c7e0.jpg b/Sklearn/sklearn-doc-zh/master/img/769ae28b43c9c1f02556a7d6d878c7e0.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/769ae28b43c9c1f02556a7d6d878c7e0.jpg rename to Sklearn/sklearn-doc-zh/master/img/769ae28b43c9c1f02556a7d6d878c7e0.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/77e01a63c0620550a5f11a7613001120.jpg b/Sklearn/sklearn-doc-zh/master/img/77e01a63c0620550a5f11a7613001120.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/77e01a63c0620550a5f11a7613001120.jpg rename to Sklearn/sklearn-doc-zh/master/img/77e01a63c0620550a5f11a7613001120.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/77e9cd089beb314666ac8397f95afc0a.jpg b/Sklearn/sklearn-doc-zh/master/img/77e9cd089beb314666ac8397f95afc0a.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/77e9cd089beb314666ac8397f95afc0a.jpg rename to Sklearn/sklearn-doc-zh/master/img/77e9cd089beb314666ac8397f95afc0a.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/77ee769c7c80ba4738fa4b34ff922e25.jpg b/Sklearn/sklearn-doc-zh/master/img/77ee769c7c80ba4738fa4b34ff922e25.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/77ee769c7c80ba4738fa4b34ff922e25.jpg rename to Sklearn/sklearn-doc-zh/master/img/77ee769c7c80ba4738fa4b34ff922e25.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/77eee205b1d286584f4002a39c9b32a3.jpg b/Sklearn/sklearn-doc-zh/master/img/77eee205b1d286584f4002a39c9b32a3.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/77eee205b1d286584f4002a39c9b32a3.jpg rename to Sklearn/sklearn-doc-zh/master/img/77eee205b1d286584f4002a39c9b32a3.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/7881dd425f1090aadc25eca46dc0daec.jpg b/Sklearn/sklearn-doc-zh/master/img/7881dd425f1090aadc25eca46dc0daec.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/7881dd425f1090aadc25eca46dc0daec.jpg rename to Sklearn/sklearn-doc-zh/master/img/7881dd425f1090aadc25eca46dc0daec.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/7947f721109ac76f1366b72715d3e7e3.jpg b/Sklearn/sklearn-doc-zh/master/img/7947f721109ac76f1366b72715d3e7e3.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/7947f721109ac76f1366b72715d3e7e3.jpg rename to Sklearn/sklearn-doc-zh/master/img/7947f721109ac76f1366b72715d3e7e3.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/79b52c5c00ce59ba04383f6a0d670c6d.jpg b/Sklearn/sklearn-doc-zh/master/img/79b52c5c00ce59ba04383f6a0d670c6d.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/79b52c5c00ce59ba04383f6a0d670c6d.jpg rename to Sklearn/sklearn-doc-zh/master/img/79b52c5c00ce59ba04383f6a0d670c6d.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/7a6d4f1e7672a61d5284203113eb5b2a.jpg b/Sklearn/sklearn-doc-zh/master/img/7a6d4f1e7672a61d5284203113eb5b2a.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/7a6d4f1e7672a61d5284203113eb5b2a.jpg rename to Sklearn/sklearn-doc-zh/master/img/7a6d4f1e7672a61d5284203113eb5b2a.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/7a79f764bd3db11876c1065b6677af80.jpg b/Sklearn/sklearn-doc-zh/master/img/7a79f764bd3db11876c1065b6677af80.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/7a79f764bd3db11876c1065b6677af80.jpg rename to Sklearn/sklearn-doc-zh/master/img/7a79f764bd3db11876c1065b6677af80.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/7a958d3bae3db42cc0811b186bbc4e00.jpg b/Sklearn/sklearn-doc-zh/master/img/7a958d3bae3db42cc0811b186bbc4e00.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/7a958d3bae3db42cc0811b186bbc4e00.jpg rename to Sklearn/sklearn-doc-zh/master/img/7a958d3bae3db42cc0811b186bbc4e00.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/7a95e7e32309847d96c207051da29ea9.jpg b/Sklearn/sklearn-doc-zh/master/img/7a95e7e32309847d96c207051da29ea9.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/7a95e7e32309847d96c207051da29ea9.jpg rename to Sklearn/sklearn-doc-zh/master/img/7a95e7e32309847d96c207051da29ea9.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/7a9775b9051c948f74639f1856f6c585.jpg b/Sklearn/sklearn-doc-zh/master/img/7a9775b9051c948f74639f1856f6c585.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/7a9775b9051c948f74639f1856f6c585.jpg rename to Sklearn/sklearn-doc-zh/master/img/7a9775b9051c948f74639f1856f6c585.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/7ab8c51f211ad5aea8e4e78337ca3624.jpg b/Sklearn/sklearn-doc-zh/master/img/7ab8c51f211ad5aea8e4e78337ca3624.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/7ab8c51f211ad5aea8e4e78337ca3624.jpg rename to Sklearn/sklearn-doc-zh/master/img/7ab8c51f211ad5aea8e4e78337ca3624.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/7acaacd6f5e4219eb0e02cc0ce0aff63.jpg b/Sklearn/sklearn-doc-zh/master/img/7acaacd6f5e4219eb0e02cc0ce0aff63.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/7acaacd6f5e4219eb0e02cc0ce0aff63.jpg rename to Sklearn/sklearn-doc-zh/master/img/7acaacd6f5e4219eb0e02cc0ce0aff63.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/7afe3c56e3473a3a7f18cf983ed5e79c.jpg b/Sklearn/sklearn-doc-zh/master/img/7afe3c56e3473a3a7f18cf983ed5e79c.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/7afe3c56e3473a3a7f18cf983ed5e79c.jpg rename to Sklearn/sklearn-doc-zh/master/img/7afe3c56e3473a3a7f18cf983ed5e79c.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/7b0e2ed0273c0a1650cc9f78eabe93c4.jpg b/Sklearn/sklearn-doc-zh/master/img/7b0e2ed0273c0a1650cc9f78eabe93c4.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/7b0e2ed0273c0a1650cc9f78eabe93c4.jpg rename to Sklearn/sklearn-doc-zh/master/img/7b0e2ed0273c0a1650cc9f78eabe93c4.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/7b215f2882ce8aaa33a97e43ad626314.jpg b/Sklearn/sklearn-doc-zh/master/img/7b215f2882ce8aaa33a97e43ad626314.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/7b215f2882ce8aaa33a97e43ad626314.jpg rename to Sklearn/sklearn-doc-zh/master/img/7b215f2882ce8aaa33a97e43ad626314.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/7b6cebf625d680ab33eba86d34885910.jpg b/Sklearn/sklearn-doc-zh/master/img/7b6cebf625d680ab33eba86d34885910.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/7b6cebf625d680ab33eba86d34885910.jpg rename to Sklearn/sklearn-doc-zh/master/img/7b6cebf625d680ab33eba86d34885910.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/7ba11d33e68a1e32f2d8d9387bbc1eba.jpg b/Sklearn/sklearn-doc-zh/master/img/7ba11d33e68a1e32f2d8d9387bbc1eba.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/7ba11d33e68a1e32f2d8d9387bbc1eba.jpg rename to Sklearn/sklearn-doc-zh/master/img/7ba11d33e68a1e32f2d8d9387bbc1eba.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/7c228fb3025521b5d9c2aef929547d1d.jpg b/Sklearn/sklearn-doc-zh/master/img/7c228fb3025521b5d9c2aef929547d1d.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/7c228fb3025521b5d9c2aef929547d1d.jpg rename to Sklearn/sklearn-doc-zh/master/img/7c228fb3025521b5d9c2aef929547d1d.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/7c9ce57d4e2ba5270352f4e118af7fac.jpg b/Sklearn/sklearn-doc-zh/master/img/7c9ce57d4e2ba5270352f4e118af7fac.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/7c9ce57d4e2ba5270352f4e118af7fac.jpg rename to Sklearn/sklearn-doc-zh/master/img/7c9ce57d4e2ba5270352f4e118af7fac.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/7ca5945518e2f3eff72bd67b029e919a.jpg b/Sklearn/sklearn-doc-zh/master/img/7ca5945518e2f3eff72bd67b029e919a.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/7ca5945518e2f3eff72bd67b029e919a.jpg rename to Sklearn/sklearn-doc-zh/master/img/7ca5945518e2f3eff72bd67b029e919a.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/7cda18e6ccd378c8ef9714e3035506c9.jpg b/Sklearn/sklearn-doc-zh/master/img/7cda18e6ccd378c8ef9714e3035506c9.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/7cda18e6ccd378c8ef9714e3035506c9.jpg rename to Sklearn/sklearn-doc-zh/master/img/7cda18e6ccd378c8ef9714e3035506c9.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/7ce09555ac9e490df7f81ef7eb0e58e8.jpg b/Sklearn/sklearn-doc-zh/master/img/7ce09555ac9e490df7f81ef7eb0e58e8.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/7ce09555ac9e490df7f81ef7eb0e58e8.jpg rename to Sklearn/sklearn-doc-zh/master/img/7ce09555ac9e490df7f81ef7eb0e58e8.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/7d1cd85c2f165c7bde33eccf4be29e75.jpg b/Sklearn/sklearn-doc-zh/master/img/7d1cd85c2f165c7bde33eccf4be29e75.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/7d1cd85c2f165c7bde33eccf4be29e75.jpg rename to Sklearn/sklearn-doc-zh/master/img/7d1cd85c2f165c7bde33eccf4be29e75.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/7d1d547b1a46ad827caeb7f0e85b213d.jpg b/Sklearn/sklearn-doc-zh/master/img/7d1d547b1a46ad827caeb7f0e85b213d.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/7d1d547b1a46ad827caeb7f0e85b213d.jpg rename to Sklearn/sklearn-doc-zh/master/img/7d1d547b1a46ad827caeb7f0e85b213d.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/7d32ef1e04f69e63d69e04b09b973946.jpg b/Sklearn/sklearn-doc-zh/master/img/7d32ef1e04f69e63d69e04b09b973946.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/7d32ef1e04f69e63d69e04b09b973946.jpg rename to Sklearn/sklearn-doc-zh/master/img/7d32ef1e04f69e63d69e04b09b973946.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/7d379816608eb84009d45f0e26772256.jpg b/Sklearn/sklearn-doc-zh/master/img/7d379816608eb84009d45f0e26772256.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/7d379816608eb84009d45f0e26772256.jpg rename to Sklearn/sklearn-doc-zh/master/img/7d379816608eb84009d45f0e26772256.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/7d4aae450c85e9ea83d3746485791394.jpg b/Sklearn/sklearn-doc-zh/master/img/7d4aae450c85e9ea83d3746485791394.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/7d4aae450c85e9ea83d3746485791394.jpg rename to Sklearn/sklearn-doc-zh/master/img/7d4aae450c85e9ea83d3746485791394.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/7d9b5103fb50fe740fbc421247d2a5c7.jpg b/Sklearn/sklearn-doc-zh/master/img/7d9b5103fb50fe740fbc421247d2a5c7.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/7d9b5103fb50fe740fbc421247d2a5c7.jpg rename to Sklearn/sklearn-doc-zh/master/img/7d9b5103fb50fe740fbc421247d2a5c7.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/7df17fc33fdb4c71b329c593ad30f47e.jpg b/Sklearn/sklearn-doc-zh/master/img/7df17fc33fdb4c71b329c593ad30f47e.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/7df17fc33fdb4c71b329c593ad30f47e.jpg rename to Sklearn/sklearn-doc-zh/master/img/7df17fc33fdb4c71b329c593ad30f47e.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/7e76cbbbf685f7ec4bec704a9b5ea007.jpg b/Sklearn/sklearn-doc-zh/master/img/7e76cbbbf685f7ec4bec704a9b5ea007.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/7e76cbbbf685f7ec4bec704a9b5ea007.jpg rename to Sklearn/sklearn-doc-zh/master/img/7e76cbbbf685f7ec4bec704a9b5ea007.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/7e8b544e8ce168b079607ff9674a2c91.jpg b/Sklearn/sklearn-doc-zh/master/img/7e8b544e8ce168b079607ff9674a2c91.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/7e8b544e8ce168b079607ff9674a2c91.jpg rename to Sklearn/sklearn-doc-zh/master/img/7e8b544e8ce168b079607ff9674a2c91.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/7ea4ef13cdad8ec1dea90fc195d058b1.jpg b/Sklearn/sklearn-doc-zh/master/img/7ea4ef13cdad8ec1dea90fc195d058b1.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/7ea4ef13cdad8ec1dea90fc195d058b1.jpg rename to Sklearn/sklearn-doc-zh/master/img/7ea4ef13cdad8ec1dea90fc195d058b1.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/7eb576473ec1de4500c33294115b0719.jpg b/Sklearn/sklearn-doc-zh/master/img/7eb576473ec1de4500c33294115b0719.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/7eb576473ec1de4500c33294115b0719.jpg rename to Sklearn/sklearn-doc-zh/master/img/7eb576473ec1de4500c33294115b0719.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/7ec998f799acf05b040856bc6b37657f.jpg b/Sklearn/sklearn-doc-zh/master/img/7ec998f799acf05b040856bc6b37657f.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/7ec998f799acf05b040856bc6b37657f.jpg rename to Sklearn/sklearn-doc-zh/master/img/7ec998f799acf05b040856bc6b37657f.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/7ece088a96a66cb9675fde4610d67980.jpg b/Sklearn/sklearn-doc-zh/master/img/7ece088a96a66cb9675fde4610d67980.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/7ece088a96a66cb9675fde4610d67980.jpg rename to Sklearn/sklearn-doc-zh/master/img/7ece088a96a66cb9675fde4610d67980.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/7efe29500f4af973643a15b3ed29a926.jpg b/Sklearn/sklearn-doc-zh/master/img/7efe29500f4af973643a15b3ed29a926.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/7efe29500f4af973643a15b3ed29a926.jpg rename to Sklearn/sklearn-doc-zh/master/img/7efe29500f4af973643a15b3ed29a926.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/7f1cafe91ec3b94b27f8759724287242.jpg b/Sklearn/sklearn-doc-zh/master/img/7f1cafe91ec3b94b27f8759724287242.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/7f1cafe91ec3b94b27f8759724287242.jpg rename to Sklearn/sklearn-doc-zh/master/img/7f1cafe91ec3b94b27f8759724287242.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/7f35ead97a9f7be07b87ff7b860bcab9.jpg b/Sklearn/sklearn-doc-zh/master/img/7f35ead97a9f7be07b87ff7b860bcab9.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/7f35ead97a9f7be07b87ff7b860bcab9.jpg rename to Sklearn/sklearn-doc-zh/master/img/7f35ead97a9f7be07b87ff7b860bcab9.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/7f37b8cd4340f3d0981f518f14670c26.jpg b/Sklearn/sklearn-doc-zh/master/img/7f37b8cd4340f3d0981f518f14670c26.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/7f37b8cd4340f3d0981f518f14670c26.jpg rename to Sklearn/sklearn-doc-zh/master/img/7f37b8cd4340f3d0981f518f14670c26.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/7fb5b8aaa79d55e35332a1f02a5aee04.jpg b/Sklearn/sklearn-doc-zh/master/img/7fb5b8aaa79d55e35332a1f02a5aee04.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/7fb5b8aaa79d55e35332a1f02a5aee04.jpg rename to Sklearn/sklearn-doc-zh/master/img/7fb5b8aaa79d55e35332a1f02a5aee04.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/7ff85adefbea266b138eec7868e87fa9.jpg b/Sklearn/sklearn-doc-zh/master/img/7ff85adefbea266b138eec7868e87fa9.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/7ff85adefbea266b138eec7868e87fa9.jpg rename to Sklearn/sklearn-doc-zh/master/img/7ff85adefbea266b138eec7868e87fa9.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/80a5660d27392922e501744cab3623da.jpg b/Sklearn/sklearn-doc-zh/master/img/80a5660d27392922e501744cab3623da.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/80a5660d27392922e501744cab3623da.jpg rename to Sklearn/sklearn-doc-zh/master/img/80a5660d27392922e501744cab3623da.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/81da9e05103270be5e500ebc67cd1b45.jpg b/Sklearn/sklearn-doc-zh/master/img/81da9e05103270be5e500ebc67cd1b45.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/81da9e05103270be5e500ebc67cd1b45.jpg rename to Sklearn/sklearn-doc-zh/master/img/81da9e05103270be5e500ebc67cd1b45.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/81dfab5bd4f0d37601684acb3d714e9d.jpg b/Sklearn/sklearn-doc-zh/master/img/81dfab5bd4f0d37601684acb3d714e9d.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/81dfab5bd4f0d37601684acb3d714e9d.jpg rename to Sklearn/sklearn-doc-zh/master/img/81dfab5bd4f0d37601684acb3d714e9d.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/827de4e90947894fc96dd0432ff0d7dd.jpg b/Sklearn/sklearn-doc-zh/master/img/827de4e90947894fc96dd0432ff0d7dd.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/827de4e90947894fc96dd0432ff0d7dd.jpg rename to Sklearn/sklearn-doc-zh/master/img/827de4e90947894fc96dd0432ff0d7dd.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/841ad1e8353dcbf5fed2a58b2008873f.jpg b/Sklearn/sklearn-doc-zh/master/img/841ad1e8353dcbf5fed2a58b2008873f.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/841ad1e8353dcbf5fed2a58b2008873f.jpg rename to Sklearn/sklearn-doc-zh/master/img/841ad1e8353dcbf5fed2a58b2008873f.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/84298218bd1f4087ce7f8d55a4de1843.jpg b/Sklearn/sklearn-doc-zh/master/img/84298218bd1f4087ce7f8d55a4de1843.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/84298218bd1f4087ce7f8d55a4de1843.jpg rename to Sklearn/sklearn-doc-zh/master/img/84298218bd1f4087ce7f8d55a4de1843.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/842c30bfbcf39ee4d2ac5d7587f7168c.jpg b/Sklearn/sklearn-doc-zh/master/img/842c30bfbcf39ee4d2ac5d7587f7168c.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/842c30bfbcf39ee4d2ac5d7587f7168c.jpg rename to Sklearn/sklearn-doc-zh/master/img/842c30bfbcf39ee4d2ac5d7587f7168c.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/843ad36e77423c1d0d068bf1b0af24fb.jpg b/Sklearn/sklearn-doc-zh/master/img/843ad36e77423c1d0d068bf1b0af24fb.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/843ad36e77423c1d0d068bf1b0af24fb.jpg rename to Sklearn/sklearn-doc-zh/master/img/843ad36e77423c1d0d068bf1b0af24fb.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/8480724cd60359c7a8ceda7bee5590bd.jpg b/Sklearn/sklearn-doc-zh/master/img/8480724cd60359c7a8ceda7bee5590bd.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/8480724cd60359c7a8ceda7bee5590bd.jpg rename to Sklearn/sklearn-doc-zh/master/img/8480724cd60359c7a8ceda7bee5590bd.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/848835d5b40c5bd74a6e592a65eed5d6.jpg b/Sklearn/sklearn-doc-zh/master/img/848835d5b40c5bd74a6e592a65eed5d6.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/848835d5b40c5bd74a6e592a65eed5d6.jpg rename to Sklearn/sklearn-doc-zh/master/img/848835d5b40c5bd74a6e592a65eed5d6.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/8494ed588e1c79e569f4d2add722ecf5.jpg b/Sklearn/sklearn-doc-zh/master/img/8494ed588e1c79e569f4d2add722ecf5.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/8494ed588e1c79e569f4d2add722ecf5.jpg rename to Sklearn/sklearn-doc-zh/master/img/8494ed588e1c79e569f4d2add722ecf5.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/851c667ab0811688c25c6819aafacba0.jpg b/Sklearn/sklearn-doc-zh/master/img/851c667ab0811688c25c6819aafacba0.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/851c667ab0811688c25c6819aafacba0.jpg rename to Sklearn/sklearn-doc-zh/master/img/851c667ab0811688c25c6819aafacba0.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/855d4e5dae2b0286042ee7eef0c91ab5.jpg b/Sklearn/sklearn-doc-zh/master/img/855d4e5dae2b0286042ee7eef0c91ab5.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/855d4e5dae2b0286042ee7eef0c91ab5.jpg rename to Sklearn/sklearn-doc-zh/master/img/855d4e5dae2b0286042ee7eef0c91ab5.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/85f7fc9836edfbdcd2a7533674940b46.jpg b/Sklearn/sklearn-doc-zh/master/img/85f7fc9836edfbdcd2a7533674940b46.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/85f7fc9836edfbdcd2a7533674940b46.jpg rename to Sklearn/sklearn-doc-zh/master/img/85f7fc9836edfbdcd2a7533674940b46.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/86003b5287219bcbec1586985a110629.jpg b/Sklearn/sklearn-doc-zh/master/img/86003b5287219bcbec1586985a110629.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/86003b5287219bcbec1586985a110629.jpg rename to Sklearn/sklearn-doc-zh/master/img/86003b5287219bcbec1586985a110629.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/860af894555b3d5f52e893bab6cafc11.jpg b/Sklearn/sklearn-doc-zh/master/img/860af894555b3d5f52e893bab6cafc11.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/860af894555b3d5f52e893bab6cafc11.jpg rename to Sklearn/sklearn-doc-zh/master/img/860af894555b3d5f52e893bab6cafc11.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/8610705cf45aa68b12197abd65653479.jpg b/Sklearn/sklearn-doc-zh/master/img/8610705cf45aa68b12197abd65653479.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/8610705cf45aa68b12197abd65653479.jpg rename to Sklearn/sklearn-doc-zh/master/img/8610705cf45aa68b12197abd65653479.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/8615ac5c8ea1fb8ae6e726f7659a29e9.jpg b/Sklearn/sklearn-doc-zh/master/img/8615ac5c8ea1fb8ae6e726f7659a29e9.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/8615ac5c8ea1fb8ae6e726f7659a29e9.jpg rename to Sklearn/sklearn-doc-zh/master/img/8615ac5c8ea1fb8ae6e726f7659a29e9.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/866891e7bebe66615daa04976af79f99.jpg b/Sklearn/sklearn-doc-zh/master/img/866891e7bebe66615daa04976af79f99.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/866891e7bebe66615daa04976af79f99.jpg rename to Sklearn/sklearn-doc-zh/master/img/866891e7bebe66615daa04976af79f99.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/86d43b01a4c13ac9d69265ff0d546720.jpg b/Sklearn/sklearn-doc-zh/master/img/86d43b01a4c13ac9d69265ff0d546720.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/86d43b01a4c13ac9d69265ff0d546720.jpg rename to Sklearn/sklearn-doc-zh/master/img/86d43b01a4c13ac9d69265ff0d546720.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/86f7969b00fb3d0914f0bababac102a0.jpg b/Sklearn/sklearn-doc-zh/master/img/86f7969b00fb3d0914f0bababac102a0.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/86f7969b00fb3d0914f0bababac102a0.jpg rename to Sklearn/sklearn-doc-zh/master/img/86f7969b00fb3d0914f0bababac102a0.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/87ae31ef4f2e7b4385b9a25aa8fed533.jpg b/Sklearn/sklearn-doc-zh/master/img/87ae31ef4f2e7b4385b9a25aa8fed533.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/87ae31ef4f2e7b4385b9a25aa8fed533.jpg rename to Sklearn/sklearn-doc-zh/master/img/87ae31ef4f2e7b4385b9a25aa8fed533.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/87c11f44388ae80ae9244e8d56a2dc86.jpg b/Sklearn/sklearn-doc-zh/master/img/87c11f44388ae80ae9244e8d56a2dc86.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/87c11f44388ae80ae9244e8d56a2dc86.jpg rename to Sklearn/sklearn-doc-zh/master/img/87c11f44388ae80ae9244e8d56a2dc86.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/87dfb2676632ee8a92713f4861ccc84e.jpg b/Sklearn/sklearn-doc-zh/master/img/87dfb2676632ee8a92713f4861ccc84e.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/87dfb2676632ee8a92713f4861ccc84e.jpg rename to Sklearn/sklearn-doc-zh/master/img/87dfb2676632ee8a92713f4861ccc84e.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/87fef5b961bf8a90d58faa5e4084a081.jpg b/Sklearn/sklearn-doc-zh/master/img/87fef5b961bf8a90d58faa5e4084a081.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/87fef5b961bf8a90d58faa5e4084a081.jpg rename to Sklearn/sklearn-doc-zh/master/img/87fef5b961bf8a90d58faa5e4084a081.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/8851bd0fe9749b4841b30cee41fb040d.jpg b/Sklearn/sklearn-doc-zh/master/img/8851bd0fe9749b4841b30cee41fb040d.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/8851bd0fe9749b4841b30cee41fb040d.jpg rename to Sklearn/sklearn-doc-zh/master/img/8851bd0fe9749b4841b30cee41fb040d.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/887928b507a2f01847b6ae5f5b0e733f.jpg b/Sklearn/sklearn-doc-zh/master/img/887928b507a2f01847b6ae5f5b0e733f.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/887928b507a2f01847b6ae5f5b0e733f.jpg rename to Sklearn/sklearn-doc-zh/master/img/887928b507a2f01847b6ae5f5b0e733f.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/88c766439d728ffb597e180a42f41d00.jpg b/Sklearn/sklearn-doc-zh/master/img/88c766439d728ffb597e180a42f41d00.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/88c766439d728ffb597e180a42f41d00.jpg rename to Sklearn/sklearn-doc-zh/master/img/88c766439d728ffb597e180a42f41d00.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/88d9981d54cd398165b20856ed5043e1.jpg b/Sklearn/sklearn-doc-zh/master/img/88d9981d54cd398165b20856ed5043e1.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/88d9981d54cd398165b20856ed5043e1.jpg rename to Sklearn/sklearn-doc-zh/master/img/88d9981d54cd398165b20856ed5043e1.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/88ef3c9a51bdadd21593bf89887a04b5.jpg b/Sklearn/sklearn-doc-zh/master/img/88ef3c9a51bdadd21593bf89887a04b5.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/88ef3c9a51bdadd21593bf89887a04b5.jpg rename to Sklearn/sklearn-doc-zh/master/img/88ef3c9a51bdadd21593bf89887a04b5.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/89807b9fa9f5924b027486f280324b1a.jpg b/Sklearn/sklearn-doc-zh/master/img/89807b9fa9f5924b027486f280324b1a.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/89807b9fa9f5924b027486f280324b1a.jpg rename to Sklearn/sklearn-doc-zh/master/img/89807b9fa9f5924b027486f280324b1a.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/8a58e8df6a985a3273e39bac7dd72b1f.jpg b/Sklearn/sklearn-doc-zh/master/img/8a58e8df6a985a3273e39bac7dd72b1f.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/8a58e8df6a985a3273e39bac7dd72b1f.jpg rename to Sklearn/sklearn-doc-zh/master/img/8a58e8df6a985a3273e39bac7dd72b1f.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/8a76c85f2de3d3777fe72f5d8e32e0cf.jpg b/Sklearn/sklearn-doc-zh/master/img/8a76c85f2de3d3777fe72f5d8e32e0cf.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/8a76c85f2de3d3777fe72f5d8e32e0cf.jpg rename to Sklearn/sklearn-doc-zh/master/img/8a76c85f2de3d3777fe72f5d8e32e0cf.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/8aca5aa85ff13bf8e8687220b137f9d3.jpg b/Sklearn/sklearn-doc-zh/master/img/8aca5aa85ff13bf8e8687220b137f9d3.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/8aca5aa85ff13bf8e8687220b137f9d3.jpg rename to Sklearn/sklearn-doc-zh/master/img/8aca5aa85ff13bf8e8687220b137f9d3.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/8ae33567ac5e4d4e9b1b14ae12220ea9.jpg b/Sklearn/sklearn-doc-zh/master/img/8ae33567ac5e4d4e9b1b14ae12220ea9.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/8ae33567ac5e4d4e9b1b14ae12220ea9.jpg rename to Sklearn/sklearn-doc-zh/master/img/8ae33567ac5e4d4e9b1b14ae12220ea9.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/8b3be66a25e0c33787b341667b261360.jpg b/Sklearn/sklearn-doc-zh/master/img/8b3be66a25e0c33787b341667b261360.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/8b3be66a25e0c33787b341667b261360.jpg rename to Sklearn/sklearn-doc-zh/master/img/8b3be66a25e0c33787b341667b261360.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/8bce1ab1dca39a6b7fcb97dcf959a1f7.jpg b/Sklearn/sklearn-doc-zh/master/img/8bce1ab1dca39a6b7fcb97dcf959a1f7.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/8bce1ab1dca39a6b7fcb97dcf959a1f7.jpg rename to Sklearn/sklearn-doc-zh/master/img/8bce1ab1dca39a6b7fcb97dcf959a1f7.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/8c0b5b6a48349ecfb1f20d9168d166b7.jpg b/Sklearn/sklearn-doc-zh/master/img/8c0b5b6a48349ecfb1f20d9168d166b7.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/8c0b5b6a48349ecfb1f20d9168d166b7.jpg rename to Sklearn/sklearn-doc-zh/master/img/8c0b5b6a48349ecfb1f20d9168d166b7.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/8c187292cd29fea23a4983db349e7545.jpg b/Sklearn/sklearn-doc-zh/master/img/8c187292cd29fea23a4983db349e7545.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/8c187292cd29fea23a4983db349e7545.jpg rename to Sklearn/sklearn-doc-zh/master/img/8c187292cd29fea23a4983db349e7545.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/8c43dd6816e66709ef3f9d681ec3941a.jpg b/Sklearn/sklearn-doc-zh/master/img/8c43dd6816e66709ef3f9d681ec3941a.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/8c43dd6816e66709ef3f9d681ec3941a.jpg rename to Sklearn/sklearn-doc-zh/master/img/8c43dd6816e66709ef3f9d681ec3941a.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/8c4a5c99b21079b9fb1be49910ff96e3.jpg b/Sklearn/sklearn-doc-zh/master/img/8c4a5c99b21079b9fb1be49910ff96e3.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/8c4a5c99b21079b9fb1be49910ff96e3.jpg rename to Sklearn/sklearn-doc-zh/master/img/8c4a5c99b21079b9fb1be49910ff96e3.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/8c4beae51da320d8fffd739a9e9e3852.jpg b/Sklearn/sklearn-doc-zh/master/img/8c4beae51da320d8fffd739a9e9e3852.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/8c4beae51da320d8fffd739a9e9e3852.jpg rename to Sklearn/sklearn-doc-zh/master/img/8c4beae51da320d8fffd739a9e9e3852.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/8c8211edce4dbaeb44032f8d71a12135.jpg b/Sklearn/sklearn-doc-zh/master/img/8c8211edce4dbaeb44032f8d71a12135.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/8c8211edce4dbaeb44032f8d71a12135.jpg rename to Sklearn/sklearn-doc-zh/master/img/8c8211edce4dbaeb44032f8d71a12135.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/8c8c09a18e398935473d8b69cf1b617e.jpg b/Sklearn/sklearn-doc-zh/master/img/8c8c09a18e398935473d8b69cf1b617e.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/8c8c09a18e398935473d8b69cf1b617e.jpg rename to Sklearn/sklearn-doc-zh/master/img/8c8c09a18e398935473d8b69cf1b617e.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/8cc09b4045e2fece3222e52d7333c1e5.jpg b/Sklearn/sklearn-doc-zh/master/img/8cc09b4045e2fece3222e52d7333c1e5.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/8cc09b4045e2fece3222e52d7333c1e5.jpg rename to Sklearn/sklearn-doc-zh/master/img/8cc09b4045e2fece3222e52d7333c1e5.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/8cddd8c0c85ca4a1b6dce8bbf145a8aa.jpg b/Sklearn/sklearn-doc-zh/master/img/8cddd8c0c85ca4a1b6dce8bbf145a8aa.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/8cddd8c0c85ca4a1b6dce8bbf145a8aa.jpg rename to Sklearn/sklearn-doc-zh/master/img/8cddd8c0c85ca4a1b6dce8bbf145a8aa.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/8dab78bd2e80188f99e0c88c4c83472a.jpg b/Sklearn/sklearn-doc-zh/master/img/8dab78bd2e80188f99e0c88c4c83472a.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/8dab78bd2e80188f99e0c88c4c83472a.jpg rename to Sklearn/sklearn-doc-zh/master/img/8dab78bd2e80188f99e0c88c4c83472a.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/8dcf0f01f9d255c37e21948ad3821885.jpg b/Sklearn/sklearn-doc-zh/master/img/8dcf0f01f9d255c37e21948ad3821885.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/8dcf0f01f9d255c37e21948ad3821885.jpg rename to Sklearn/sklearn-doc-zh/master/img/8dcf0f01f9d255c37e21948ad3821885.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/8dd0e1ef4fdc9bbfcc8b0ce3fa8a4321.jpg b/Sklearn/sklearn-doc-zh/master/img/8dd0e1ef4fdc9bbfcc8b0ce3fa8a4321.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/8dd0e1ef4fdc9bbfcc8b0ce3fa8a4321.jpg rename to Sklearn/sklearn-doc-zh/master/img/8dd0e1ef4fdc9bbfcc8b0ce3fa8a4321.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/8ef970a809e9beef56eb7a78d0133978.jpg b/Sklearn/sklearn-doc-zh/master/img/8ef970a809e9beef56eb7a78d0133978.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/8ef970a809e9beef56eb7a78d0133978.jpg rename to Sklearn/sklearn-doc-zh/master/img/8ef970a809e9beef56eb7a78d0133978.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/8f4e82e4dfa89ac81c42992c603a953e.jpg b/Sklearn/sklearn-doc-zh/master/img/8f4e82e4dfa89ac81c42992c603a953e.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/8f4e82e4dfa89ac81c42992c603a953e.jpg rename to Sklearn/sklearn-doc-zh/master/img/8f4e82e4dfa89ac81c42992c603a953e.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/8f4f76678eb50ebccaba25e86961ff3e.jpg b/Sklearn/sklearn-doc-zh/master/img/8f4f76678eb50ebccaba25e86961ff3e.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/8f4f76678eb50ebccaba25e86961ff3e.jpg rename to Sklearn/sklearn-doc-zh/master/img/8f4f76678eb50ebccaba25e86961ff3e.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/8f62a01deaf67a7f9960d853abd9dcca.jpg b/Sklearn/sklearn-doc-zh/master/img/8f62a01deaf67a7f9960d853abd9dcca.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/8f62a01deaf67a7f9960d853abd9dcca.jpg rename to Sklearn/sklearn-doc-zh/master/img/8f62a01deaf67a7f9960d853abd9dcca.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/8f839ebe5b506fef19bd8cc121b3f557.jpg b/Sklearn/sklearn-doc-zh/master/img/8f839ebe5b506fef19bd8cc121b3f557.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/8f839ebe5b506fef19bd8cc121b3f557.jpg rename to Sklearn/sklearn-doc-zh/master/img/8f839ebe5b506fef19bd8cc121b3f557.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/8fae035cff5a2ccfbc80e38fab4907cd.jpg b/Sklearn/sklearn-doc-zh/master/img/8fae035cff5a2ccfbc80e38fab4907cd.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/8fae035cff5a2ccfbc80e38fab4907cd.jpg rename to Sklearn/sklearn-doc-zh/master/img/8fae035cff5a2ccfbc80e38fab4907cd.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/8fe0d726b364f224c93e31cd3248a1f6.jpg b/Sklearn/sklearn-doc-zh/master/img/8fe0d726b364f224c93e31cd3248a1f6.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/8fe0d726b364f224c93e31cd3248a1f6.jpg rename to Sklearn/sklearn-doc-zh/master/img/8fe0d726b364f224c93e31cd3248a1f6.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/9127c3e2b5748eee602354fed5570605.jpg b/Sklearn/sklearn-doc-zh/master/img/9127c3e2b5748eee602354fed5570605.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/9127c3e2b5748eee602354fed5570605.jpg rename to Sklearn/sklearn-doc-zh/master/img/9127c3e2b5748eee602354fed5570605.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/9255ba83a88cb73b04d1ca968f9c2b4e.jpg b/Sklearn/sklearn-doc-zh/master/img/9255ba83a88cb73b04d1ca968f9c2b4e.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/9255ba83a88cb73b04d1ca968f9c2b4e.jpg rename to Sklearn/sklearn-doc-zh/master/img/9255ba83a88cb73b04d1ca968f9c2b4e.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/92570652e9c52768c76f5b9cf8f97a28.jpg b/Sklearn/sklearn-doc-zh/master/img/92570652e9c52768c76f5b9cf8f97a28.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/92570652e9c52768c76f5b9cf8f97a28.jpg rename to Sklearn/sklearn-doc-zh/master/img/92570652e9c52768c76f5b9cf8f97a28.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/9259b19a18f30f67db9e45b8c0b361c7.jpg b/Sklearn/sklearn-doc-zh/master/img/9259b19a18f30f67db9e45b8c0b361c7.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/9259b19a18f30f67db9e45b8c0b361c7.jpg rename to Sklearn/sklearn-doc-zh/master/img/9259b19a18f30f67db9e45b8c0b361c7.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/929e25fd2cb34bf9709d68d266786fd3.jpg b/Sklearn/sklearn-doc-zh/master/img/929e25fd2cb34bf9709d68d266786fd3.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/929e25fd2cb34bf9709d68d266786fd3.jpg rename to Sklearn/sklearn-doc-zh/master/img/929e25fd2cb34bf9709d68d266786fd3.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/92c470d884176abfcc29a5218ccf0aef.jpg b/Sklearn/sklearn-doc-zh/master/img/92c470d884176abfcc29a5218ccf0aef.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/92c470d884176abfcc29a5218ccf0aef.jpg rename to Sklearn/sklearn-doc-zh/master/img/92c470d884176abfcc29a5218ccf0aef.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/92e5a41435bd53653e9ad36f030cbd61.jpg b/Sklearn/sklearn-doc-zh/master/img/92e5a41435bd53653e9ad36f030cbd61.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/92e5a41435bd53653e9ad36f030cbd61.jpg rename to Sklearn/sklearn-doc-zh/master/img/92e5a41435bd53653e9ad36f030cbd61.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/93074566222e67121a8ab55e90d8e1af.jpg b/Sklearn/sklearn-doc-zh/master/img/93074566222e67121a8ab55e90d8e1af.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/93074566222e67121a8ab55e90d8e1af.jpg rename to Sklearn/sklearn-doc-zh/master/img/93074566222e67121a8ab55e90d8e1af.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/93401b902ac1e2a94ff3ce04e7f05882.jpg b/Sklearn/sklearn-doc-zh/master/img/93401b902ac1e2a94ff3ce04e7f05882.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/93401b902ac1e2a94ff3ce04e7f05882.jpg rename to Sklearn/sklearn-doc-zh/master/img/93401b902ac1e2a94ff3ce04e7f05882.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/938ee5c0c620fd2298ea16abe621e7bb.jpg b/Sklearn/sklearn-doc-zh/master/img/938ee5c0c620fd2298ea16abe621e7bb.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/938ee5c0c620fd2298ea16abe621e7bb.jpg rename to Sklearn/sklearn-doc-zh/master/img/938ee5c0c620fd2298ea16abe621e7bb.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/93cdc140e4de4cbb0d093fc7ad5c6a36.jpg b/Sklearn/sklearn-doc-zh/master/img/93cdc140e4de4cbb0d093fc7ad5c6a36.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/93cdc140e4de4cbb0d093fc7ad5c6a36.jpg rename to Sklearn/sklearn-doc-zh/master/img/93cdc140e4de4cbb0d093fc7ad5c6a36.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/93d2f2876517637396e99e36132252f3.jpg b/Sklearn/sklearn-doc-zh/master/img/93d2f2876517637396e99e36132252f3.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/93d2f2876517637396e99e36132252f3.jpg rename to Sklearn/sklearn-doc-zh/master/img/93d2f2876517637396e99e36132252f3.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/94156b879a7455cb0d516efa9c9c0991.jpg b/Sklearn/sklearn-doc-zh/master/img/94156b879a7455cb0d516efa9c9c0991.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/94156b879a7455cb0d516efa9c9c0991.jpg rename to Sklearn/sklearn-doc-zh/master/img/94156b879a7455cb0d516efa9c9c0991.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/942734d190e4b1d2c51b0e2ee6c24428.jpg b/Sklearn/sklearn-doc-zh/master/img/942734d190e4b1d2c51b0e2ee6c24428.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/942734d190e4b1d2c51b0e2ee6c24428.jpg rename to Sklearn/sklearn-doc-zh/master/img/942734d190e4b1d2c51b0e2ee6c24428.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/943add7649d85f7ef63a83356dd6f234.jpg b/Sklearn/sklearn-doc-zh/master/img/943add7649d85f7ef63a83356dd6f234.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/943add7649d85f7ef63a83356dd6f234.jpg rename to Sklearn/sklearn-doc-zh/master/img/943add7649d85f7ef63a83356dd6f234.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/94435503540f66cab82015a35139213d.jpg b/Sklearn/sklearn-doc-zh/master/img/94435503540f66cab82015a35139213d.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/94435503540f66cab82015a35139213d.jpg rename to Sklearn/sklearn-doc-zh/master/img/94435503540f66cab82015a35139213d.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/947ae691edda29c53c3b962665b052c6.jpg b/Sklearn/sklearn-doc-zh/master/img/947ae691edda29c53c3b962665b052c6.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/947ae691edda29c53c3b962665b052c6.jpg rename to Sklearn/sklearn-doc-zh/master/img/947ae691edda29c53c3b962665b052c6.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/94a5c73d8f351280b6313519455a11c7.jpg b/Sklearn/sklearn-doc-zh/master/img/94a5c73d8f351280b6313519455a11c7.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/94a5c73d8f351280b6313519455a11c7.jpg rename to Sklearn/sklearn-doc-zh/master/img/94a5c73d8f351280b6313519455a11c7.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/94f627411c005fe4911552b1dd5b6ff1.jpg b/Sklearn/sklearn-doc-zh/master/img/94f627411c005fe4911552b1dd5b6ff1.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/94f627411c005fe4911552b1dd5b6ff1.jpg rename to Sklearn/sklearn-doc-zh/master/img/94f627411c005fe4911552b1dd5b6ff1.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/9521899a181a367c5873e61b9f7785ce.jpg b/Sklearn/sklearn-doc-zh/master/img/9521899a181a367c5873e61b9f7785ce.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/9521899a181a367c5873e61b9f7785ce.jpg rename to Sklearn/sklearn-doc-zh/master/img/9521899a181a367c5873e61b9f7785ce.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/9642d01a97f06869baba6159e3438677.jpg b/Sklearn/sklearn-doc-zh/master/img/9642d01a97f06869baba6159e3438677.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/9642d01a97f06869baba6159e3438677.jpg rename to Sklearn/sklearn-doc-zh/master/img/9642d01a97f06869baba6159e3438677.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/966bd0680e0e71a4df98abab98818724.jpg b/Sklearn/sklearn-doc-zh/master/img/966bd0680e0e71a4df98abab98818724.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/966bd0680e0e71a4df98abab98818724.jpg rename to Sklearn/sklearn-doc-zh/master/img/966bd0680e0e71a4df98abab98818724.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/971b86cde9801a3bb1a80af70bd05466.jpg b/Sklearn/sklearn-doc-zh/master/img/971b86cde9801a3bb1a80af70bd05466.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/971b86cde9801a3bb1a80af70bd05466.jpg rename to Sklearn/sklearn-doc-zh/master/img/971b86cde9801a3bb1a80af70bd05466.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/97b3ab2cc18a43b00c07a1cf6e118798.jpg b/Sklearn/sklearn-doc-zh/master/img/97b3ab2cc18a43b00c07a1cf6e118798.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/97b3ab2cc18a43b00c07a1cf6e118798.jpg rename to Sklearn/sklearn-doc-zh/master/img/97b3ab2cc18a43b00c07a1cf6e118798.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/97f4093251d6c6f6f7d0902a86a08dbe.jpg b/Sklearn/sklearn-doc-zh/master/img/97f4093251d6c6f6f7d0902a86a08dbe.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/97f4093251d6c6f6f7d0902a86a08dbe.jpg rename to Sklearn/sklearn-doc-zh/master/img/97f4093251d6c6f6f7d0902a86a08dbe.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/97f450040417800904df33c9702d2c66.jpg b/Sklearn/sklearn-doc-zh/master/img/97f450040417800904df33c9702d2c66.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/97f450040417800904df33c9702d2c66.jpg rename to Sklearn/sklearn-doc-zh/master/img/97f450040417800904df33c9702d2c66.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/9812effbd6ddac1053fd0b63ebe8c2fb.jpg b/Sklearn/sklearn-doc-zh/master/img/9812effbd6ddac1053fd0b63ebe8c2fb.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/9812effbd6ddac1053fd0b63ebe8c2fb.jpg rename to Sklearn/sklearn-doc-zh/master/img/9812effbd6ddac1053fd0b63ebe8c2fb.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/983133e80141fbf289a10f379c11b34f.jpg b/Sklearn/sklearn-doc-zh/master/img/983133e80141fbf289a10f379c11b34f.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/983133e80141fbf289a10f379c11b34f.jpg rename to Sklearn/sklearn-doc-zh/master/img/983133e80141fbf289a10f379c11b34f.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/9839512f63b7e5de021f13f7e6bd6b22.jpg b/Sklearn/sklearn-doc-zh/master/img/9839512f63b7e5de021f13f7e6bd6b22.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/9839512f63b7e5de021f13f7e6bd6b22.jpg rename to Sklearn/sklearn-doc-zh/master/img/9839512f63b7e5de021f13f7e6bd6b22.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/987fc6b717a40e57a95fb79a8e809309.jpg b/Sklearn/sklearn-doc-zh/master/img/987fc6b717a40e57a95fb79a8e809309.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/987fc6b717a40e57a95fb79a8e809309.jpg rename to Sklearn/sklearn-doc-zh/master/img/987fc6b717a40e57a95fb79a8e809309.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/995e683eac95f8e8e65d96c6516858e7.jpg b/Sklearn/sklearn-doc-zh/master/img/995e683eac95f8e8e65d96c6516858e7.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/995e683eac95f8e8e65d96c6516858e7.jpg rename to Sklearn/sklearn-doc-zh/master/img/995e683eac95f8e8e65d96c6516858e7.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/997e17f4d9f18abfb95e6c9f1f289098.jpg b/Sklearn/sklearn-doc-zh/master/img/997e17f4d9f18abfb95e6c9f1f289098.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/997e17f4d9f18abfb95e6c9f1f289098.jpg rename to Sklearn/sklearn-doc-zh/master/img/997e17f4d9f18abfb95e6c9f1f289098.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/9997b300f697e018f955724f7106ad09.jpg b/Sklearn/sklearn-doc-zh/master/img/9997b300f697e018f955724f7106ad09.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/9997b300f697e018f955724f7106ad09.jpg rename to Sklearn/sklearn-doc-zh/master/img/9997b300f697e018f955724f7106ad09.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/99988260d9d836d14b2569c2fc921e81.jpg b/Sklearn/sklearn-doc-zh/master/img/99988260d9d836d14b2569c2fc921e81.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/99988260d9d836d14b2569c2fc921e81.jpg rename to Sklearn/sklearn-doc-zh/master/img/99988260d9d836d14b2569c2fc921e81.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/99d4804dc3d2ef82e10d91de99d0142a.jpg b/Sklearn/sklearn-doc-zh/master/img/99d4804dc3d2ef82e10d91de99d0142a.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/99d4804dc3d2ef82e10d91de99d0142a.jpg rename to Sklearn/sklearn-doc-zh/master/img/99d4804dc3d2ef82e10d91de99d0142a.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/99dfcad081b3f6e1f4648a9f7d24f103.jpg b/Sklearn/sklearn-doc-zh/master/img/99dfcad081b3f6e1f4648a9f7d24f103.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/99dfcad081b3f6e1f4648a9f7d24f103.jpg rename to Sklearn/sklearn-doc-zh/master/img/99dfcad081b3f6e1f4648a9f7d24f103.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/99e96cb74c925ba51098fe6167e22c44.jpg b/Sklearn/sklearn-doc-zh/master/img/99e96cb74c925ba51098fe6167e22c44.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/99e96cb74c925ba51098fe6167e22c44.jpg rename to Sklearn/sklearn-doc-zh/master/img/99e96cb74c925ba51098fe6167e22c44.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/9a2b4cb70f12a0e7419ffde362e1bed4.jpg b/Sklearn/sklearn-doc-zh/master/img/9a2b4cb70f12a0e7419ffde362e1bed4.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/9a2b4cb70f12a0e7419ffde362e1bed4.jpg rename to Sklearn/sklearn-doc-zh/master/img/9a2b4cb70f12a0e7419ffde362e1bed4.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/9a55689143b2e4d90adcdfe1f95b9ffd.jpg b/Sklearn/sklearn-doc-zh/master/img/9a55689143b2e4d90adcdfe1f95b9ffd.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/9a55689143b2e4d90adcdfe1f95b9ffd.jpg rename to Sklearn/sklearn-doc-zh/master/img/9a55689143b2e4d90adcdfe1f95b9ffd.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/9a5f8912e7fe77be2acea88fd091a5d8.jpg b/Sklearn/sklearn-doc-zh/master/img/9a5f8912e7fe77be2acea88fd091a5d8.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/9a5f8912e7fe77be2acea88fd091a5d8.jpg rename to Sklearn/sklearn-doc-zh/master/img/9a5f8912e7fe77be2acea88fd091a5d8.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/9ad9b5d15124615ced9c9721a8ef4d3b.jpg b/Sklearn/sklearn-doc-zh/master/img/9ad9b5d15124615ced9c9721a8ef4d3b.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/9ad9b5d15124615ced9c9721a8ef4d3b.jpg rename to Sklearn/sklearn-doc-zh/master/img/9ad9b5d15124615ced9c9721a8ef4d3b.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/9afe9de54aeed2a857e99cf6444ff0e5.jpg b/Sklearn/sklearn-doc-zh/master/img/9afe9de54aeed2a857e99cf6444ff0e5.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/9afe9de54aeed2a857e99cf6444ff0e5.jpg rename to Sklearn/sklearn-doc-zh/master/img/9afe9de54aeed2a857e99cf6444ff0e5.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/9b4b00422c0cec29f80a03fe1d772100.jpg b/Sklearn/sklearn-doc-zh/master/img/9b4b00422c0cec29f80a03fe1d772100.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/9b4b00422c0cec29f80a03fe1d772100.jpg rename to Sklearn/sklearn-doc-zh/master/img/9b4b00422c0cec29f80a03fe1d772100.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/9b5fb6c6e0f320a3e8e0ba606d601c98.jpg b/Sklearn/sklearn-doc-zh/master/img/9b5fb6c6e0f320a3e8e0ba606d601c98.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/9b5fb6c6e0f320a3e8e0ba606d601c98.jpg rename to Sklearn/sklearn-doc-zh/master/img/9b5fb6c6e0f320a3e8e0ba606d601c98.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/9b6c97851ffb568abc5688d5c9e81800.jpg b/Sklearn/sklearn-doc-zh/master/img/9b6c97851ffb568abc5688d5c9e81800.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/9b6c97851ffb568abc5688d5c9e81800.jpg rename to Sklearn/sklearn-doc-zh/master/img/9b6c97851ffb568abc5688d5c9e81800.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/9b7a1b832e3f1e4ea340038115e6b00d.jpg b/Sklearn/sklearn-doc-zh/master/img/9b7a1b832e3f1e4ea340038115e6b00d.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/9b7a1b832e3f1e4ea340038115e6b00d.jpg rename to Sklearn/sklearn-doc-zh/master/img/9b7a1b832e3f1e4ea340038115e6b00d.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/9b85a5f7098321120773184b4673cbeb.jpg b/Sklearn/sklearn-doc-zh/master/img/9b85a5f7098321120773184b4673cbeb.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/9b85a5f7098321120773184b4673cbeb.jpg rename to Sklearn/sklearn-doc-zh/master/img/9b85a5f7098321120773184b4673cbeb.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/9b9ee41d276ad49322856b95cb6c7e43.jpg b/Sklearn/sklearn-doc-zh/master/img/9b9ee41d276ad49322856b95cb6c7e43.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/9b9ee41d276ad49322856b95cb6c7e43.jpg rename to Sklearn/sklearn-doc-zh/master/img/9b9ee41d276ad49322856b95cb6c7e43.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/9ca39b9e9aa5f1a4660e45f3c9b5ef7b.jpg b/Sklearn/sklearn-doc-zh/master/img/9ca39b9e9aa5f1a4660e45f3c9b5ef7b.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/9ca39b9e9aa5f1a4660e45f3c9b5ef7b.jpg rename to Sklearn/sklearn-doc-zh/master/img/9ca39b9e9aa5f1a4660e45f3c9b5ef7b.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/9cb7de99579cbd4664159c8a06417d13.jpg b/Sklearn/sklearn-doc-zh/master/img/9cb7de99579cbd4664159c8a06417d13.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/9cb7de99579cbd4664159c8a06417d13.jpg rename to Sklearn/sklearn-doc-zh/master/img/9cb7de99579cbd4664159c8a06417d13.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/9d2e3befcfa08a4b6a7cfed8dadbd5c0.jpg b/Sklearn/sklearn-doc-zh/master/img/9d2e3befcfa08a4b6a7cfed8dadbd5c0.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/9d2e3befcfa08a4b6a7cfed8dadbd5c0.jpg rename to Sklearn/sklearn-doc-zh/master/img/9d2e3befcfa08a4b6a7cfed8dadbd5c0.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/9e49f9c5d629711b032b027324d531b9.jpg b/Sklearn/sklearn-doc-zh/master/img/9e49f9c5d629711b032b027324d531b9.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/9e49f9c5d629711b032b027324d531b9.jpg rename to Sklearn/sklearn-doc-zh/master/img/9e49f9c5d629711b032b027324d531b9.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/9ef5bf146675caa32b298d7e8318fc43.jpg b/Sklearn/sklearn-doc-zh/master/img/9ef5bf146675caa32b298d7e8318fc43.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/9ef5bf146675caa32b298d7e8318fc43.jpg rename to Sklearn/sklearn-doc-zh/master/img/9ef5bf146675caa32b298d7e8318fc43.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/9f2872d8b84b398d8dd6408fd880d3b4.jpg b/Sklearn/sklearn-doc-zh/master/img/9f2872d8b84b398d8dd6408fd880d3b4.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/9f2872d8b84b398d8dd6408fd880d3b4.jpg rename to Sklearn/sklearn-doc-zh/master/img/9f2872d8b84b398d8dd6408fd880d3b4.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/9f4178a540cbe2b9f093702d71bafbe5.jpg b/Sklearn/sklearn-doc-zh/master/img/9f4178a540cbe2b9f093702d71bafbe5.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/9f4178a540cbe2b9f093702d71bafbe5.jpg rename to Sklearn/sklearn-doc-zh/master/img/9f4178a540cbe2b9f093702d71bafbe5.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/9fd83615429a9be9e5698d35bec8642a.jpg b/Sklearn/sklearn-doc-zh/master/img/9fd83615429a9be9e5698d35bec8642a.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/9fd83615429a9be9e5698d35bec8642a.jpg rename to Sklearn/sklearn-doc-zh/master/img/9fd83615429a9be9e5698d35bec8642a.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/a036c2c31320cfaea7959236c1b81d4c.jpg b/Sklearn/sklearn-doc-zh/master/img/a036c2c31320cfaea7959236c1b81d4c.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/a036c2c31320cfaea7959236c1b81d4c.jpg rename to Sklearn/sklearn-doc-zh/master/img/a036c2c31320cfaea7959236c1b81d4c.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/a060693e746caf8e0ff030ed5411520f.jpg b/Sklearn/sklearn-doc-zh/master/img/a060693e746caf8e0ff030ed5411520f.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/a060693e746caf8e0ff030ed5411520f.jpg rename to Sklearn/sklearn-doc-zh/master/img/a060693e746caf8e0ff030ed5411520f.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/a0bb00db4979d538e9ca2f0a8b423286.jpg b/Sklearn/sklearn-doc-zh/master/img/a0bb00db4979d538e9ca2f0a8b423286.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/a0bb00db4979d538e9ca2f0a8b423286.jpg rename to Sklearn/sklearn-doc-zh/master/img/a0bb00db4979d538e9ca2f0a8b423286.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/a0c2e3fe9c6a7a8416435260aa55dc4a.jpg b/Sklearn/sklearn-doc-zh/master/img/a0c2e3fe9c6a7a8416435260aa55dc4a.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/a0c2e3fe9c6a7a8416435260aa55dc4a.jpg rename to Sklearn/sklearn-doc-zh/master/img/a0c2e3fe9c6a7a8416435260aa55dc4a.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/a0fc07cc08abaf336142bf23fb4f5cc2.jpg b/Sklearn/sklearn-doc-zh/master/img/a0fc07cc08abaf336142bf23fb4f5cc2.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/a0fc07cc08abaf336142bf23fb4f5cc2.jpg rename to Sklearn/sklearn-doc-zh/master/img/a0fc07cc08abaf336142bf23fb4f5cc2.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/a1670c1fcb5b7ad10830f43812ed50da.jpg b/Sklearn/sklearn-doc-zh/master/img/a1670c1fcb5b7ad10830f43812ed50da.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/a1670c1fcb5b7ad10830f43812ed50da.jpg rename to Sklearn/sklearn-doc-zh/master/img/a1670c1fcb5b7ad10830f43812ed50da.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/a19f2294e2649252ad2b5766d295e75e.jpg b/Sklearn/sklearn-doc-zh/master/img/a19f2294e2649252ad2b5766d295e75e.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/a19f2294e2649252ad2b5766d295e75e.jpg rename to Sklearn/sklearn-doc-zh/master/img/a19f2294e2649252ad2b5766d295e75e.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/a1bd06d03e764db224f0e10b4f024bdd.jpg b/Sklearn/sklearn-doc-zh/master/img/a1bd06d03e764db224f0e10b4f024bdd.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/a1bd06d03e764db224f0e10b4f024bdd.jpg rename to Sklearn/sklearn-doc-zh/master/img/a1bd06d03e764db224f0e10b4f024bdd.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/a201561ab545f4fd9cba5a2e0eae9a94.jpg b/Sklearn/sklearn-doc-zh/master/img/a201561ab545f4fd9cba5a2e0eae9a94.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/a201561ab545f4fd9cba5a2e0eae9a94.jpg rename to Sklearn/sklearn-doc-zh/master/img/a201561ab545f4fd9cba5a2e0eae9a94.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/a243fb2cabe46c32a3a66214f514d623.jpg b/Sklearn/sklearn-doc-zh/master/img/a243fb2cabe46c32a3a66214f514d623.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/a243fb2cabe46c32a3a66214f514d623.jpg rename to Sklearn/sklearn-doc-zh/master/img/a243fb2cabe46c32a3a66214f514d623.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/a24e299927ed136dd98d6c87904c973d.jpg b/Sklearn/sklearn-doc-zh/master/img/a24e299927ed136dd98d6c87904c973d.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/a24e299927ed136dd98d6c87904c973d.jpg rename to Sklearn/sklearn-doc-zh/master/img/a24e299927ed136dd98d6c87904c973d.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/a25320a2e009abd4269f291f85062a5d.jpg b/Sklearn/sklearn-doc-zh/master/img/a25320a2e009abd4269f291f85062a5d.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/a25320a2e009abd4269f291f85062a5d.jpg rename to Sklearn/sklearn-doc-zh/master/img/a25320a2e009abd4269f291f85062a5d.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/a25ab5d7faac5be22a6d242286376161.jpg b/Sklearn/sklearn-doc-zh/master/img/a25ab5d7faac5be22a6d242286376161.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/a25ab5d7faac5be22a6d242286376161.jpg rename to Sklearn/sklearn-doc-zh/master/img/a25ab5d7faac5be22a6d242286376161.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/a35122280170c396ab3c9d8fa3b62446.jpg b/Sklearn/sklearn-doc-zh/master/img/a35122280170c396ab3c9d8fa3b62446.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/a35122280170c396ab3c9d8fa3b62446.jpg rename to Sklearn/sklearn-doc-zh/master/img/a35122280170c396ab3c9d8fa3b62446.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/a44a7c045f2217894a894c482861387a.jpg b/Sklearn/sklearn-doc-zh/master/img/a44a7c045f2217894a894c482861387a.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/a44a7c045f2217894a894c482861387a.jpg rename to Sklearn/sklearn-doc-zh/master/img/a44a7c045f2217894a894c482861387a.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/a4775baaa990a4fbffcfc2688e3b5578.jpg b/Sklearn/sklearn-doc-zh/master/img/a4775baaa990a4fbffcfc2688e3b5578.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/a4775baaa990a4fbffcfc2688e3b5578.jpg rename to Sklearn/sklearn-doc-zh/master/img/a4775baaa990a4fbffcfc2688e3b5578.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/a4a20739f22e7059a927ba615ec373da.jpg b/Sklearn/sklearn-doc-zh/master/img/a4a20739f22e7059a927ba615ec373da.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/a4a20739f22e7059a927ba615ec373da.jpg rename to Sklearn/sklearn-doc-zh/master/img/a4a20739f22e7059a927ba615ec373da.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/a4dd5119f3eeb13b99180aab64917975.jpg b/Sklearn/sklearn-doc-zh/master/img/a4dd5119f3eeb13b99180aab64917975.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/a4dd5119f3eeb13b99180aab64917975.jpg rename to Sklearn/sklearn-doc-zh/master/img/a4dd5119f3eeb13b99180aab64917975.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/a51f44dfa9b1942326c669c5ffe3f9f6.jpg b/Sklearn/sklearn-doc-zh/master/img/a51f44dfa9b1942326c669c5ffe3f9f6.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/a51f44dfa9b1942326c669c5ffe3f9f6.jpg rename to Sklearn/sklearn-doc-zh/master/img/a51f44dfa9b1942326c669c5ffe3f9f6.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/a539f6901c50549f8d30f6d7f6f3e177.jpg b/Sklearn/sklearn-doc-zh/master/img/a539f6901c50549f8d30f6d7f6f3e177.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/a539f6901c50549f8d30f6d7f6f3e177.jpg rename to Sklearn/sklearn-doc-zh/master/img/a539f6901c50549f8d30f6d7f6f3e177.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/a561362ff63affeb799b9d33423235a3.jpg b/Sklearn/sklearn-doc-zh/master/img/a561362ff63affeb799b9d33423235a3.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/a561362ff63affeb799b9d33423235a3.jpg rename to Sklearn/sklearn-doc-zh/master/img/a561362ff63affeb799b9d33423235a3.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/a6334506478d6feb4025038294ccfa00.jpg b/Sklearn/sklearn-doc-zh/master/img/a6334506478d6feb4025038294ccfa00.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/a6334506478d6feb4025038294ccfa00.jpg rename to Sklearn/sklearn-doc-zh/master/img/a6334506478d6feb4025038294ccfa00.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/a6509a4f7b1838eb6d76d30036a00ffd.jpg b/Sklearn/sklearn-doc-zh/master/img/a6509a4f7b1838eb6d76d30036a00ffd.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/a6509a4f7b1838eb6d76d30036a00ffd.jpg rename to Sklearn/sklearn-doc-zh/master/img/a6509a4f7b1838eb6d76d30036a00ffd.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/a6dba4aa006d4689de18a4de5acaa949.jpg b/Sklearn/sklearn-doc-zh/master/img/a6dba4aa006d4689de18a4de5acaa949.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/a6dba4aa006d4689de18a4de5acaa949.jpg rename to Sklearn/sklearn-doc-zh/master/img/a6dba4aa006d4689de18a4de5acaa949.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/a70da26e29baa95b2f65f3cbc2e5fbfa.jpg b/Sklearn/sklearn-doc-zh/master/img/a70da26e29baa95b2f65f3cbc2e5fbfa.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/a70da26e29baa95b2f65f3cbc2e5fbfa.jpg rename to Sklearn/sklearn-doc-zh/master/img/a70da26e29baa95b2f65f3cbc2e5fbfa.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/a71a1d9e35b09d284da476b2175edf6f.jpg b/Sklearn/sklearn-doc-zh/master/img/a71a1d9e35b09d284da476b2175edf6f.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/a71a1d9e35b09d284da476b2175edf6f.jpg rename to Sklearn/sklearn-doc-zh/master/img/a71a1d9e35b09d284da476b2175edf6f.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/a8e210a94f6eac6c32bc219dbc049288.jpg b/Sklearn/sklearn-doc-zh/master/img/a8e210a94f6eac6c32bc219dbc049288.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/a8e210a94f6eac6c32bc219dbc049288.jpg rename to Sklearn/sklearn-doc-zh/master/img/a8e210a94f6eac6c32bc219dbc049288.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/a8f0b97723e99a7ddae947c83f43ff45.jpg b/Sklearn/sklearn-doc-zh/master/img/a8f0b97723e99a7ddae947c83f43ff45.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/a8f0b97723e99a7ddae947c83f43ff45.jpg rename to Sklearn/sklearn-doc-zh/master/img/a8f0b97723e99a7ddae947c83f43ff45.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/a921f24886997e2a5b94e2abb46d8402.jpg b/Sklearn/sklearn-doc-zh/master/img/a921f24886997e2a5b94e2abb46d8402.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/a921f24886997e2a5b94e2abb46d8402.jpg rename to Sklearn/sklearn-doc-zh/master/img/a921f24886997e2a5b94e2abb46d8402.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/a960dd9452e5e5da7b211f6fdc120a0d.jpg b/Sklearn/sklearn-doc-zh/master/img/a960dd9452e5e5da7b211f6fdc120a0d.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/a960dd9452e5e5da7b211f6fdc120a0d.jpg rename to Sklearn/sklearn-doc-zh/master/img/a960dd9452e5e5da7b211f6fdc120a0d.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/a98f0fb22381bfc1d14fc1e3f7e737e5.jpg b/Sklearn/sklearn-doc-zh/master/img/a98f0fb22381bfc1d14fc1e3f7e737e5.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/a98f0fb22381bfc1d14fc1e3f7e737e5.jpg rename to Sklearn/sklearn-doc-zh/master/img/a98f0fb22381bfc1d14fc1e3f7e737e5.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/a9cd44019704ddd9877089f25d6a229e.jpg b/Sklearn/sklearn-doc-zh/master/img/a9cd44019704ddd9877089f25d6a229e.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/a9cd44019704ddd9877089f25d6a229e.jpg rename to Sklearn/sklearn-doc-zh/master/img/a9cd44019704ddd9877089f25d6a229e.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/a9e58e92cdae50e285a704a6d1ccbafc.jpg b/Sklearn/sklearn-doc-zh/master/img/a9e58e92cdae50e285a704a6d1ccbafc.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/a9e58e92cdae50e285a704a6d1ccbafc.jpg rename to Sklearn/sklearn-doc-zh/master/img/a9e58e92cdae50e285a704a6d1ccbafc.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/aa0c61cd560f0fdab4fe10c7b12e5082.jpg b/Sklearn/sklearn-doc-zh/master/img/aa0c61cd560f0fdab4fe10c7b12e5082.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/aa0c61cd560f0fdab4fe10c7b12e5082.jpg rename to Sklearn/sklearn-doc-zh/master/img/aa0c61cd560f0fdab4fe10c7b12e5082.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/aa5e9ace265afd9e1f881564a1923a17.jpg b/Sklearn/sklearn-doc-zh/master/img/aa5e9ace265afd9e1f881564a1923a17.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/aa5e9ace265afd9e1f881564a1923a17.jpg rename to Sklearn/sklearn-doc-zh/master/img/aa5e9ace265afd9e1f881564a1923a17.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/aaa84c285eb96ed446fd34be4b51bbec.jpg b/Sklearn/sklearn-doc-zh/master/img/aaa84c285eb96ed446fd34be4b51bbec.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/aaa84c285eb96ed446fd34be4b51bbec.jpg rename to Sklearn/sklearn-doc-zh/master/img/aaa84c285eb96ed446fd34be4b51bbec.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/ab2096ee4087e644cca732d92d241edf.jpg b/Sklearn/sklearn-doc-zh/master/img/ab2096ee4087e644cca732d92d241edf.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/ab2096ee4087e644cca732d92d241edf.jpg rename to Sklearn/sklearn-doc-zh/master/img/ab2096ee4087e644cca732d92d241edf.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/ab81f225a7e452d651b4888d437d07d2.jpg b/Sklearn/sklearn-doc-zh/master/img/ab81f225a7e452d651b4888d437d07d2.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/ab81f225a7e452d651b4888d437d07d2.jpg rename to Sklearn/sklearn-doc-zh/master/img/ab81f225a7e452d651b4888d437d07d2.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/aba080a369ca542a90a264a04dd518c5.jpg b/Sklearn/sklearn-doc-zh/master/img/aba080a369ca542a90a264a04dd518c5.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/aba080a369ca542a90a264a04dd518c5.jpg rename to Sklearn/sklearn-doc-zh/master/img/aba080a369ca542a90a264a04dd518c5.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/aba64ff85b1f99c5d1c4f8e1ace15f89.jpg b/Sklearn/sklearn-doc-zh/master/img/aba64ff85b1f99c5d1c4f8e1ace15f89.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/aba64ff85b1f99c5d1c4f8e1ace15f89.jpg rename to Sklearn/sklearn-doc-zh/master/img/aba64ff85b1f99c5d1c4f8e1ace15f89.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/abd232bd41a9d1b51021b788e2a82dd9.jpg b/Sklearn/sklearn-doc-zh/master/img/abd232bd41a9d1b51021b788e2a82dd9.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/abd232bd41a9d1b51021b788e2a82dd9.jpg rename to Sklearn/sklearn-doc-zh/master/img/abd232bd41a9d1b51021b788e2a82dd9.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/abed66e83395b34fe3c020cfcab3dce9.jpg b/Sklearn/sklearn-doc-zh/master/img/abed66e83395b34fe3c020cfcab3dce9.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/abed66e83395b34fe3c020cfcab3dce9.jpg rename to Sklearn/sklearn-doc-zh/master/img/abed66e83395b34fe3c020cfcab3dce9.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/abee3460000f8532d0df4e1b1d1928e8.jpg b/Sklearn/sklearn-doc-zh/master/img/abee3460000f8532d0df4e1b1d1928e8.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/abee3460000f8532d0df4e1b1d1928e8.jpg rename to Sklearn/sklearn-doc-zh/master/img/abee3460000f8532d0df4e1b1d1928e8.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/ac11972df0ad68aba63757a4ba1ee02b.jpg b/Sklearn/sklearn-doc-zh/master/img/ac11972df0ad68aba63757a4ba1ee02b.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/ac11972df0ad68aba63757a4ba1ee02b.jpg rename to Sklearn/sklearn-doc-zh/master/img/ac11972df0ad68aba63757a4ba1ee02b.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/ac23b915eb44e4fa668b8d56785e2c20.jpg b/Sklearn/sklearn-doc-zh/master/img/ac23b915eb44e4fa668b8d56785e2c20.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/ac23b915eb44e4fa668b8d56785e2c20.jpg rename to Sklearn/sklearn-doc-zh/master/img/ac23b915eb44e4fa668b8d56785e2c20.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/ac9afe17947eb296e1701877b220779d.jpg b/Sklearn/sklearn-doc-zh/master/img/ac9afe17947eb296e1701877b220779d.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/ac9afe17947eb296e1701877b220779d.jpg rename to Sklearn/sklearn-doc-zh/master/img/ac9afe17947eb296e1701877b220779d.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/accc37ed7ec2ed38ec70c71f5d6aeebe.jpg b/Sklearn/sklearn-doc-zh/master/img/accc37ed7ec2ed38ec70c71f5d6aeebe.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/accc37ed7ec2ed38ec70c71f5d6aeebe.jpg rename to Sklearn/sklearn-doc-zh/master/img/accc37ed7ec2ed38ec70c71f5d6aeebe.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/ad8371d06b000849fa4e2fbd6b386c7d.jpg b/Sklearn/sklearn-doc-zh/master/img/ad8371d06b000849fa4e2fbd6b386c7d.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/ad8371d06b000849fa4e2fbd6b386c7d.jpg rename to Sklearn/sklearn-doc-zh/master/img/ad8371d06b000849fa4e2fbd6b386c7d.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/ad854ab6b0056f9b521d823a98548d3f.jpg b/Sklearn/sklearn-doc-zh/master/img/ad854ab6b0056f9b521d823a98548d3f.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/ad854ab6b0056f9b521d823a98548d3f.jpg rename to Sklearn/sklearn-doc-zh/master/img/ad854ab6b0056f9b521d823a98548d3f.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/ad946e6478bb10e60ac9663066f26ee8.jpg b/Sklearn/sklearn-doc-zh/master/img/ad946e6478bb10e60ac9663066f26ee8.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/ad946e6478bb10e60ac9663066f26ee8.jpg rename to Sklearn/sklearn-doc-zh/master/img/ad946e6478bb10e60ac9663066f26ee8.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/ad9ce0e67dc83e5380738d3193819195.jpg b/Sklearn/sklearn-doc-zh/master/img/ad9ce0e67dc83e5380738d3193819195.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/ad9ce0e67dc83e5380738d3193819195.jpg rename to Sklearn/sklearn-doc-zh/master/img/ad9ce0e67dc83e5380738d3193819195.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/adc60d285d73d89dac7cb76f51617e64.jpg b/Sklearn/sklearn-doc-zh/master/img/adc60d285d73d89dac7cb76f51617e64.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/adc60d285d73d89dac7cb76f51617e64.jpg rename to Sklearn/sklearn-doc-zh/master/img/adc60d285d73d89dac7cb76f51617e64.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/adf83056bc2bd05628e24c40cb728b3d.jpg b/Sklearn/sklearn-doc-zh/master/img/adf83056bc2bd05628e24c40cb728b3d.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/adf83056bc2bd05628e24c40cb728b3d.jpg rename to Sklearn/sklearn-doc-zh/master/img/adf83056bc2bd05628e24c40cb728b3d.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/ae1e2652c240448ae994f4b236379d6d.jpg b/Sklearn/sklearn-doc-zh/master/img/ae1e2652c240448ae994f4b236379d6d.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/ae1e2652c240448ae994f4b236379d6d.jpg rename to Sklearn/sklearn-doc-zh/master/img/ae1e2652c240448ae994f4b236379d6d.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/ae484baf10384efcf4d993631f4641e7.jpg b/Sklearn/sklearn-doc-zh/master/img/ae484baf10384efcf4d993631f4641e7.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/ae484baf10384efcf4d993631f4641e7.jpg rename to Sklearn/sklearn-doc-zh/master/img/ae484baf10384efcf4d993631f4641e7.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/ae82b9adb507cb166d4721c004ae5f40.jpg b/Sklearn/sklearn-doc-zh/master/img/ae82b9adb507cb166d4721c004ae5f40.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/ae82b9adb507cb166d4721c004ae5f40.jpg rename to Sklearn/sklearn-doc-zh/master/img/ae82b9adb507cb166d4721c004ae5f40.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/aee8fc1c253da16851991ef3ceff663b.jpg b/Sklearn/sklearn-doc-zh/master/img/aee8fc1c253da16851991ef3ceff663b.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/aee8fc1c253da16851991ef3ceff663b.jpg rename to Sklearn/sklearn-doc-zh/master/img/aee8fc1c253da16851991ef3ceff663b.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/af78ae4aadd0f0961cf4d9564897b1ff.jpg b/Sklearn/sklearn-doc-zh/master/img/af78ae4aadd0f0961cf4d9564897b1ff.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/af78ae4aadd0f0961cf4d9564897b1ff.jpg rename to Sklearn/sklearn-doc-zh/master/img/af78ae4aadd0f0961cf4d9564897b1ff.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/af7b81123e6cdf0b42acec802041beef.jpg b/Sklearn/sklearn-doc-zh/master/img/af7b81123e6cdf0b42acec802041beef.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/af7b81123e6cdf0b42acec802041beef.jpg rename to Sklearn/sklearn-doc-zh/master/img/af7b81123e6cdf0b42acec802041beef.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/af875bce0483bd18603c4d247e6a3745.jpg b/Sklearn/sklearn-doc-zh/master/img/af875bce0483bd18603c4d247e6a3745.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/af875bce0483bd18603c4d247e6a3745.jpg rename to Sklearn/sklearn-doc-zh/master/img/af875bce0483bd18603c4d247e6a3745.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/afcad7956ba0a3a4a6771ee9810280c2.jpg b/Sklearn/sklearn-doc-zh/master/img/afcad7956ba0a3a4a6771ee9810280c2.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/afcad7956ba0a3a4a6771ee9810280c2.jpg rename to Sklearn/sklearn-doc-zh/master/img/afcad7956ba0a3a4a6771ee9810280c2.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/afee107e82d448558078835ad82086e5.jpg b/Sklearn/sklearn-doc-zh/master/img/afee107e82d448558078835ad82086e5.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/afee107e82d448558078835ad82086e5.jpg rename to Sklearn/sklearn-doc-zh/master/img/afee107e82d448558078835ad82086e5.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/algo.jpg b/Sklearn/sklearn-doc-zh/master/img/algo.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/algo.jpg rename to Sklearn/sklearn-doc-zh/master/img/algo.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/b003858334d1ad594207911e84219151.jpg b/Sklearn/sklearn-doc-zh/master/img/b003858334d1ad594207911e84219151.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/b003858334d1ad594207911e84219151.jpg rename to Sklearn/sklearn-doc-zh/master/img/b003858334d1ad594207911e84219151.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/b019b19dda07f07208f1bd2576ebad30.jpg b/Sklearn/sklearn-doc-zh/master/img/b019b19dda07f07208f1bd2576ebad30.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/b019b19dda07f07208f1bd2576ebad30.jpg rename to Sklearn/sklearn-doc-zh/master/img/b019b19dda07f07208f1bd2576ebad30.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/b0471e4ce10b5565385c611ba381fa0d.jpg b/Sklearn/sklearn-doc-zh/master/img/b0471e4ce10b5565385c611ba381fa0d.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/b0471e4ce10b5565385c611ba381fa0d.jpg rename to Sklearn/sklearn-doc-zh/master/img/b0471e4ce10b5565385c611ba381fa0d.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/b10afb1bc4c68539a74de379254c7655.jpg b/Sklearn/sklearn-doc-zh/master/img/b10afb1bc4c68539a74de379254c7655.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/b10afb1bc4c68539a74de379254c7655.jpg rename to Sklearn/sklearn-doc-zh/master/img/b10afb1bc4c68539a74de379254c7655.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/b146555411b22bcf0ad73d4720455038.jpg b/Sklearn/sklearn-doc-zh/master/img/b146555411b22bcf0ad73d4720455038.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/b146555411b22bcf0ad73d4720455038.jpg rename to Sklearn/sklearn-doc-zh/master/img/b146555411b22bcf0ad73d4720455038.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/b25f834ac79280901c702fb1449740a3.jpg b/Sklearn/sklearn-doc-zh/master/img/b25f834ac79280901c702fb1449740a3.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/b25f834ac79280901c702fb1449740a3.jpg rename to Sklearn/sklearn-doc-zh/master/img/b25f834ac79280901c702fb1449740a3.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/b2996ad4866e8a26c7ba42c0229385af.jpg b/Sklearn/sklearn-doc-zh/master/img/b2996ad4866e8a26c7ba42c0229385af.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/b2996ad4866e8a26c7ba42c0229385af.jpg rename to Sklearn/sklearn-doc-zh/master/img/b2996ad4866e8a26c7ba42c0229385af.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/b29b59eca5c581c3f54d92c1671f2b2e.jpg b/Sklearn/sklearn-doc-zh/master/img/b29b59eca5c581c3f54d92c1671f2b2e.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/b29b59eca5c581c3f54d92c1671f2b2e.jpg rename to Sklearn/sklearn-doc-zh/master/img/b29b59eca5c581c3f54d92c1671f2b2e.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/b2b4edffe444aa936ad59a769317f692.jpg b/Sklearn/sklearn-doc-zh/master/img/b2b4edffe444aa936ad59a769317f692.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/b2b4edffe444aa936ad59a769317f692.jpg rename to Sklearn/sklearn-doc-zh/master/img/b2b4edffe444aa936ad59a769317f692.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/b2ea359213f8f5b01eead0821e29e856.jpg b/Sklearn/sklearn-doc-zh/master/img/b2ea359213f8f5b01eead0821e29e856.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/b2ea359213f8f5b01eead0821e29e856.jpg rename to Sklearn/sklearn-doc-zh/master/img/b2ea359213f8f5b01eead0821e29e856.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/b3206aa7b52a9c0918727730873d1363.jpg b/Sklearn/sklearn-doc-zh/master/img/b3206aa7b52a9c0918727730873d1363.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/b3206aa7b52a9c0918727730873d1363.jpg rename to Sklearn/sklearn-doc-zh/master/img/b3206aa7b52a9c0918727730873d1363.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/b3348b4a0dc8e97bcefc5c7489b006db.jpg b/Sklearn/sklearn-doc-zh/master/img/b3348b4a0dc8e97bcefc5c7489b006db.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/b3348b4a0dc8e97bcefc5c7489b006db.jpg rename to Sklearn/sklearn-doc-zh/master/img/b3348b4a0dc8e97bcefc5c7489b006db.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/b335c88114c4fec7e72304006810c82c.jpg b/Sklearn/sklearn-doc-zh/master/img/b335c88114c4fec7e72304006810c82c.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/b335c88114c4fec7e72304006810c82c.jpg rename to Sklearn/sklearn-doc-zh/master/img/b335c88114c4fec7e72304006810c82c.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/b36270d22513e9645235b5ad4c3cd7dd.jpg b/Sklearn/sklearn-doc-zh/master/img/b36270d22513e9645235b5ad4c3cd7dd.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/b36270d22513e9645235b5ad4c3cd7dd.jpg rename to Sklearn/sklearn-doc-zh/master/img/b36270d22513e9645235b5ad4c3cd7dd.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/b382a1d99ddfadf17b35d32b0b156b5b.jpg b/Sklearn/sklearn-doc-zh/master/img/b382a1d99ddfadf17b35d32b0b156b5b.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/b382a1d99ddfadf17b35d32b0b156b5b.jpg rename to Sklearn/sklearn-doc-zh/master/img/b382a1d99ddfadf17b35d32b0b156b5b.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/b38a0de3f98aa9b8837354765bc9e3f6.jpg b/Sklearn/sklearn-doc-zh/master/img/b38a0de3f98aa9b8837354765bc9e3f6.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/b38a0de3f98aa9b8837354765bc9e3f6.jpg rename to Sklearn/sklearn-doc-zh/master/img/b38a0de3f98aa9b8837354765bc9e3f6.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/b38aca53acb7894dca026d3325f61a00.jpg b/Sklearn/sklearn-doc-zh/master/img/b38aca53acb7894dca026d3325f61a00.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/b38aca53acb7894dca026d3325f61a00.jpg rename to Sklearn/sklearn-doc-zh/master/img/b38aca53acb7894dca026d3325f61a00.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/b3ca25c4cc9cb42878f6fc39525b7446.jpg b/Sklearn/sklearn-doc-zh/master/img/b3ca25c4cc9cb42878f6fc39525b7446.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/b3ca25c4cc9cb42878f6fc39525b7446.jpg rename to Sklearn/sklearn-doc-zh/master/img/b3ca25c4cc9cb42878f6fc39525b7446.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/b3ea6ae2442e72f261f037571e580979.jpg b/Sklearn/sklearn-doc-zh/master/img/b3ea6ae2442e72f261f037571e580979.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/b3ea6ae2442e72f261f037571e580979.jpg rename to Sklearn/sklearn-doc-zh/master/img/b3ea6ae2442e72f261f037571e580979.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/b3edbb24837112f795a22e3574457416.jpg b/Sklearn/sklearn-doc-zh/master/img/b3edbb24837112f795a22e3574457416.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/b3edbb24837112f795a22e3574457416.jpg rename to Sklearn/sklearn-doc-zh/master/img/b3edbb24837112f795a22e3574457416.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/b47a023e9edbe1976993ce76c7dbc286.jpg b/Sklearn/sklearn-doc-zh/master/img/b47a023e9edbe1976993ce76c7dbc286.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/b47a023e9edbe1976993ce76c7dbc286.jpg rename to Sklearn/sklearn-doc-zh/master/img/b47a023e9edbe1976993ce76c7dbc286.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/b4e752f6314fe52f8c066964d26145a8.jpg b/Sklearn/sklearn-doc-zh/master/img/b4e752f6314fe52f8c066964d26145a8.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/b4e752f6314fe52f8c066964d26145a8.jpg rename to Sklearn/sklearn-doc-zh/master/img/b4e752f6314fe52f8c066964d26145a8.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/b6305894a6f400569f3ff2b899370b54.jpg b/Sklearn/sklearn-doc-zh/master/img/b6305894a6f400569f3ff2b899370b54.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/b6305894a6f400569f3ff2b899370b54.jpg rename to Sklearn/sklearn-doc-zh/master/img/b6305894a6f400569f3ff2b899370b54.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/b67ce2997477f658a6a39026c01e07c4.jpg b/Sklearn/sklearn-doc-zh/master/img/b67ce2997477f658a6a39026c01e07c4.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/b67ce2997477f658a6a39026c01e07c4.jpg rename to Sklearn/sklearn-doc-zh/master/img/b67ce2997477f658a6a39026c01e07c4.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/b68e95efa751d5e14b6517cff553419b.jpg b/Sklearn/sklearn-doc-zh/master/img/b68e95efa751d5e14b6517cff553419b.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/b68e95efa751d5e14b6517cff553419b.jpg rename to Sklearn/sklearn-doc-zh/master/img/b68e95efa751d5e14b6517cff553419b.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/b6a1f98637a242005be08bb10541a524.jpg b/Sklearn/sklearn-doc-zh/master/img/b6a1f98637a242005be08bb10541a524.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/b6a1f98637a242005be08bb10541a524.jpg rename to Sklearn/sklearn-doc-zh/master/img/b6a1f98637a242005be08bb10541a524.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/b6cea83c1722562f844aebd98fb3f59d.jpg b/Sklearn/sklearn-doc-zh/master/img/b6cea83c1722562f844aebd98fb3f59d.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/b6cea83c1722562f844aebd98fb3f59d.jpg rename to Sklearn/sklearn-doc-zh/master/img/b6cea83c1722562f844aebd98fb3f59d.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/b714492d7f23932738745c4ed05fe7ae.jpg b/Sklearn/sklearn-doc-zh/master/img/b714492d7f23932738745c4ed05fe7ae.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/b714492d7f23932738745c4ed05fe7ae.jpg rename to Sklearn/sklearn-doc-zh/master/img/b714492d7f23932738745c4ed05fe7ae.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/b74decc4f9ee591a92a5281d0187f05a.jpg b/Sklearn/sklearn-doc-zh/master/img/b74decc4f9ee591a92a5281d0187f05a.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/b74decc4f9ee591a92a5281d0187f05a.jpg rename to Sklearn/sklearn-doc-zh/master/img/b74decc4f9ee591a92a5281d0187f05a.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/b7a68272ceb9cb09f48103e44f469470.jpg b/Sklearn/sklearn-doc-zh/master/img/b7a68272ceb9cb09f48103e44f469470.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/b7a68272ceb9cb09f48103e44f469470.jpg rename to Sklearn/sklearn-doc-zh/master/img/b7a68272ceb9cb09f48103e44f469470.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/b8c91e64943f8e5e311a51bc901a2c7e.jpg b/Sklearn/sklearn-doc-zh/master/img/b8c91e64943f8e5e311a51bc901a2c7e.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/b8c91e64943f8e5e311a51bc901a2c7e.jpg rename to Sklearn/sklearn-doc-zh/master/img/b8c91e64943f8e5e311a51bc901a2c7e.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/b8d08f24f6c909a25190cb38127ec809.jpg b/Sklearn/sklearn-doc-zh/master/img/b8d08f24f6c909a25190cb38127ec809.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/b8d08f24f6c909a25190cb38127ec809.jpg rename to Sklearn/sklearn-doc-zh/master/img/b8d08f24f6c909a25190cb38127ec809.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/b8e446d22ed902423187ead2d67af58c.jpg b/Sklearn/sklearn-doc-zh/master/img/b8e446d22ed902423187ead2d67af58c.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/b8e446d22ed902423187ead2d67af58c.jpg rename to Sklearn/sklearn-doc-zh/master/img/b8e446d22ed902423187ead2d67af58c.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/b91e4507d9fd7068b02f689d697f8714.jpg b/Sklearn/sklearn-doc-zh/master/img/b91e4507d9fd7068b02f689d697f8714.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/b91e4507d9fd7068b02f689d697f8714.jpg rename to Sklearn/sklearn-doc-zh/master/img/b91e4507d9fd7068b02f689d697f8714.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/b94b3a3837e7741f704e3b9b23ba0880.jpg b/Sklearn/sklearn-doc-zh/master/img/b94b3a3837e7741f704e3b9b23ba0880.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/b94b3a3837e7741f704e3b9b23ba0880.jpg rename to Sklearn/sklearn-doc-zh/master/img/b94b3a3837e7741f704e3b9b23ba0880.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/ba3c1f82d7a66df41015761326619e26.jpg b/Sklearn/sklearn-doc-zh/master/img/ba3c1f82d7a66df41015761326619e26.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/ba3c1f82d7a66df41015761326619e26.jpg rename to Sklearn/sklearn-doc-zh/master/img/ba3c1f82d7a66df41015761326619e26.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/ba5932f77767fa05771311d1f926e3ee.jpg b/Sklearn/sklearn-doc-zh/master/img/ba5932f77767fa05771311d1f926e3ee.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/ba5932f77767fa05771311d1f926e3ee.jpg rename to Sklearn/sklearn-doc-zh/master/img/ba5932f77767fa05771311d1f926e3ee.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/ba90d4193a98b9023d3d8526a20fe1ac.jpg b/Sklearn/sklearn-doc-zh/master/img/ba90d4193a98b9023d3d8526a20fe1ac.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/ba90d4193a98b9023d3d8526a20fe1ac.jpg rename to Sklearn/sklearn-doc-zh/master/img/ba90d4193a98b9023d3d8526a20fe1ac.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/bafb2b9486fa2f91dcc020843770eef6.jpg b/Sklearn/sklearn-doc-zh/master/img/bafb2b9486fa2f91dcc020843770eef6.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/bafb2b9486fa2f91dcc020843770eef6.jpg rename to Sklearn/sklearn-doc-zh/master/img/bafb2b9486fa2f91dcc020843770eef6.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/bayes01.png b/Sklearn/sklearn-doc-zh/master/img/bayes01.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/bayes01.png rename to Sklearn/sklearn-doc-zh/master/img/bayes01.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/bayes02.png b/Sklearn/sklearn-doc-zh/master/img/bayes02.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/bayes02.png rename to Sklearn/sklearn-doc-zh/master/img/bayes02.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/bayse03.png b/Sklearn/sklearn-doc-zh/master/img/bayse03.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/bayse03.png rename to Sklearn/sklearn-doc-zh/master/img/bayse03.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/bayse04.png b/Sklearn/sklearn-doc-zh/master/img/bayse04.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/bayse04.png rename to Sklearn/sklearn-doc-zh/master/img/bayse04.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/bayse05.png b/Sklearn/sklearn-doc-zh/master/img/bayse05.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/bayse05.png rename to Sklearn/sklearn-doc-zh/master/img/bayse05.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/bb0a3257b0276e1ade46d7fa84c49ad0.jpg b/Sklearn/sklearn-doc-zh/master/img/bb0a3257b0276e1ade46d7fa84c49ad0.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/bb0a3257b0276e1ade46d7fa84c49ad0.jpg rename to Sklearn/sklearn-doc-zh/master/img/bb0a3257b0276e1ade46d7fa84c49ad0.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/bbdbd3f496f907578ff01cc798adde74.jpg b/Sklearn/sklearn-doc-zh/master/img/bbdbd3f496f907578ff01cc798adde74.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/bbdbd3f496f907578ff01cc798adde74.jpg rename to Sklearn/sklearn-doc-zh/master/img/bbdbd3f496f907578ff01cc798adde74.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/bc08d23c9d8a06975f20eb514faae9ef.jpg b/Sklearn/sklearn-doc-zh/master/img/bc08d23c9d8a06975f20eb514faae9ef.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/bc08d23c9d8a06975f20eb514faae9ef.jpg rename to Sklearn/sklearn-doc-zh/master/img/bc08d23c9d8a06975f20eb514faae9ef.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/bc0cfc8c8661055fd60ca8e90b21d1dd.jpg b/Sklearn/sklearn-doc-zh/master/img/bc0cfc8c8661055fd60ca8e90b21d1dd.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/bc0cfc8c8661055fd60ca8e90b21d1dd.jpg rename to Sklearn/sklearn-doc-zh/master/img/bc0cfc8c8661055fd60ca8e90b21d1dd.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/bc35316fd422689a9bc65845d63c1433.jpg b/Sklearn/sklearn-doc-zh/master/img/bc35316fd422689a9bc65845d63c1433.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/bc35316fd422689a9bc65845d63c1433.jpg rename to Sklearn/sklearn-doc-zh/master/img/bc35316fd422689a9bc65845d63c1433.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/bc7418a3ab8f749f1abd139faa96bee2.jpg b/Sklearn/sklearn-doc-zh/master/img/bc7418a3ab8f749f1abd139faa96bee2.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/bc7418a3ab8f749f1abd139faa96bee2.jpg rename to Sklearn/sklearn-doc-zh/master/img/bc7418a3ab8f749f1abd139faa96bee2.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/bd3f0762bab34671ec8bfb8ace2cc129.jpg b/Sklearn/sklearn-doc-zh/master/img/bd3f0762bab34671ec8bfb8ace2cc129.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/bd3f0762bab34671ec8bfb8ace2cc129.jpg rename to Sklearn/sklearn-doc-zh/master/img/bd3f0762bab34671ec8bfb8ace2cc129.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/bdc1e4261347e1c74950e91fa4f2230f.jpg b/Sklearn/sklearn-doc-zh/master/img/bdc1e4261347e1c74950e91fa4f2230f.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/bdc1e4261347e1c74950e91fa4f2230f.jpg rename to Sklearn/sklearn-doc-zh/master/img/bdc1e4261347e1c74950e91fa4f2230f.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/be2b3bbef9fe377c6f748dd05355b58b.jpg b/Sklearn/sklearn-doc-zh/master/img/be2b3bbef9fe377c6f748dd05355b58b.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/be2b3bbef9fe377c6f748dd05355b58b.jpg rename to Sklearn/sklearn-doc-zh/master/img/be2b3bbef9fe377c6f748dd05355b58b.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/be4190a760361bd7ae65c77218465778.jpg b/Sklearn/sklearn-doc-zh/master/img/be4190a760361bd7ae65c77218465778.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/be4190a760361bd7ae65c77218465778.jpg rename to Sklearn/sklearn-doc-zh/master/img/be4190a760361bd7ae65c77218465778.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/be71aa00cd62f24b4657f7993d1b3a45.jpg b/Sklearn/sklearn-doc-zh/master/img/be71aa00cd62f24b4657f7993d1b3a45.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/be71aa00cd62f24b4657f7993d1b3a45.jpg rename to Sklearn/sklearn-doc-zh/master/img/be71aa00cd62f24b4657f7993d1b3a45.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/be8c80153a3cafbe4309f1fe3b62d96b.jpg b/Sklearn/sklearn-doc-zh/master/img/be8c80153a3cafbe4309f1fe3b62d96b.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/be8c80153a3cafbe4309f1fe3b62d96b.jpg rename to Sklearn/sklearn-doc-zh/master/img/be8c80153a3cafbe4309f1fe3b62d96b.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/bec3afcb1362068f9caf79c5c58ea816.jpg b/Sklearn/sklearn-doc-zh/master/img/bec3afcb1362068f9caf79c5c58ea816.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/bec3afcb1362068f9caf79c5c58ea816.jpg rename to Sklearn/sklearn-doc-zh/master/img/bec3afcb1362068f9caf79c5c58ea816.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/bee2cb707f91d8e36ae11638b6698fe4.jpg b/Sklearn/sklearn-doc-zh/master/img/bee2cb707f91d8e36ae11638b6698fe4.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/bee2cb707f91d8e36ae11638b6698fe4.jpg rename to Sklearn/sklearn-doc-zh/master/img/bee2cb707f91d8e36ae11638b6698fe4.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/beee3f6e512d1e3caf1d1f6cfff468ae.jpg b/Sklearn/sklearn-doc-zh/master/img/beee3f6e512d1e3caf1d1f6cfff468ae.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/beee3f6e512d1e3caf1d1f6cfff468ae.jpg rename to Sklearn/sklearn-doc-zh/master/img/beee3f6e512d1e3caf1d1f6cfff468ae.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/bf95d88f4f17676409c7bab64ba036dc.jpg b/Sklearn/sklearn-doc-zh/master/img/bf95d88f4f17676409c7bab64ba036dc.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/bf95d88f4f17676409c7bab64ba036dc.jpg rename to Sklearn/sklearn-doc-zh/master/img/bf95d88f4f17676409c7bab64ba036dc.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/bf9baf4863bf6d025348b7d91c888066.jpg b/Sklearn/sklearn-doc-zh/master/img/bf9baf4863bf6d025348b7d91c888066.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/bf9baf4863bf6d025348b7d91c888066.jpg rename to Sklearn/sklearn-doc-zh/master/img/bf9baf4863bf6d025348b7d91c888066.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/bf9fb1354c2e0ea50d37e5cad7866314.jpg b/Sklearn/sklearn-doc-zh/master/img/bf9fb1354c2e0ea50d37e5cad7866314.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/bf9fb1354c2e0ea50d37e5cad7866314.jpg rename to Sklearn/sklearn-doc-zh/master/img/bf9fb1354c2e0ea50d37e5cad7866314.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/c001247a5528df69cdcc243c70f61d01.jpg b/Sklearn/sklearn-doc-zh/master/img/c001247a5528df69cdcc243c70f61d01.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/c001247a5528df69cdcc243c70f61d01.jpg rename to Sklearn/sklearn-doc-zh/master/img/c001247a5528df69cdcc243c70f61d01.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/c009aed89f1926875e5637127af6000c.jpg b/Sklearn/sklearn-doc-zh/master/img/c009aed89f1926875e5637127af6000c.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/c009aed89f1926875e5637127af6000c.jpg rename to Sklearn/sklearn-doc-zh/master/img/c009aed89f1926875e5637127af6000c.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/c019126f38fb92a868a7155bd707a5f8.jpg b/Sklearn/sklearn-doc-zh/master/img/c019126f38fb92a868a7155bd707a5f8.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/c019126f38fb92a868a7155bd707a5f8.jpg rename to Sklearn/sklearn-doc-zh/master/img/c019126f38fb92a868a7155bd707a5f8.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/c03e9014ab434e11e9323f87908ed15e.jpg b/Sklearn/sklearn-doc-zh/master/img/c03e9014ab434e11e9323f87908ed15e.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/c03e9014ab434e11e9323f87908ed15e.jpg rename to Sklearn/sklearn-doc-zh/master/img/c03e9014ab434e11e9323f87908ed15e.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/c236b4ae30d04ba7fae7fa499a2ba9ea.jpg b/Sklearn/sklearn-doc-zh/master/img/c236b4ae30d04ba7fae7fa499a2ba9ea.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/c236b4ae30d04ba7fae7fa499a2ba9ea.jpg rename to Sklearn/sklearn-doc-zh/master/img/c236b4ae30d04ba7fae7fa499a2ba9ea.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/c3567127ff1f678758b338a50e9c4880.jpg b/Sklearn/sklearn-doc-zh/master/img/c3567127ff1f678758b338a50e9c4880.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/c3567127ff1f678758b338a50e9c4880.jpg rename to Sklearn/sklearn-doc-zh/master/img/c3567127ff1f678758b338a50e9c4880.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/c39b576ee6e4fc82b4d9edd06ffc8c9c.jpg b/Sklearn/sklearn-doc-zh/master/img/c39b576ee6e4fc82b4d9edd06ffc8c9c.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/c39b576ee6e4fc82b4d9edd06ffc8c9c.jpg rename to Sklearn/sklearn-doc-zh/master/img/c39b576ee6e4fc82b4d9edd06ffc8c9c.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/c3c22734435fdd94f6819708bc55d8a6.jpg b/Sklearn/sklearn-doc-zh/master/img/c3c22734435fdd94f6819708bc55d8a6.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/c3c22734435fdd94f6819708bc55d8a6.jpg rename to Sklearn/sklearn-doc-zh/master/img/c3c22734435fdd94f6819708bc55d8a6.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/c3de98827a889725d91141a5780692b5.jpg b/Sklearn/sklearn-doc-zh/master/img/c3de98827a889725d91141a5780692b5.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/c3de98827a889725d91141a5780692b5.jpg rename to Sklearn/sklearn-doc-zh/master/img/c3de98827a889725d91141a5780692b5.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/c3e91e4b6783eb9fe281bc3e2e29aaea.jpg b/Sklearn/sklearn-doc-zh/master/img/c3e91e4b6783eb9fe281bc3e2e29aaea.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/c3e91e4b6783eb9fe281bc3e2e29aaea.jpg rename to Sklearn/sklearn-doc-zh/master/img/c3e91e4b6783eb9fe281bc3e2e29aaea.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/c4373cf7ea98d1425608569103286d28.jpg b/Sklearn/sklearn-doc-zh/master/img/c4373cf7ea98d1425608569103286d28.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/c4373cf7ea98d1425608569103286d28.jpg rename to Sklearn/sklearn-doc-zh/master/img/c4373cf7ea98d1425608569103286d28.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/c458cf14df5a22e0d44d7c4fa458361d.jpg b/Sklearn/sklearn-doc-zh/master/img/c458cf14df5a22e0d44d7c4fa458361d.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/c458cf14df5a22e0d44d7c4fa458361d.jpg rename to Sklearn/sklearn-doc-zh/master/img/c458cf14df5a22e0d44d7c4fa458361d.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/c46633c42aaa3e030b14d90aadb323fc.jpg b/Sklearn/sklearn-doc-zh/master/img/c46633c42aaa3e030b14d90aadb323fc.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/c46633c42aaa3e030b14d90aadb323fc.jpg rename to Sklearn/sklearn-doc-zh/master/img/c46633c42aaa3e030b14d90aadb323fc.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/c471a6ca04f68a6d888d4c8ad95ba189.jpg b/Sklearn/sklearn-doc-zh/master/img/c471a6ca04f68a6d888d4c8ad95ba189.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/c471a6ca04f68a6d888d4c8ad95ba189.jpg rename to Sklearn/sklearn-doc-zh/master/img/c471a6ca04f68a6d888d4c8ad95ba189.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/c5396cf8ee04b897a37bc7cd54383eaa.jpg b/Sklearn/sklearn-doc-zh/master/img/c5396cf8ee04b897a37bc7cd54383eaa.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/c5396cf8ee04b897a37bc7cd54383eaa.jpg rename to Sklearn/sklearn-doc-zh/master/img/c5396cf8ee04b897a37bc7cd54383eaa.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/c57acf47ae694e71f55f0005d1e52c55.jpg b/Sklearn/sklearn-doc-zh/master/img/c57acf47ae694e71f55f0005d1e52c55.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/c57acf47ae694e71f55f0005d1e52c55.jpg rename to Sklearn/sklearn-doc-zh/master/img/c57acf47ae694e71f55f0005d1e52c55.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/c57c1c5b116586e218fdaa3d0696d246.jpg b/Sklearn/sklearn-doc-zh/master/img/c57c1c5b116586e218fdaa3d0696d246.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/c57c1c5b116586e218fdaa3d0696d246.jpg rename to Sklearn/sklearn-doc-zh/master/img/c57c1c5b116586e218fdaa3d0696d246.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/c593cc77e5133571028587b75182d3b3.jpg b/Sklearn/sklearn-doc-zh/master/img/c593cc77e5133571028587b75182d3b3.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/c593cc77e5133571028587b75182d3b3.jpg rename to Sklearn/sklearn-doc-zh/master/img/c593cc77e5133571028587b75182d3b3.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/c5b0e465d16add1d02594ec434515c04.jpg b/Sklearn/sklearn-doc-zh/master/img/c5b0e465d16add1d02594ec434515c04.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/c5b0e465d16add1d02594ec434515c04.jpg rename to Sklearn/sklearn-doc-zh/master/img/c5b0e465d16add1d02594ec434515c04.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/c5f2af9df9f65f0e399542ecf7f40554.jpg b/Sklearn/sklearn-doc-zh/master/img/c5f2af9df9f65f0e399542ecf7f40554.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/c5f2af9df9f65f0e399542ecf7f40554.jpg rename to Sklearn/sklearn-doc-zh/master/img/c5f2af9df9f65f0e399542ecf7f40554.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/c5f49595b56010ad04fce358940848e5.jpg b/Sklearn/sklearn-doc-zh/master/img/c5f49595b56010ad04fce358940848e5.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/c5f49595b56010ad04fce358940848e5.jpg rename to Sklearn/sklearn-doc-zh/master/img/c5f49595b56010ad04fce358940848e5.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/c63c614232be2789284b906792195a15.jpg b/Sklearn/sklearn-doc-zh/master/img/c63c614232be2789284b906792195a15.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/c63c614232be2789284b906792195a15.jpg rename to Sklearn/sklearn-doc-zh/master/img/c63c614232be2789284b906792195a15.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/c63f13d52ac4f65cde6e5dfd9e941562.jpg b/Sklearn/sklearn-doc-zh/master/img/c63f13d52ac4f65cde6e5dfd9e941562.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/c63f13d52ac4f65cde6e5dfd9e941562.jpg rename to Sklearn/sklearn-doc-zh/master/img/c63f13d52ac4f65cde6e5dfd9e941562.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/c683d0fa5d21d783e383612dda8ecad3.jpg b/Sklearn/sklearn-doc-zh/master/img/c683d0fa5d21d783e383612dda8ecad3.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/c683d0fa5d21d783e383612dda8ecad3.jpg rename to Sklearn/sklearn-doc-zh/master/img/c683d0fa5d21d783e383612dda8ecad3.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/c6b27df44672e7fa50d1d81ffbbebfbd.jpg b/Sklearn/sklearn-doc-zh/master/img/c6b27df44672e7fa50d1d81ffbbebfbd.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/c6b27df44672e7fa50d1d81ffbbebfbd.jpg rename to Sklearn/sklearn-doc-zh/master/img/c6b27df44672e7fa50d1d81ffbbebfbd.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/c74889dd434ec9a5f4e1b57a549263e7.jpg b/Sklearn/sklearn-doc-zh/master/img/c74889dd434ec9a5f4e1b57a549263e7.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/c74889dd434ec9a5f4e1b57a549263e7.jpg rename to Sklearn/sklearn-doc-zh/master/img/c74889dd434ec9a5f4e1b57a549263e7.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/c7e49892dca2f0df35d1261a276693f2.jpg b/Sklearn/sklearn-doc-zh/master/img/c7e49892dca2f0df35d1261a276693f2.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/c7e49892dca2f0df35d1261a276693f2.jpg rename to Sklearn/sklearn-doc-zh/master/img/c7e49892dca2f0df35d1261a276693f2.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/c7fbb786df17a75de12baa56eb99a2c2.jpg b/Sklearn/sklearn-doc-zh/master/img/c7fbb786df17a75de12baa56eb99a2c2.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/c7fbb786df17a75de12baa56eb99a2c2.jpg rename to Sklearn/sklearn-doc-zh/master/img/c7fbb786df17a75de12baa56eb99a2c2.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/c82c4d24e15330860a4ca71a31ddd553.jpg b/Sklearn/sklearn-doc-zh/master/img/c82c4d24e15330860a4ca71a31ddd553.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/c82c4d24e15330860a4ca71a31ddd553.jpg rename to Sklearn/sklearn-doc-zh/master/img/c82c4d24e15330860a4ca71a31ddd553.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/c851b3cdef3493f17f70f7249928e34b.jpg b/Sklearn/sklearn-doc-zh/master/img/c851b3cdef3493f17f70f7249928e34b.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/c851b3cdef3493f17f70f7249928e34b.jpg rename to Sklearn/sklearn-doc-zh/master/img/c851b3cdef3493f17f70f7249928e34b.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/c87d9110f3d32ffa5fa08671e4af11fb.jpg b/Sklearn/sklearn-doc-zh/master/img/c87d9110f3d32ffa5fa08671e4af11fb.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/c87d9110f3d32ffa5fa08671e4af11fb.jpg rename to Sklearn/sklearn-doc-zh/master/img/c87d9110f3d32ffa5fa08671e4af11fb.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/c8b386f383c840e769d6dae0eeac73dd.jpg b/Sklearn/sklearn-doc-zh/master/img/c8b386f383c840e769d6dae0eeac73dd.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/c8b386f383c840e769d6dae0eeac73dd.jpg rename to Sklearn/sklearn-doc-zh/master/img/c8b386f383c840e769d6dae0eeac73dd.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/c95237387255f824359f6c772cbb1df0.jpg b/Sklearn/sklearn-doc-zh/master/img/c95237387255f824359f6c772cbb1df0.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/c95237387255f824359f6c772cbb1df0.jpg rename to Sklearn/sklearn-doc-zh/master/img/c95237387255f824359f6c772cbb1df0.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/c9a9a1cec953f556f78c5f400277b422.jpg b/Sklearn/sklearn-doc-zh/master/img/c9a9a1cec953f556f78c5f400277b422.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/c9a9a1cec953f556f78c5f400277b422.jpg rename to Sklearn/sklearn-doc-zh/master/img/c9a9a1cec953f556f78c5f400277b422.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/c9e8d19526fed76c122a77c246f59313.jpg b/Sklearn/sklearn-doc-zh/master/img/c9e8d19526fed76c122a77c246f59313.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/c9e8d19526fed76c122a77c246f59313.jpg rename to Sklearn/sklearn-doc-zh/master/img/c9e8d19526fed76c122a77c246f59313.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/c9f28da3986a32d6c1421f357d52b9fa.jpg b/Sklearn/sklearn-doc-zh/master/img/c9f28da3986a32d6c1421f357d52b9fa.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/c9f28da3986a32d6c1421f357d52b9fa.jpg rename to Sklearn/sklearn-doc-zh/master/img/c9f28da3986a32d6c1421f357d52b9fa.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/ca04f56b8f8c29e1eec03620f0f601b0.jpg b/Sklearn/sklearn-doc-zh/master/img/ca04f56b8f8c29e1eec03620f0f601b0.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/ca04f56b8f8c29e1eec03620f0f601b0.jpg rename to Sklearn/sklearn-doc-zh/master/img/ca04f56b8f8c29e1eec03620f0f601b0.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/ca22762150e0516b4847c03efd5ebf6d.jpg b/Sklearn/sklearn-doc-zh/master/img/ca22762150e0516b4847c03efd5ebf6d.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/ca22762150e0516b4847c03efd5ebf6d.jpg rename to Sklearn/sklearn-doc-zh/master/img/ca22762150e0516b4847c03efd5ebf6d.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/cb598ee06bc5060d2dabe4acba00faa7.jpg b/Sklearn/sklearn-doc-zh/master/img/cb598ee06bc5060d2dabe4acba00faa7.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/cb598ee06bc5060d2dabe4acba00faa7.jpg rename to Sklearn/sklearn-doc-zh/master/img/cb598ee06bc5060d2dabe4acba00faa7.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/cb5a89fd247e8cbbea21b00a12d188e6.jpg b/Sklearn/sklearn-doc-zh/master/img/cb5a89fd247e8cbbea21b00a12d188e6.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/cb5a89fd247e8cbbea21b00a12d188e6.jpg rename to Sklearn/sklearn-doc-zh/master/img/cb5a89fd247e8cbbea21b00a12d188e6.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/cba233fc4178da6d3fe0b177cbbb6318.jpg b/Sklearn/sklearn-doc-zh/master/img/cba233fc4178da6d3fe0b177cbbb6318.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/cba233fc4178da6d3fe0b177cbbb6318.jpg rename to Sklearn/sklearn-doc-zh/master/img/cba233fc4178da6d3fe0b177cbbb6318.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/cc41a8d314f9b97ecdf236aa0c21d984.jpg b/Sklearn/sklearn-doc-zh/master/img/cc41a8d314f9b97ecdf236aa0c21d984.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/cc41a8d314f9b97ecdf236aa0c21d984.jpg rename to Sklearn/sklearn-doc-zh/master/img/cc41a8d314f9b97ecdf236aa0c21d984.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/cc9d324e8bc61a67cc1947f73bf5b618.jpg b/Sklearn/sklearn-doc-zh/master/img/cc9d324e8bc61a67cc1947f73bf5b618.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/cc9d324e8bc61a67cc1947f73bf5b618.jpg rename to Sklearn/sklearn-doc-zh/master/img/cc9d324e8bc61a67cc1947f73bf5b618.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/ccc8bedf9424617c5d6a61fbe9a1cc36.jpg b/Sklearn/sklearn-doc-zh/master/img/ccc8bedf9424617c5d6a61fbe9a1cc36.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/ccc8bedf9424617c5d6a61fbe9a1cc36.jpg rename to Sklearn/sklearn-doc-zh/master/img/ccc8bedf9424617c5d6a61fbe9a1cc36.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/ccd727d4b039d28f8146546bd5f614b3.jpg b/Sklearn/sklearn-doc-zh/master/img/ccd727d4b039d28f8146546bd5f614b3.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/ccd727d4b039d28f8146546bd5f614b3.jpg rename to Sklearn/sklearn-doc-zh/master/img/ccd727d4b039d28f8146546bd5f614b3.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/cd0c1a5b7ac7d76f9fe724cc7a723374.jpg b/Sklearn/sklearn-doc-zh/master/img/cd0c1a5b7ac7d76f9fe724cc7a723374.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/cd0c1a5b7ac7d76f9fe724cc7a723374.jpg rename to Sklearn/sklearn-doc-zh/master/img/cd0c1a5b7ac7d76f9fe724cc7a723374.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/cd345cf1e9e01448cd544361983ab95a.jpg b/Sklearn/sklearn-doc-zh/master/img/cd345cf1e9e01448cd544361983ab95a.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/cd345cf1e9e01448cd544361983ab95a.jpg rename to Sklearn/sklearn-doc-zh/master/img/cd345cf1e9e01448cd544361983ab95a.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/cd58ff0ab17f3ead1d5179426f2dae50.jpg b/Sklearn/sklearn-doc-zh/master/img/cd58ff0ab17f3ead1d5179426f2dae50.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/cd58ff0ab17f3ead1d5179426f2dae50.jpg rename to Sklearn/sklearn-doc-zh/master/img/cd58ff0ab17f3ead1d5179426f2dae50.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/cd8ca25fe0dc0cc43949bcaa5d2674c2.jpg b/Sklearn/sklearn-doc-zh/master/img/cd8ca25fe0dc0cc43949bcaa5d2674c2.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/cd8ca25fe0dc0cc43949bcaa5d2674c2.jpg rename to Sklearn/sklearn-doc-zh/master/img/cd8ca25fe0dc0cc43949bcaa5d2674c2.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/cda529a3abe8af421f1f218b1a390091.jpg b/Sklearn/sklearn-doc-zh/master/img/cda529a3abe8af421f1f218b1a390091.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/cda529a3abe8af421f1f218b1a390091.jpg rename to Sklearn/sklearn-doc-zh/master/img/cda529a3abe8af421f1f218b1a390091.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/cdaca3963c7797054326117c0bc0b368.jpg b/Sklearn/sklearn-doc-zh/master/img/cdaca3963c7797054326117c0bc0b368.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/cdaca3963c7797054326117c0bc0b368.jpg rename to Sklearn/sklearn-doc-zh/master/img/cdaca3963c7797054326117c0bc0b368.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/cdc5ef75d769259ef0537940296ab0b4.jpg b/Sklearn/sklearn-doc-zh/master/img/cdc5ef75d769259ef0537940296ab0b4.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/cdc5ef75d769259ef0537940296ab0b4.jpg rename to Sklearn/sklearn-doc-zh/master/img/cdc5ef75d769259ef0537940296ab0b4.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/cddb30ab60430b100271b055376e8363.jpg b/Sklearn/sklearn-doc-zh/master/img/cddb30ab60430b100271b055376e8363.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/cddb30ab60430b100271b055376e8363.jpg rename to Sklearn/sklearn-doc-zh/master/img/cddb30ab60430b100271b055376e8363.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/ce7fd91ddb116ab5d747de0316caf657.jpg b/Sklearn/sklearn-doc-zh/master/img/ce7fd91ddb116ab5d747de0316caf657.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/ce7fd91ddb116ab5d747de0316caf657.jpg rename to Sklearn/sklearn-doc-zh/master/img/ce7fd91ddb116ab5d747de0316caf657.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/cf52655ee609af9f3c27c06448a5bf67.jpg b/Sklearn/sklearn-doc-zh/master/img/cf52655ee609af9f3c27c06448a5bf67.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/cf52655ee609af9f3c27c06448a5bf67.jpg rename to Sklearn/sklearn-doc-zh/master/img/cf52655ee609af9f3c27c06448a5bf67.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/cf8cc964dfa6df1a7473fe033f9fb642.jpg b/Sklearn/sklearn-doc-zh/master/img/cf8cc964dfa6df1a7473fe033f9fb642.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/cf8cc964dfa6df1a7473fe033f9fb642.jpg rename to Sklearn/sklearn-doc-zh/master/img/cf8cc964dfa6df1a7473fe033f9fb642.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/cfe45a2d171ae9c5933cd6d48cd48cb0.jpg b/Sklearn/sklearn-doc-zh/master/img/cfe45a2d171ae9c5933cd6d48cd48cb0.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/cfe45a2d171ae9c5933cd6d48cd48cb0.jpg rename to Sklearn/sklearn-doc-zh/master/img/cfe45a2d171ae9c5933cd6d48cd48cb0.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/cluster01.png b/Sklearn/sklearn-doc-zh/master/img/cluster01.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/cluster01.png rename to Sklearn/sklearn-doc-zh/master/img/cluster01.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/d06a11a8d6ed2efac238ab0bdbd33326.jpg b/Sklearn/sklearn-doc-zh/master/img/d06a11a8d6ed2efac238ab0bdbd33326.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/d06a11a8d6ed2efac238ab0bdbd33326.jpg rename to Sklearn/sklearn-doc-zh/master/img/d06a11a8d6ed2efac238ab0bdbd33326.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/d11a3c7c81eef9f2c8ece06f91411e9e.jpg b/Sklearn/sklearn-doc-zh/master/img/d11a3c7c81eef9f2c8ece06f91411e9e.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/d11a3c7c81eef9f2c8ece06f91411e9e.jpg rename to Sklearn/sklearn-doc-zh/master/img/d11a3c7c81eef9f2c8ece06f91411e9e.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/d12d5f9823ac608127ac67df8cecff9d.jpg b/Sklearn/sklearn-doc-zh/master/img/d12d5f9823ac608127ac67df8cecff9d.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/d12d5f9823ac608127ac67df8cecff9d.jpg rename to Sklearn/sklearn-doc-zh/master/img/d12d5f9823ac608127ac67df8cecff9d.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/d1d5abd40c8364a2da2f181ede69fa56.jpg b/Sklearn/sklearn-doc-zh/master/img/d1d5abd40c8364a2da2f181ede69fa56.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/d1d5abd40c8364a2da2f181ede69fa56.jpg rename to Sklearn/sklearn-doc-zh/master/img/d1d5abd40c8364a2da2f181ede69fa56.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/d2b34f499ce1bbd28a276bc0d7afdde1.jpg b/Sklearn/sklearn-doc-zh/master/img/d2b34f499ce1bbd28a276bc0d7afdde1.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/d2b34f499ce1bbd28a276bc0d7afdde1.jpg rename to Sklearn/sklearn-doc-zh/master/img/d2b34f499ce1bbd28a276bc0d7afdde1.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/d2fed0ae8e2b987a781ee01a92c31dfb.jpg b/Sklearn/sklearn-doc-zh/master/img/d2fed0ae8e2b987a781ee01a92c31dfb.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/d2fed0ae8e2b987a781ee01a92c31dfb.jpg rename to Sklearn/sklearn-doc-zh/master/img/d2fed0ae8e2b987a781ee01a92c31dfb.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/d325b0db5d92ebf952f4b6d810fa43bd.jpg b/Sklearn/sklearn-doc-zh/master/img/d325b0db5d92ebf952f4b6d810fa43bd.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/d325b0db5d92ebf952f4b6d810fa43bd.jpg rename to Sklearn/sklearn-doc-zh/master/img/d325b0db5d92ebf952f4b6d810fa43bd.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/d35b85fc7ddd819b1fec30a6ef410fc9.jpg b/Sklearn/sklearn-doc-zh/master/img/d35b85fc7ddd819b1fec30a6ef410fc9.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/d35b85fc7ddd819b1fec30a6ef410fc9.jpg rename to Sklearn/sklearn-doc-zh/master/img/d35b85fc7ddd819b1fec30a6ef410fc9.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/d3c1bd7a2bac2e7cc22203e423d56e7e.jpg b/Sklearn/sklearn-doc-zh/master/img/d3c1bd7a2bac2e7cc22203e423d56e7e.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/d3c1bd7a2bac2e7cc22203e423d56e7e.jpg rename to Sklearn/sklearn-doc-zh/master/img/d3c1bd7a2bac2e7cc22203e423d56e7e.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/d3d363339c8708bf2058b128facd0aea.jpg b/Sklearn/sklearn-doc-zh/master/img/d3d363339c8708bf2058b128facd0aea.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/d3d363339c8708bf2058b128facd0aea.jpg rename to Sklearn/sklearn-doc-zh/master/img/d3d363339c8708bf2058b128facd0aea.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/d41288778c3d66bcae947c3078469126.jpg b/Sklearn/sklearn-doc-zh/master/img/d41288778c3d66bcae947c3078469126.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/d41288778c3d66bcae947c3078469126.jpg rename to Sklearn/sklearn-doc-zh/master/img/d41288778c3d66bcae947c3078469126.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/d459482314974b92f7f44cc36d6eae3e.jpg b/Sklearn/sklearn-doc-zh/master/img/d459482314974b92f7f44cc36d6eae3e.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/d459482314974b92f7f44cc36d6eae3e.jpg rename to Sklearn/sklearn-doc-zh/master/img/d459482314974b92f7f44cc36d6eae3e.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/d47bd99afb1d5dd3bff5b9809371c476.jpg b/Sklearn/sklearn-doc-zh/master/img/d47bd99afb1d5dd3bff5b9809371c476.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/d47bd99afb1d5dd3bff5b9809371c476.jpg rename to Sklearn/sklearn-doc-zh/master/img/d47bd99afb1d5dd3bff5b9809371c476.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/d49a868d9c8c944f42202a30647ff7cf.jpg b/Sklearn/sklearn-doc-zh/master/img/d49a868d9c8c944f42202a30647ff7cf.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/d49a868d9c8c944f42202a30647ff7cf.jpg rename to Sklearn/sklearn-doc-zh/master/img/d49a868d9c8c944f42202a30647ff7cf.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/d49c17958d641e42faa6fd3a46ac860a.jpg b/Sklearn/sklearn-doc-zh/master/img/d49c17958d641e42faa6fd3a46ac860a.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/d49c17958d641e42faa6fd3a46ac860a.jpg rename to Sklearn/sklearn-doc-zh/master/img/d49c17958d641e42faa6fd3a46ac860a.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/d5021b539c18587624a07ef6df00f585.jpg b/Sklearn/sklearn-doc-zh/master/img/d5021b539c18587624a07ef6df00f585.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/d5021b539c18587624a07ef6df00f585.jpg rename to Sklearn/sklearn-doc-zh/master/img/d5021b539c18587624a07ef6df00f585.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/d5a26fae0e652d4e951d9ec9ae1a01e5.jpg b/Sklearn/sklearn-doc-zh/master/img/d5a26fae0e652d4e951d9ec9ae1a01e5.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/d5a26fae0e652d4e951d9ec9ae1a01e5.jpg rename to Sklearn/sklearn-doc-zh/master/img/d5a26fae0e652d4e951d9ec9ae1a01e5.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/d5c9a11453ea30a1be50a1034052bd6b.jpg b/Sklearn/sklearn-doc-zh/master/img/d5c9a11453ea30a1be50a1034052bd6b.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/d5c9a11453ea30a1be50a1034052bd6b.jpg rename to Sklearn/sklearn-doc-zh/master/img/d5c9a11453ea30a1be50a1034052bd6b.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/d61124c62424b8a8d38adc3c41bb71f6.jpg b/Sklearn/sklearn-doc-zh/master/img/d61124c62424b8a8d38adc3c41bb71f6.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/d61124c62424b8a8d38adc3c41bb71f6.jpg rename to Sklearn/sklearn-doc-zh/master/img/d61124c62424b8a8d38adc3c41bb71f6.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/d6127761ddbd135a1317ec14f1ddfac4.jpg b/Sklearn/sklearn-doc-zh/master/img/d6127761ddbd135a1317ec14f1ddfac4.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/d6127761ddbd135a1317ec14f1ddfac4.jpg rename to Sklearn/sklearn-doc-zh/master/img/d6127761ddbd135a1317ec14f1ddfac4.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/d6293957048ac05c3ae0dfac9949537c.jpg b/Sklearn/sklearn-doc-zh/master/img/d6293957048ac05c3ae0dfac9949537c.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/d6293957048ac05c3ae0dfac9949537c.jpg rename to Sklearn/sklearn-doc-zh/master/img/d6293957048ac05c3ae0dfac9949537c.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/d670eea3215462f64d74d9366622a490.jpg b/Sklearn/sklearn-doc-zh/master/img/d670eea3215462f64d74d9366622a490.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/d670eea3215462f64d74d9366622a490.jpg rename to Sklearn/sklearn-doc-zh/master/img/d670eea3215462f64d74d9366622a490.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/d69db8c22e9315a6fb454b276d5ce534.jpg b/Sklearn/sklearn-doc-zh/master/img/d69db8c22e9315a6fb454b276d5ce534.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/d69db8c22e9315a6fb454b276d5ce534.jpg rename to Sklearn/sklearn-doc-zh/master/img/d69db8c22e9315a6fb454b276d5ce534.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/d6d6e6638cd01ead4811579660e36b44.jpg b/Sklearn/sklearn-doc-zh/master/img/d6d6e6638cd01ead4811579660e36b44.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/d6d6e6638cd01ead4811579660e36b44.jpg rename to Sklearn/sklearn-doc-zh/master/img/d6d6e6638cd01ead4811579660e36b44.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/d6e5649338670ffec641327eeaa0521a.jpg b/Sklearn/sklearn-doc-zh/master/img/d6e5649338670ffec641327eeaa0521a.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/d6e5649338670ffec641327eeaa0521a.jpg rename to Sklearn/sklearn-doc-zh/master/img/d6e5649338670ffec641327eeaa0521a.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/d6f34fca0b5561181aa5263dbb97df74.jpg b/Sklearn/sklearn-doc-zh/master/img/d6f34fca0b5561181aa5263dbb97df74.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/d6f34fca0b5561181aa5263dbb97df74.jpg rename to Sklearn/sklearn-doc-zh/master/img/d6f34fca0b5561181aa5263dbb97df74.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/d7151d4911c077ded512eba1c6cd2bb0.jpg b/Sklearn/sklearn-doc-zh/master/img/d7151d4911c077ded512eba1c6cd2bb0.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/d7151d4911c077ded512eba1c6cd2bb0.jpg rename to Sklearn/sklearn-doc-zh/master/img/d7151d4911c077ded512eba1c6cd2bb0.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/d7228aff11bb03497e40badd984560a6.jpg b/Sklearn/sklearn-doc-zh/master/img/d7228aff11bb03497e40badd984560a6.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/d7228aff11bb03497e40badd984560a6.jpg rename to Sklearn/sklearn-doc-zh/master/img/d7228aff11bb03497e40badd984560a6.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/d7b279566c62332b11d20ca6ff026505.jpg b/Sklearn/sklearn-doc-zh/master/img/d7b279566c62332b11d20ca6ff026505.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/d7b279566c62332b11d20ca6ff026505.jpg rename to Sklearn/sklearn-doc-zh/master/img/d7b279566c62332b11d20ca6ff026505.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/d7e5ca264ffbd335db57ba194e2a5d0f.jpg b/Sklearn/sklearn-doc-zh/master/img/d7e5ca264ffbd335db57ba194e2a5d0f.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/d7e5ca264ffbd335db57ba194e2a5d0f.jpg rename to Sklearn/sklearn-doc-zh/master/img/d7e5ca264ffbd335db57ba194e2a5d0f.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/d7f26dee1f8849176f6438863fb775fb.jpg b/Sklearn/sklearn-doc-zh/master/img/d7f26dee1f8849176f6438863fb775fb.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/d7f26dee1f8849176f6438863fb775fb.jpg rename to Sklearn/sklearn-doc-zh/master/img/d7f26dee1f8849176f6438863fb775fb.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/d7ff3091308658ce388554d420581459.jpg b/Sklearn/sklearn-doc-zh/master/img/d7ff3091308658ce388554d420581459.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/d7ff3091308658ce388554d420581459.jpg rename to Sklearn/sklearn-doc-zh/master/img/d7ff3091308658ce388554d420581459.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/d822c46462e0ffda4dd99f74a070b6b3.jpg b/Sklearn/sklearn-doc-zh/master/img/d822c46462e0ffda4dd99f74a070b6b3.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/d822c46462e0ffda4dd99f74a070b6b3.jpg rename to Sklearn/sklearn-doc-zh/master/img/d822c46462e0ffda4dd99f74a070b6b3.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/d8b3d5242d513369a44f8bf0c6112744.jpg b/Sklearn/sklearn-doc-zh/master/img/d8b3d5242d513369a44f8bf0c6112744.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/d8b3d5242d513369a44f8bf0c6112744.jpg rename to Sklearn/sklearn-doc-zh/master/img/d8b3d5242d513369a44f8bf0c6112744.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/d90bb77f4f60c523c2bc041f768e8a49.jpg b/Sklearn/sklearn-doc-zh/master/img/d90bb77f4f60c523c2bc041f768e8a49.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/d90bb77f4f60c523c2bc041f768e8a49.jpg rename to Sklearn/sklearn-doc-zh/master/img/d90bb77f4f60c523c2bc041f768e8a49.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/d97ae32100e54dfed8139aef0fcc9b68.jpg b/Sklearn/sklearn-doc-zh/master/img/d97ae32100e54dfed8139aef0fcc9b68.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/d97ae32100e54dfed8139aef0fcc9b68.jpg rename to Sklearn/sklearn-doc-zh/master/img/d97ae32100e54dfed8139aef0fcc9b68.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/d9ac7cfff134bd66e853020e32d76f5c.jpg b/Sklearn/sklearn-doc-zh/master/img/d9ac7cfff134bd66e853020e32d76f5c.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/d9ac7cfff134bd66e853020e32d76f5c.jpg rename to Sklearn/sklearn-doc-zh/master/img/d9ac7cfff134bd66e853020e32d76f5c.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/da2ce2d49bbab0c389600d1c82fccf9b.jpg b/Sklearn/sklearn-doc-zh/master/img/da2ce2d49bbab0c389600d1c82fccf9b.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/da2ce2d49bbab0c389600d1c82fccf9b.jpg rename to Sklearn/sklearn-doc-zh/master/img/da2ce2d49bbab0c389600d1c82fccf9b.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/da31be9c84ea7617ab534b511701d650.jpg b/Sklearn/sklearn-doc-zh/master/img/da31be9c84ea7617ab534b511701d650.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/da31be9c84ea7617ab534b511701d650.jpg rename to Sklearn/sklearn-doc-zh/master/img/da31be9c84ea7617ab534b511701d650.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/da928d409a2cf1615368a82030e09380.jpg b/Sklearn/sklearn-doc-zh/master/img/da928d409a2cf1615368a82030e09380.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/da928d409a2cf1615368a82030e09380.jpg rename to Sklearn/sklearn-doc-zh/master/img/da928d409a2cf1615368a82030e09380.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/db23fadfab6b660dbfa2934c4536beb1.jpg b/Sklearn/sklearn-doc-zh/master/img/db23fadfab6b660dbfa2934c4536beb1.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/db23fadfab6b660dbfa2934c4536beb1.jpg rename to Sklearn/sklearn-doc-zh/master/img/db23fadfab6b660dbfa2934c4536beb1.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/db24e5f707f974690c4334cfa218bbee.jpg b/Sklearn/sklearn-doc-zh/master/img/db24e5f707f974690c4334cfa218bbee.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/db24e5f707f974690c4334cfa218bbee.jpg rename to Sklearn/sklearn-doc-zh/master/img/db24e5f707f974690c4334cfa218bbee.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/db30d43fd890d5f28b84a667ddfbb39d.jpg b/Sklearn/sklearn-doc-zh/master/img/db30d43fd890d5f28b84a667ddfbb39d.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/db30d43fd890d5f28b84a667ddfbb39d.jpg rename to Sklearn/sklearn-doc-zh/master/img/db30d43fd890d5f28b84a667ddfbb39d.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/dc074c105944810a277030dfab298376.jpg b/Sklearn/sklearn-doc-zh/master/img/dc074c105944810a277030dfab298376.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/dc074c105944810a277030dfab298376.jpg rename to Sklearn/sklearn-doc-zh/master/img/dc074c105944810a277030dfab298376.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/dc219bfd2e157456e106676575955251.jpg b/Sklearn/sklearn-doc-zh/master/img/dc219bfd2e157456e106676575955251.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/dc219bfd2e157456e106676575955251.jpg rename to Sklearn/sklearn-doc-zh/master/img/dc219bfd2e157456e106676575955251.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/dc40e3a6d4022bfb488e0a4283b656b9.jpg b/Sklearn/sklearn-doc-zh/master/img/dc40e3a6d4022bfb488e0a4283b656b9.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/dc40e3a6d4022bfb488e0a4283b656b9.jpg rename to Sklearn/sklearn-doc-zh/master/img/dc40e3a6d4022bfb488e0a4283b656b9.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/dc8f095e63b3defdb85fcf54d7d2d8c2.jpg b/Sklearn/sklearn-doc-zh/master/img/dc8f095e63b3defdb85fcf54d7d2d8c2.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/dc8f095e63b3defdb85fcf54d7d2d8c2.jpg rename to Sklearn/sklearn-doc-zh/master/img/dc8f095e63b3defdb85fcf54d7d2d8c2.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/dd310c2fa94418ac4f4d12638444fd3b.jpg b/Sklearn/sklearn-doc-zh/master/img/dd310c2fa94418ac4f4d12638444fd3b.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/dd310c2fa94418ac4f4d12638444fd3b.jpg rename to Sklearn/sklearn-doc-zh/master/img/dd310c2fa94418ac4f4d12638444fd3b.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/dd9b7456f8406b6fac80cfd3ca11ccfc.jpg b/Sklearn/sklearn-doc-zh/master/img/dd9b7456f8406b6fac80cfd3ca11ccfc.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/dd9b7456f8406b6fac80cfd3ca11ccfc.jpg rename to Sklearn/sklearn-doc-zh/master/img/dd9b7456f8406b6fac80cfd3ca11ccfc.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/dda3ed622f104bd7e6abffbcb1691998.jpg b/Sklearn/sklearn-doc-zh/master/img/dda3ed622f104bd7e6abffbcb1691998.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/dda3ed622f104bd7e6abffbcb1691998.jpg rename to Sklearn/sklearn-doc-zh/master/img/dda3ed622f104bd7e6abffbcb1691998.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/ddb7802ca4af9cffa650eec942feb790.jpg b/Sklearn/sklearn-doc-zh/master/img/ddb7802ca4af9cffa650eec942feb790.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/ddb7802ca4af9cffa650eec942feb790.jpg rename to Sklearn/sklearn-doc-zh/master/img/ddb7802ca4af9cffa650eec942feb790.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/dde55f7a409ac46b5694e689c4b151f5.jpg b/Sklearn/sklearn-doc-zh/master/img/dde55f7a409ac46b5694e689c4b151f5.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/dde55f7a409ac46b5694e689c4b151f5.jpg rename to Sklearn/sklearn-doc-zh/master/img/dde55f7a409ac46b5694e689c4b151f5.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/de55c53f911184b6ad3e562a4d694c01.jpg b/Sklearn/sklearn-doc-zh/master/img/de55c53f911184b6ad3e562a4d694c01.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/de55c53f911184b6ad3e562a4d694c01.jpg rename to Sklearn/sklearn-doc-zh/master/img/de55c53f911184b6ad3e562a4d694c01.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/de6303ad5b9808c7ae8c64ddc632d893.jpg b/Sklearn/sklearn-doc-zh/master/img/de6303ad5b9808c7ae8c64ddc632d893.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/de6303ad5b9808c7ae8c64ddc632d893.jpg rename to Sklearn/sklearn-doc-zh/master/img/de6303ad5b9808c7ae8c64ddc632d893.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/decc30cee202697370eb9e21062c54b7.jpg b/Sklearn/sklearn-doc-zh/master/img/decc30cee202697370eb9e21062c54b7.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/decc30cee202697370eb9e21062c54b7.jpg rename to Sklearn/sklearn-doc-zh/master/img/decc30cee202697370eb9e21062c54b7.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/dedef2ddd0f96df639d4c85fffb9bbd5.jpg b/Sklearn/sklearn-doc-zh/master/img/dedef2ddd0f96df639d4c85fffb9bbd5.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/dedef2ddd0f96df639d4c85fffb9bbd5.jpg rename to Sklearn/sklearn-doc-zh/master/img/dedef2ddd0f96df639d4c85fffb9bbd5.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/def4737951f9990642e65b2403941350.jpg b/Sklearn/sklearn-doc-zh/master/img/def4737951f9990642e65b2403941350.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/def4737951f9990642e65b2403941350.jpg rename to Sklearn/sklearn-doc-zh/master/img/def4737951f9990642e65b2403941350.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/df49142f65f7afa86c2e18f598f00729.jpg b/Sklearn/sklearn-doc-zh/master/img/df49142f65f7afa86c2e18f598f00729.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/df49142f65f7afa86c2e18f598f00729.jpg rename to Sklearn/sklearn-doc-zh/master/img/df49142f65f7afa86c2e18f598f00729.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/dfab82d3a21680e5b6d3898a02dc6e01.jpg b/Sklearn/sklearn-doc-zh/master/img/dfab82d3a21680e5b6d3898a02dc6e01.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/dfab82d3a21680e5b6d3898a02dc6e01.jpg rename to Sklearn/sklearn-doc-zh/master/img/dfab82d3a21680e5b6d3898a02dc6e01.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/e02e680946360c19e1cee28c92173bc4.jpg b/Sklearn/sklearn-doc-zh/master/img/e02e680946360c19e1cee28c92173bc4.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/e02e680946360c19e1cee28c92173bc4.jpg rename to Sklearn/sklearn-doc-zh/master/img/e02e680946360c19e1cee28c92173bc4.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/e03066df748abd9273db055cb79f0f01.jpg b/Sklearn/sklearn-doc-zh/master/img/e03066df748abd9273db055cb79f0f01.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/e03066df748abd9273db055cb79f0f01.jpg rename to Sklearn/sklearn-doc-zh/master/img/e03066df748abd9273db055cb79f0f01.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/e0532dc18cc4c92c2b39f4b29d33cd13.jpg b/Sklearn/sklearn-doc-zh/master/img/e0532dc18cc4c92c2b39f4b29d33cd13.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/e0532dc18cc4c92c2b39f4b29d33cd13.jpg rename to Sklearn/sklearn-doc-zh/master/img/e0532dc18cc4c92c2b39f4b29d33cd13.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/e0d8dbb9574d5eb264279927dcf8baaf.jpg b/Sklearn/sklearn-doc-zh/master/img/e0d8dbb9574d5eb264279927dcf8baaf.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/e0d8dbb9574d5eb264279927dcf8baaf.jpg rename to Sklearn/sklearn-doc-zh/master/img/e0d8dbb9574d5eb264279927dcf8baaf.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/e18ade3134bef595ea6ddf488ff9557a.jpg b/Sklearn/sklearn-doc-zh/master/img/e18ade3134bef595ea6ddf488ff9557a.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/e18ade3134bef595ea6ddf488ff9557a.jpg rename to Sklearn/sklearn-doc-zh/master/img/e18ade3134bef595ea6ddf488ff9557a.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/e1b4d08b93d1d222e96d99475a766281.jpg b/Sklearn/sklearn-doc-zh/master/img/e1b4d08b93d1d222e96d99475a766281.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/e1b4d08b93d1d222e96d99475a766281.jpg rename to Sklearn/sklearn-doc-zh/master/img/e1b4d08b93d1d222e96d99475a766281.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/e24edaeb407b6a696ddb188697f0934d.jpg b/Sklearn/sklearn-doc-zh/master/img/e24edaeb407b6a696ddb188697f0934d.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/e24edaeb407b6a696ddb188697f0934d.jpg rename to Sklearn/sklearn-doc-zh/master/img/e24edaeb407b6a696ddb188697f0934d.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/e2632203a52191f8ba8e393e34545100.jpg b/Sklearn/sklearn-doc-zh/master/img/e2632203a52191f8ba8e393e34545100.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/e2632203a52191f8ba8e393e34545100.jpg rename to Sklearn/sklearn-doc-zh/master/img/e2632203a52191f8ba8e393e34545100.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/e2650a6b3222cf44bd311f7205909f06.jpg b/Sklearn/sklearn-doc-zh/master/img/e2650a6b3222cf44bd311f7205909f06.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/e2650a6b3222cf44bd311f7205909f06.jpg rename to Sklearn/sklearn-doc-zh/master/img/e2650a6b3222cf44bd311f7205909f06.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/e270fdc1fb7cabab295d31d189d77258.jpg b/Sklearn/sklearn-doc-zh/master/img/e270fdc1fb7cabab295d31d189d77258.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/e270fdc1fb7cabab295d31d189d77258.jpg rename to Sklearn/sklearn-doc-zh/master/img/e270fdc1fb7cabab295d31d189d77258.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/e279b8169ddd6581c5606c868ba52fae.jpg b/Sklearn/sklearn-doc-zh/master/img/e279b8169ddd6581c5606c868ba52fae.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/e279b8169ddd6581c5606c868ba52fae.jpg rename to Sklearn/sklearn-doc-zh/master/img/e279b8169ddd6581c5606c868ba52fae.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/e2bd3aaa1586d4d17301f7fe016eefd7.jpg b/Sklearn/sklearn-doc-zh/master/img/e2bd3aaa1586d4d17301f7fe016eefd7.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/e2bd3aaa1586d4d17301f7fe016eefd7.jpg rename to Sklearn/sklearn-doc-zh/master/img/e2bd3aaa1586d4d17301f7fe016eefd7.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/e2f9b08680b30cfb80102f69264fdd5c.jpg b/Sklearn/sklearn-doc-zh/master/img/e2f9b08680b30cfb80102f69264fdd5c.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/e2f9b08680b30cfb80102f69264fdd5c.jpg rename to Sklearn/sklearn-doc-zh/master/img/e2f9b08680b30cfb80102f69264fdd5c.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/e310c621bd78988800b952eb7542cd88.jpg b/Sklearn/sklearn-doc-zh/master/img/e310c621bd78988800b952eb7542cd88.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/e310c621bd78988800b952eb7542cd88.jpg rename to Sklearn/sklearn-doc-zh/master/img/e310c621bd78988800b952eb7542cd88.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/e3eebe26bb077cc3986d3beba9de4ac5.jpg b/Sklearn/sklearn-doc-zh/master/img/e3eebe26bb077cc3986d3beba9de4ac5.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/e3eebe26bb077cc3986d3beba9de4ac5.jpg rename to Sklearn/sklearn-doc-zh/master/img/e3eebe26bb077cc3986d3beba9de4ac5.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/e3ff277d54a34043adefa98a9e1a69d1.jpg b/Sklearn/sklearn-doc-zh/master/img/e3ff277d54a34043adefa98a9e1a69d1.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/e3ff277d54a34043adefa98a9e1a69d1.jpg rename to Sklearn/sklearn-doc-zh/master/img/e3ff277d54a34043adefa98a9e1a69d1.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/e473a2606f078eaa7b86800b11f4d62b.jpg b/Sklearn/sklearn-doc-zh/master/img/e473a2606f078eaa7b86800b11f4d62b.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/e473a2606f078eaa7b86800b11f4d62b.jpg rename to Sklearn/sklearn-doc-zh/master/img/e473a2606f078eaa7b86800b11f4d62b.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/e4e213d17043826a96dd917dcbdf5d85.jpg b/Sklearn/sklearn-doc-zh/master/img/e4e213d17043826a96dd917dcbdf5d85.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/e4e213d17043826a96dd917dcbdf5d85.jpg rename to Sklearn/sklearn-doc-zh/master/img/e4e213d17043826a96dd917dcbdf5d85.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/e5012484ffa6afb2c720d363b39a36b0.jpg b/Sklearn/sklearn-doc-zh/master/img/e5012484ffa6afb2c720d363b39a36b0.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/e5012484ffa6afb2c720d363b39a36b0.jpg rename to Sklearn/sklearn-doc-zh/master/img/e5012484ffa6afb2c720d363b39a36b0.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/e56abe6d36f21c0c6dd22d2a84535415.jpg b/Sklearn/sklearn-doc-zh/master/img/e56abe6d36f21c0c6dd22d2a84535415.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/e56abe6d36f21c0c6dd22d2a84535415.jpg rename to Sklearn/sklearn-doc-zh/master/img/e56abe6d36f21c0c6dd22d2a84535415.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/e5ad06b17e1bacf475bf9247d93d1419.jpg b/Sklearn/sklearn-doc-zh/master/img/e5ad06b17e1bacf475bf9247d93d1419.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/e5ad06b17e1bacf475bf9247d93d1419.jpg rename to Sklearn/sklearn-doc-zh/master/img/e5ad06b17e1bacf475bf9247d93d1419.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/e5b3516a2cd7fbf2916643478e0bed70.jpg b/Sklearn/sklearn-doc-zh/master/img/e5b3516a2cd7fbf2916643478e0bed70.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/e5b3516a2cd7fbf2916643478e0bed70.jpg rename to Sklearn/sklearn-doc-zh/master/img/e5b3516a2cd7fbf2916643478e0bed70.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/e615670f11d811f002d1b58effe99dc0.jpg b/Sklearn/sklearn-doc-zh/master/img/e615670f11d811f002d1b58effe99dc0.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/e615670f11d811f002d1b58effe99dc0.jpg rename to Sklearn/sklearn-doc-zh/master/img/e615670f11d811f002d1b58effe99dc0.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/e64298b4d9439c3db54eeddbf3d92b4b.jpg b/Sklearn/sklearn-doc-zh/master/img/e64298b4d9439c3db54eeddbf3d92b4b.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/e64298b4d9439c3db54eeddbf3d92b4b.jpg rename to Sklearn/sklearn-doc-zh/master/img/e64298b4d9439c3db54eeddbf3d92b4b.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/e668ecc249e709e47f6955a74528bf7b.jpg b/Sklearn/sklearn-doc-zh/master/img/e668ecc249e709e47f6955a74528bf7b.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/e668ecc249e709e47f6955a74528bf7b.jpg rename to Sklearn/sklearn-doc-zh/master/img/e668ecc249e709e47f6955a74528bf7b.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/e6811d3f6333e9490d602db8dc1e3d96.jpg b/Sklearn/sklearn-doc-zh/master/img/e6811d3f6333e9490d602db8dc1e3d96.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/e6811d3f6333e9490d602db8dc1e3d96.jpg rename to Sklearn/sklearn-doc-zh/master/img/e6811d3f6333e9490d602db8dc1e3d96.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/e73c79ca71fe87074008fd5f464d686d.jpg b/Sklearn/sklearn-doc-zh/master/img/e73c79ca71fe87074008fd5f464d686d.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/e73c79ca71fe87074008fd5f464d686d.jpg rename to Sklearn/sklearn-doc-zh/master/img/e73c79ca71fe87074008fd5f464d686d.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/e7a07569c8c6af174aa061b9f8921065.jpg b/Sklearn/sklearn-doc-zh/master/img/e7a07569c8c6af174aa061b9f8921065.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/e7a07569c8c6af174aa061b9f8921065.jpg rename to Sklearn/sklearn-doc-zh/master/img/e7a07569c8c6af174aa061b9f8921065.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/e7a3ba1e52e7e8add5e2c14602a92e3a.jpg b/Sklearn/sklearn-doc-zh/master/img/e7a3ba1e52e7e8add5e2c14602a92e3a.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/e7a3ba1e52e7e8add5e2c14602a92e3a.jpg rename to Sklearn/sklearn-doc-zh/master/img/e7a3ba1e52e7e8add5e2c14602a92e3a.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/e83376afb9facc5992c857d659c7d812.jpg b/Sklearn/sklearn-doc-zh/master/img/e83376afb9facc5992c857d659c7d812.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/e83376afb9facc5992c857d659c7d812.jpg rename to Sklearn/sklearn-doc-zh/master/img/e83376afb9facc5992c857d659c7d812.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/e83fafb56db497bec93433bd0b0ee316.jpg b/Sklearn/sklearn-doc-zh/master/img/e83fafb56db497bec93433bd0b0ee316.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/e83fafb56db497bec93433bd0b0ee316.jpg rename to Sklearn/sklearn-doc-zh/master/img/e83fafb56db497bec93433bd0b0ee316.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/e84b0861711b65f28923da7c268645e7.jpg b/Sklearn/sklearn-doc-zh/master/img/e84b0861711b65f28923da7c268645e7.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/e84b0861711b65f28923da7c268645e7.jpg rename to Sklearn/sklearn-doc-zh/master/img/e84b0861711b65f28923da7c268645e7.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/e996da94de858e5248f145e01733ed9d.jpg b/Sklearn/sklearn-doc-zh/master/img/e996da94de858e5248f145e01733ed9d.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/e996da94de858e5248f145e01733ed9d.jpg rename to Sklearn/sklearn-doc-zh/master/img/e996da94de858e5248f145e01733ed9d.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/ea1fbc6466fa8a62aa1f55a550a686b3.jpg b/Sklearn/sklearn-doc-zh/master/img/ea1fbc6466fa8a62aa1f55a550a686b3.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/ea1fbc6466fa8a62aa1f55a550a686b3.jpg rename to Sklearn/sklearn-doc-zh/master/img/ea1fbc6466fa8a62aa1f55a550a686b3.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/eaba00919535dcb10574f0228b451481.jpg b/Sklearn/sklearn-doc-zh/master/img/eaba00919535dcb10574f0228b451481.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/eaba00919535dcb10574f0228b451481.jpg rename to Sklearn/sklearn-doc-zh/master/img/eaba00919535dcb10574f0228b451481.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/eac4cdf0a783ddcd7098023e25bb16ef.jpg b/Sklearn/sklearn-doc-zh/master/img/eac4cdf0a783ddcd7098023e25bb16ef.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/eac4cdf0a783ddcd7098023e25bb16ef.jpg rename to Sklearn/sklearn-doc-zh/master/img/eac4cdf0a783ddcd7098023e25bb16ef.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/eada6f59eaee0a758bddb97b44835751.jpg b/Sklearn/sklearn-doc-zh/master/img/eada6f59eaee0a758bddb97b44835751.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/eada6f59eaee0a758bddb97b44835751.jpg rename to Sklearn/sklearn-doc-zh/master/img/eada6f59eaee0a758bddb97b44835751.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/eaf558e2c8d1fbd5426664c1698d80bd.jpg b/Sklearn/sklearn-doc-zh/master/img/eaf558e2c8d1fbd5426664c1698d80bd.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/eaf558e2c8d1fbd5426664c1698d80bd.jpg rename to Sklearn/sklearn-doc-zh/master/img/eaf558e2c8d1fbd5426664c1698d80bd.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/eb604628a01ce7d6db62d61eba6e2e2f.jpg b/Sklearn/sklearn-doc-zh/master/img/eb604628a01ce7d6db62d61eba6e2e2f.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/eb604628a01ce7d6db62d61eba6e2e2f.jpg rename to Sklearn/sklearn-doc-zh/master/img/eb604628a01ce7d6db62d61eba6e2e2f.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/eb89c7ce95ca7e68685f180915bf0511.jpg b/Sklearn/sklearn-doc-zh/master/img/eb89c7ce95ca7e68685f180915bf0511.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/eb89c7ce95ca7e68685f180915bf0511.jpg rename to Sklearn/sklearn-doc-zh/master/img/eb89c7ce95ca7e68685f180915bf0511.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/eba6c21adbb5d5905624446cc970a7d3.jpg b/Sklearn/sklearn-doc-zh/master/img/eba6c21adbb5d5905624446cc970a7d3.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/eba6c21adbb5d5905624446cc970a7d3.jpg rename to Sklearn/sklearn-doc-zh/master/img/eba6c21adbb5d5905624446cc970a7d3.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/ebdfa216db7c73e9067f547e1e65ca02.jpg b/Sklearn/sklearn-doc-zh/master/img/ebdfa216db7c73e9067f547e1e65ca02.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/ebdfa216db7c73e9067f547e1e65ca02.jpg rename to Sklearn/sklearn-doc-zh/master/img/ebdfa216db7c73e9067f547e1e65ca02.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/ec29f705a6be2ce512a10c266dd755f0.jpg b/Sklearn/sklearn-doc-zh/master/img/ec29f705a6be2ce512a10c266dd755f0.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/ec29f705a6be2ce512a10c266dd755f0.jpg rename to Sklearn/sklearn-doc-zh/master/img/ec29f705a6be2ce512a10c266dd755f0.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/ed352c0fca166fdaa12d7da93676df50.jpg b/Sklearn/sklearn-doc-zh/master/img/ed352c0fca166fdaa12d7da93676df50.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/ed352c0fca166fdaa12d7da93676df50.jpg rename to Sklearn/sklearn-doc-zh/master/img/ed352c0fca166fdaa12d7da93676df50.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/ed407df5e0211da2859805a96e271751.jpg b/Sklearn/sklearn-doc-zh/master/img/ed407df5e0211da2859805a96e271751.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/ed407df5e0211da2859805a96e271751.jpg rename to Sklearn/sklearn-doc-zh/master/img/ed407df5e0211da2859805a96e271751.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/ed51906ca8fbc868248006c841aefa2b.jpg b/Sklearn/sklearn-doc-zh/master/img/ed51906ca8fbc868248006c841aefa2b.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/ed51906ca8fbc868248006c841aefa2b.jpg rename to Sklearn/sklearn-doc-zh/master/img/ed51906ca8fbc868248006c841aefa2b.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/ed6a1db8527fda759b14943c1b36d88e.jpg b/Sklearn/sklearn-doc-zh/master/img/ed6a1db8527fda759b14943c1b36d88e.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/ed6a1db8527fda759b14943c1b36d88e.jpg rename to Sklearn/sklearn-doc-zh/master/img/ed6a1db8527fda759b14943c1b36d88e.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/ed70b000f50fb169ffe20ca2979e4a75.jpg b/Sklearn/sklearn-doc-zh/master/img/ed70b000f50fb169ffe20ca2979e4a75.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/ed70b000f50fb169ffe20ca2979e4a75.jpg rename to Sklearn/sklearn-doc-zh/master/img/ed70b000f50fb169ffe20ca2979e4a75.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/edbf7affc28abad1a964eeeeac3e711c.jpg b/Sklearn/sklearn-doc-zh/master/img/edbf7affc28abad1a964eeeeac3e711c.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/edbf7affc28abad1a964eeeeac3e711c.jpg rename to Sklearn/sklearn-doc-zh/master/img/edbf7affc28abad1a964eeeeac3e711c.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/ede6a26a443c24b8cea943a4a6f144f0.jpg b/Sklearn/sklearn-doc-zh/master/img/ede6a26a443c24b8cea943a4a6f144f0.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/ede6a26a443c24b8cea943a4a6f144f0.jpg rename to Sklearn/sklearn-doc-zh/master/img/ede6a26a443c24b8cea943a4a6f144f0.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/ee5d94bdc1dac94ab975f3db18552505.jpg b/Sklearn/sklearn-doc-zh/master/img/ee5d94bdc1dac94ab975f3db18552505.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/ee5d94bdc1dac94ab975f3db18552505.jpg rename to Sklearn/sklearn-doc-zh/master/img/ee5d94bdc1dac94ab975f3db18552505.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/ee68d82006856c6355aa0dca42cd5054.jpg b/Sklearn/sklearn-doc-zh/master/img/ee68d82006856c6355aa0dca42cd5054.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/ee68d82006856c6355aa0dca42cd5054.jpg rename to Sklearn/sklearn-doc-zh/master/img/ee68d82006856c6355aa0dca42cd5054.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/ee78ab463ea8dc72594f270f5193a7a6.jpg b/Sklearn/sklearn-doc-zh/master/img/ee78ab463ea8dc72594f270f5193a7a6.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/ee78ab463ea8dc72594f270f5193a7a6.jpg rename to Sklearn/sklearn-doc-zh/master/img/ee78ab463ea8dc72594f270f5193a7a6.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/eeaf066f8cca5064b706ccfc4728323d.jpg b/Sklearn/sklearn-doc-zh/master/img/eeaf066f8cca5064b706ccfc4728323d.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/eeaf066f8cca5064b706ccfc4728323d.jpg rename to Sklearn/sklearn-doc-zh/master/img/eeaf066f8cca5064b706ccfc4728323d.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/eeb2bac86ebedef3d8d2dcbf5b8c735b.jpg b/Sklearn/sklearn-doc-zh/master/img/eeb2bac86ebedef3d8d2dcbf5b8c735b.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/eeb2bac86ebedef3d8d2dcbf5b8c735b.jpg rename to Sklearn/sklearn-doc-zh/master/img/eeb2bac86ebedef3d8d2dcbf5b8c735b.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/ef59d065ac32862cd7527b0c3fcf7956.jpg b/Sklearn/sklearn-doc-zh/master/img/ef59d065ac32862cd7527b0c3fcf7956.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/ef59d065ac32862cd7527b0c3fcf7956.jpg rename to Sklearn/sklearn-doc-zh/master/img/ef59d065ac32862cd7527b0c3fcf7956.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/efaeec5dadbe79caddb0f92abab55f5b.jpg b/Sklearn/sklearn-doc-zh/master/img/efaeec5dadbe79caddb0f92abab55f5b.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/efaeec5dadbe79caddb0f92abab55f5b.jpg rename to Sklearn/sklearn-doc-zh/master/img/efaeec5dadbe79caddb0f92abab55f5b.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/efb0c43ded3d4bdfb4b1d2092c8ee446.jpg b/Sklearn/sklearn-doc-zh/master/img/efb0c43ded3d4bdfb4b1d2092c8ee446.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/efb0c43ded3d4bdfb4b1d2092c8ee446.jpg rename to Sklearn/sklearn-doc-zh/master/img/efb0c43ded3d4bdfb4b1d2092c8ee446.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/f0b72920659961ba27aec1da59f3019c.jpg b/Sklearn/sklearn-doc-zh/master/img/f0b72920659961ba27aec1da59f3019c.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/f0b72920659961ba27aec1da59f3019c.jpg rename to Sklearn/sklearn-doc-zh/master/img/f0b72920659961ba27aec1da59f3019c.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/f15b9946d9078749f894a78579dc6778.jpg b/Sklearn/sklearn-doc-zh/master/img/f15b9946d9078749f894a78579dc6778.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/f15b9946d9078749f894a78579dc6778.jpg rename to Sklearn/sklearn-doc-zh/master/img/f15b9946d9078749f894a78579dc6778.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/f19ea7e3ca675413d6ee0f2faff14b75.jpg b/Sklearn/sklearn-doc-zh/master/img/f19ea7e3ca675413d6ee0f2faff14b75.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/f19ea7e3ca675413d6ee0f2faff14b75.jpg rename to Sklearn/sklearn-doc-zh/master/img/f19ea7e3ca675413d6ee0f2faff14b75.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/f1c8c9b812ae1b6ec189eda900e47269.jpg b/Sklearn/sklearn-doc-zh/master/img/f1c8c9b812ae1b6ec189eda900e47269.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/f1c8c9b812ae1b6ec189eda900e47269.jpg rename to Sklearn/sklearn-doc-zh/master/img/f1c8c9b812ae1b6ec189eda900e47269.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/f1fa822436569807fdc9dca5d2879d99.jpg b/Sklearn/sklearn-doc-zh/master/img/f1fa822436569807fdc9dca5d2879d99.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/f1fa822436569807fdc9dca5d2879d99.jpg rename to Sklearn/sklearn-doc-zh/master/img/f1fa822436569807fdc9dca5d2879d99.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/f1fb5834480bfa9770be94da12bbd514.jpg b/Sklearn/sklearn-doc-zh/master/img/f1fb5834480bfa9770be94da12bbd514.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/f1fb5834480bfa9770be94da12bbd514.jpg rename to Sklearn/sklearn-doc-zh/master/img/f1fb5834480bfa9770be94da12bbd514.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/f1fc9ca63c663059f76c2af6729189d1.jpg b/Sklearn/sklearn-doc-zh/master/img/f1fc9ca63c663059f76c2af6729189d1.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/f1fc9ca63c663059f76c2af6729189d1.jpg rename to Sklearn/sklearn-doc-zh/master/img/f1fc9ca63c663059f76c2af6729189d1.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/f211ed45608192b0763ed51c85b60811.jpg b/Sklearn/sklearn-doc-zh/master/img/f211ed45608192b0763ed51c85b60811.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/f211ed45608192b0763ed51c85b60811.jpg rename to Sklearn/sklearn-doc-zh/master/img/f211ed45608192b0763ed51c85b60811.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/f28436a66fb892c9e8923e6649f19065.jpg b/Sklearn/sklearn-doc-zh/master/img/f28436a66fb892c9e8923e6649f19065.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/f28436a66fb892c9e8923e6649f19065.jpg rename to Sklearn/sklearn-doc-zh/master/img/f28436a66fb892c9e8923e6649f19065.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/f298c2b42dd32bed6f02df3c6d4f7cf9.jpg b/Sklearn/sklearn-doc-zh/master/img/f298c2b42dd32bed6f02df3c6d4f7cf9.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/f298c2b42dd32bed6f02df3c6d4f7cf9.jpg rename to Sklearn/sklearn-doc-zh/master/img/f298c2b42dd32bed6f02df3c6d4f7cf9.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/f3432e537038cc7e319db4ea1ff6a8e3.jpg b/Sklearn/sklearn-doc-zh/master/img/f3432e537038cc7e319db4ea1ff6a8e3.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/f3432e537038cc7e319db4ea1ff6a8e3.jpg rename to Sklearn/sklearn-doc-zh/master/img/f3432e537038cc7e319db4ea1ff6a8e3.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/f34cef4cb73bfa4cbe2f9b4300a8940c.jpg b/Sklearn/sklearn-doc-zh/master/img/f34cef4cb73bfa4cbe2f9b4300a8940c.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/f34cef4cb73bfa4cbe2f9b4300a8940c.jpg rename to Sklearn/sklearn-doc-zh/master/img/f34cef4cb73bfa4cbe2f9b4300a8940c.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/f35f174b5f70ab18c19107e3f0fbe889.jpg b/Sklearn/sklearn-doc-zh/master/img/f35f174b5f70ab18c19107e3f0fbe889.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/f35f174b5f70ab18c19107e3f0fbe889.jpg rename to Sklearn/sklearn-doc-zh/master/img/f35f174b5f70ab18c19107e3f0fbe889.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/f3893160388ee4203c313659d729cef0.jpg b/Sklearn/sklearn-doc-zh/master/img/f3893160388ee4203c313659d729cef0.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/f3893160388ee4203c313659d729cef0.jpg rename to Sklearn/sklearn-doc-zh/master/img/f3893160388ee4203c313659d729cef0.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/f38f84b73c7da2884c38bdfca1a38b1c.jpg b/Sklearn/sklearn-doc-zh/master/img/f38f84b73c7da2884c38bdfca1a38b1c.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/f38f84b73c7da2884c38bdfca1a38b1c.jpg rename to Sklearn/sklearn-doc-zh/master/img/f38f84b73c7da2884c38bdfca1a38b1c.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/f40416aceb254b77100eb361321c1804.jpg b/Sklearn/sklearn-doc-zh/master/img/f40416aceb254b77100eb361321c1804.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/f40416aceb254b77100eb361321c1804.jpg rename to Sklearn/sklearn-doc-zh/master/img/f40416aceb254b77100eb361321c1804.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/f43793e26fc93870e33eb062060e309a.jpg b/Sklearn/sklearn-doc-zh/master/img/f43793e26fc93870e33eb062060e309a.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/f43793e26fc93870e33eb062060e309a.jpg rename to Sklearn/sklearn-doc-zh/master/img/f43793e26fc93870e33eb062060e309a.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/f4ad81b759af5604d12ae25c4c541224.jpg b/Sklearn/sklearn-doc-zh/master/img/f4ad81b759af5604d12ae25c4c541224.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/f4ad81b759af5604d12ae25c4c541224.jpg rename to Sklearn/sklearn-doc-zh/master/img/f4ad81b759af5604d12ae25c4c541224.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/f4c7787828dd90f6b47e1677bbc806da.jpg b/Sklearn/sklearn-doc-zh/master/img/f4c7787828dd90f6b47e1677bbc806da.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/f4c7787828dd90f6b47e1677bbc806da.jpg rename to Sklearn/sklearn-doc-zh/master/img/f4c7787828dd90f6b47e1677bbc806da.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/f5291f866455b62cd6c68f419444e5cf.jpg b/Sklearn/sklearn-doc-zh/master/img/f5291f866455b62cd6c68f419444e5cf.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/f5291f866455b62cd6c68f419444e5cf.jpg rename to Sklearn/sklearn-doc-zh/master/img/f5291f866455b62cd6c68f419444e5cf.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/f60c0101ae8f649bb02ed8b24b30fd83.jpg b/Sklearn/sklearn-doc-zh/master/img/f60c0101ae8f649bb02ed8b24b30fd83.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/f60c0101ae8f649bb02ed8b24b30fd83.jpg rename to Sklearn/sklearn-doc-zh/master/img/f60c0101ae8f649bb02ed8b24b30fd83.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/f6ce0899ba52f1169500b726ee9c8a92.jpg b/Sklearn/sklearn-doc-zh/master/img/f6ce0899ba52f1169500b726ee9c8a92.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/f6ce0899ba52f1169500b726ee9c8a92.jpg rename to Sklearn/sklearn-doc-zh/master/img/f6ce0899ba52f1169500b726ee9c8a92.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/f7129cf20abc58eaa0e261335a7606a6.jpg b/Sklearn/sklearn-doc-zh/master/img/f7129cf20abc58eaa0e261335a7606a6.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/f7129cf20abc58eaa0e261335a7606a6.jpg rename to Sklearn/sklearn-doc-zh/master/img/f7129cf20abc58eaa0e261335a7606a6.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/f72a2f9f160a11abc8568b72386776fe.jpg b/Sklearn/sklearn-doc-zh/master/img/f72a2f9f160a11abc8568b72386776fe.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/f72a2f9f160a11abc8568b72386776fe.jpg rename to Sklearn/sklearn-doc-zh/master/img/f72a2f9f160a11abc8568b72386776fe.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/f7dd5b16c1d8c3e278e9a1fa7f49dcd2.jpg b/Sklearn/sklearn-doc-zh/master/img/f7dd5b16c1d8c3e278e9a1fa7f49dcd2.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/f7dd5b16c1d8c3e278e9a1fa7f49dcd2.jpg rename to Sklearn/sklearn-doc-zh/master/img/f7dd5b16c1d8c3e278e9a1fa7f49dcd2.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/f7ee2b868860148ea59bc617d8ba7bb1.jpg b/Sklearn/sklearn-doc-zh/master/img/f7ee2b868860148ea59bc617d8ba7bb1.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/f7ee2b868860148ea59bc617d8ba7bb1.jpg rename to Sklearn/sklearn-doc-zh/master/img/f7ee2b868860148ea59bc617d8ba7bb1.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/f7f0b321634c8d80ceacdc75ee3c68b6.jpg b/Sklearn/sklearn-doc-zh/master/img/f7f0b321634c8d80ceacdc75ee3c68b6.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/f7f0b321634c8d80ceacdc75ee3c68b6.jpg rename to Sklearn/sklearn-doc-zh/master/img/f7f0b321634c8d80ceacdc75ee3c68b6.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/f8d66dde73704b8821db5322592a0cc2.jpg b/Sklearn/sklearn-doc-zh/master/img/f8d66dde73704b8821db5322592a0cc2.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/f8d66dde73704b8821db5322592a0cc2.jpg rename to Sklearn/sklearn-doc-zh/master/img/f8d66dde73704b8821db5322592a0cc2.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/f8e0c6c9a82bcbf369e2d0b7fc7aba8d.jpg b/Sklearn/sklearn-doc-zh/master/img/f8e0c6c9a82bcbf369e2d0b7fc7aba8d.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/f8e0c6c9a82bcbf369e2d0b7fc7aba8d.jpg rename to Sklearn/sklearn-doc-zh/master/img/f8e0c6c9a82bcbf369e2d0b7fc7aba8d.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/f8f807bd22e1f9f3c4271c78c8cb33fa.jpg b/Sklearn/sklearn-doc-zh/master/img/f8f807bd22e1f9f3c4271c78c8cb33fa.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/f8f807bd22e1f9f3c4271c78c8cb33fa.jpg rename to Sklearn/sklearn-doc-zh/master/img/f8f807bd22e1f9f3c4271c78c8cb33fa.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/f92e6fadff74949dcf9c70e40d1a7619.jpg b/Sklearn/sklearn-doc-zh/master/img/f92e6fadff74949dcf9c70e40d1a7619.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/f92e6fadff74949dcf9c70e40d1a7619.jpg rename to Sklearn/sklearn-doc-zh/master/img/f92e6fadff74949dcf9c70e40d1a7619.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/f93871977da52a6d11045d57c3e18728.jpg b/Sklearn/sklearn-doc-zh/master/img/f93871977da52a6d11045d57c3e18728.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/f93871977da52a6d11045d57c3e18728.jpg rename to Sklearn/sklearn-doc-zh/master/img/f93871977da52a6d11045d57c3e18728.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/f94e86205401e8da73308b60aa64b05b.jpg b/Sklearn/sklearn-doc-zh/master/img/f94e86205401e8da73308b60aa64b05b.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/f94e86205401e8da73308b60aa64b05b.jpg rename to Sklearn/sklearn-doc-zh/master/img/f94e86205401e8da73308b60aa64b05b.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/f996477bc9806499e6b6a1ea4d9ae8eb.jpg b/Sklearn/sklearn-doc-zh/master/img/f996477bc9806499e6b6a1ea4d9ae8eb.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/f996477bc9806499e6b6a1ea4d9ae8eb.jpg rename to Sklearn/sklearn-doc-zh/master/img/f996477bc9806499e6b6a1ea4d9ae8eb.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/f9d2fc91f381e1772999a738d3c8c32b.jpg b/Sklearn/sklearn-doc-zh/master/img/f9d2fc91f381e1772999a738d3c8c32b.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/f9d2fc91f381e1772999a738d3c8c32b.jpg rename to Sklearn/sklearn-doc-zh/master/img/f9d2fc91f381e1772999a738d3c8c32b.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/f9e7fc3940e2875bf542aeda657d0718.jpg b/Sklearn/sklearn-doc-zh/master/img/f9e7fc3940e2875bf542aeda657d0718.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/f9e7fc3940e2875bf542aeda657d0718.jpg rename to Sklearn/sklearn-doc-zh/master/img/f9e7fc3940e2875bf542aeda657d0718.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/fa1895bee67b8c643cbaab1e8da8620f.jpg b/Sklearn/sklearn-doc-zh/master/img/fa1895bee67b8c643cbaab1e8da8620f.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/fa1895bee67b8c643cbaab1e8da8620f.jpg rename to Sklearn/sklearn-doc-zh/master/img/fa1895bee67b8c643cbaab1e8da8620f.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/fa48fa696e5242bb078fb786e6dc24c3.jpg b/Sklearn/sklearn-doc-zh/master/img/fa48fa696e5242bb078fb786e6dc24c3.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/fa48fa696e5242bb078fb786e6dc24c3.jpg rename to Sklearn/sklearn-doc-zh/master/img/fa48fa696e5242bb078fb786e6dc24c3.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/fae30a190cd9e5f5d06f534d956df5f5.jpg b/Sklearn/sklearn-doc-zh/master/img/fae30a190cd9e5f5d06f534d956df5f5.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/fae30a190cd9e5f5d06f534d956df5f5.jpg rename to Sklearn/sklearn-doc-zh/master/img/fae30a190cd9e5f5d06f534d956df5f5.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/fb8da9a6dd6e45015b629002d748d9b1.jpg b/Sklearn/sklearn-doc-zh/master/img/fb8da9a6dd6e45015b629002d748d9b1.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/fb8da9a6dd6e45015b629002d748d9b1.jpg rename to Sklearn/sklearn-doc-zh/master/img/fb8da9a6dd6e45015b629002d748d9b1.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/fb9cbfd2ff15ac51a36902f0a6037c28.jpg b/Sklearn/sklearn-doc-zh/master/img/fb9cbfd2ff15ac51a36902f0a6037c28.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/fb9cbfd2ff15ac51a36902f0a6037c28.jpg rename to Sklearn/sklearn-doc-zh/master/img/fb9cbfd2ff15ac51a36902f0a6037c28.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/fc080793a40b71dc553fe8966ad7516a.jpg b/Sklearn/sklearn-doc-zh/master/img/fc080793a40b71dc553fe8966ad7516a.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/fc080793a40b71dc553fe8966ad7516a.jpg rename to Sklearn/sklearn-doc-zh/master/img/fc080793a40b71dc553fe8966ad7516a.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/fc333385a9012524b39bc23303de30d4.jpg b/Sklearn/sklearn-doc-zh/master/img/fc333385a9012524b39bc23303de30d4.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/fc333385a9012524b39bc23303de30d4.jpg rename to Sklearn/sklearn-doc-zh/master/img/fc333385a9012524b39bc23303de30d4.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/fcf31635bf1c46833111df71ab92b68e.jpg b/Sklearn/sklearn-doc-zh/master/img/fcf31635bf1c46833111df71ab92b68e.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/fcf31635bf1c46833111df71ab92b68e.jpg rename to Sklearn/sklearn-doc-zh/master/img/fcf31635bf1c46833111df71ab92b68e.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/fd132d0faf19fdc76254a6317ed1acfd.jpg b/Sklearn/sklearn-doc-zh/master/img/fd132d0faf19fdc76254a6317ed1acfd.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/fd132d0faf19fdc76254a6317ed1acfd.jpg rename to Sklearn/sklearn-doc-zh/master/img/fd132d0faf19fdc76254a6317ed1acfd.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/fd132d0faf19fdc76254a6317ed1acfd.png b/Sklearn/sklearn-doc-zh/master/img/fd132d0faf19fdc76254a6317ed1acfd.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/fd132d0faf19fdc76254a6317ed1acfd.png rename to Sklearn/sklearn-doc-zh/master/img/fd132d0faf19fdc76254a6317ed1acfd.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/fd6f65ce4fb7491d7628d1ce576c19d4.jpg b/Sklearn/sklearn-doc-zh/master/img/fd6f65ce4fb7491d7628d1ce576c19d4.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/fd6f65ce4fb7491d7628d1ce576c19d4.jpg rename to Sklearn/sklearn-doc-zh/master/img/fd6f65ce4fb7491d7628d1ce576c19d4.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/fd785ede0569b8be0bea11bc8eecd583.jpg b/Sklearn/sklearn-doc-zh/master/img/fd785ede0569b8be0bea11bc8eecd583.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/fd785ede0569b8be0bea11bc8eecd583.jpg rename to Sklearn/sklearn-doc-zh/master/img/fd785ede0569b8be0bea11bc8eecd583.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/fde3a2898bfc0ce1823058fe7b706b9e.jpg b/Sklearn/sklearn-doc-zh/master/img/fde3a2898bfc0ce1823058fe7b706b9e.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/fde3a2898bfc0ce1823058fe7b706b9e.jpg rename to Sklearn/sklearn-doc-zh/master/img/fde3a2898bfc0ce1823058fe7b706b9e.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/fdff527ccbac4fd87c2ca9c4bed5fce2.jpg b/Sklearn/sklearn-doc-zh/master/img/fdff527ccbac4fd87c2ca9c4bed5fce2.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/fdff527ccbac4fd87c2ca9c4bed5fce2.jpg rename to Sklearn/sklearn-doc-zh/master/img/fdff527ccbac4fd87c2ca9c4bed5fce2.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/fe1d79339349f9b6263e123094ffce7b.jpg b/Sklearn/sklearn-doc-zh/master/img/fe1d79339349f9b6263e123094ffce7b.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/fe1d79339349f9b6263e123094ffce7b.jpg rename to Sklearn/sklearn-doc-zh/master/img/fe1d79339349f9b6263e123094ffce7b.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/fe5ed835e0d3407e3f2d694d8bc049a1.jpg b/Sklearn/sklearn-doc-zh/master/img/fe5ed835e0d3407e3f2d694d8bc049a1.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/fe5ed835e0d3407e3f2d694d8bc049a1.jpg rename to Sklearn/sklearn-doc-zh/master/img/fe5ed835e0d3407e3f2d694d8bc049a1.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/fe62193b4391c9f60e373f03623696ac.jpg b/Sklearn/sklearn-doc-zh/master/img/fe62193b4391c9f60e373f03623696ac.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/fe62193b4391c9f60e373f03623696ac.jpg rename to Sklearn/sklearn-doc-zh/master/img/fe62193b4391c9f60e373f03623696ac.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/fe9e5bb155154914f761d6497915e9cb.jpg b/Sklearn/sklearn-doc-zh/master/img/fe9e5bb155154914f761d6497915e9cb.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/fe9e5bb155154914f761d6497915e9cb.jpg rename to Sklearn/sklearn-doc-zh/master/img/fe9e5bb155154914f761d6497915e9cb.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/ff5428ca3c50ed06f5162ad194377188.jpg b/Sklearn/sklearn-doc-zh/master/img/ff5428ca3c50ed06f5162ad194377188.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/ff5428ca3c50ed06f5162ad194377188.jpg rename to Sklearn/sklearn-doc-zh/master/img/ff5428ca3c50ed06f5162ad194377188.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/ff5e98366afa13070d3b410c55a80db1.jpg b/Sklearn/sklearn-doc-zh/master/img/ff5e98366afa13070d3b410c55a80db1.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/ff5e98366afa13070d3b410c55a80db1.jpg rename to Sklearn/sklearn-doc-zh/master/img/ff5e98366afa13070d3b410c55a80db1.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/ffecfca02992b6a85e966c9440cb40dd.jpg b/Sklearn/sklearn-doc-zh/master/img/ffecfca02992b6a85e966c9440cb40dd.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/ffecfca02992b6a85e966c9440cb40dd.jpg rename to Sklearn/sklearn-doc-zh/master/img/ffecfca02992b6a85e966c9440cb40dd.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/ffecfca02992b6a85e966c9440cb40dd2.jpg b/Sklearn/sklearn-doc-zh/master/img/ffecfca02992b6a85e966c9440cb40dd2.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/ffecfca02992b6a85e966c9440cb40dd2.jpg rename to Sklearn/sklearn-doc-zh/master/img/ffecfca02992b6a85e966c9440cb40dd2.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/grid_search_cross_validation.png b/Sklearn/sklearn-doc-zh/master/img/grid_search_cross_validation.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/grid_search_cross_validation.png rename to Sklearn/sklearn-doc-zh/master/img/grid_search_cross_validation.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/grid_search_workflow.png b/Sklearn/sklearn-doc-zh/master/img/grid_search_workflow.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/grid_search_workflow.png rename to Sklearn/sklearn-doc-zh/master/img/grid_search_workflow.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/iris.jpg b/Sklearn/sklearn-doc-zh/master/img/iris.jpg similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/iris.jpg rename to Sklearn/sklearn-doc-zh/master/img/iris.jpg diff --git a/Python/sklearn/sklearn-doc-zh/master/img/knn01.png b/Sklearn/sklearn-doc-zh/master/img/knn01.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/knn01.png rename to Sklearn/sklearn-doc-zh/master/img/knn01.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/knn02.png b/Sklearn/sklearn-doc-zh/master/img/knn02.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/knn02.png rename to Sklearn/sklearn-doc-zh/master/img/knn02.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/knn03.png b/Sklearn/sklearn-doc-zh/master/img/knn03.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/knn03.png rename to Sklearn/sklearn-doc-zh/master/img/knn03.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/knn04.png b/Sklearn/sklearn-doc-zh/master/img/knn04.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/knn04.png rename to Sklearn/sklearn-doc-zh/master/img/knn04.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/knn05.png b/Sklearn/sklearn-doc-zh/master/img/knn05.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/knn05.png rename to Sklearn/sklearn-doc-zh/master/img/knn05.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/preprocessing001.png b/Sklearn/sklearn-doc-zh/master/img/preprocessing001.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/preprocessing001.png rename to Sklearn/sklearn-doc-zh/master/img/preprocessing001.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/preprocessing002.png b/Sklearn/sklearn-doc-zh/master/img/preprocessing002.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/preprocessing002.png rename to Sklearn/sklearn-doc-zh/master/img/preprocessing002.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/projection001.png b/Sklearn/sklearn-doc-zh/master/img/projection001.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/projection001.png rename to Sklearn/sklearn-doc-zh/master/img/projection001.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/projection002.png b/Sklearn/sklearn-doc-zh/master/img/projection002.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/projection002.png rename to Sklearn/sklearn-doc-zh/master/img/projection002.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/score001.png b/Sklearn/sklearn-doc-zh/master/img/score001.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/score001.png rename to Sklearn/sklearn-doc-zh/master/img/score001.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/score002.png b/Sklearn/sklearn-doc-zh/master/img/score002.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/score002.png rename to Sklearn/sklearn-doc-zh/master/img/score002.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/score003.png b/Sklearn/sklearn-doc-zh/master/img/score003.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/score003.png rename to Sklearn/sklearn-doc-zh/master/img/score003.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/score004.png b/Sklearn/sklearn-doc-zh/master/img/score004.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/score004.png rename to Sklearn/sklearn-doc-zh/master/img/score004.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/score005.png b/Sklearn/sklearn-doc-zh/master/img/score005.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/score005.png rename to Sklearn/sklearn-doc-zh/master/img/score005.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/score006.png b/Sklearn/sklearn-doc-zh/master/img/score006.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/score006.png rename to Sklearn/sklearn-doc-zh/master/img/score006.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/score007.png b/Sklearn/sklearn-doc-zh/master/img/score007.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/score007.png rename to Sklearn/sklearn-doc-zh/master/img/score007.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/score008.png b/Sklearn/sklearn-doc-zh/master/img/score008.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/score008.png rename to Sklearn/sklearn-doc-zh/master/img/score008.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/score009.png b/Sklearn/sklearn-doc-zh/master/img/score009.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/score009.png rename to Sklearn/sklearn-doc-zh/master/img/score009.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_agglomerative_clustering_0011.png b/Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_agglomerative_clustering_0011.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_agglomerative_clustering_0011.png rename to Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_agglomerative_clustering_0011.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_agglomerative_clustering_0021.png b/Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_agglomerative_clustering_0021.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_agglomerative_clustering_0021.png rename to Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_agglomerative_clustering_0021.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_agglomerative_clustering_0031.png b/Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_agglomerative_clustering_0031.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_agglomerative_clustering_0031.png rename to Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_agglomerative_clustering_0031.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_agglomerative_clustering_0041.png b/Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_agglomerative_clustering_0041.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_agglomerative_clustering_0041.png rename to Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_agglomerative_clustering_0041.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_anomaly_comparison_0011.png b/Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_anomaly_comparison_0011.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_anomaly_comparison_0011.png rename to Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_anomaly_comparison_0011.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_coin_ward_segmentation_0011.png b/Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_coin_ward_segmentation_0011.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_coin_ward_segmentation_0011.png rename to Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_coin_ward_segmentation_0011.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_cv_indices_0041.png b/Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_cv_indices_0041.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_cv_indices_0041.png rename to Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_cv_indices_0041.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_cv_indices_0051.png b/Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_cv_indices_0051.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_cv_indices_0051.png rename to Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_cv_indices_0051.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_cv_indices_0061.png b/Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_cv_indices_0061.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_cv_indices_0061.png rename to Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_cv_indices_0061.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_cv_indices_0071.png b/Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_cv_indices_0071.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_cv_indices_0071.png rename to Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_cv_indices_0071.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_cv_indices_0081.png b/Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_cv_indices_0081.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_cv_indices_0081.png rename to Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_cv_indices_0081.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_cv_indices_0091.png b/Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_cv_indices_0091.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_cv_indices_0091.png rename to Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_cv_indices_0091.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_faces_decomposition_0111.png b/Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_faces_decomposition_0111.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_faces_decomposition_0111.png rename to Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_faces_decomposition_0111.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_faces_decomposition_0121.png b/Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_faces_decomposition_0121.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_faces_decomposition_0121.png rename to Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_faces_decomposition_0121.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_faces_decomposition_0131.png b/Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_faces_decomposition_0131.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_faces_decomposition_0131.png rename to Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_faces_decomposition_0131.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_faces_decomposition_0141.png b/Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_faces_decomposition_0141.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_faces_decomposition_0141.png rename to Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_faces_decomposition_0141.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_lda_qda_0011.png b/Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_lda_qda_0011.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_lda_qda_0011.png rename to Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_lda_qda_0011.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_linkage_comparison_0011.png b/Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_linkage_comparison_0011.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_linkage_comparison_0011.png rename to Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_linkage_comparison_0011.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_lof_novelty_detection_0011.png b/Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_lof_novelty_detection_0011.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_lof_novelty_detection_0011.png rename to Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_lof_novelty_detection_0011.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_lof_outlier_detection_0011.png b/Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_lof_outlier_detection_0011.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_lof_outlier_detection_0011.png rename to Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_lof_outlier_detection_0011.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_map_data_to_normal_0011.png b/Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_map_data_to_normal_0011.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_map_data_to_normal_0011.png rename to Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_map_data_to_normal_0011.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_mlp_alpha_0011.png b/Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_mlp_alpha_0011.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_mlp_alpha_0011.png rename to Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_mlp_alpha_0011.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_multioutput_face_completion_0011.png b/Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_multioutput_face_completion_0011.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_multioutput_face_completion_0011.png rename to Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_multioutput_face_completion_0011.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_nca_classification_0011.png b/Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_nca_classification_0011.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_nca_classification_0011.png rename to Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_nca_classification_0011.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_nca_classification_0021.png b/Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_nca_classification_0021.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_nca_classification_0021.png rename to Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_nca_classification_0021.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_nca_dim_reduction_0011.png b/Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_nca_dim_reduction_0011.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_nca_dim_reduction_0011.png rename to Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_nca_dim_reduction_0011.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_nca_dim_reduction_0021.png b/Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_nca_dim_reduction_0021.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_nca_dim_reduction_0021.png rename to Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_nca_dim_reduction_0021.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_nca_dim_reduction_0031.png b/Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_nca_dim_reduction_0031.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_nca_dim_reduction_0031.png rename to Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_nca_dim_reduction_0031.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_optics_0011.png b/Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_optics_0011.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_optics_0011.png rename to Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_optics_0011.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_partial_dependence_0021.png b/Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_partial_dependence_0021.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_partial_dependence_0021.png rename to Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_partial_dependence_0021.png diff --git a/Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_voting_regressor_0011.png b/Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_voting_regressor_0011.png similarity index 100% rename from Python/sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_voting_regressor_0011.png rename to Sklearn/sklearn-doc-zh/master/img/sphx_glr_plot_voting_regressor_0011.png diff --git a/Tensorflow教程/TensorFlow-Code Framework.md b/Tensorflow/TensorFlow1.0/TensorFlow-Code Framework.md similarity index 100% rename from Tensorflow教程/TensorFlow-Code Framework.md rename to Tensorflow/TensorFlow1.0/TensorFlow-Code Framework.md diff --git a/Tensorflow教程/TensorFlow-Dateset.md b/Tensorflow/TensorFlow1.0/TensorFlow-Dateset.md similarity index 100% rename from Tensorflow教程/TensorFlow-Dateset.md rename to Tensorflow/TensorFlow1.0/TensorFlow-Dateset.md diff --git a/Tensorflow教程/TensorFlow-IO.md b/Tensorflow/TensorFlow1.0/TensorFlow-IO.md similarity index 100% rename from Tensorflow教程/TensorFlow-IO.md rename to Tensorflow/TensorFlow1.0/TensorFlow-IO.md diff --git a/Tensorflow教程/TensorFlow-OP(控制).md b/Tensorflow/TensorFlow1.0/TensorFlow-OP(控制).md similarity index 100% rename from Tensorflow教程/TensorFlow-OP(控制).md rename to Tensorflow/TensorFlow1.0/TensorFlow-OP(控制).md diff --git a/Tensorflow教程/TensorFlow-OP(计算).md b/Tensorflow/TensorFlow1.0/TensorFlow-OP(计算).md similarity index 100% rename from Tensorflow教程/TensorFlow-OP(计算).md rename to Tensorflow/TensorFlow1.0/TensorFlow-OP(计算).md diff --git a/Tensorflow教程/TensorFlow-Optimizer.md b/Tensorflow/TensorFlow1.0/TensorFlow-Optimizer.md similarity index 100% rename from Tensorflow教程/TensorFlow-Optimizer.md rename to Tensorflow/TensorFlow1.0/TensorFlow-Optimizer.md diff --git a/Tensorflow教程/TensorFlow-Queue & Thread.md b/Tensorflow/TensorFlow1.0/TensorFlow-Queue & Thread.md similarity index 100% rename from Tensorflow教程/TensorFlow-Queue & Thread.md rename to Tensorflow/TensorFlow1.0/TensorFlow-Queue & Thread.md diff --git a/Tensorflow教程/TensorFlow-Summary&Tensorboard.md b/Tensorflow/TensorFlow1.0/TensorFlow-Summary&Tensorboard.md similarity index 100% rename from Tensorflow教程/TensorFlow-Summary&Tensorboard.md rename to Tensorflow/TensorFlow1.0/TensorFlow-Summary&Tensorboard.md diff --git a/Tensorflow教程/TensorFlow-Variable &Constant & Random.md b/Tensorflow/TensorFlow1.0/TensorFlow-Variable &Constant & Random.md similarity index 100% rename from Tensorflow教程/TensorFlow-Variable &Constant & Random.md rename to Tensorflow/TensorFlow1.0/TensorFlow-Variable &Constant & Random.md diff --git a/Tensorflow教程/TensorFlow-code Bazel.md b/Tensorflow/TensorFlow1.0/TensorFlow-code Bazel.md similarity index 100% rename from Tensorflow教程/TensorFlow-code Bazel.md rename to Tensorflow/TensorFlow1.0/TensorFlow-code Bazel.md diff --git a/Tensorflow教程/TensorFlow-code IO.md b/Tensorflow/TensorFlow1.0/TensorFlow-code IO.md similarity index 100% rename from Tensorflow教程/TensorFlow-code IO.md rename to Tensorflow/TensorFlow1.0/TensorFlow-code IO.md diff --git a/Tensorflow教程/TensorFlow-code Swig.md b/Tensorflow/TensorFlow1.0/TensorFlow-code Swig.md similarity index 100% rename from Tensorflow教程/TensorFlow-code Swig.md rename to Tensorflow/TensorFlow1.0/TensorFlow-code Swig.md diff --git a/Tensorflow教程/TensorFlow-code architecture.md b/Tensorflow/TensorFlow1.0/TensorFlow-code architecture.md similarity index 100% rename from Tensorflow教程/TensorFlow-code architecture.md rename to Tensorflow/TensorFlow1.0/TensorFlow-code architecture.md diff --git a/Tensorflow教程/TensorFlow-code kernels.md b/Tensorflow/TensorFlow1.0/TensorFlow-code kernels.md similarity index 100% rename from Tensorflow教程/TensorFlow-code kernels.md rename to Tensorflow/TensorFlow1.0/TensorFlow-code kernels.md diff --git a/Tensorflow教程/TensorFlow-自定义IO.md b/Tensorflow/TensorFlow1.0/TensorFlow-自定义IO.md similarity index 100% rename from Tensorflow教程/TensorFlow-自定义IO.md rename to Tensorflow/TensorFlow1.0/TensorFlow-自定义IO.md diff --git a/Tensorflow教程/TensorFlow概述.md b/Tensorflow/TensorFlow1.0/TensorFlow概述.md similarity index 100% rename from Tensorflow教程/TensorFlow概述.md rename to Tensorflow/TensorFlow1.0/TensorFlow概述.md diff --git a/Tensorflow教程/tensorflow安装常见问题.md b/Tensorflow/TensorFlow1.0/tensorflow安装常见问题.md similarity index 100% rename from Tensorflow教程/tensorflow安装常见问题.md rename to Tensorflow/TensorFlow1.0/tensorflow安装常见问题.md diff --git a/Tensorflow教程/编译失败记录.md b/Tensorflow/TensorFlow1.0/编译失败记录.md similarity index 100% rename from Tensorflow教程/编译失败记录.md rename to Tensorflow/TensorFlow1.0/编译失败记录.md diff --git a/Tensorflow教程/说明.md b/Tensorflow/TensorFlow1.0/说明.md similarity index 100% rename from Tensorflow教程/说明.md rename to Tensorflow/TensorFlow1.0/说明.md diff --git a/Tensorflow/TensorFlow2.0/002.md b/Tensorflow/TensorFlow2.0/002.md new file mode 100644 index 00000000..194ca390 --- /dev/null +++ b/Tensorflow/TensorFlow2.0/002.md @@ -0,0 +1,69 @@ +# 初学者的 TensorFlow 2.0 教程 + +> 原文:[https://tensorflow.google.cn/tutorials/quickstart/beginner](https://tensorflow.google.cn/tutorials/quickstart/beginner) + +**Note:** 我们的 TensorFlow 社区翻译了这些文档。因为社区翻译是尽力而为, 所以无法保证它们是最准确的,并且反映了最新的 [官方英文文档](https://tensorflow.google.cn/?hl=en)。如果您有改进此翻译的建议, 请提交 pull request 到 [tensorflow/docs](https://github.com/tensorflow/docs) GitHub 仓库。要志愿地撰写或者审核译文,请加入 [docs-zh-cn@tensorflow.org Google Group](https://groups.google.com/a/tensorflow.org/forum/#!forum/docs-zh-cn)。 + +这是一个 [Google Colaboratory](https://colab.research.google.com/notebooks/welcome.ipynb) 笔记本文件。 Python 程序可以直接在浏览器中运行,这是学习 Tensorflow 的绝佳方式。想要学习该教程,请点击此页面顶部的按钮,在 Google Colab 中运行笔记本。 + +1. 在 Colab 中, 连接到 Python 运行环境: 在菜单条的右上方, 选择 *CONNECT*。 +2. 运行所有的代码块: 选择 *Runtime* > *Run all*。 + +下载并安装 TensorFlow 2.0 测试版包。将 TensorFlow 载入你的程序: + +```py +# 安装 TensorFlow + +import tensorflow as tf +``` + +载入并准备好 [MNIST 数据集](http://yann.lecun.com/exdb/mnist/)。将样本从整数转换为浮点数: + +```py +mnist = tf.keras.datasets.mnist + +(x_train, y_train), (x_test, y_test) = mnist.load_data() +x_train, x_test = x_train / 255.0, x_test / 255.0 +``` + +将模型的各层堆叠起来,以搭建 [`tf.keras.Sequential`](https://tensorflow.google.cn/api_docs/python/tf/keras/Sequential) 模型。为训练选择优化器和损失函数: + +```py +model = tf.keras.models.Sequential([ + tf.keras.layers.Flatten(input_shape=(28, 28)), + tf.keras.layers.Dense(128, activation='relu'), + tf.keras.layers.Dropout(0.2), + tf.keras.layers.Dense(10, activation='softmax') +]) + +model.compile(optimizer='adam', + loss='sparse_categorical_crossentropy', + metrics=['accuracy']) +``` + +训练并验证模型: + +```py +model.fit(x_train, y_train, epochs=5) + +model.evaluate(x_test, y_test, verbose=2) +``` + +```py +Epoch 1/5 +1875/1875 [==============================] - 3s 2ms/step - loss: 0.2962 - accuracy: 0.9155 +Epoch 2/5 +1875/1875 [==============================] - 3s 2ms/step - loss: 0.1420 - accuracy: 0.9581 +Epoch 3/5 +1875/1875 [==============================] - 3s 2ms/step - loss: 0.1064 - accuracy: 0.9672 +Epoch 4/5 +1875/1875 [==============================] - 3s 2ms/step - loss: 0.0885 - accuracy: 0.9730 +Epoch 5/5 +1875/1875 [==============================] - 3s 2ms/step - loss: 0.0749 - accuracy: 0.9765 +313/313 - 0s - loss: 0.0748 - accuracy: 0.9778 + +[0.07484959065914154, 0.9778000116348267] + +``` + +现在,这个照片分类器的准确度已经达到 98%。想要了解更多,请阅读 [TensorFlow 教程](https://tensorflow.google.cn/tutorials/)。 \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/003.md b/Tensorflow/TensorFlow2.0/003.md new file mode 100644 index 00000000..d52949a4 --- /dev/null +++ b/Tensorflow/TensorFlow2.0/003.md @@ -0,0 +1,148 @@ +# 针对专业人员的 TensorFlow 2.0 入门 + +> 原文:[https://tensorflow.google.cn/tutorials/quickstart/advanced](https://tensorflow.google.cn/tutorials/quickstart/advanced) + +**Note:** 我们的 TensorFlow 社区翻译了这些文档。因为社区翻译是尽力而为, 所以无法保证它们是最准确的,并且反映了最新的 [官方英文文档](https://tensorflow.google.cn/?hl=en)。如果您有改进此翻译的建议, 请提交 pull request 到 [tensorflow/docs](https://github.com/tensorflow/docs) GitHub 仓库。要志愿地撰写或者审核译文,请加入 [docs-zh-cn@tensorflow.org Google Group](https://groups.google.com/a/tensorflow.org/forum/#!forum/docs-zh-cn)。 + +这是一个 [Google Colaboratory](https://colab.research.google.com/notebooks/welcome.ipynb) 笔记本(notebook)文件。Python 程序直接在浏览器中运行——这是一种学习和使用 Tensorflow 的好方法。要学习本教程,请单击本页顶部按钮,在 Google Colab 中运行笔记本(notebook). + +1. 在 Colab 中,连接到 Python 运行时:在菜单栏右上角,选择*连接(CONNECT)*。 +2. 运行所有笔记本(notebook)代码单元格:选择*运行时(Runtime)* > *运行所有(Run all)*。 + +下载并安装 TensorFlow 2.0 Beta 软件包: + +将 Tensorflow 导入您的程序: + +```py +import tensorflow as tf + +from tensorflow.keras.layers import Dense, Flatten, Conv2D +from tensorflow.keras import Model +``` + +加载并准备 [MNIST 数据集](http://yann.lecun.com/exdb/mnist/)。 + +```py +mnist = tf.keras.datasets.mnist + +(x_train, y_train), (x_test, y_test) = mnist.load_data() +x_train, x_test = x_train / 255.0, x_test / 255.0 + +# Add a channels dimension +x_train = x_train[..., tf.newaxis] +x_test = x_test[..., tf.newaxis] +``` + +使用 [`tf.data`](https://tensorflow.google.cn/api_docs/python/tf/data) 来将数据集切分为 batch 以及混淆数据集: + +```py +train_ds = tf.data.Dataset.from_tensor_slices( + (x_train, y_train)).shuffle(10000).batch(32) +test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32) +``` + +使用 Keras [模型子类化(model subclassing) API](https://tensorflow.google.cn/guide/keras#model_subclassing) 构建 [`tf.keras`](https://tensorflow.google.cn/api_docs/python/tf/keras) 模型: + +```py +class MyModel(Model): + def __init__(self): + super(MyModel, self).__init__() + self.conv1 = Conv2D(32, 3, activation='relu') + self.flatten = Flatten() + self.d1 = Dense(128, activation='relu') + self.d2 = Dense(10, activation='softmax') + + def call(self, x): + x = self.conv1(x) + x = self.flatten(x) + x = self.d1(x) + return self.d2(x) + +model = MyModel() +``` + +为训练选择优化器与损失函数: + +```py +loss_object = tf.keras.losses.SparseCategoricalCrossentropy() + +optimizer = tf.keras.optimizers.Adam() +``` + +选择衡量指标来度量模型的损失值(loss)和准确率(accuracy)。这些指标在 epoch 上累积值,然后打印出整体结果。 + +```py +train_loss = tf.keras.metrics.Mean(name='train_loss') +train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy') + +test_loss = tf.keras.metrics.Mean(name='test_loss') +test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy') +``` + +使用 [`tf.GradientTape`](https://tensorflow.google.cn/api_docs/python/tf/GradientTape) 来训练模型: + +```py +@tf.function +def train_step(images, labels): + with tf.GradientTape() as tape: + predictions = model(images) + loss = loss_object(labels, predictions) + gradients = tape.gradient(loss, model.trainable_variables) + optimizer.apply_gradients(zip(gradients, model.trainable_variables)) + + train_loss(loss) + train_accuracy(labels, predictions) +``` + +测试模型: + +```py +@tf.function +def test_step(images, labels): + predictions = model(images) + t_loss = loss_object(labels, predictions) + + test_loss(t_loss) + test_accuracy(labels, predictions) +``` + +```py +EPOCHS = 5 + +for epoch in range(EPOCHS): + # 在下一个 epoch 开始时,重置评估指标 + train_loss.reset_states() + train_accuracy.reset_states() + test_loss.reset_states() + test_accuracy.reset_states() + + for images, labels in train_ds: + train_step(images, labels) + + for test_images, test_labels in test_ds: + test_step(test_images, test_labels) + + template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}' + print (template.format(epoch+1, + train_loss.result(), + train_accuracy.result()*100, + test_loss.result(), + test_accuracy.result()*100)) +``` + +```py +WARNING:tensorflow:Layer my_model is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2\. The layer has dtype float32 because its dtype defaults to floatx. + +If you intended to run this layer in float32, you can safely ignore this warning. If in doubt, this warning is likely only an issue if you are porting a TensorFlow 1.X model to TensorFlow 2. + +To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor. + +Epoch 1, Loss: 0.13825324177742004, Accuracy: 95.89166259765625, Test Loss: 0.07461485266685486, Test Accuracy: 97.47999572753906 +Epoch 2, Loss: 0.04554400220513344, Accuracy: 98.61666870117188, Test Loss: 0.05126383528113365, Test Accuracy: 98.29000091552734 +Epoch 3, Loss: 0.024927066639065742, Accuracy: 99.18500518798828, Test Loss: 0.05301696062088013, Test Accuracy: 98.30999755859375 +Epoch 4, Loss: 0.014068767428398132, Accuracy: 99.52832794189453, Test Loss: 0.051672786474227905, Test Accuracy: 98.58000183105469 +Epoch 5, Loss: 0.009344187565147877, Accuracy: 99.69166564941406, Test Loss: 0.06102905049920082, Test Accuracy: 98.25 + +``` + +该图片分类器现在在此数据集上训练得到了接近 98% 的准确率(accuracy)。要了解更多信息,请阅读 [TensorFlow 教程](https://tensorflow.google.cn/tutorials/keras)。 \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/004.md b/Tensorflow/TensorFlow2.0/004.md new file mode 100644 index 00000000..0d0b15fa --- /dev/null +++ b/Tensorflow/TensorFlow2.0/004.md @@ -0,0 +1 @@ +# 初级 \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/005.md b/Tensorflow/TensorFlow2.0/005.md new file mode 100644 index 00000000..fc51e00a --- /dev/null +++ b/Tensorflow/TensorFlow2.0/005.md @@ -0,0 +1 @@ +# Keras 机器学习基础知识 \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/006.md b/Tensorflow/TensorFlow2.0/006.md new file mode 100644 index 00000000..1c051ab2 --- /dev/null +++ b/Tensorflow/TensorFlow2.0/006.md @@ -0,0 +1,489 @@ +# 基本分类:对服装图像进行分类 + +> 原文:[https://tensorflow.google.cn/tutorials/keras/classification](https://tensorflow.google.cn/tutorials/keras/classification) + +本指南将训练一个神经网络模型,对运动鞋和衬衫等服装图像进行分类。即使您不理解所有细节也没关系;这只是对完整 TensorFlow 程序的快速概述,详细内容会在您实际操作的同时进行介绍。 + +本指南使用了 [tf.keras](https://tensorflow.google.cn/guide/keras),它是 TensorFlow 中用来构建和训练模型的高级 API。 + +```py +# TensorFlow and tf.keras +import tensorflow as tf +from tensorflow import keras + +# Helper libraries +import numpy as np +import matplotlib.pyplot as plt + +print(tf.__version__) +``` + +```py +2.3.0 + +``` + +## 导入 Fashion MNIST 数据集 + +本指南使用 [Fashion MNIST](https://github.com/zalandoresearch/fashion-mnist) 数据集,该数据集包含 10 个类别的 70,000 个灰度图像。这些图像以低分辨率(28x28 像素)展示了单件衣物,如下所示: + +| ![Fashion MNIST sprite](img/8a26efaab988f8c9054ea977baabb45a.png) | +| **图 1.** [Fashion-MNIST 样本](https://github.com/zalandoresearch/fashion-mnist)(由 Zalando 提供,MIT 许可)。 + | + +Fashion MNIST 旨在临时替代经典 [MNIST](http://yann.lecun.com/exdb/mnist/) 数据集,后者常被用作计算机视觉机器学习程序的“Hello, World”。MNIST 数据集包含手写数字(0、1、2 等)的图像,其格式与您将使用的衣物图像的格式相同。 + +本指南使用 Fashion MNIST 来实现多样化,因为它比常规 MNIST 更具挑战性。这两个数据集都相对较小,都用于验证某个算法是否按预期工作。对于代码的测试和调试,它们都是很好的起点。 + +在本指南中,我们使用 60,000 个图像来训练网络,使用 10,000 个图像来评估网络学习对图像分类的准确率。您可以直接从 TensorFlow 访问 Fashion MNIST。请运行以下代码,直接从 TensorFlow 中导入和加载 Fashion MNIST 数据: + +```py +fashion_mnist = keras.datasets.fashion_mnist + +(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data() +``` + +加载数据集会返回四个 NumPy 数组: + +* `train_images` 和 `train_labels` 数组是*训练集*,即模型用于学习的数据。 +* *测试集*、`test_images` 和 `test_labels` 数组会被用来对模型进行测试。 + +图像是 28x28 的 NumPy 数组,像素值介于 0 到 255 之间。*标签*是整数数组,介于 0 到 9 之间。这些标签对应于图像所代表的服装*类*: + +| 标签 | 类 | +| 0 | T 恤/上衣 | +| 1 | 裤子 | +| 2 | 套头衫 | +| 3 | 连衣裙 | +| 4 | 外套 | +| 5 | 凉鞋 | +| 6 | 衬衫 | +| 7 | 运动鞋 | +| 8 | 包 | +| 9 | 短靴 | + +每个图像都会被映射到一个标签。由于数据集不包括*类名称*,请将它们存储在下方,供稍后绘制图像时使用: + +```py +class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', + 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot'] +``` + +## 浏览数据 + +在训练模型之前,我们先浏览一下数据集的格式。以下代码显示训练集中有 60,000 个图像,每个图像由 28 x 28 的像素表示: + +```py +train_images.shape +``` + +```py +(60000, 28, 28) + +``` + +同样,训练集中有 60,000 个标签: + +```py +len(train_labels) +``` + +```py +60000 + +``` + +每个标签都是一个 0 到 9 之间的整数: + +```py +train_labels +``` + +```py +array([9, 0, 0, ..., 3, 0, 5], dtype=uint8) + +``` + +测试集中有 10,000 个图像。同样,每个图像都由 28x28 个像素表示: + +```py +test_images.shape +``` + +```py +(10000, 28, 28) + +``` + +测试集包含 10,000 个图像标签: + +```py +len(test_labels) +``` + +```py +10000 + +``` + +## 预处理数据 + +在训练网络之前,必须对数据进行预处理。如果您检查训练集中的第一个图像,您会看到像素值处于 0 到 255 之间: + +```py +plt.figure() +plt.imshow(train_images[0]) +plt.colorbar() +plt.grid(False) +plt.show() +``` + +![png](img/07fde30d678eaceba2bf9695ee89c403.png) + +将这些值缩小至 0 到 1 之间,然后将其馈送到神经网络模型。为此,请将这些值除以 255。请务必以相同的方式对*训练集*和*测试集*进行预处理: + +```py +train_images = train_images / 255.0 + +test_images = test_images / 255.0 +``` + +为了验证数据的格式是否正确,以及您是否已准备好构建和训练网络,让我们显示*训练集*中的前 25 个图像,并在每个图像下方显示类名称。 + +```py +plt.figure(figsize=(10,10)) +for i in range(25): + plt.subplot(5,5,i+1) + plt.xticks([]) + plt.yticks([]) + plt.grid(False) + plt.imshow(train_images[i], cmap=plt.cm.binary) + plt.xlabel(class_names[train_labels[i]]) +plt.show() +``` + +![png](img/0fc5058e71e5828192048ef6a6b9a595.png) + +## 构建模型 + +构建神经网络需要先配置模型的层,然后再编译模型。 + +### 设置层 + +神经网络的基本组成部分是*层*。层会从向其馈送的数据中提取表示形式。希望这些表示形式有助于解决手头上的问题。 + +大多数深度学习都包括将简单的层链接在一起。大多数层(如 [`tf.keras.layers.Dense`](https://tensorflow.google.cn/api_docs/python/tf/keras/layers/Dense))都具有在训练期间才会学习的参数。 + +```py +model = keras.Sequential([ + keras.layers.Flatten(input_shape=(28, 28)), + keras.layers.Dense(128, activation='relu'), + keras.layers.Dense(10) +]) +``` + +该网络的第一层 [`tf.keras.layers.Flatten`](https://tensorflow.google.cn/api_docs/python/tf/keras/layers/Flatten) 将图像格式从二维数组(28 x 28 像素)转换成一维数组(28 x 28 = 784 像素)。将该层视为图像中未堆叠的像素行并将其排列起来。该层没有要学习的参数,它只会重新格式化数据。 + +展平像素后,网络会包括两个 [`tf.keras.layers.Dense`](https://tensorflow.google.cn/api_docs/python/tf/keras/layers/Dense) 层的序列。它们是密集连接或全连接神经层。第一个 `Dense` 层有 128 个节点(或神经元)。第二个(也是最后一个)层会返回一个长度为 10 的 logits 数组。每个节点都包含一个得分,用来表示当前图像属于 10 个类中的哪一类。 + +### 编译模型 + +在准备对模型进行训练之前,还需要再对其进行一些设置。以下内容是在模型的*编译*步骤中添加的: + +* *损失函数* - 用于测量模型在训练期间的准确率。您会希望最小化此函数,以便将模型“引导”到正确的方向上。 +* *优化器* - 决定模型如何根据其看到的数据和自身的损失函数进行更新。 +* *指标* - 用于监控训练和测试步骤。以下示例使用了*准确率*,即被正确分类的图像的比率。 + +```py +model.compile(optimizer='adam', + loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), + metrics=['accuracy']) +``` + +## 训练模型 + +训练神经网络模型需要执行以下步骤: + +1. 将训练数据馈送给模型。在本例中,训练数据位于 `train_images` 和 `train_labels` 数组中。 +2. 模型学习将图像和标签关联起来。 +3. 要求模型对测试集(在本例中为 `test_images` 数组)进行预测。 +4. 验证预测是否与 `test_labels` 数组中的标签相匹配。 + +### 向模型馈送数据 + +要开始训练,请调用 `model.fit` 方法,这样命名是因为该方法会将模型与训练数据进行“拟合”: + +```py +model.fit(train_images, train_labels, epochs=10) +``` + +```py +Epoch 1/10 +1875/1875 [==============================] - 3s 1ms/step - loss: 0.4924 - accuracy: 0.8265 +Epoch 2/10 +1875/1875 [==============================] - 3s 1ms/step - loss: 0.3698 - accuracy: 0.8669 +Epoch 3/10 +1875/1875 [==============================] - 3s 1ms/step - loss: 0.3340 - accuracy: 0.8781 +Epoch 4/10 +1875/1875 [==============================] - 3s 1ms/step - loss: 0.3110 - accuracy: 0.8863 +Epoch 5/10 +1875/1875 [==============================] - 3s 1ms/step - loss: 0.2924 - accuracy: 0.8936 +Epoch 6/10 +1875/1875 [==============================] - 3s 1ms/step - loss: 0.2776 - accuracy: 0.8972 +Epoch 7/10 +1875/1875 [==============================] - 3s 1ms/step - loss: 0.2659 - accuracy: 0.9021 +Epoch 8/10 +1875/1875 [==============================] - 3s 1ms/step - loss: 0.2543 - accuracy: 0.9052 +Epoch 9/10 +1875/1875 [==============================] - 3s 1ms/step - loss: 0.2453 - accuracy: 0.9084 +Epoch 10/10 +1875/1875 [==============================] - 3s 1ms/step - loss: 0.2366 - accuracy: 0.9122 + + + +``` + +在模型训练期间,会显示损失和准确率指标。此模型在训练数据上的准确率达到了 0.91(或 91%)左右。 + +### 评估准确率 + +接下来,比较模型在测试数据集上的表现: + +```py +test_loss, test_acc = model.evaluate(test_images, test_labels, verbose=2) + +print('\nTest accuracy:', test_acc) +``` + +```py +313/313 - 0s - loss: 0.3726 - accuracy: 0.8635 + +Test accuracy: 0.8634999990463257 + +``` + +结果表明,模型在测试数据集上的准确率略低于训练数据集。训练准确率和测试准确率之间的差距代表*过拟合*。过拟合是指机器学习模型在新的、以前未曾见过的输入上的表现不如在训练数据上的表现。过拟合的模型会“记住”训练数据集中的噪声和细节,从而对模型在新数据上的表现产生负面影响。有关更多信息,请参阅以下内容: + +* [演示过拟合](https://tensorflow.google.cn/tutorials/keras/overfit_and_underfit#demonstrate_overfitting) +* [避免过拟合的策略](https://tensorflow.google.cn/tutorials/keras/overfit_and_underfit#strategies_to_prevent_overfitting) + +### 进行预测 + +在模型经过训练后,您可以使用它对一些图像进行预测。模型具有线性输出,即 [logits](https://developers.google.cn/machine-learning/glossary#logits)。您可以附加一个 softmax 层,将 logits 转换成更容易理解的概率。 + +```py +probability_model = tf.keras.Sequential([model, + tf.keras.layers.Softmax()]) +``` + +```py +predictions = probability_model.predict(test_images) +``` + +在上例中,模型预测了测试集中每个图像的标签。我们来看看第一个预测结果: + +```py +predictions[0] +``` + +```py +array([6.9982241e-07, 5.5403369e-08, 1.8353174e-07, 1.4761626e-07, + 2.4380807e-07, 1.9273469e-04, 1.8122660e-06, 6.5027133e-02, + 1.7891599e-06, 9.3477517e-01], dtype=float32) + +``` + +预测结果是一个包含 10 个数字的数组。它们代表模型对 10 种不同服装中每种服装的“置信度”。您可以看到哪个标签的置信度值最大: + +```py +np.argmax(predictions[0]) +``` + +```py +9 + +``` + +因此,该模型非常确信这个图像是短靴,或 `class_names[9]`。通过检查测试标签发现这个分类是正确的: + +```py +test_labels[0] +``` + +```py +9 + +``` + +您可以将其绘制成图表,看看模型对于全部 10 个类的预测。 + +```py +def plot_image(i, predictions_array, true_label, img): + predictions_array, true_label, img = predictions_array, true_label[i], img[i] + plt.grid(False) + plt.xticks([]) + plt.yticks([]) + + plt.imshow(img, cmap=plt.cm.binary) + + predicted_label = np.argmax(predictions_array) + if predicted_label == true_label: + color = 'blue' + else: + color = 'red' + + plt.xlabel("{} {:2.0f}% ({})".format(class_names[predicted_label], + 100*np.max(predictions_array), + class_names[true_label]), + color=color) + +def plot_value_array(i, predictions_array, true_label): + predictions_array, true_label = predictions_array, true_label[i] + plt.grid(False) + plt.xticks(range(10)) + plt.yticks([]) + thisplot = plt.bar(range(10), predictions_array, color="#777777") + plt.ylim([0, 1]) + predicted_label = np.argmax(predictions_array) + + thisplot[predicted_label].set_color('red') + thisplot[true_label].set_color('blue') +``` + +### 验证预测结果 + +在模型经过训练后,您可以使用它对一些图像进行预测。 + +我们来看看第 0 个图像、预测结果和预测数组。正确的预测标签为蓝色,错误的预测标签为红色。数字表示预测标签的百分比(总计为 100)。 + +```py +i = 0 +plt.figure(figsize=(6,3)) +plt.subplot(1,2,1) +plot_image(i, predictions[i], test_labels, test_images) +plt.subplot(1,2,2) +plot_value_array(i, predictions[i], test_labels) +plt.show() +``` + +![png](img/55d2924ed5a33ffad4b9f727cd335194.png) + +```py +i = 12 +plt.figure(figsize=(6,3)) +plt.subplot(1,2,1) +plot_image(i, predictions[i], test_labels, test_images) +plt.subplot(1,2,2) +plot_value_array(i, predictions[i], test_labels) +plt.show() +``` + +![png](img/0c7474d216a51a2b258a81a689920596.png) + +让我们用模型的预测绘制几张图像。请注意,即使置信度很高,模型也可能出错。 + +```py +# Plot the first X test images, their predicted labels, and the true labels. +# Color correct predictions in blue and incorrect predictions in red. +num_rows = 5 +num_cols = 3 +num_images = num_rows*num_cols +plt.figure(figsize=(2*2*num_cols, 2*num_rows)) +for i in range(num_images): + plt.subplot(num_rows, 2*num_cols, 2*i+1) + plot_image(i, predictions[i], test_labels, test_images) + plt.subplot(num_rows, 2*num_cols, 2*i+2) + plot_value_array(i, predictions[i], test_labels) +plt.tight_layout() +plt.show() +``` + +![png](img/8f40b70083328d6f68f1d2c5821927d1.png) + +## 使用训练好的模型 + +最后,使用训练好的模型对单个图像进行预测。 + +```py +# Grab an image from the test dataset. +img = test_images[1] + +print(img.shape) +``` + +```py +(28, 28) + +``` + +[`tf.keras`](https://tensorflow.google.cn/api_docs/python/tf/keras) 模型经过了优化,可同时对一个*批*或一组样本进行预测。因此,即便您只使用一个图像,您也需要将其添加到列表中: + +```py +# Add the image to a batch where it's the only member. +img = (np.expand_dims(img,0)) + +print(img.shape) +``` + +```py +(1, 28, 28) + +``` + +现在预测这个图像的正确标签: + +```py +predictions_single = probability_model.predict(img) + +print(predictions_single) +``` + +```py +[[1.0675135e-05 2.4023437e-12 9.9772269e-01 1.3299730e-09 1.2968916e-03 + 8.7469149e-14 9.6970733e-04 5.4669354e-19 2.4514609e-11 1.8405429e-12]] + +``` + +```py +plot_value_array(1, predictions_single[0], test_labels) +_ = plt.xticks(range(10), class_names, rotation=45) +``` + +![png](img/35aea8e2802acf908920febe4776fbf0.png) + +[`keras.Model.predict`](https://tensorflow.google.cn/api_docs/python/tf/keras/Model#predict) 会返回一组列表,每个列表对应一批数据中的每个图像。在批次中获取对我们(唯一)图像的预测: + +```py +np.argmax(predictions_single[0]) +``` + +```py +2 + +``` + +该模型会按照预期预测标签。 + +```py +# MIT License +# +# Copyright (c) 2017 François Chollet +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. +``` \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/007.md b/Tensorflow/TensorFlow2.0/007.md new file mode 100644 index 00000000..a40bfe89 --- /dev/null +++ b/Tensorflow/TensorFlow2.0/007.md @@ -0,0 +1,467 @@ +# 电影评论文本分类 + +> 原文:[https://tensorflow.google.cn/tutorials/keras/text_classification](https://tensorflow.google.cn/tutorials/keras/text_classification) + +**Note:** 我们的 TensorFlow 社区翻译了这些文档。因为社区翻译是尽力而为, 所以无法保证它们是最准确的,并且反映了最新的 [官方英文文档](https://tensorflow.google.cn/?hl=en)。如果您有改进此翻译的建议, 请提交 pull request 到 [tensorflow/docs](https://github.com/tensorflow/docs) GitHub 仓库。要志愿地撰写或者审核译文,请加入 [docs-zh-cn@tensorflow.org Google Group](https://groups.google.com/a/tensorflow.org/forum/#!forum/docs-zh-cn)。 + +此笔记本(notebook)使用评论文本将影评分为*积极(positive)*或*消极(nagetive)*两类。这是一个*二元(binary)*或者二分类问题,一种重要且应用广泛的机器学习问题。 + +我们将使用来源于[网络电影数据库(Internet Movie Database)](https://www.imdb.com/)的 [IMDB 数据集(IMDB dataset)](https://tensorflow.google.cn/api_docs/python/tf/keras/datasets/imdb),其包含 50,000 条影评文本。从该数据集切割出的 25,000 条评论用作训练,另外 25,000 条用作测试。训练集与测试集是*平衡的(balanced)*,意味着它们包含相等数量的积极和消极评论。 + +此笔记本(notebook)使用了 [tf.keras](https://tensorflow.google.cn/guide/keras),它是一个 Tensorflow 中用于构建和训练模型的高级 API。有关使用 [`tf.keras`](https://tensorflow.google.cn/api_docs/python/tf/keras) 进行文本分类的更高级教程,请参阅 [MLCC 文本分类指南(MLCC Text Classification Guide)](https://developers.google.cn/machine-learning/guides/text-classification/)。 + +```py +import tensorflow as tf +from tensorflow import keras + +import numpy as np + +print(tf.__version__) +``` + +```py +2.3.0 + +``` + +## 下载 IMDB 数据集 + +IMDB 数据集已经打包在 Tensorflow 中。该数据集已经经过预处理,评论(单词序列)已经被转换为整数序列,其中每个整数表示字典中的特定单词。 + +以下代码将下载 IMDB 数据集到您的机器上(如果您已经下载过将从缓存中复制): + +```py +imdb = keras.datasets.imdb + +(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=10000) +``` + +```py +Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz +17465344/17464789 [==============================] - 0s 0us/step + +``` + +参数 `num_words=10000` 保留了训练数据中最常出现的 10,000 个单词。为了保持数据规模的可管理性,低频词将被丢弃。 + +## 探索数据 + +让我们花一点时间来了解数据格式。该数据集是经过预处理的:每个样本都是一个表示影评中词汇的整数数组。每个标签都是一个值为 0 或 1 的整数值,其中 0 代表消极评论,1 代表积极评论。 + +```py +print("Training entries: {}, labels: {}".format(len(train_data), len(train_labels))) +``` + +```py +Training entries: 25000, labels: 25000 + +``` + +评论文本被转换为整数值,其中每个整数代表词典中的一个单词。首条评论是这样的: + +```py +print(train_data[0]) +``` + +```py +[1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 5244, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 2, 8, 4, 107, 117, 5952, 15, 256, 4, 2, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 2, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 7486, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 5535, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 4472, 113, 103, 32, 15, 16, 5345, 19, 178, 32] + +``` + +电影评论可能具有不同的长度。以下代码显示了第一条和第二条评论的中单词数量。由于神经网络的输入必须是统一的长度,我们稍后需要解决这个问题。 + +```py +len(train_data[0]), len(train_data[1]) +``` + +```py +(218, 189) + +``` + +### 将整数转换回单词 + +了解如何将整数转换回文本对您可能是有帮助的。这里我们将创建一个辅助函数来查询一个包含了整数到字符串映射的字典对象: + +```py +# 一个映射单词到整数索引的词典 +word_index = imdb.get_word_index() + +# 保留第一个索引 +word_index = {k:(v+3) for k,v in word_index.items()} +word_index[""] = 0 +word_index[""] = 1 +word_index[""] = 2 # unknown +word_index[""] = 3 + +reverse_word_index = dict([(value, key) for (key, value) in word_index.items()]) + +def decode_review(text): + return ' '.join([reverse_word_index.get(i, '?') for i in text]) +``` + +```py +Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json +1646592/1641221 [==============================] - 0s 0us/step + +``` + +现在我们可以使用 `decode_review` 函数来显示首条评论的文本: + +```py +decode_review(train_data[0]) +``` + +```py +" this film was just brilliant casting location scenery story direction everyone's really suited the part they played and you could just imagine being there robert is an amazing actor and now the same being director father came from the same scottish island as myself so i loved the fact there was a real connection with this film the witty remarks throughout the film were great it was just brilliant so much that i bought the film as soon as it was released for and would recommend it to everyone to watch and the fly fishing was amazing really cried at the end it was so sad and you know what they say if you cry at a film it must have been good and this definitely was also to the two little boy's that played the of norman and paul they were just brilliant children are often left out of the list i think because the stars that play them all grown up are such a big profile for the whole film but these children are amazing and should be praised for what they have done don't you think the whole story was so lovely because it was true and was someone's life after all that was shared with us all" + +``` + +## 准备数据 + +影评——即整数数组必须在输入神经网络之前转换为张量。这种转换可以通过以下两种方式来完成: + +* 将数组转换为表示单词出现与否的由 0 和 1 组成的向量,类似于 one-hot 编码。例如,序列[3, 5]将转换为一个 10,000 维的向量,该向量除了索引为 3 和 5 的位置是 1 以外,其他都为 0。然后,将其作为网络的首层——一个可以处理浮点型向量数据的稠密层。不过,这种方法需要大量的内存,需要一个大小为 `num_words * num_reviews` 的矩阵。 + +* 或者,我们可以填充数组来保证输入数据具有相同的长度,然后创建一个大小为 `max_length * num_reviews` 的整型张量。我们可以使用能够处理此形状数据的嵌入层作为网络中的第一层。 + +在本教程中,我们将使用第二种方法。 + +由于电影评论长度必须相同,我们将使用 [pad_sequences](https://tensorflow.google.cn/api_docs/python/tf/keras/preprocessing/sequence/pad_sequences) 函数来使长度标准化: + +```py +train_data = keras.preprocessing.sequence.pad_sequences(train_data, + value=word_index[""], + padding='post', + maxlen=256) + +test_data = keras.preprocessing.sequence.pad_sequences(test_data, + value=word_index[""], + padding='post', + maxlen=256) +``` + +现在让我们看下样本的长度: + +```py +len(train_data[0]), len(train_data[1]) +``` + +```py +(256, 256) + +``` + +并检查一下首条评论(当前已经填充): + +```py +print(train_data[0]) +``` + +```py +[ 1 14 22 16 43 530 973 1622 1385 65 458 4468 66 3941 + 4 173 36 256 5 25 100 43 838 112 50 670 2 9 + 35 480 284 5 150 4 172 112 167 2 336 385 39 4 + 172 4536 1111 17 546 38 13 447 4 192 50 16 6 147 + 2025 19 14 22 4 1920 4613 469 4 22 71 87 12 16 + 43 530 38 76 15 13 1247 4 22 17 515 17 12 16 + 626 18 2 5 62 386 12 8 316 8 106 5 4 2223 + 5244 16 480 66 3785 33 4 130 12 16 38 619 5 25 + 124 51 36 135 48 25 1415 33 6 22 12 215 28 77 + 52 5 14 407 16 82 2 8 4 107 117 5952 15 256 + 4 2 7 3766 5 723 36 71 43 530 476 26 400 317 + 46 7 4 2 1029 13 104 88 4 381 15 297 98 32 + 2071 56 26 141 6 194 7486 18 4 226 22 21 134 476 + 26 480 5 144 30 5535 18 51 36 28 224 92 25 104 + 4 226 65 16 38 1334 88 12 16 283 5 16 4472 113 + 103 32 15 16 5345 19 178 32 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0] + +``` + +## 构建模型 + +神经网络由堆叠的层来构建,这需要从两个主要方面来进行体系结构决策: + +* 模型里有多少层? +* 每个层里有多少*隐层单元(hidden units)*? + +在此样本中,输入数据包含一个单词索引的数组。要预测的标签为 0 或 1。让我们来为该问题构建一个模型: + +```py +# 输入形状是用于电影评论的词汇数目(10,000 词) +vocab_size = 10000 + +model = keras.Sequential() +model.add(keras.layers.Embedding(vocab_size, 16)) +model.add(keras.layers.GlobalAveragePooling1D()) +model.add(keras.layers.Dense(16, activation='relu')) +model.add(keras.layers.Dense(1, activation='sigmoid')) + +model.summary() +``` + +```py +Model: "sequential" +_________________________________________________________________ +Layer (type) Output Shape Param # +================================================================= +embedding (Embedding) (None, None, 16) 160000 +_________________________________________________________________ +global_average_pooling1d (Gl (None, 16) 0 +_________________________________________________________________ +dense (Dense) (None, 16) 272 +_________________________________________________________________ +dense_1 (Dense) (None, 1) 17 +================================================================= +Total params: 160,289 +Trainable params: 160,289 +Non-trainable params: 0 +_________________________________________________________________ + +``` + +层按顺序堆叠以构建分类器: + +1. 第一层是`嵌入(Embedding)`层。该层采用整数编码的词汇表,并查找每个词索引的嵌入向量(embedding vector)。这些向量是通过模型训练学习到的。向量向输出数组增加了一个维度。得到的维度为:`(batch, sequence, embedding)`。 +2. 接下来,`GlobalAveragePooling1D` 将通过对序列维度求平均值来为每个样本返回一个定长输出向量。这允许模型以尽可能最简单的方式处理变长输入。 +3. 该定长输出向量通过一个有 16 个隐层单元的全连接(`Dense`)层传输。 +4. 最后一层与单个输出结点密集连接。使用 `Sigmoid` 激活函数,其函数值为介于 0 与 1 之间的浮点数,表示概率或置信度。 + +### 隐层单元 + +上述模型在输入输出之间有两个中间层或“隐藏层”。输出(单元,结点或神经元)的数量即为层表示空间的维度。换句话说,是学习内部表示时网络所允许的自由度。 + +如果模型具有更多的隐层单元(更高维度的表示空间)和/或更多层,则可以学习到更复杂的表示。但是,这会使网络的计算成本更高,并且可能导致学习到不需要的模式——一些能够在训练数据上而不是测试数据上改善性能的模式。这被称为*过拟合(overfitting)*,我们稍后会对此进行探究。 + +### 损失函数与优化器 + +一个模型需要损失函数和优化器来进行训练。由于这是一个二分类问题且模型输出概率值(一个使用 sigmoid 激活函数的单一单元层),我们将使用 `binary_crossentropy` 损失函数。 + +这不是损失函数的唯一选择,例如,您可以选择 `mean_squared_error` 。但是,一般来说 `binary_crossentropy` 更适合处理概率——它能够度量概率分布之间的“距离”,或者在我们的示例中,指的是度量 ground-truth 分布与预测值之间的“距离”。 + +稍后,当我们研究回归问题(例如,预测房价)时,我们将介绍如何使用另一种叫做均方误差的损失函数。 + +现在,配置模型来使用优化器和损失函数: + +```py +model.compile(optimizer='adam', + loss='binary_crossentropy', + metrics=['accuracy']) +``` + +## 创建一个验证集 + +在训练时,我们想要检查模型在未见过的数据上的准确率(accuracy)。通过从原始训练数据中分离 10,000 个样本来创建一个*验证集*。(为什么现在不使用测试集?我们的目标是只使用训练数据来开发和调整模型,然后只使用一次测试数据来评估准确率(accuracy))。 + +```py +x_val = train_data[:10000] +partial_x_train = train_data[10000:] + +y_val = train_labels[:10000] +partial_y_train = train_labels[10000:] +``` + +## 训练模型 + +以 512 个样本的 mini-batch 大小迭代 40 个 epoch 来训练模型。这是指对 `x_train` 和 `y_train` 张量中所有样本的的 40 次迭代。在训练过程中,监测来自验证集的 10,000 个样本上的损失值(loss)和准确率(accuracy): + +```py +history = model.fit(partial_x_train, + partial_y_train, + epochs=40, + batch_size=512, + validation_data=(x_val, y_val), + verbose=1) +``` + +```py +Epoch 1/40 +30/30 [==============================] - 1s 18ms/step - loss: 0.6924 - accuracy: 0.5173 - val_loss: 0.6911 - val_accuracy: 0.5699 +Epoch 2/40 +30/30 [==============================] - 0s 10ms/step - loss: 0.6886 - accuracy: 0.5734 - val_loss: 0.6863 - val_accuracy: 0.6309 +Epoch 3/40 +30/30 [==============================] - 0s 10ms/step - loss: 0.6810 - accuracy: 0.6439 - val_loss: 0.6766 - val_accuracy: 0.7367 +Epoch 4/40 +30/30 [==============================] - 0s 10ms/step - loss: 0.6667 - accuracy: 0.7411 - val_loss: 0.6595 - val_accuracy: 0.7328 +Epoch 5/40 +30/30 [==============================] - 0s 10ms/step - loss: 0.6431 - accuracy: 0.7602 - val_loss: 0.6327 - val_accuracy: 0.7677 +Epoch 6/40 +30/30 [==============================] - 0s 10ms/step - loss: 0.6086 - accuracy: 0.7896 - val_loss: 0.5968 - val_accuracy: 0.7894 +Epoch 7/40 +30/30 [==============================] - 0s 10ms/step - loss: 0.5654 - accuracy: 0.8147 - val_loss: 0.5550 - val_accuracy: 0.8102 +Epoch 8/40 +30/30 [==============================] - 0s 10ms/step - loss: 0.5180 - accuracy: 0.8337 - val_loss: 0.5115 - val_accuracy: 0.8230 +Epoch 9/40 +30/30 [==============================] - 0s 10ms/step - loss: 0.4709 - accuracy: 0.8535 - val_loss: 0.4705 - val_accuracy: 0.8356 +Epoch 10/40 +30/30 [==============================] - 0s 10ms/step - loss: 0.4269 - accuracy: 0.8655 - val_loss: 0.4342 - val_accuracy: 0.8454 +Epoch 11/40 +30/30 [==============================] - 0s 10ms/step - loss: 0.3887 - accuracy: 0.8763 - val_loss: 0.4040 - val_accuracy: 0.8545 +Epoch 12/40 +30/30 [==============================] - 0s 10ms/step - loss: 0.3566 - accuracy: 0.8843 - val_loss: 0.3799 - val_accuracy: 0.8598 +Epoch 13/40 +30/30 [==============================] - 0s 10ms/step - loss: 0.3299 - accuracy: 0.8911 - val_loss: 0.3608 - val_accuracy: 0.8660 +Epoch 14/40 +30/30 [==============================] - 0s 10ms/step - loss: 0.3070 - accuracy: 0.8975 - val_loss: 0.3458 - val_accuracy: 0.8702 +Epoch 15/40 +30/30 [==============================] - 0s 10ms/step - loss: 0.2876 - accuracy: 0.9021 - val_loss: 0.3334 - val_accuracy: 0.8727 +Epoch 16/40 +30/30 [==============================] - 0s 10ms/step - loss: 0.2708 - accuracy: 0.9073 - val_loss: 0.3234 - val_accuracy: 0.8753 +Epoch 17/40 +30/30 [==============================] - 0s 10ms/step - loss: 0.2558 - accuracy: 0.9130 - val_loss: 0.3154 - val_accuracy: 0.8773 +Epoch 18/40 +30/30 [==============================] - 0s 10ms/step - loss: 0.2428 - accuracy: 0.9175 - val_loss: 0.3102 - val_accuracy: 0.8782 +Epoch 19/40 +30/30 [==============================] - 0s 10ms/step - loss: 0.2308 - accuracy: 0.9214 - val_loss: 0.3032 - val_accuracy: 0.8812 +Epoch 20/40 +30/30 [==============================] - 0s 10ms/step - loss: 0.2194 - accuracy: 0.9246 - val_loss: 0.2988 - val_accuracy: 0.8818 +Epoch 21/40 +30/30 [==============================] - 0s 10ms/step - loss: 0.2093 - accuracy: 0.9280 - val_loss: 0.2956 - val_accuracy: 0.8821 +Epoch 22/40 +30/30 [==============================] - 0s 10ms/step - loss: 0.2000 - accuracy: 0.9321 - val_loss: 0.2921 - val_accuracy: 0.8838 +Epoch 23/40 +30/30 [==============================] - 0s 10ms/step - loss: 0.1912 - accuracy: 0.9357 - val_loss: 0.2901 - val_accuracy: 0.8846 +Epoch 24/40 +30/30 [==============================] - 0s 10ms/step - loss: 0.1829 - accuracy: 0.9396 - val_loss: 0.2885 - val_accuracy: 0.8847 +Epoch 25/40 +30/30 [==============================] - 0s 10ms/step - loss: 0.1756 - accuracy: 0.9439 - val_loss: 0.2874 - val_accuracy: 0.8844 +Epoch 26/40 +30/30 [==============================] - 0s 10ms/step - loss: 0.1681 - accuracy: 0.9465 - val_loss: 0.2864 - val_accuracy: 0.8855 +Epoch 27/40 +30/30 [==============================] - 0s 10ms/step - loss: 0.1617 - accuracy: 0.9481 - val_loss: 0.2867 - val_accuracy: 0.8844 +Epoch 28/40 +30/30 [==============================] - 0s 10ms/step - loss: 0.1548 - accuracy: 0.9519 - val_loss: 0.2865 - val_accuracy: 0.8861 +Epoch 29/40 +30/30 [==============================] - 0s 10ms/step - loss: 0.1485 - accuracy: 0.9543 - val_loss: 0.2872 - val_accuracy: 0.8849 +Epoch 30/40 +30/30 [==============================] - 0s 10ms/step - loss: 0.1426 - accuracy: 0.9561 - val_loss: 0.2881 - val_accuracy: 0.8854 +Epoch 31/40 +30/30 [==============================] - 0s 10ms/step - loss: 0.1372 - accuracy: 0.9587 - val_loss: 0.2895 - val_accuracy: 0.8851 +Epoch 32/40 +30/30 [==============================] - 0s 10ms/step - loss: 0.1320 - accuracy: 0.9609 - val_loss: 0.2899 - val_accuracy: 0.8856 +Epoch 33/40 +30/30 [==============================] - 0s 10ms/step - loss: 0.1267 - accuracy: 0.9625 - val_loss: 0.2911 - val_accuracy: 0.8851 +Epoch 34/40 +30/30 [==============================] - 0s 10ms/step - loss: 0.1219 - accuracy: 0.9649 - val_loss: 0.2931 - val_accuracy: 0.8851 +Epoch 35/40 +30/30 [==============================] - 0s 10ms/step - loss: 0.1173 - accuracy: 0.9666 - val_loss: 0.2948 - val_accuracy: 0.8863 +Epoch 36/40 +30/30 [==============================] - 0s 10ms/step - loss: 0.1127 - accuracy: 0.9685 - val_loss: 0.2985 - val_accuracy: 0.8851 +Epoch 37/40 +30/30 [==============================] - 0s 10ms/step - loss: 0.1086 - accuracy: 0.9688 - val_loss: 0.2998 - val_accuracy: 0.8860 +Epoch 38/40 +30/30 [==============================] - 0s 10ms/step - loss: 0.1045 - accuracy: 0.9716 - val_loss: 0.3033 - val_accuracy: 0.8839 +Epoch 39/40 +30/30 [==============================] - 0s 10ms/step - loss: 0.1007 - accuracy: 0.9723 - val_loss: 0.3049 - val_accuracy: 0.8847 +Epoch 40/40 +30/30 [==============================] - 0s 10ms/step - loss: 0.0967 - accuracy: 0.9737 - val_loss: 0.3087 - val_accuracy: 0.8832 + +``` + +## 评估模型 + +我们来看一下模型的性能如何。将返回两个值。损失值(loss)(一个表示误差的数字,值越低越好)与准确率(accuracy)。 + +```py +results = model.evaluate(test_data, test_labels, verbose=2) + +print(results) +``` + +```py +782/782 - 1s - loss: 0.3298 - accuracy: 0.8729 +[0.32977813482284546, 0.8728799819946289] + +``` + +这种十分朴素的方法得到了约 87% 的准确率(accuracy)。若采用更好的方法,模型的准确率应当接近 95%。 + +## 创建一个准确率(accuracy)和损失值(loss)随时间变化的图表 + +`model.fit()` 返回一个 `History` 对象,该对象包含一个字典,其中包含训练阶段所发生的一切事件: + +```py +history_dict = history.history +history_dict.keys() +``` + +```py +dict_keys(['loss', 'accuracy', 'val_loss', 'val_accuracy']) + +``` + +有四个条目:在训练和验证期间,每个条目对应一个监控指标。我们可以使用这些条目来绘制训练与验证过程的损失值(loss)和准确率(accuracy),以便进行比较。 + +```py +import matplotlib.pyplot as plt + +acc = history_dict['accuracy'] +val_acc = history_dict['val_accuracy'] +loss = history_dict['loss'] +val_loss = history_dict['val_loss'] + +epochs = range(1, len(acc) + 1) + +# “bo”代表 "蓝点" +plt.plot(epochs, loss, 'bo', label='Training loss') +# b 代表“蓝色实线” +plt.plot(epochs, val_loss, 'b', label='Validation loss') +plt.title('Training and validation loss') +plt.xlabel('Epochs') +plt.ylabel('Loss') +plt.legend() + +plt.show() +``` + +![png](img/9c459926609b3f3452425d5e76209223.png) + +```py +plt.clf() # 清除数字 + +plt.plot(epochs, acc, 'bo', label='Training acc') +plt.plot(epochs, val_acc, 'b', label='Validation acc') +plt.title('Training and validation accuracy') +plt.xlabel('Epochs') +plt.ylabel('Accuracy') +plt.legend() + +plt.show() +``` + +![png](img/6cd4981eb3c80dc3045b45bd7fd0e7ea.png) + +在该图中,点代表训练损失值(loss)与准确率(accuracy),实线代表验证损失值(loss)与准确率(accuracy)。 + +注意训练损失值随每一个 epoch *下降*而训练准确率(accuracy)随每一个 epoch *上升*。这在使用梯度下降优化时是可预期的——理应在每次迭代中最小化期望值。 + +验证过程的损失值(loss)与准确率(accuracy)的情况却并非如此——它们似乎在 20 个 epoch 后达到峰值。这是过拟合的一个实例:模型在训练数据上的表现比在以前从未见过的数据上的表现要更好。在此之后,模型过度优化并学习*特定*于训练数据的表示,而不能够*泛化*到测试数据。 + +对于这种特殊情况,我们可以通过在 20 个左右的 epoch 后停止训练来避免过拟合。稍后,您将看到如何通过回调自动执行此操作。 + +```py +# MIT License +# +# Copyright (c) 2017 François Chollet +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. +``` \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/008.md b/Tensorflow/TensorFlow2.0/008.md new file mode 100644 index 00000000..89602096 --- /dev/null +++ b/Tensorflow/TensorFlow2.0/008.md @@ -0,0 +1,308 @@ +# 使用 Keras 和 Tensorflow Hub 对电影评论进行文本分类 + +> 原文:[https://tensorflow.google.cn/tutorials/keras/text_classification_with_hub](https://tensorflow.google.cn/tutorials/keras/text_classification_with_hub) + +**Note:** 我们的 TensorFlow 社区翻译了这些文档。因为社区翻译是尽力而为, 所以无法保证它们是最准确的,并且反映了最新的 [官方英文文档](https://tensorflow.google.cn/?hl=en)。如果您有改进此翻译的建议, 请提交 pull request 到 [tensorflow/docs](https://github.com/tensorflow/docs) GitHub 仓库。要志愿地撰写或者审核译文,请加入 [docs-zh-cn@tensorflow.org Google Group](https://groups.google.com/a/tensorflow.org/forum/#!forum/docs-zh-cn)。 + +此笔记本(notebook)使用评论文本将影评分为*积极(positive)*或*消极(nagetive)*两类。这是一个*二元(binary)*或者二分类问题,一种重要且应用广泛的机器学习问题。 + +本教程演示了使用 Tensorflow Hub 和 Keras 进行迁移学习的基本应用。 + +我们将使用来源于[网络电影数据库(Internet Movie Database)](https://www.imdb.com/)的 [IMDB 数据集(IMDB dataset)](https://tensorflow.google.cn/api_docs/python/tf/keras/datasets/imdb),其包含 50,000 条影评文本。从该数据集切割出的 25,000 条评论用作训练,另外 25,000 条用作测试。训练集与测试集是*平衡的(balanced)*,意味着它们包含相等数量的积极和消极评论。 + +此笔记本(notebook)使用了 [tf.keras](https://tensorflow.google.cn/guide/keras),它是一个 Tensorflow 中用于构建和训练模型的高级 API,此外还使用了 [TensorFlow Hub](https://tensorflow.google.cn/hub),一个用于迁移学习的库和平台。有关使用 [`tf.keras`](https://tensorflow.google.cn/api_docs/python/tf/keras) 进行文本分类的更高级教程,请参阅 [MLCC 文本分类指南(MLCC Text Classification Guide)](https://developers.google.cn/machine-learning/guides/text-classification/)。 + +```py +import numpy as np + +import tensorflow as tf + +!pip install -q tensorflow-hub +!pip install -q tfds-nightly +import tensorflow_hub as hub +import tensorflow_datasets as tfds + +print("Version: ", tf.__version__) +print("Eager mode: ", tf.executing_eagerly()) +print("Hub version: ", hub.__version__) +print("GPU is", "available" if tf.config.experimental.list_physical_devices("GPU") else "NOT AVAILABLE") +``` + +```py +WARNING: You are using pip version 20.2.2; however, version 20.2.3 is available. +You should consider upgrading via the '/tmpfs/src/tf_docs_env/bin/python -m pip install --upgrade pip' command. +WARNING: You are using pip version 20.2.2; however, version 20.2.3 is available. +You should consider upgrading via the '/tmpfs/src/tf_docs_env/bin/python -m pip install --upgrade pip' command. +Version: 2.3.0 +Eager mode: True +Hub version: 0.9.0 +GPU is available + +``` + +## 下载 IMDB 数据集 + +IMDB 数据集可以在 [Tensorflow 数据集](https://github.com/tensorflow/datasets)处获取。以下代码将 IMDB 数据集下载至您的机器(或 colab 运行时环境)中: + +```py +# 将训练集分割成 60% 和 40%,从而最终我们将得到 15,000 个训练样本 +# 10,000 个验证样本以及 25,000 个测试样本。 +train_data, validation_data, test_data = tfds.load( + name="imdb_reviews", + split=('train[:60%]', 'train[60%:]', 'test'), + as_supervised=True) +``` + +```py +Downloading and preparing dataset imdb_reviews/plain_text/1.0.0 (download: 80.23 MiB, generated: Unknown size, total: 80.23 MiB) to /home/kbuilder/tensorflow_datasets/imdb_reviews/plain_text/1.0.0... +Shuffling and writing examples to /home/kbuilder/tensorflow_datasets/imdb_reviews/plain_text/1.0.0.incompleteZDZ3AR/imdb_reviews-train.tfrecord +Shuffling and writing examples to /home/kbuilder/tensorflow_datasets/imdb_reviews/plain_text/1.0.0.incompleteZDZ3AR/imdb_reviews-test.tfrecord +Shuffling and writing examples to /home/kbuilder/tensorflow_datasets/imdb_reviews/plain_text/1.0.0.incompleteZDZ3AR/imdb_reviews-unsupervised.tfrecord +Dataset imdb_reviews downloaded and prepared to /home/kbuilder/tensorflow_datasets/imdb_reviews/plain_text/1.0.0\. Subsequent calls will reuse this data. + +``` + +## 探索数据 + +让我们花一点时间来了解数据的格式。每一个样本都是一个表示电影评论和相应标签的句子。该句子不以任何方式进行预处理。标签是一个值为 0 或 1 的整数,其中 0 代表消极评论,1 代表积极评论。 + +我们来打印下前十个样本。 + +```py +train_examples_batch, train_labels_batch = next(iter(train_data.batch(10))) +train_examples_batch +``` + +```py +
But come on Hollywood - a Mountie telling the people of Dawson City, Yukon to elect themselves a marshal (yes a marshal!) and to enforce the law themselves, then gunfighters battling it out on the streets for control of the town?

Nothing even remotely resembling that happened on the Canadian side of the border during the Klondike gold rush. Mr. Mann and company appear to have mistaken Dawson City for Deadwood, the Canadian North for the American Wild West.

Canadian viewers be prepared for a Reefer Madness type of enjoyable howl with this ludicrous plot, or, to shake your head in disgust.', + b'This is the kind of film for a snowy Sunday afternoon when the rest of the world can go ahead with its own business as you descend into a big arm-chair and mellow for a couple of hours. Wonderful performances from Cher and Nicolas Cage (as always) gently row the plot along. There are no rapids to cross, no dangerous waters, just a warm and witty paddle through New York life at its best. A family film in every sense and one that deserves the praise it received.', + b'As others have mentioned, all the women that go nude in this film are mostly absolutely gorgeous. The plot very ably shows the hypocrisy of the female libido. When men are around they want to be pursued, but when no "men" are around, they become the pursuers of a 14 year old boy. And the boy becomes a man really fast (we should all be so lucky at this age!). He then gets up the courage to pursue his true love.', + b"This is a film which should be seen by anybody interested in, effected by, or suffering from an eating disorder. It is an amazingly accurate and sensitive portrayal of bulimia in a teenage girl, its causes and its symptoms. The girl is played by one of the most brilliant young actresses working in cinema today, Alison Lohman, who was later so spectacular in 'Where the Truth Lies'. I would recommend that this film be shown in all schools, as you will never see a better on this subject. Alison Lohman is absolutely outstanding, and one marvels at her ability to convey the anguish of a girl suffering from this compulsive disorder. If barometers tell us the air pressure, Alison Lohman tells us the emotional pressure with the same degree of accuracy. Her emotional range is so precise, each scene could be measured microscopically for its gradations of trauma, on a scale of rising hysteria and desperation which reaches unbearable intensity. Mare Winningham is the perfect choice to play her mother, and does so with immense sympathy and a range of emotions just as finely tuned as Lohman's. Together, they make a pair of sensitive emotional oscillators vibrating in resonance with one another. This film is really an astonishing achievement, and director Katt Shea should be proud of it. The only reason for not seeing it is if you are not interested in people. But even if you like nature films best, this is after all animal behaviour at the sharp edge. Bulimia is an extreme version of how a tormented soul can destroy her own body in a frenzy of despair. And if we don't sympathise with people suffering from the depths of despair, then we are dead inside.", + b'Okay, you have:

Penelope Keith as Miss Herringbone-Tweed, B.B.E. (Backbone of England.) She\'s killed off in the first scene - that\'s right, folks; this show has no backbone!

Peter O\'Toole as Ol\' Colonel Cricket from The First War and now the emblazered Lord of the Manor.

Joanna Lumley as the ensweatered Lady of the Manor, 20 years younger than the colonel and 20 years past her own prime but still glamourous (Brit spelling, not mine) enough to have a toy-boy on the side. It\'s alright, they have Col. Cricket\'s full knowledge and consent (they guy even comes \'round for Christmas!) Still, she\'s considerate of the colonel enough to have said toy-boy her own age (what a gal!)

David McCallum as said toy-boy, equally as pointlessly glamourous as his squeeze. Pilcher couldn\'t come up with any cover for him within the story, so she gave him a hush-hush job at the Circus.

and finally:

Susan Hampshire as Miss Polonia Teacups, Venerable Headmistress of the Venerable Girls\' Boarding-School, serving tea in her office with a dash of deep, poignant advice for life in the outside world just before graduation. Her best bit of advice: "I\'ve only been to Nancherrow (the local Stately Home of England) once. I thought it was very beautiful but, somehow, not part of the real world." Well, we can\'t say they didn\'t warn us.

Ah, Susan - time was, your character would have been running the whole show. They don\'t write \'em like that any more. Our loss, not yours.

So - with a cast and setting like this, you have the re-makings of "Brideshead Revisited," right?

Wrong! They took these 1-dimensional supporting roles because they paid so well. After all, acting is one of the oldest temp-jobs there is (YOU name another!)

First warning sign: lots and lots of backlighting. They get around it by shooting outdoors - "hey, it\'s just the sunlight!"

Second warning sign: Leading Lady cries a lot. When not crying, her eyes are moist. That\'s the law of romance novels: Leading Lady is "dewy-eyed."

Henceforth, Leading Lady shall be known as L.L.

Third warning sign: L.L. actually has stars in her eyes when she\'s in love. Still, I\'ll give Emily Mortimer an award just for having to act with that spotlight in her eyes (I wonder . did they use contacts?)

And lastly, fourth warning sign: no on-screen female character is "Mrs." She\'s either "Miss" or "Lady."

When all was said and done, I still couldn\'t tell you who was pursuing whom and why. I couldn\'t even tell you what was said and done.

To sum up: they all live through World War II without anything happening to them at all.

OK, at the end, L.L. finds she\'s lost her parents to the Japanese prison camps and baby sis comes home catatonic. Meanwhile (there\'s always a "meanwhile,") some young guy L.L. had a crush on (when, I don\'t know) comes home from some wartime tough spot and is found living on the street by Lady of the Manor (must be some street if SHE\'s going to find him there.) Both war casualties are whisked away to recover at Nancherrow (SOMEBODY has to be "whisked away" SOMEWHERE in these romance stories!)

Great drama.', + b'The film is based on a genuine 1950s novel.

Journalist Colin McInnes wrote a set of three "London novels": "Absolute Beginners", "City of Spades" and "Mr Love and Justice". I have read all three. The first two are excellent. The last, perhaps an experiment that did not come off. But McInnes\'s work is highly acclaimed; and rightly so. This musical is the novelist\'s ultimate nightmare - to see the fruits of one\'s mind being turned into a glitzy, badly-acted, soporific one-dimensional apology of a film that says it captures the spirit of 1950s London, and does nothing of the sort.

Thank goodness Colin McInnes wasn\'t alive to witness it.', + b'I really love the sexy action and sci-fi films of the sixties and its because of the actress\'s that appeared in them. They found the sexiest women to be in these films and it didn\'t matter if they could act (Remember "Candy"?). The reason I was disappointed by this film was because it wasn\'t nostalgic enough. The story here has a European sci-fi film called "Dragonfly" being made and the director is fired. So the producers decide to let a young aspiring filmmaker (Jeremy Davies) to complete the picture. They\'re is one real beautiful woman in the film who plays Dragonfly but she\'s barely in it. Film is written and directed by Roman Coppola who uses some of his fathers exploits from his early days and puts it into the script. I wish the film could have been an homage to those early films. They could have lots of cameos by actors who appeared in them. There is one actor in this film who was popular from the sixties and its John Phillip Law (Barbarella). Gerard Depardieu, Giancarlo Giannini and Dean Stockwell appear as well. I guess I\'m going to have to continue waiting for a director to make a good homage to the films of the sixties. If any are reading this, "Make it as sexy as you can"! I\'ll be waiting!', + b'Sure, this one isn\'t really a blockbuster, nor does it target such a position. "Dieter" is the first name of a quite popular German musician, who is either loved or hated for his kind of acting and thats exactly what this movie is about. It is based on the autobiography "Dieter Bohlen" wrote a few years ago but isn\'t meant to be accurate on that. The movie is filled with some sexual offensive content (at least for American standard) which is either amusing (not for the other "actors" of course) or dumb - it depends on your individual kind of humor or on you being a "Bohlen"-Fan or not. Technically speaking there isn\'t much to criticize. Speaking of me I find this movie to be an OK-movie.'], + dtype=object)> + +``` + +我们再打印下前十个标签。 + +```py +train_labels_batch +``` + +```py + + +``` + +## 构建模型 + +神经网络由堆叠的层来构建,这需要从三个主要方面来进行体系结构决策: + +* 如何表示文本? +* 模型里有多少层? +* 每个层里有多少*隐层单元(hidden units)*? + +本示例中,输入数据由句子组成。预测的标签为 0 或 1。 + +表示文本的一种方式是将句子转换为嵌入向量(embeddings vectors)。我们可以使用一个预先训练好的文本嵌入(text embedding)作为首层,这将具有三个优点: + +* 我们不必担心文本预处理 +* 我们可以从迁移学习中受益 +* 嵌入具有固定长度,更易于处理 + +针对此示例我们将使用 [TensorFlow Hub](https://tensorflow.google.cn/hub) 中名为 [google/tf2-preview/gnews-swivel-20dim/1](https://hub.tensorflow.google.cn/google/tf2-preview/gnews-swivel-20dim/1) 的一种**预训练文本嵌入(text embedding)模型** 。 + +为了达到本教程的目的还有其他三种预训练模型可供测试: + +* [google/tf2-preview/gnews-swivel-20dim-with-oov/1](https://hub.tensorflow.google.cn/google/tf2-preview/gnews-swivel-20dim-with-oov/1) ——类似 [google/tf2-preview/gnews-swivel-20dim/1](https://hub.tensorflow.google.cn/google/tf2-preview/gnews-swivel-20dim/1),但 2.5%的词汇转换为未登录词桶(OOV buckets)。如果任务的词汇与模型的词汇没有完全重叠,这将会有所帮助。 +* [google/tf2-preview/nnlm-en-dim50/1](https://hub.tensorflow.google.cn/google/tf2-preview/nnlm-en-dim50/1) ——一个拥有约 1M 词汇量且维度为 50 的更大的模型。 +* [google/tf2-preview/nnlm-en-dim128/1](https://hub.tensorflow.google.cn/google/tf2-preview/nnlm-en-dim128/1) ——拥有约 1M 词汇量且维度为 128 的更大的模型。 + +让我们首先创建一个使用 Tensorflow Hub 模型嵌入(embed)语句的 Keras 层,并在几个输入样本中进行尝试。请注意无论输入文本的长度如何,嵌入(embeddings)输出的形状都是:`(num_examples, embedding_dimension)`。 + +```py +embedding = "https://hub.tensorflow.google.cn/google/tf2-preview/gnews-swivel-20dim/1" +hub_layer = hub.KerasLayer(embedding, input_shape=[], + dtype=tf.string, trainable=True) +hub_layer(train_examples_batch[:3]) +``` + +```py + + +``` + +现在让我们构建完整模型: + +```py +model = tf.keras.Sequential() +model.add(hub_layer) +model.add(tf.keras.layers.Dense(16, activation='relu')) +model.add(tf.keras.layers.Dense(1)) + +model.summary() +``` + +```py +Model: "sequential" +_________________________________________________________________ +Layer (type) Output Shape Param # +================================================================= +keras_layer (KerasLayer) (None, 20) 400020 +_________________________________________________________________ +dense (Dense) (None, 16) 336 +_________________________________________________________________ +dense_1 (Dense) (None, 1) 17 +================================================================= +Total params: 400,373 +Trainable params: 400,373 +Non-trainable params: 0 +_________________________________________________________________ + +``` + +层按顺序堆叠以构建分类器: + +1. 第一层是 Tensorflow Hub 层。这一层使用一个预训练的保存好的模型来将句子映射为嵌入向量(embedding vector)。我们所使用的预训练文本嵌入(embedding)模型([google/tf2-preview/gnews-swivel-20dim/1](https://hub.tensorflow.google.cn/google/tf2-preview/gnews-swivel-20dim/1))将句子切割为符号,嵌入(embed)每个符号然后进行合并。最终得到的维度是:`(num_examples, embedding_dimension)`。 +2. 该定长输出向量通过一个有 16 个隐层单元的全连接层(`Dense`)进行管道传输。 +3. 最后一层与单个输出结点紧密相连。使用 `Sigmoid` 激活函数,其函数值为介于 0 与 1 之间的浮点数,表示概率或置信水平。 + +让我们编译模型。 + +### 损失函数与优化器 + +一个模型需要损失函数和优化器来进行训练。由于这是一个二分类问题且模型输出概率值(一个使用 sigmoid 激活函数的单一单元层),我们将使用 `binary_crossentropy` 损失函数。 + +这不是损失函数的唯一选择,例如,您可以选择 `mean_squared_error` 。但是,一般来说 `binary_crossentropy` 更适合处理概率——它能够度量概率分布之间的“距离”,或者在我们的示例中,指的是度量 ground-truth 分布与预测值之间的“距离”。 + +稍后,当我们研究回归问题(例如,预测房价)时,我们将介绍如何使用另一种叫做均方误差的损失函数。 + +现在,配置模型来使用优化器和损失函数: + +```py +model.compile(optimizer='adam', + loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), + metrics=['accuracy']) +``` + +## 训练模型 + +以 512 个样本的 mini-batch 大小迭代 20 个 epoch 来训练模型。 这是指对 `x_train` 和 `y_train` 张量中所有样本的的 20 次迭代。在训练过程中,监测来自验证集的 10,000 个样本上的损失值(loss)和准确率(accuracy): + +```py +history = model.fit(train_data.shuffle(10000).batch(512), + epochs=20, + validation_data=validation_data.batch(512), + verbose=1) +``` + +```py +Epoch 1/20 +30/30 [==============================] - 2s 64ms/step - loss: 1.5444 - accuracy: 0.4965 - val_loss: 0.9259 - val_accuracy: 0.4705 +Epoch 2/20 +30/30 [==============================] - 2s 59ms/step - loss: 0.7667 - accuracy: 0.4990 - val_loss: 0.7017 - val_accuracy: 0.5327 +Epoch 3/20 +30/30 [==============================] - 2s 58ms/step - loss: 0.6631 - accuracy: 0.5799 - val_loss: 0.6387 - val_accuracy: 0.6238 +Epoch 4/20 +30/30 [==============================] - 2s 58ms/step - loss: 0.6156 - accuracy: 0.6327 - val_loss: 0.6051 - val_accuracy: 0.6390 +Epoch 5/20 +30/30 [==============================] - 2s 57ms/step - loss: 0.5819 - accuracy: 0.6623 - val_loss: 0.5761 - val_accuracy: 0.6639 +Epoch 6/20 +30/30 [==============================] - 2s 57ms/step - loss: 0.5492 - accuracy: 0.6983 - val_loss: 0.5475 - val_accuracy: 0.6873 +Epoch 7/20 +30/30 [==============================] - 2s 58ms/step - loss: 0.5159 - accuracy: 0.7294 - val_loss: 0.5176 - val_accuracy: 0.7277 +Epoch 8/20 +30/30 [==============================] - 2s 58ms/step - loss: 0.4813 - accuracy: 0.7609 - val_loss: 0.4884 - val_accuracy: 0.7490 +Epoch 9/20 +30/30 [==============================] - 2s 58ms/step - loss: 0.4472 - accuracy: 0.7869 - val_loss: 0.4602 - val_accuracy: 0.7747 +Epoch 10/20 +30/30 [==============================] - 2s 58ms/step - loss: 0.4141 - accuracy: 0.8113 - val_loss: 0.4352 - val_accuracy: 0.7983 +Epoch 11/20 +30/30 [==============================] - 2s 57ms/step - loss: 0.3837 - accuracy: 0.8312 - val_loss: 0.4113 - val_accuracy: 0.8074 +Epoch 12/20 +30/30 [==============================] - 2s 58ms/step - loss: 0.3558 - accuracy: 0.8482 - val_loss: 0.3910 - val_accuracy: 0.8152 +Epoch 13/20 +30/30 [==============================] - 2s 57ms/step - loss: 0.3305 - accuracy: 0.8611 - val_loss: 0.3727 - val_accuracy: 0.8270 +Epoch 14/20 +30/30 [==============================] - 2s 58ms/step - loss: 0.3071 - accuracy: 0.8746 - val_loss: 0.3602 - val_accuracy: 0.8455 +Epoch 15/20 +30/30 [==============================] - 2s 58ms/step - loss: 0.2872 - accuracy: 0.8840 - val_loss: 0.3445 - val_accuracy: 0.8462 +Epoch 16/20 +30/30 [==============================] - 2s 58ms/step - loss: 0.2678 - accuracy: 0.8942 - val_loss: 0.3333 - val_accuracy: 0.8538 +Epoch 17/20 +30/30 [==============================] - 2s 58ms/step - loss: 0.2505 - accuracy: 0.9010 - val_loss: 0.3243 - val_accuracy: 0.8557 +Epoch 18/20 +30/30 [==============================] - 2s 57ms/step - loss: 0.2351 - accuracy: 0.9073 - val_loss: 0.3172 - val_accuracy: 0.8634 +Epoch 19/20 +30/30 [==============================] - 2s 58ms/step - loss: 0.2209 - accuracy: 0.9154 - val_loss: 0.3108 - val_accuracy: 0.8660 +Epoch 20/20 +30/30 [==============================] - 2s 57ms/step - loss: 0.2082 - accuracy: 0.9224 - val_loss: 0.3058 - val_accuracy: 0.8676 + +``` + +## 评估模型 + +我们来看下模型的表现如何。将返回两个值。损失值(loss)(一个表示误差的数字,值越低越好)与准确率(accuracy)。 + +```py +results = model.evaluate(test_data.batch(512), verbose=2) + +for name, value in zip(model.metrics_names, results): + print("%s: %.3f" % (name, value)) +``` + +```py +49/49 - 1s - loss: 0.3208 - accuracy: 0.8546 +loss: 0.321 +accuracy: 0.855 + +``` + +这种十分朴素的方法得到了约 87% 的准确率(accuracy)。若采用更好的方法,模型的准确率应当接近 95%。 + +## 进一步阅读 + +有关使用字符串输入的更一般方法,以及对训练期间准确率(accuracy)和损失值(loss)更详细的分析,请参阅[此处](https://tensorflow.google.cn/tutorials/keras/basic_text_classification)。 + +```py +# MIT License +# +# Copyright (c) 2017 François Chollet +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. +``` \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/009.md b/Tensorflow/TensorFlow2.0/009.md new file mode 100644 index 00000000..ef083943 --- /dev/null +++ b/Tensorflow/TensorFlow2.0/009.md @@ -0,0 +1,450 @@ +# Basic regression: Predict fuel efficiency + +> 原文:[https://tensorflow.google.cn/tutorials/keras/regression](https://tensorflow.google.cn/tutorials/keras/regression) + + + +**Note:** 我们的 TensorFlow 社区翻译了这些文档。因为社区翻译是尽力而为, 所以无法保证它们是最准确的,并且反映了最新的 [官方英文文档](https://tensorflow.google.cn/?hl=en)。如果您有改进此翻译的建议, 请提交 pull request 到 [tensorflow/docs](https://github.com/tensorflow/docs) GitHub 仓库。要志愿地撰写或者审核译文,请加入 [docs-zh-cn@tensorflow.org Google Group](https://groups.google.com/a/tensorflow.org/forum/#!forum/docs-zh-cn)。 + +在 *回归 (regression)* 问题中,我们的目的是预测出如价格或概率这样连续值的输出。相对于*分类(classification)* 问题,*分类(classification)* 的目的是从一系列的分类出选择出一个分类 (如,给出一张包含苹果或橘子的图片,识别出图片中是哪种水果)。 + +本 notebook 使用经典的 [Auto MPG](https://archive.ics.uci.edu/ml/datasets/auto+mpg) 数据集,构建了一个用来预测 70 年代末到 80 年代初汽车燃油效率的模型。为了做到这一点,我们将为该模型提供许多那个时期的汽车描述。这个描述包含:气缸数,排量,马力以及重量。 + +本示例使用 [`tf.keras`](https://tensorflow.google.cn/api_docs/python/tf/keras) API,相关细节请参阅 [本指南](https://tensorflow.google.cn/guide/keras)。 + +```py +# 使用 seaborn 绘制矩阵图 (pairplot) +pip install -q seaborn + +``` + +```py +WARNING: You are using pip version 20.2.2; however, version 20.2.3 is available. +You should consider upgrading via the '/tmpfs/src/tf_docs_env/bin/python -m pip install --upgrade pip' command. + +``` + +```py +import pathlib + +import matplotlib.pyplot as plt +import pandas as pd +import seaborn as sns + +import tensorflow as tf + +from tensorflow import keras +from tensorflow.keras import layers + +print(tf.__version__) +``` + +```py +2.3.0 + +``` + +## Auto MPG 数据集 + +该数据集可以从 [UCI 机器学习库](https://archive.ics.uci.edu/ml/) 中获取. + +### 获取数据 + +首先下载数据集。 + +```py +dataset_path = keras.utils.get_file("auto-mpg.data", "http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data") +dataset_path +``` + +```py +Downloading data from http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data +32768/30286 [================================] - 0s 1us/step + +'/home/kbuilder/.keras/datasets/auto-mpg.data' + +``` + +使用 pandas 导入数据集。 + +```py +column_names = ['MPG','Cylinders','Displacement','Horsepower','Weight', + 'Acceleration', 'Model Year', 'Origin'] +raw_dataset = pd.read_csv(dataset_path, names=column_names, + na_values = "?", comment='\t', + sep=" ", skipinitialspace=True) + +dataset = raw_dataset.copy() +dataset.tail() +``` + + + +### 数据清洗 + +数据集中包括一些未知值。 + +```py +dataset.isna().sum() +``` + +```py +MPG 0 +Cylinders 0 +Displacement 0 +Horsepower 6 +Weight 0 +Acceleration 0 +Model Year 0 +Origin 0 +dtype: int64 + +``` + +为了保证这个初始示例的简单性,删除这些行。 + +```py +dataset = dataset.dropna() +``` + +`"Origin"` 列实际上代表分类,而不仅仅是一个数字。所以把它转换为独热码 (one-hot): + +```py +origin = dataset.pop('Origin') +``` + +```py +dataset['USA'] = (origin == 1)*1.0 +dataset['Europe'] = (origin == 2)*1.0 +dataset['Japan'] = (origin == 3)*1.0 +dataset.tail() +``` + + + +### 拆分训练数据集和测试数据集 + +现在需要将数据集拆分为一个训练数据集和一个测试数据集。 + +我们最后将使用测试数据集对模型进行评估。 + +```py +train_dataset = dataset.sample(frac=0.8,random_state=0) +test_dataset = dataset.drop(train_dataset.index) +``` + +### 数据检查 + +快速查看训练集中几对列的联合分布。 + +```py +sns.pairplot(train_dataset[["MPG", "Cylinders", "Displacement", "Weight"]], diag_kind="kde") +``` + +```py + + +``` + +![png](img/4a4c68a2d8914e8b1b75bed4a9b81a5b.png) + +也可以查看总体的数据统计: + +```py +train_stats = train_dataset.describe() +train_stats.pop("MPG") +train_stats = train_stats.transpose() +train_stats +``` + + + +### 从标签中分离特征 + +将特征值从目标值或者"标签"中分离。 这个标签是你使用训练模型进行预测的值。 + +```py +train_labels = train_dataset.pop('MPG') +test_labels = test_dataset.pop('MPG') +``` + +### 数据规范化 + +再次审视下上面的 `train_stats` 部分,并注意每个特征的范围有什么不同。 + +使用不同的尺度和范围对特征归一化是好的实践。尽管模型*可能* 在没有特征归一化的情况下收敛,它会使得模型训练更加复杂,并会造成生成的模型依赖输入所使用的单位选择。 + +注意:尽管我们仅仅从训练集中有意生成这些统计数据,但是这些统计信息也会用于归一化的测试数据集。我们需要这样做,将测试数据集放入到与已经训练过的模型相同的分布中。 + +```py +def norm(x): + return (x - train_stats['mean']) / train_stats['std'] +normed_train_data = norm(train_dataset) +normed_test_data = norm(test_dataset) +``` + +我们将会使用这个已经归一化的数据来训练模型。 + +警告: 用于归一化输入的数据统计(均值和标准差)需要反馈给模型从而应用于任何其他数据,以及我们之前所获得独热码。这些数据包含测试数据集以及生产环境中所使用的实时数据。 + +## 模型 + +### 构建模型 + +让我们来构建我们自己的模型。这里,我们将会使用一个“顺序”模型,其中包含两个紧密相连的隐藏层,以及返回单个、连续值得输出层。模型的构建步骤包含于一个名叫 'build_model' 的函数中,稍后我们将会创建第二个模型。 两个密集连接的隐藏层。 + +```py +def build_model(): + model = keras.Sequential([ + layers.Dense(64, activation='relu', input_shape=[len(train_dataset.keys())]), + layers.Dense(64, activation='relu'), + layers.Dense(1) + ]) + + optimizer = tf.keras.optimizers.RMSprop(0.001) + + model.compile(loss='mse', + optimizer=optimizer, + metrics=['mae', 'mse']) + return model +``` + +```py +model = build_model() +``` + +### 检查模型 + +使用 `.summary` 方法来打印该模型的简单描述。 + +```py +model.summary() +``` + +```py +Model: "sequential" +_________________________________________________________________ +Layer (type) Output Shape Param # +================================================================= +dense (Dense) (None, 64) 640 +_________________________________________________________________ +dense_1 (Dense) (None, 64) 4160 +_________________________________________________________________ +dense_2 (Dense) (None, 1) 65 +================================================================= +Total params: 4,865 +Trainable params: 4,865 +Non-trainable params: 0 +_________________________________________________________________ + +``` + +现在试用下这个模型。从训练数据中批量获取‘10’条例子并对这些例子调用 `model.predict` 。 + +```py +example_batch = normed_train_data[:10] +example_result = model.predict(example_batch) +example_result +``` + +```py +array([[0.15074062], + [0.0973136 ], + [0.17310914], + [0.08873479], + [0.52456 ], + [0.05311462], + [0.49406645], + [0.04333409], + [0.12005241], + [0.6703117 ]], dtype=float32) + +``` + +它似乎在工作,并产生了预期的形状和类型的结果 + +### 训练模型 + +对模型进行 1000 个周期的训练,并在 `history` 对象中记录训练和验证的准确性。 + +```py +# 通过为每个完成的时期打印一个点来显示训练进度 +class PrintDot(keras.callbacks.Callback): + def on_epoch_end(self, epoch, logs): + if epoch % 100 == 0: print('') + print('.', end='') + +EPOCHS = 1000 + +history = model.fit( + normed_train_data, train_labels, + epochs=EPOCHS, validation_split = 0.2, verbose=0, + callbacks=[PrintDot()]) +``` + +```py + +.................................................................................................... +.................................................................................................... +.................................................................................................... +.................................................................................................... +.................................................................................................... +.................................................................................................... +.................................................................................................... +.................................................................................................... +.................................................................................................... +.................................................................................................... + +``` + +使用 `history` 对象中存储的统计信息可视化模型的训练进度。 + +```py +hist = pd.DataFrame(history.history) +hist['epoch'] = history.epoch +hist.tail() +``` + + + +```py +def plot_history(history): + hist = pd.DataFrame(history.history) + hist['epoch'] = history.epoch + + plt.figure() + plt.xlabel('Epoch') + plt.ylabel('Mean Abs Error [MPG]') + plt.plot(hist['epoch'], hist['mae'], + label='Train Error') + plt.plot(hist['epoch'], hist['val_mae'], + label = 'Val Error') + plt.ylim([0,5]) + plt.legend() + + plt.figure() + plt.xlabel('Epoch') + plt.ylabel('Mean Square Error [$MPG^2$]') + plt.plot(hist['epoch'], hist['mse'], + label='Train Error') + plt.plot(hist['epoch'], hist['val_mse'], + label = 'Val Error') + plt.ylim([0,20]) + plt.legend() + plt.show() + +plot_history(history) +``` + +![png](img/7fe4fe0b14735050369dc31f05672d65.png) + +![png](img/29af7886a5834acb3b056b86d97b4128.png) + +该图表显示在约 100 个 epochs 之后误差非但没有改进,反而出现恶化。 让我们更新 `model.fit` 调用,当验证值没有提高上是自动停止训练。 我们将使用一个 *EarlyStopping callback* 来测试每个 epoch 的训练条件。如果经过一定数量的 epochs 后没有改进,则自动停止训练。 + +你可以从[这里](https://tensorflow.google.cn/versions/master/api_docs/python/tf/keras/callbacks/EarlyStopping)学习到更多的回调。 + +```py +model = build_model() + +# patience 值用来检查改进 epochs 的数量 +early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10) + +history = model.fit(normed_train_data, train_labels, epochs=EPOCHS, + validation_split = 0.2, verbose=0, callbacks=[early_stop, PrintDot()]) + +plot_history(history) +``` + +```py + +.................................................................................................... +........................... + +``` + +![png](img/253f679c0d56ad236d24246ddb70d466.png) + +![png](img/0f98889f249aed7e8f8f5e90e5432e08.png) + +如图所示,验证集中的平均的误差通常在 +/- 2 MPG 左右。 这个结果好么? 我们将决定权留给你。 + +让我们看看通过使用 **测试集** 来泛化模型的效果如何,我们在训练模型时没有使用测试集。这告诉我们,当我们在现实世界中使用这个模型时,我们可以期望它预测得有多好。 + +```py +loss, mae, mse = model.evaluate(normed_test_data, test_labels, verbose=2) + +print("Testing set Mean Abs Error: {:5.2f} MPG".format(mae)) +``` + +```py +3/3 - 0s - loss: 5.9941 - mae: 1.8809 - mse: 5.9941 +Testing set Mean Abs Error: 1.88 MPG + +``` + +### 做预测 + +最后,使用测试集中的数据预测 MPG 值: + +```py +test_predictions = model.predict(normed_test_data).flatten() + +plt.scatter(test_labels, test_predictions) +plt.xlabel('True Values [MPG]') +plt.ylabel('Predictions [MPG]') +plt.axis('equal') +plt.axis('square') +plt.xlim([0,plt.xlim()[1]]) +plt.ylim([0,plt.ylim()[1]]) +_ = plt.plot([-100, 100], [-100, 100]) +``` + +![png](img/54c9e1f17ab75ca37c6360c3e5230475.png) + +这看起来我们的模型预测得相当好。我们来看下误差分布。 + +```py +error = test_predictions - test_labels +plt.hist(error, bins = 25) +plt.xlabel("Prediction Error [MPG]") +_ = plt.ylabel("Count") +``` + +![png](img/25091cb1e90c92e9948c6c6cb9d0238b.png) + +它不是完全的高斯分布,但我们可以推断出,这是因为样本的数量很小所导致的。 + +## 结论 + +本笔记本 (notebook) 介绍了一些处理回归问题的技术。 + +* 均方误差(MSE)是用于回归问题的常见损失函数(分类问题中使用不同的损失函数)。 +* 类似的,用于回归的评估指标与分类不同。 常见的回归指标是平均绝对误差(MAE)。 +* 当数字输入数据特征的值存在不同范围时,每个特征应独立缩放到相同范围。 +* 如果训练数据不多,一种方法是选择隐藏层较少的小网络,以避免过度拟合。 +* 早期停止是一种防止过度拟合的有效技术。 + +```py +# MIT License +# +# Copyright (c) 2017 François Chollet +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. +``` \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/010.md b/Tensorflow/TensorFlow2.0/010.md new file mode 100644 index 00000000..622a0587 --- /dev/null +++ b/Tensorflow/TensorFlow2.0/010.md @@ -0,0 +1,915 @@ +# Overfit and underfit + +> 原文:[https://tensorflow.google.cn/tutorials/keras/overfit_and_underfit](https://tensorflow.google.cn/tutorials/keras/overfit_and_underfit) + +As always, the code in this example will use the [`tf.keras`](https://tensorflow.google.cn/api_docs/python/tf/keras) API, which you can learn more about in the TensorFlow [Keras guide](https://tensorflow.google.cn/guide/keras). + +In both of the previous examples—[classifying text](https://tensorflow.google.cn/tutorials/keras/text_classification_with_hub) and [predicting fuel efficiency](https://tensorflow.google.cn/tutorials/keras/regression) — we saw that the accuracy of our model on the validation data would peak after training for a number of epochs, and would then stagnate or start decreasing. + +In other words, our model would *overfit* to the training data. Learning how to deal with overfitting is important. Although it's often possible to achieve high accuracy on the *training set*, what we really want is to develop models that generalize well to a *testing set* (or data they haven't seen before). + +The opposite of overfitting is *underfitting*. Underfitting occurs when there is still room for improvement on the train data. This can happen for a number of reasons: If the model is not powerful enough, is over-regularized, or has simply not been trained long enough. This means the network has not learned the relevant patterns in the training data. + +If you train for too long though, the model will start to overfit and learn patterns from the training data that don't generalize to the test data. We need to strike a balance. Understanding how to train for an appropriate number of epochs as we'll explore below is a useful skill. + +To prevent overfitting, the best solution is to use more complete training data. The dataset should cover the full range of inputs that the model is expected to handle. Additional data may only be useful if it covers new and interesting cases. + +A model trained on more complete data will naturally generalize better. When that is no longer possible, the next best solution is to use techniques like regularization. These place constraints on the quantity and type of information your model can store. If a network can only afford to memorize a small number of patterns, the optimization process will force it to focus on the most prominent patterns, which have a better chance of generalizing well. + +In this notebook, we'll explore several common regularization techniques, and use them to improve on a classification model. + +## Setup + +Before getting started, import the necessary packages: + +```py +import tensorflow as tf + +from tensorflow.keras import layers +from tensorflow.keras import regularizers + +print(tf.__version__) +``` + +```py +2.3.1 + +``` + +```py +!pip install -q git+https://github.com/tensorflow/docs + +import tensorflow_docs as tfdocs +import tensorflow_docs.modeling +import tensorflow_docs.plots +``` + +```py +from IPython import display +from matplotlib import pyplot as plt + +import numpy as np + +import pathlib +import shutil +import tempfile +``` + +```py +logdir = pathlib.Path(tempfile.mkdtemp())/"tensorboard_logs" +shutil.rmtree(logdir, ignore_errors=True) +``` + +## The Higgs Dataset + +The goal of this tutorial is not to do particle physics, so don't dwell on the details of the dataset. It contains 11 000 000 examples, each with 28 features, and a binary class label. + +```py +gz = tf.keras.utils.get_file('HIGGS.csv.gz', 'http://mlphysics.ics.uci.edu/data/higgs/HIGGS.csv.gz') +``` + +```py +Downloading data from http://mlphysics.ics.uci.edu/data/higgs/HIGGS.csv.gz +2816409600/2816407858 [==============================] - 230s 0us/step + +``` + +```py +FEATURES = 28 +``` + +The [`tf.data.experimental.CsvDataset`](https://tensorflow.google.cn/api_docs/python/tf/data/experimental/CsvDataset) class can be used to read csv records directly from a gzip file with no intermediate decompression step. + +```py +ds = tf.data.experimental.CsvDataset(gz,[float(),]*(FEATURES+1), compression_type="GZIP") +``` + +That csv reader class returns a list of scalars for each record. The following function repacks that list of scalars into a (feature_vector, label) pair. + +```py +def pack_row(*row): + label = row[0] + features = tf.stack(row[1:],1) + return features, label +``` + +TensorFlow is most efficient when operating on large batches of data. + +So instead of repacking each row individually make a new `Dataset` that takes batches of 10000-examples, applies the `pack_row` function to each batch, and then splits the batches back up into individual records: + +```py +packed_ds = ds.batch(10000).map(pack_row).unbatch() +``` + +Have a look at some of the records from this new `packed_ds`. + +The features are not perfectly normalized, but this is sufficient for this tutorial. + +```py +for features,label in packed_ds.batch(1000).take(1): + print(features[0]) + plt.hist(features.numpy().flatten(), bins = 101) +``` + +```py +tf.Tensor( +[ 0.8692932 -0.6350818 0.22569026 0.32747006 -0.6899932 0.75420225 + -0.24857314 -1.0920639 0\. 1.3749921 -0.6536742 0.9303491 + 1.1074361 1.1389043 -1.5781983 -1.0469854 0\. 0.65792954 + -0.01045457 -0.04576717 3.1019614 1.35376 0.9795631 0.97807616 + 0.92000484 0.72165745 0.98875093 0.87667835], shape=(28,), dtype=float32) + +``` + +![png](img/b4bcda4ec74a98071e75941c07503a6c.png) + +To keep this tutorial relatively short use just the first 1000 samples for validation, and the next 10 000 for training: + +```py +N_VALIDATION = int(1e3) +N_TRAIN = int(1e4) +BUFFER_SIZE = int(1e4) +BATCH_SIZE = 500 +STEPS_PER_EPOCH = N_TRAIN//BATCH_SIZE +``` + +The [`Dataset.skip`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset#skip) and [`Dataset.take`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset#take) methods make this easy. + +At the same time, use the [`Dataset.cache`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset#cache) method to ensure that the loader doesn't need to re-read the data from the file on each epoch: + +```py +validate_ds = packed_ds.take(N_VALIDATION).cache() +train_ds = packed_ds.skip(N_VALIDATION).take(N_TRAIN).cache() +``` + +```py +train_ds +``` + +```py + + +``` + +These datasets return individual examples. Use the `.batch` method to create batches of an appropriate size for training. Before batching also remember to `.shuffle` and `.repeat` the training set. + +```py +validate_ds = validate_ds.batch(BATCH_SIZE) +train_ds = train_ds.shuffle(BUFFER_SIZE).repeat().batch(BATCH_SIZE) +``` + +## Demonstrate overfitting + +The simplest way to prevent overfitting is to start with a small model: A model with a small number of learnable parameters (which is determined by the number of layers and the number of units per layer). In deep learning, the number of learnable parameters in a model is often referred to as the model's "capacity". + +Intuitively, a model with more parameters will have more "memorization capacity" and therefore will be able to easily learn a perfect dictionary-like mapping between training samples and their targets, a mapping without any generalization power, but this would be useless when making predictions on previously unseen data. + +Always keep this in mind: deep learning models tend to be good at fitting to the training data, but the real challenge is generalization, not fitting. + +On the other hand, if the network has limited memorization resources, it will not be able to learn the mapping as easily. To minimize its loss, it will have to learn compressed representations that have more predictive power. At the same time, if you make your model too small, it will have difficulty fitting to the training data. There is a balance between "too much capacity" and "not enough capacity". + +Unfortunately, there is no magical formula to determine the right size or architecture of your model (in terms of the number of layers, or the right size for each layer). You will have to experiment using a series of different architectures. + +To find an appropriate model size, it's best to start with relatively few layers and parameters, then begin increasing the size of the layers or adding new layers until you see diminishing returns on the validation loss. + +Start with a simple model using only [`layers.Dense`](https://tensorflow.google.cn/api_docs/python/tf/keras/layers/Dense) as a baseline, then create larger versions, and compare them. + +### Training procedure + +Many models train better if you gradually reduce the learning rate during training. Use [`optimizers.schedules`](https://tensorflow.google.cn/api_docs/python/tf/keras/optimizers/schedules) to reduce the learning rate over time: + +```py +lr_schedule = tf.keras.optimizers.schedules.InverseTimeDecay( + 0.001, + decay_steps=STEPS_PER_EPOCH*1000, + decay_rate=1, + staircase=False) + +def get_optimizer(): + return tf.keras.optimizers.Adam(lr_schedule) +``` + +The code above sets a [`schedules.InverseTimeDecay`](https://tensorflow.google.cn/api_docs/python/tf/keras/optimizers/schedules/InverseTimeDecay) to hyperbolically decrease the learning rate to 1/2 of the base rate at 1000 epochs, 1/3 at 2000 epochs and so on. + +```py +step = np.linspace(0,100000) +lr = lr_schedule(step) +plt.figure(figsize = (8,6)) +plt.plot(step/STEPS_PER_EPOCH, lr) +plt.ylim([0,max(plt.ylim())]) +plt.xlabel('Epoch') +_ = plt.ylabel('Learning Rate') +``` + +![png](img/1d906c8d5397ad3e918d2a91fcfbb78e.png) + +Each model in this tutorial will use the same training configuration. So set these up in a reusable way, starting with the list of callbacks. + +The training for this tutorial runs for many short epochs. To reduce the logging noise use the `tfdocs.EpochDots` which simply prints a `.` for each epoch, and a full set of metrics every 100 epochs. + +Next include [`callbacks.EarlyStopping`](https://tensorflow.google.cn/api_docs/python/tf/keras/callbacks/EarlyStopping) to avoid long and unnecessary training times. Note that this callback is set to monitor the `val_binary_crossentropy`, not the `val_loss`. This difference will be important later. + +Use [`callbacks.TensorBoard`](https://tensorflow.google.cn/api_docs/python/tf/keras/callbacks/TensorBoard) to generate TensorBoard logs for the training. + +```py +def get_callbacks(name): + return [ + tfdocs.modeling.EpochDots(), + tf.keras.callbacks.EarlyStopping(monitor='val_binary_crossentropy', patience=200), + tf.keras.callbacks.TensorBoard(logdir/name), + ] +``` + +Similarly each model will use the same [`Model.compile`](https://tensorflow.google.cn/api_docs/python/tf/keras/Model#compile) and [`Model.fit`](https://tensorflow.google.cn/api_docs/python/tf/keras/Model#fit) settings: + +```py +def compile_and_fit(model, name, optimizer=None, max_epochs=10000): + if optimizer is None: + optimizer = get_optimizer() + model.compile(optimizer=optimizer, + loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), + metrics=[ + tf.keras.losses.BinaryCrossentropy( + from_logits=True, name='binary_crossentropy'), + 'accuracy']) + + model.summary() + + history = model.fit( + train_ds, + steps_per_epoch = STEPS_PER_EPOCH, + epochs=max_epochs, + validation_data=validate_ds, + callbacks=get_callbacks(name), + verbose=0) + return history +``` + +### Tiny model + +Start by training a model: + +```py +tiny_model = tf.keras.Sequential([ + layers.Dense(16, activation='elu', input_shape=(FEATURES,)), + layers.Dense(1) +]) +``` + +```py +size_histories = {} +``` + +```py +size_histories['Tiny'] = compile_and_fit(tiny_model, 'sizes/Tiny') +``` + +```py +Model: "sequential" +_________________________________________________________________ +Layer (type) Output Shape Param # +================================================================= +dense (Dense) (None, 16) 464 +_________________________________________________________________ +dense_1 (Dense) (None, 1) 17 +================================================================= +Total params: 481 +Trainable params: 481 +Non-trainable params: 0 +_________________________________________________________________ +WARNING:tensorflow:From /tmpfs/src/tf_docs_env/lib/python3.6/site-packages/tensorflow/python/ops/summary_ops_v2.py:1277: stop (from tensorflow.python.eager.profiler) is deprecated and will be removed after 2020-07-01. +Instructions for updating: +use `tf.profiler.experimental.stop` instead. +WARNING:tensorflow:Callbacks method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0032s vs `on_train_batch_end` time: 0.0255s). Check your callbacks. + +Epoch: 0, accuracy:0.5092, binary_crossentropy:0.7752, loss:0.7752, val_accuracy:0.5110, val_binary_crossentropy:0.7376, val_loss:0.7376, +.................................................................................................... +Epoch: 100, accuracy:0.6028, binary_crossentropy:0.6251, loss:0.6251, val_accuracy:0.5680, val_binary_crossentropy:0.6271, val_loss:0.6271, +.................................................................................................... +Epoch: 200, accuracy:0.6231, binary_crossentropy:0.6137, loss:0.6137, val_accuracy:0.5920, val_binary_crossentropy:0.6146, val_loss:0.6146, +.................................................................................................... +Epoch: 300, accuracy:0.6356, binary_crossentropy:0.6038, loss:0.6038, val_accuracy:0.6190, val_binary_crossentropy:0.6051, val_loss:0.6051, +.................................................................................................... +Epoch: 400, accuracy:0.6470, binary_crossentropy:0.5963, loss:0.5963, val_accuracy:0.6330, val_binary_crossentropy:0.5968, val_loss:0.5968, +.................................................................................................... +Epoch: 500, accuracy:0.6619, binary_crossentropy:0.5909, loss:0.5909, val_accuracy:0.6280, val_binary_crossentropy:0.5939, val_loss:0.5939, +.................................................................................................... +Epoch: 600, accuracy:0.6618, binary_crossentropy:0.5872, loss:0.5872, val_accuracy:0.6630, val_binary_crossentropy:0.5910, val_loss:0.5910, +.................................................................................................... +Epoch: 700, accuracy:0.6655, binary_crossentropy:0.5847, loss:0.5847, val_accuracy:0.6290, val_binary_crossentropy:0.5940, val_loss:0.5940, +.................................................................................................... +Epoch: 800, accuracy:0.6683, binary_crossentropy:0.5819, loss:0.5819, val_accuracy:0.6510, val_binary_crossentropy:0.5908, val_loss:0.5908, +.................................................................................................... +Epoch: 900, accuracy:0.6722, binary_crossentropy:0.5797, loss:0.5797, val_accuracy:0.6620, val_binary_crossentropy:0.5907, val_loss:0.5907, +.................................................................................................... +Epoch: 1000, accuracy:0.6761, binary_crossentropy:0.5779, loss:0.5779, val_accuracy:0.6470, val_binary_crossentropy:0.5910, val_loss:0.5910, +............................... + +``` + +Now check how the model did: + +```py +plotter = tfdocs.plots.HistoryPlotter(metric = 'binary_crossentropy', smoothing_std=10) +plotter.plot(size_histories) +plt.ylim([0.5, 0.7]) +``` + +```py +(0.5, 0.7) + +``` + +![png](img/f865018e54d4c67ed60313c72d71e99c.png) + +### Small model + +To see if you can beat the performance of the small model, progressively train some larger models. + +Try two hidden layers with 16 units each: + +```py +small_model = tf.keras.Sequential([ + # `input_shape` is only required here so that `.summary` works. + layers.Dense(16, activation='elu', input_shape=(FEATURES,)), + layers.Dense(16, activation='elu'), + layers.Dense(1) +]) +``` + +```py +size_histories['Small'] = compile_and_fit(small_model, 'sizes/Small') +``` + +```py +Model: "sequential_1" +_________________________________________________________________ +Layer (type) Output Shape Param # +================================================================= +dense_2 (Dense) (None, 16) 464 +_________________________________________________________________ +dense_3 (Dense) (None, 16) 272 +_________________________________________________________________ +dense_4 (Dense) (None, 1) 17 +================================================================= +Total params: 753 +Trainable params: 753 +Non-trainable params: 0 +_________________________________________________________________ +WARNING:tensorflow:Callbacks method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0037s vs `on_train_batch_end` time: 0.0530s). Check your callbacks. + +Epoch: 0, accuracy:0.5029, binary_crossentropy:0.7257, loss:0.7257, val_accuracy:0.4720, val_binary_crossentropy:0.6927, val_loss:0.6927, +.................................................................................................... +Epoch: 100, accuracy:0.6153, binary_crossentropy:0.6185, loss:0.6185, val_accuracy:0.6290, val_binary_crossentropy:0.6112, val_loss:0.6112, +.................................................................................................... +Epoch: 200, accuracy:0.6551, binary_crossentropy:0.5940, loss:0.5940, val_accuracy:0.6540, val_binary_crossentropy:0.5941, val_loss:0.5941, +.................................................................................................... +Epoch: 300, accuracy:0.6678, binary_crossentropy:0.5824, loss:0.5824, val_accuracy:0.6680, val_binary_crossentropy:0.5904, val_loss:0.5904, +.................................................................................................... +Epoch: 400, accuracy:0.6731, binary_crossentropy:0.5754, loss:0.5754, val_accuracy:0.6630, val_binary_crossentropy:0.5872, val_loss:0.5872, +.................................................................................................... +Epoch: 500, accuracy:0.6836, binary_crossentropy:0.5679, loss:0.5679, val_accuracy:0.6740, val_binary_crossentropy:0.5834, val_loss:0.5834, +.................................................................................................... +Epoch: 600, accuracy:0.6839, binary_crossentropy:0.5617, loss:0.5617, val_accuracy:0.6760, val_binary_crossentropy:0.5849, val_loss:0.5849, +.................................................................................................... + +``` + +### Medium model + +Now try 3 hidden layers with 64 units each: + +```py +medium_model = tf.keras.Sequential([ + layers.Dense(64, activation='elu', input_shape=(FEATURES,)), + layers.Dense(64, activation='elu'), + layers.Dense(64, activation='elu'), + layers.Dense(1) +]) +``` + +And train the model using the same data: + +```py +size_histories['Medium'] = compile_and_fit(medium_model, "sizes/Medium") +``` + +```py +Model: "sequential_2" +_________________________________________________________________ +Layer (type) Output Shape Param # +================================================================= +dense_5 (Dense) (None, 64) 1856 +_________________________________________________________________ +dense_6 (Dense) (None, 64) 4160 +_________________________________________________________________ +dense_7 (Dense) (None, 64) 4160 +_________________________________________________________________ +dense_8 (Dense) (None, 1) 65 +================================================================= +Total params: 10,241 +Trainable params: 10,241 +Non-trainable params: 0 +_________________________________________________________________ +WARNING:tensorflow:Callbacks method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0039s vs `on_train_batch_end` time: 0.0548s). Check your callbacks. + +Epoch: 0, accuracy:0.5027, binary_crossentropy:0.6936, loss:0.6936, val_accuracy:0.5150, val_binary_crossentropy:0.6758, val_loss:0.6758, +.................................................................................................... +Epoch: 100, accuracy:0.7075, binary_crossentropy:0.5382, loss:0.5382, val_accuracy:0.6670, val_binary_crossentropy:0.6027, val_loss:0.6027, +.................................................................................................... +Epoch: 200, accuracy:0.7705, binary_crossentropy:0.4498, loss:0.4498, val_accuracy:0.6200, val_binary_crossentropy:0.6833, val_loss:0.6833, +................................................................... + +``` + +### Large model + +As an exercise, you can create an even larger model, and see how quickly it begins overfitting. Next, let's add to this benchmark a network that has much more capacity, far more than the problem would warrant: + +```py +large_model = tf.keras.Sequential([ + layers.Dense(512, activation='elu', input_shape=(FEATURES,)), + layers.Dense(512, activation='elu'), + layers.Dense(512, activation='elu'), + layers.Dense(512, activation='elu'), + layers.Dense(1) +]) +``` + +And, again, train the model using the same data: + +```py +size_histories['large'] = compile_and_fit(large_model, "sizes/large") +``` + +```py +Model: "sequential_3" +_________________________________________________________________ +Layer (type) Output Shape Param # +================================================================= +dense_9 (Dense) (None, 512) 14848 +_________________________________________________________________ +dense_10 (Dense) (None, 512) 262656 +_________________________________________________________________ +dense_11 (Dense) (None, 512) 262656 +_________________________________________________________________ +dense_12 (Dense) (None, 512) 262656 +_________________________________________________________________ +dense_13 (Dense) (None, 1) 513 +================================================================= +Total params: 803,329 +Trainable params: 803,329 +Non-trainable params: 0 +_________________________________________________________________ +WARNING:tensorflow:Callbacks method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0041s vs `on_train_batch_end` time: 0.0613s). Check your callbacks. + +Epoch: 0, accuracy:0.5072, binary_crossentropy:0.8249, loss:0.8249, val_accuracy:0.4810, val_binary_crossentropy:0.6884, val_loss:0.6884, +.................................................................................................... +Epoch: 100, accuracy:1.0000, binary_crossentropy:0.0025, loss:0.0025, val_accuracy:0.6590, val_binary_crossentropy:1.8242, val_loss:1.8242, +.................................................................................................... +Epoch: 200, accuracy:1.0000, binary_crossentropy:0.0001, loss:0.0001, val_accuracy:0.6590, val_binary_crossentropy:2.5014, val_loss:2.5014, +...................... + +``` + +### Plot the training and validation losses + +The solid lines show the training loss, and the dashed lines show the validation loss (remember: a lower validation loss indicates a better model). + +While building a larger model gives it more power, if this power is not constrained somehow it can easily overfit to the training set. + +In this example, typically, only the `"Tiny"` model manages to avoid overfitting altogether, and each of the larger models overfit the data more quickly. This becomes so severe for the `"large"` model that you need to switch the plot to a log-scale to really see what's happening. + +This is apparent if you plot and compare the validation metrics to the training metrics. + +* It's normal for there to be a small difference. +* If both metrics are moving in the same direction, everything is fine. +* If the validation metric begins to stagnate while the training metric continues to improve, you are probably close to overfitting. +* If the validation metric is going in the wrong direction, the model is clearly overfitting. + +```py +plotter.plot(size_histories) +a = plt.xscale('log') +plt.xlim([5, max(plt.xlim())]) +plt.ylim([0.5, 0.7]) +plt.xlabel("Epochs [Log Scale]") +``` + +```py +Text(0.5, 0, 'Epochs [Log Scale]') + +``` + +![png](img/4c173dbd57644fa57c04cf1d62ca75e4.png) + +**Note:** All the above training runs used the [`callbacks.EarlyStopping`](https://tensorflow.google.cn/api_docs/python/tf/keras/callbacks/EarlyStopping) to end the training once it was clear the model was not making progress. + +### View in TensorBoard + +These models all wrote TensorBoard logs during training. + +Open an embedded TensorBoard viewer inside a notebook: + +```py + # Load the TensorBoard notebook extension +%load_ext tensorboard + +# Open an embedded TensorBoard viewer +%tensorboard --logdir {logdir}/sizes +``` + +You can view the [results of a previous run](https://tensorboard.dev/experiment/vW7jmmF9TmKmy3rbheMQpw/#scalars&_smoothingWeight=0.97) of this notebook on [TensorBoard.dev](https://tensorboard.dev/). + +TensorBoard.dev is a managed experience for hosting, tracking, and sharing ML experiments with everyone. + +It's also included in an ` + +If you want to share TensorBoard results you can upload the logs to [TensorBoard.dev](https://tensorboard.dev/) by copying the following into a code-cell. + +**Note:** This step requires a Google account. + +```py +tensorboard dev upload --logdir {logdir}/sizes + +``` + +**Caution:** This command does not terminate. It's designed to continuously upload the results of long-running experiments. Once your data is uploaded you need to stop it using the "interrupt execution" option in your notebook tool. + +## Strategies to prevent overfitting + +Before getting into the content of this section copy the training logs from the `"Tiny"` model above, to use as a baseline for comparison. + +```py +shutil.rmtree(logdir/'regularizers/Tiny', ignore_errors=True) +shutil.copytree(logdir/'sizes/Tiny', logdir/'regularizers/Tiny') +``` + +```py +PosixPath('/tmp/tmp9n203dpq/tensorboard_logs/regularizers/Tiny') + +``` + +```py +regularizer_histories = {} +regularizer_histories['Tiny'] = size_histories['Tiny'] +``` + +### Add weight regularization + +You may be familiar with Occam's Razor principle: given two explanations for something, the explanation most likely to be correct is the "simplest" one, the one that makes the least amount of assumptions. This also applies to the models learned by neural networks: given some training data and a network architecture, there are multiple sets of weights values (multiple models) that could explain the data, and simpler models are less likely to overfit than complex ones. + +A "simple model" in this context is a model where the distribution of parameter values has less entropy (or a model with fewer parameters altogether, as we saw in the section above). Thus a common way to mitigate overfitting is to put constraints on the complexity of a network by forcing its weights only to take small values, which makes the distribution of weight values more "regular". This is called "weight regularization", and it is done by adding to the loss function of the network a cost associated with having large weights. This cost comes in two flavors: + +* [L1 regularization](https://developers.google.cn/machine-learning/glossary/#L1_regularization), where the cost added is proportional to the absolute value of the weights coefficients (i.e. to what is called the "L1 norm" of the weights). + +* [L2 regularization](https://developers.google.cn/machine-learning/glossary/#L2_regularization), where the cost added is proportional to the square of the value of the weights coefficients (i.e. to what is called the squared "L2 norm" of the weights). L2 regularization is also called weight decay in the context of neural networks. Don't let the different name confuse you: weight decay is mathematically the exact same as L2 regularization. + +L1 regularization pushes weights towards exactly zero encouraging a sparse model. L2 regularization will penalize the weights parameters without making them sparse since the penalty goes to zero for small weights. one reason why L2 is more common. + +In [`tf.keras`](https://tensorflow.google.cn/api_docs/python/tf/keras), weight regularization is added by passing weight regularizer instances to layers as keyword arguments. Let's add L2 weight regularization now. + +```py +l2_model = tf.keras.Sequential([ + layers.Dense(512, activation='elu', + kernel_regularizer=regularizers.l2(0.001), + input_shape=(FEATURES,)), + layers.Dense(512, activation='elu', + kernel_regularizer=regularizers.l2(0.001)), + layers.Dense(512, activation='elu', + kernel_regularizer=regularizers.l2(0.001)), + layers.Dense(512, activation='elu', + kernel_regularizer=regularizers.l2(0.001)), + layers.Dense(1) +]) + +regularizer_histories['l2'] = compile_and_fit(l2_model, "regularizers/l2") +``` + +```py +Model: "sequential_4" +_________________________________________________________________ +Layer (type) Output Shape Param # +================================================================= +dense_14 (Dense) (None, 512) 14848 +_________________________________________________________________ +dense_15 (Dense) (None, 512) 262656 +_________________________________________________________________ +dense_16 (Dense) (None, 512) 262656 +_________________________________________________________________ +dense_17 (Dense) (None, 512) 262656 +_________________________________________________________________ +dense_18 (Dense) (None, 1) 513 +================================================================= +Total params: 803,329 +Trainable params: 803,329 +Non-trainable params: 0 +_________________________________________________________________ +WARNING:tensorflow:Callbacks method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0040s vs `on_train_batch_end` time: 0.0613s). Check your callbacks. + +Epoch: 0, accuracy:0.5087, binary_crossentropy:0.8160, loss:2.3363, val_accuracy:0.4770, val_binary_crossentropy:0.6979, val_loss:2.1441, +.................................................................................................... +Epoch: 100, accuracy:0.6607, binary_crossentropy:0.5920, loss:0.6163, val_accuracy:0.6530, val_binary_crossentropy:0.5831, val_loss:0.6076, +.................................................................................................... +Epoch: 200, accuracy:0.6820, binary_crossentropy:0.5789, loss:0.6033, val_accuracy:0.6690, val_binary_crossentropy:0.5799, val_loss:0.6044, +.................................................................................................... +Epoch: 300, accuracy:0.6865, binary_crossentropy:0.5696, loss:0.5947, val_accuracy:0.6360, val_binary_crossentropy:0.5839, val_loss:0.6088, +.................................................................................................... +Epoch: 400, accuracy:0.6908, binary_crossentropy:0.5639, loss:0.5908, val_accuracy:0.6840, val_binary_crossentropy:0.5898, val_loss:0.6167, +.......................................... + +``` + +`l2(0.001)` means that every coefficient in the weight matrix of the layer will add `0.001 * weight_coefficient_value**2` to the total **loss** of the network. + +That is why we're monitoring the `binary_crossentropy` directly. Because it doesn't have this regularization component mixed in. + +So, that same `"Large"` model with an `L2` regularization penalty performs much better: + +```py +plotter.plot(regularizer_histories) +plt.ylim([0.5, 0.7]) +``` + +```py +(0.5, 0.7) + +``` + +![png](img/87e59b9663f1f875cba8bbc04b3ec8d7.png) + +As you can see, the `"L2"` regularized model is now much more competitive with the the `"Tiny"` model. This `"L2"` model is also much more resistant to overfitting than the `"Large"` model it was based on despite having the same number of parameters. + +#### More info + +There are two important things to note about this sort of regularization. + +**First:** if you are writing your own training loop, then you need to be sure to ask the model for its regularization losses. + +```py +result = l2_model(features) +regularization_loss=tf.add_n(l2_model.losses) +``` + +**Second:** This implementation works by adding the weight penalties to the model's loss, and then applying a standard optimization procedure after that. + +There is a second approach that instead only runs the optimizer on the raw loss, and then while applying the calculated step the optimizer also applies some weight decay. This "Decoupled Weight Decay" is seen in optimizers like `optimizers.FTRL` and [`optimizers.AdamW`](https://tensorflow.google.cn/addons/api_docs/python/tfa/optimizers/AdamW). + +### Add dropout + +Dropout is one of the most effective and most commonly used regularization techniques for neural networks, developed by Hinton and his students at the University of Toronto. + +The intuitive explanation for dropout is that because individual nodes in the network cannot rely on the output of the others, each node must output features that are useful on their own. + +Dropout, applied to a layer, consists of randomly "dropping out" (i.e. set to zero) a number of output features of the layer during training. Let's say a given layer would normally have returned a vector [0.2, 0.5, 1.3, 0.8, 1.1] for a given input sample during training; after applying dropout, this vector will have a few zero entries distributed at random, e.g. [0, 0.5, 1.3, 0, 1.1]. + +The "dropout rate" is the fraction of the features that are being zeroed-out; it is usually set between 0.2 and 0.5\. At test time, no units are dropped out, and instead the layer's output values are scaled down by a factor equal to the dropout rate, so as to balance for the fact that more units are active than at training time. + +In [`tf.keras`](https://tensorflow.google.cn/api_docs/python/tf/keras) you can introduce dropout in a network via the Dropout layer, which gets applied to the output of layer right before. + +Let's add two Dropout layers in our network to see how well they do at reducing overfitting: + +```py +dropout_model = tf.keras.Sequential([ + layers.Dense(512, activation='elu', input_shape=(FEATURES,)), + layers.Dropout(0.5), + layers.Dense(512, activation='elu'), + layers.Dropout(0.5), + layers.Dense(512, activation='elu'), + layers.Dropout(0.5), + layers.Dense(512, activation='elu'), + layers.Dropout(0.5), + layers.Dense(1) +]) + +regularizer_histories['dropout'] = compile_and_fit(dropout_model, "regularizers/dropout") +``` + +```py +Model: "sequential_5" +_________________________________________________________________ +Layer (type) Output Shape Param # +================================================================= +dense_19 (Dense) (None, 512) 14848 +_________________________________________________________________ +dropout (Dropout) (None, 512) 0 +_________________________________________________________________ +dense_20 (Dense) (None, 512) 262656 +_________________________________________________________________ +dropout_1 (Dropout) (None, 512) 0 +_________________________________________________________________ +dense_21 (Dense) (None, 512) 262656 +_________________________________________________________________ +dropout_2 (Dropout) (None, 512) 0 +_________________________________________________________________ +dense_22 (Dense) (None, 512) 262656 +_________________________________________________________________ +dropout_3 (Dropout) (None, 512) 0 +_________________________________________________________________ +dense_23 (Dense) (None, 1) 513 +================================================================= +Total params: 803,329 +Trainable params: 803,329 +Non-trainable params: 0 +_________________________________________________________________ +WARNING:tensorflow:Callbacks method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0040s vs `on_train_batch_end` time: 0.0632s). Check your callbacks. + +Epoch: 0, accuracy:0.5073, binary_crossentropy:0.7984, loss:0.7984, val_accuracy:0.5200, val_binary_crossentropy:0.6761, val_loss:0.6761, +.................................................................................................... +Epoch: 100, accuracy:0.6576, binary_crossentropy:0.5965, loss:0.5965, val_accuracy:0.6730, val_binary_crossentropy:0.5833, val_loss:0.5833, +.................................................................................................... +Epoch: 200, accuracy:0.6861, binary_crossentropy:0.5554, loss:0.5554, val_accuracy:0.6790, val_binary_crossentropy:0.5830, val_loss:0.5830, +.................................................................................................... +Epoch: 300, accuracy:0.7280, binary_crossentropy:0.5102, loss:0.5102, val_accuracy:0.6860, val_binary_crossentropy:0.6088, val_loss:0.6088, +................ + +``` + +```py +plotter.plot(regularizer_histories) +plt.ylim([0.5, 0.7]) +``` + +```py +(0.5, 0.7) + +``` + +![png](img/b5a9ca25aab20c2b09a25fdab4c2b92b.png) + +It's clear from this plot that both of these regularization approaches improve the behavior of the `"Large"` model. But this still doesn't beat even the `"Tiny"` baseline. + +Next try them both, together, and see if that does better. + +### Combined L2 + dropout + +```py +combined_model = tf.keras.Sequential([ + layers.Dense(512, kernel_regularizer=regularizers.l2(0.0001), + activation='elu', input_shape=(FEATURES,)), + layers.Dropout(0.5), + layers.Dense(512, kernel_regularizer=regularizers.l2(0.0001), + activation='elu'), + layers.Dropout(0.5), + layers.Dense(512, kernel_regularizer=regularizers.l2(0.0001), + activation='elu'), + layers.Dropout(0.5), + layers.Dense(512, kernel_regularizer=regularizers.l2(0.0001), + activation='elu'), + layers.Dropout(0.5), + layers.Dense(1) +]) + +regularizer_histories['combined'] = compile_and_fit(combined_model, "regularizers/combined") +``` + +```py +Model: "sequential_6" +_________________________________________________________________ +Layer (type) Output Shape Param # +================================================================= +dense_24 (Dense) (None, 512) 14848 +_________________________________________________________________ +dropout_4 (Dropout) (None, 512) 0 +_________________________________________________________________ +dense_25 (Dense) (None, 512) 262656 +_________________________________________________________________ +dropout_5 (Dropout) (None, 512) 0 +_________________________________________________________________ +dense_26 (Dense) (None, 512) 262656 +_________________________________________________________________ +dropout_6 (Dropout) (None, 512) 0 +_________________________________________________________________ +dense_27 (Dense) (None, 512) 262656 +_________________________________________________________________ +dropout_7 (Dropout) (None, 512) 0 +_________________________________________________________________ +dense_28 (Dense) (None, 1) 513 +================================================================= +Total params: 803,329 +Trainable params: 803,329 +Non-trainable params: 0 +_________________________________________________________________ +WARNING:tensorflow:Callbacks method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0046s vs `on_train_batch_end` time: 0.0686s). Check your callbacks. + +Epoch: 0, accuracy:0.5034, binary_crossentropy:0.8003, loss:0.9588, val_accuracy:0.5040, val_binary_crossentropy:0.6752, val_loss:0.8330, +.................................................................................................... +Epoch: 100, accuracy:0.6514, binary_crossentropy:0.6067, loss:0.6373, val_accuracy:0.6470, val_binary_crossentropy:0.5868, val_loss:0.6173, +.................................................................................................... +Epoch: 200, accuracy:0.6664, binary_crossentropy:0.5900, loss:0.6158, val_accuracy:0.6510, val_binary_crossentropy:0.5795, val_loss:0.6053, +.................................................................................................... +Epoch: 300, accuracy:0.6690, binary_crossentropy:0.5822, loss:0.6104, val_accuracy:0.6940, val_binary_crossentropy:0.5611, val_loss:0.5892, +.................................................................................................... +Epoch: 400, accuracy:0.6773, binary_crossentropy:0.5764, loss:0.6063, val_accuracy:0.6820, val_binary_crossentropy:0.5539, val_loss:0.5839, +.................................................................................................... +Epoch: 500, accuracy:0.6840, binary_crossentropy:0.5695, loss:0.6012, val_accuracy:0.6870, val_binary_crossentropy:0.5500, val_loss:0.5818, +.................................................................................................... +Epoch: 600, accuracy:0.6821, binary_crossentropy:0.5692, loss:0.6023, val_accuracy:0.6850, val_binary_crossentropy:0.5456, val_loss:0.5787, +.................................................................................................... +Epoch: 700, accuracy:0.6836, binary_crossentropy:0.5678, loss:0.6021, val_accuracy:0.6870, val_binary_crossentropy:0.5502, val_loss:0.5846, +.................................................................................................... +Epoch: 800, accuracy:0.6908, binary_crossentropy:0.5585, loss:0.5940, val_accuracy:0.7000, val_binary_crossentropy:0.5424, val_loss:0.5780, +.................................................................................................... +Epoch: 900, accuracy:0.6931, binary_crossentropy:0.5583, loss:0.5948, val_accuracy:0.6860, val_binary_crossentropy:0.5447, val_loss:0.5813, +.................................................................................................... +Epoch: 1000, accuracy:0.6919, binary_crossentropy:0.5563, loss:0.5940, val_accuracy:0.7100, val_binary_crossentropy:0.5422, val_loss:0.5799, +.................................................................................................... +Epoch: 1100, accuracy:0.6914, binary_crossentropy:0.5545, loss:0.5935, val_accuracy:0.6940, val_binary_crossentropy:0.5375, val_loss:0.5765, +.................................................................................................... +Epoch: 1200, accuracy:0.7012, binary_crossentropy:0.5466, loss:0.5867, val_accuracy:0.6970, val_binary_crossentropy:0.5429, val_loss:0.5831, +.................................................................................................... +Epoch: 1300, accuracy:0.6939, binary_crossentropy:0.5491, loss:0.5903, val_accuracy:0.6950, val_binary_crossentropy:0.5477, val_loss:0.5890, +.. + +``` + +```py +plotter.plot(regularizer_histories) +plt.ylim([0.5, 0.7]) +``` + +```py +(0.5, 0.7) + +``` + +![png](img/77a7189086e1a02a870dbf630c311e5d.png) + +This model with the `"Combined"` regularization is obviously the best one so far. + +### View in TensorBoard + +These models also recorded TensorBoard logs. + +To open an embedded tensorboard viewer inside a notebook, copy the following into a code-cell: + +```py +%tensorboard --logdir {logdir}/regularizers +``` + +You can view the [results of a previous run](https://tensorboard.dev/experiment/fGInKDo8TXes1z7HQku9mw/#scalars&_smoothingWeight=0.97) of this notebook on [TensorDoard.dev](https://tensorboard.dev/). + +It's also included in an ` + +This was uploaded with: + +```py +tensorboard dev upload --logdir {logdir}/regularizers + +``` + +## Conclusions + +To recap: here are the most common ways to prevent overfitting in neural networks: + +* Get more training data. +* Reduce the capacity of the network. +* Add weight regularization. +* Add dropout. + +Two important approaches not covered in this guide are: + +* data-augmentation +* batch normalization + +Remember that each method can help on its own, but often combining them can be even more effective. + +```py +# MIT License +# +# Copyright (c) 2017 François Chollet +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. +``` \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/011.md b/Tensorflow/TensorFlow2.0/011.md new file mode 100644 index 00000000..616ca1de --- /dev/null +++ b/Tensorflow/TensorFlow2.0/011.md @@ -0,0 +1,610 @@ +# 保存和恢复模型 + +> 原文:[https://tensorflow.google.cn/tutorials/keras/save_and_load](https://tensorflow.google.cn/tutorials/keras/save_and_load) + +**Note:** 我们的 TensorFlow 社区翻译了这些文档。因为社区翻译是尽力而为, 所以无法保证它们是最准确的,并且反映了最新的 [官方英文文档](https://tensorflow.google.cn/?hl=en)。如果您有改进此翻译的建议, 请提交 pull request 到 [tensorflow/docs](https://github.com/tensorflow/docs) GitHub 仓库。要志愿地撰写或者审核译文,请加入 [docs-zh-cn@tensorflow.org Google Group](https://groups.google.com/a/tensorflow.org/forum/#!forum/docs-zh-cn)。 + +模型可以在训练期间和训练完成后进行保存。这意味着模型可以从任意中断中恢复,并避免耗费比较长的时间在训练上。保存也意味着您可以共享您的模型,而其他人可以通过您的模型来重新创建工作。在发布研究模型和技术时,大多数机器学习从业者分享: + +* 用于创建模型的代码 +* 模型训练的权重 (weight) 和参数 (parameters) 。 + +共享数据有助于其他人了解模型的工作原理,并使用新数据自行尝试。 + +注意:小心不受信任的代码——Tensorflow 模型是代码。有关详细信息,请参阅 [安全使用 Tensorflow](https://github.com/tensorflow/tensorflow/blob/master/SECURITY.md)。 + +### 选项 + +保存 Tensorflow 的模型有许多方法——具体取决于您使用的 API。本指南使用 [tf.keras](https://tensorflow.google.cn/guide/keras), 一个高级 API 用于在 Tensorflow 中构建和训练模型。有关其他方法的实现,请参阅 TensorFlow [保存和恢复](https://tensorflow.google.cn/guide/saved_model)指南或[保存到 eager](https://tensorflow.google.cn/guide/eager#object-based_saving)。 + +## 配置 + +### 安装并导入 + +安装并导入 Tensorflow 和依赖项: + +```py +pip install -q pyyaml h5py # 以 HDF5 格式保存模型所必须 + +``` + +```py +WARNING: You are using pip version 20.2.2; however, version 20.2.3 is available. +You should consider upgrading via the '/tmpfs/src/tf_docs_env/bin/python -m pip install --upgrade pip' command. + +``` + +```py +import os + +import tensorflow as tf +from tensorflow import keras + +print(tf.version.VERSION) +``` + +```py +2.3.0 + +``` + +### 获取示例数据集 + +要演示如何保存和加载权重,您将使用 [MNIST 数据集](http://yann.lecun.com/exdb/mnist/). 要加快运行速度,请使用前 1000 个示例: + +```py +(train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.mnist.load_data() + +train_labels = train_labels[:1000] +test_labels = test_labels[:1000] + +train_images = train_images[:1000].reshape(-1, 28 * 28) / 255.0 +test_images = test_images[:1000].reshape(-1, 28 * 28) / 255.0 +``` + +### 定义模型 + +首先构建一个简单的序列(sequential)模型: + +```py +# 定义一个简单的序列模型 +def create_model(): + model = tf.keras.models.Sequential([ + keras.layers.Dense(512, activation='relu', input_shape=(784,)), + keras.layers.Dropout(0.2), + keras.layers.Dense(10) + ]) + + model.compile(optimizer='adam', + loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True), + metrics=['accuracy']) + + return model + +# 创建一个基本的模型实例 +model = create_model() + +# 显示模型的结构 +model.summary() +``` + +```py +Model: "sequential" +_________________________________________________________________ +Layer (type) Output Shape Param # +================================================================= +dense (Dense) (None, 512) 401920 +_________________________________________________________________ +dropout (Dropout) (None, 512) 0 +_________________________________________________________________ +dense_1 (Dense) (None, 10) 5130 +================================================================= +Total params: 407,050 +Trainable params: 407,050 +Non-trainable params: 0 +_________________________________________________________________ + +``` + +## 在训练期间保存模型(以 checkpoints 形式保存) + +您可以使用训练好的模型而无需从头开始重新训练,或在您打断的地方开始训练,以防止训练过程没有保存。 [`tf.keras.callbacks.ModelCheckpoint`](https://tensorflow.google.cn/api_docs/python/tf/keras/callbacks/ModelCheckpoint) 允许在训练的*过程中*和*结束时*回调保存的模型。 + +### Checkpoint 回调用法 + +创建一个只在训练期间保存权重的 [`tf.keras.callbacks.ModelCheckpoint`](https://tensorflow.google.cn/api_docs/python/tf/keras/callbacks/ModelCheckpoint) 回调: + +```py +checkpoint_path = "training_1/cp.ckpt" +checkpoint_dir = os.path.dirname(checkpoint_path) + +# 创建一个保存模型权重的回调 +cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path, + save_weights_only=True, + verbose=1) + +# 使用新的回调训练模型 +model.fit(train_images, + train_labels, + epochs=10, + validation_data=(test_images,test_labels), + callbacks=[cp_callback]) # 通过回调训练 + +# 这可能会生成与保存优化程序状态相关的警告。 +# 这些警告(以及整个笔记本中的类似警告) +# 是防止过时使用,可以忽略。 +``` + +```py +Epoch 1/10 +29/32 [==========================>...] - ETA: 0s - loss: 1.1844 - accuracy: 0.6595 +Epoch 00001: saving model to training_1/cp.ckpt +32/32 [==============================] - 0s 8ms/step - loss: 1.1300 - accuracy: 0.6770 - val_loss: 0.7189 - val_accuracy: 0.7780 +Epoch 2/10 +30/32 [===========================>..] - ETA: 0s - loss: 0.4232 - accuracy: 0.8792 +Epoch 00002: saving model to training_1/cp.ckpt +32/32 [==============================] - 0s 5ms/step - loss: 0.4216 - accuracy: 0.8800 - val_loss: 0.5160 - val_accuracy: 0.8470 +Epoch 3/10 +29/32 [==========================>...] - ETA: 0s - loss: 0.2964 - accuracy: 0.9149 +Epoch 00003: saving model to training_1/cp.ckpt +32/32 [==============================] - 0s 4ms/step - loss: 0.2988 - accuracy: 0.9170 - val_loss: 0.4753 - val_accuracy: 0.8560 +Epoch 4/10 +29/32 [==========================>...] - ETA: 0s - loss: 0.2057 - accuracy: 0.9494 +Epoch 00004: saving model to training_1/cp.ckpt +32/32 [==============================] - 0s 4ms/step - loss: 0.2086 - accuracy: 0.9500 - val_loss: 0.4375 - val_accuracy: 0.8600 +Epoch 5/10 +29/32 [==========================>...] - ETA: 0s - loss: 0.1512 - accuracy: 0.9666 +Epoch 00005: saving model to training_1/cp.ckpt +32/32 [==============================] - 0s 4ms/step - loss: 0.1488 - accuracy: 0.9680 - val_loss: 0.4275 - val_accuracy: 0.8660 +Epoch 6/10 +30/32 [===========================>..] - ETA: 0s - loss: 0.1130 - accuracy: 0.9823 +Epoch 00006: saving model to training_1/cp.ckpt +32/32 [==============================] - 0s 4ms/step - loss: 0.1134 - accuracy: 0.9820 - val_loss: 0.4309 - val_accuracy: 0.8630 +Epoch 7/10 +29/32 [==========================>...] - ETA: 0s - loss: 0.0829 - accuracy: 0.9925 +Epoch 00007: saving model to training_1/cp.ckpt +32/32 [==============================] - 0s 4ms/step - loss: 0.0838 - accuracy: 0.9920 - val_loss: 0.4079 - val_accuracy: 0.8680 +Epoch 8/10 +29/32 [==========================>...] - ETA: 0s - loss: 0.0624 - accuracy: 0.9946 +Epoch 00008: saving model to training_1/cp.ckpt +32/32 [==============================] - 0s 4ms/step - loss: 0.0627 - accuracy: 0.9950 - val_loss: 0.4176 - val_accuracy: 0.8690 +Epoch 9/10 +29/32 [==========================>...] - ETA: 0s - loss: 0.0520 - accuracy: 0.9946 +Epoch 00009: saving model to training_1/cp.ckpt +32/32 [==============================] - 0s 4ms/step - loss: 0.0508 - accuracy: 0.9950 - val_loss: 0.4600 - val_accuracy: 0.8450 +Epoch 10/10 +29/32 [==========================>...] - ETA: 0s - loss: 0.0462 - accuracy: 0.9968 +Epoch 00010: saving model to training_1/cp.ckpt +32/32 [==============================] - 0s 4ms/step - loss: 0.0459 - accuracy: 0.9970 - val_loss: 0.4378 - val_accuracy: 0.8660 + + + +``` + +这将创建一个 TensorFlow checkpoint 文件集合,这些文件在每个 epoch 结束时更新: + +```py +ls {checkpoint_dir} + +``` + +```py +checkpoint cp.ckpt.data-00000-of-00001 cp.ckpt.index + +``` + +创建一个新的未经训练的模型。仅恢复模型的权重时,必须具有与原始模型具有相同网络结构的模型。由于模型具有相同的结构,您可以共享权重,尽管它是模型的不同*实例*。 现在重建一个新的未经训练的模型,并在测试集上进行评估。未经训练的模型将在机会水平(chance levels)上执行(准确度约为 10%): + +```py +# 创建一个基本模型实例 +model = create_model() + +# 评估模型 +loss, acc = model.evaluate(test_images, test_labels, verbose=2) +print("Untrained model, accuracy: {:5.2f}%".format(100*acc)) +``` + +```py +32/32 - 0s - loss: 2.3734 - accuracy: 0.0990 +Untrained model, accuracy: 9.90% + +``` + +然后从 checkpoint 加载权重并重新评估: + +```py +# 加载权重 +model.load_weights(checkpoint_path) + +# 重新评估模型 +loss,acc = model.evaluate(test_images, test_labels, verbose=2) +print("Restored model, accuracy: {:5.2f}%".format(100*acc)) +``` + +```py +32/32 - 0s - loss: 0.4378 - accuracy: 0.8660 +Restored model, accuracy: 86.60% + +``` + +### checkpoint 回调选项 + +回调提供了几个选项,为 checkpoint 提供唯一名称并调整 checkpoint 频率。 + +训练一个新模型,每五个 epochs 保存一次唯一命名的 checkpoint : + +```py +# 在文件名中包含 epoch (使用 `str.format`) +checkpoint_path = "training_2/cp-{epoch:04d}.ckpt" +checkpoint_dir = os.path.dirname(checkpoint_path) + +# 创建一个回调,每 5 个 epochs 保存模型的权重 +cp_callback = tf.keras.callbacks.ModelCheckpoint( + filepath=checkpoint_path, + verbose=1, + save_weights_only=True, + period=5) + +# 创建一个新的模型实例 +model = create_model() + +# 使用 `checkpoint_path` 格式保存权重 +model.save_weights(checkpoint_path.format(epoch=0)) + +# 使用新的回调训练模型 +model.fit(train_images, + train_labels, + epochs=50, + callbacks=[cp_callback], + validation_data=(test_images,test_labels), + verbose=0) +``` + +```py +WARNING:tensorflow:`period` argument is deprecated. Please use `save_freq` to specify the frequency in number of batches seen. +WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer.iter +WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer.beta_1 +WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer.beta_2 +WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer.decay +WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer.learning_rate +WARNING:tensorflow:A checkpoint was restored (e.g. tf.train.Checkpoint.restore or tf.keras.Model.load_weights) but not all checkpointed values were used. See above for specific issues. Use expect_partial() on the load status object, e.g. tf.train.Checkpoint.restore(...).expect_partial(), to silence these warnings, or use assert_consumed() to make the check explicit. See https://www.tensorflow.org/guide/checkpoint#loading_mechanics for details. + +Epoch 00005: saving model to training_2/cp-0005.ckpt + +Epoch 00010: saving model to training_2/cp-0010.ckpt + +Epoch 00015: saving model to training_2/cp-0015.ckpt + +Epoch 00020: saving model to training_2/cp-0020.ckpt + +Epoch 00025: saving model to training_2/cp-0025.ckpt + +Epoch 00030: saving model to training_2/cp-0030.ckpt + +Epoch 00035: saving model to training_2/cp-0035.ckpt + +Epoch 00040: saving model to training_2/cp-0040.ckpt + +Epoch 00045: saving model to training_2/cp-0045.ckpt + +Epoch 00050: saving model to training_2/cp-0050.ckpt + + + +``` + +现在查看生成的 checkpoint 并选择最新的 checkpoint : + +```py +ls {checkpoint_dir} + +``` + +```py +checkpoint cp-0025.ckpt.index +cp-0000.ckpt.data-00000-of-00001 cp-0030.ckpt.data-00000-of-00001 +cp-0000.ckpt.index cp-0030.ckpt.index +cp-0005.ckpt.data-00000-of-00001 cp-0035.ckpt.data-00000-of-00001 +cp-0005.ckpt.index cp-0035.ckpt.index +cp-0010.ckpt.data-00000-of-00001 cp-0040.ckpt.data-00000-of-00001 +cp-0010.ckpt.index cp-0040.ckpt.index +cp-0015.ckpt.data-00000-of-00001 cp-0045.ckpt.data-00000-of-00001 +cp-0015.ckpt.index cp-0045.ckpt.index +cp-0020.ckpt.data-00000-of-00001 cp-0050.ckpt.data-00000-of-00001 +cp-0020.ckpt.index cp-0050.ckpt.index +cp-0025.ckpt.data-00000-of-00001 + +``` + +```py +latest = tf.train.latest_checkpoint(checkpoint_dir) +latest +``` + +```py +'training_2/cp-0050.ckpt' + +``` + +注意: 默认的 tensorflow 格式仅保存最近的 5 个 checkpoint 。 + +如果要进行测试,请重置模型并加载最新的 checkpoint : + +```py +# 创建一个新的模型实例 +model = create_model() + +# 加载以前保存的权重 +model.load_weights(latest) + +# 重新评估模型 +loss, acc = model.evaluate(test_images, test_labels, verbose=2) +print("Restored model, accuracy: {:5.2f}%".format(100*acc)) +``` + +```py +32/32 - 0s - loss: 0.4836 - accuracy: 0.8750 +Restored model, accuracy: 87.50% + +``` + +## 这些文件是什么? + +上述代码将权重存储到 [checkpoint](https://tensorflow.google.cn/guide/saved_model#save_and_restore_variables)—— 格式化文件的集合中,这些文件仅包含二进制格式的训练权重。 Checkpoints 包含: + +* 一个或多个包含模型权重的分片。 +* 索引文件,指示哪些权重存储在哪个分片中。 + +如果你只在一台机器上训练一个模型,你将有一个带有后缀的碎片: `.data-00000-of-00001` + +## 手动保存权重 + +您将了解如何将权重加载到模型中。使用 [`Model.save_weights`](https://tensorflow.google.cn/api_docs/python/tf/keras/Model#save_weights) 方法手动保存它们同样简单。默认情况下, [`tf.keras`](https://tensorflow.google.cn/api_docs/python/tf/keras) 和 `save_weights` 特别使用 TensorFlow [checkpoints](https://tensorflow.google.cn/guide/keras/checkpoints) 格式 `.ckpt` 扩展名和 ( 保存在 [HDF5](https://js.tensorflow.org/tutorials/import-keras.html) 扩展名为 `.h5` [保存并序列化模型](https://tensorflow.google.cn/guide/keras/save_and_serialize#weights_only_saving_in_savedmodel_format) ): + +```py +# 保存权重 +model.save_weights('./checkpoints/my_checkpoint') + +# 创建模型实例 +model = create_model() + +# 恢复权重 +model.load_weights('./checkpoints/my_checkpoint') + +# 评估模型 +loss,acc = model.evaluate(test_images, test_labels, verbose=2) +print("Restored model, accuracy: {:5.2f}%".format(100*acc)) +``` + +```py +WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer.iter +WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer.beta_1 +WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer.beta_2 +WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer.decay +WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer.learning_rate +WARNING:tensorflow:A checkpoint was restored (e.g. tf.train.Checkpoint.restore or tf.keras.Model.load_weights) but not all checkpointed values were used. See above for specific issues. Use expect_partial() on the load status object, e.g. tf.train.Checkpoint.restore(...).expect_partial(), to silence these warnings, or use assert_consumed() to make the check explicit. See https://www.tensorflow.org/guide/checkpoint#loading_mechanics for details. +32/32 - 0s - loss: 0.4836 - accuracy: 0.8750 +Restored model, accuracy: 87.50% + +``` + +## 保存整个模型 + +调用 [`model.save`](https://tensorflow.google.cn/api_docs/python/tf/keras/Model#save) 将保存模型的结构,权重和训练配置保存在单个文件/文件夹中。这可以让您导出模型,以便在不访问原始 Python 代码*的情况下使用它。因为优化器状态(optimizer-state)已经恢复,您可以从中断的位置恢复训练。 + +整个模型可以以两种不同的文件格式(`SavedModel` 和 `HDF5`)进行保存。需要注意的是 TensorFlow 的 `SavedModel` 格式是 TF2.x. 中的默认文件格式。但是,模型仍可以以 `HDF5` 格式保存。下面介绍了以两种文件格式保存整个模型的更多详细信息。 + +保存完整模型会非常有用——您可以在 TensorFlow.js([Saved Model](https://tensorflow.google.cn/js/tutorials/conversion/import_saved_model), [HDF5](https://tensorflow.google.cn/js/tutorials/conversion/import_keras))加载它们,然后在 web 浏览器中训练和运行它们,或者使用 TensorFlow Lite 将它们转换为在移动设备上运行([Saved Model](https://tensorflow.google.cn/lite/convert/python_api#converting_a_savedmodel_), [HDF5](https://tensorflow.google.cn/lite/convert/python_api#converting_a_keras_model_)) + +*自定义对象(例如,子类化模型或层)在保存和加载时需要特别注意。请参阅下面的**保存自定义对象**部分 + +### SavedModel 格式 + +SavedModel 格式是序列化模型的另一种方法。以这种格式保存的模型,可以使用 [`tf.keras.models.load_model`](https://tensorflow.google.cn/api_docs/python/tf/keras/models/load_model) 还原,并且模型与 TensorFlow Serving 兼容。[SavedModel 指南](https://tensorflow.google.cn/guide/saved_model)详细介绍了如何提供/检查 SavedModel。以下部分说明了保存和还原模型的步骤。 + +```py +# 创建并训练一个新的模型实例。 +model = create_model() +model.fit(train_images, train_labels, epochs=5) + +# 将整个模型另存为 SavedModel。 +!mkdir -p saved_model +model.save('saved_model/my_model') +``` + +```py +Epoch 1/5 +32/32 [==============================] - 0s 2ms/step - loss: 1.1705 - accuracy: 0.6690 +Epoch 2/5 +32/32 [==============================] - 0s 2ms/step - loss: 0.4326 - accuracy: 0.8780 +Epoch 3/5 +32/32 [==============================] - 0s 2ms/step - loss: 0.2910 - accuracy: 0.9190 +Epoch 4/5 +32/32 [==============================] - 0s 2ms/step - loss: 0.2045 - accuracy: 0.9520 +Epoch 5/5 +32/32 [==============================] - 0s 2ms/step - loss: 0.1538 - accuracy: 0.9650 +WARNING:tensorflow:From /tmpfs/src/tf_docs_env/lib/python3.6/site-packages/tensorflow/python/training/tracking/tracking.py:111: Model.state_updates (from tensorflow.python.keras.engine.training) is deprecated and will be removed in a future version. +Instructions for updating: +This property should not be used in TensorFlow 2.0, as updates are applied automatically. +WARNING:tensorflow:From /tmpfs/src/tf_docs_env/lib/python3.6/site-packages/tensorflow/python/training/tracking/tracking.py:111: Layer.updates (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version. +Instructions for updating: +This property should not be used in TensorFlow 2.0, as updates are applied automatically. +INFO:tensorflow:Assets written to: saved_model/my_model/assets + +``` + +SavedModel 格式是一个包含 protobuf 二进制文件和 Tensorflow 检查点(checkpoint)的目录。检查保存的模型目录: + +```py +# my_model 文件夹 +ls saved_model + +# 包含一个 assets 文件夹,saved_model.pb,和变量文件夹。 +ls saved_model/my_model + +``` + +```py +my_model +assets saved_model.pb variables + +``` + +从保存的模型重新加载新的 Keras 模型: + +```py +new_model = tf.keras.models.load_model('saved_model/my_model') + +# 检查其架构 +new_model.summary() +``` + +```py +Model: "sequential_5" +_________________________________________________________________ +Layer (type) Output Shape Param # +================================================================= +dense_10 (Dense) (None, 512) 401920 +_________________________________________________________________ +dropout_5 (Dropout) (None, 512) 0 +_________________________________________________________________ +dense_11 (Dense) (None, 10) 5130 +================================================================= +Total params: 407,050 +Trainable params: 407,050 +Non-trainable params: 0 +_________________________________________________________________ + +``` + +还原的模型使用与原始模型相同的参数进行编译。 尝试使用加载的模型运行评估和预测: + +```py +# 评估还原的模型 +loss, acc = new_model.evaluate(test_images, test_labels, verbose=2) +print('Restored model, accuracy: {:5.2f}%'.format(100*acc)) + +print(new_model.predict(test_images).shape) +``` + +```py +32/32 - 0s - loss: 0.4630 - accuracy: 0.0890 +Restored model, accuracy: 8.90% +(1000, 10) + +``` + +### HDF5 格式 + +Keras 使用 [HDF5](https://en.wikipedia.org/wiki/Hierarchical_Data_Format) 标准提供了一种基本的保存格式。 + +```py +# 创建并训练一个新的模型实例 +model = create_model() +model.fit(train_images, train_labels, epochs=5) + +# 将整个模型保存为 HDF5 文件。 +# '.h5' 扩展名指示应将模型保存到 HDF5。 +model.save('my_model.h5') +``` + +```py +Epoch 1/5 +32/32 [==============================] - 0s 2ms/step - loss: 1.1465 - accuracy: 0.6560 +Epoch 2/5 +32/32 [==============================] - 0s 2ms/step - loss: 0.4152 - accuracy: 0.8850 +Epoch 3/5 +32/32 [==============================] - 0s 2ms/step - loss: 0.2801 - accuracy: 0.9280 +Epoch 4/5 +32/32 [==============================] - 0s 2ms/step - loss: 0.2108 - accuracy: 0.9480 +Epoch 5/5 +32/32 [==============================] - 0s 2ms/step - loss: 0.1520 - accuracy: 0.9660 + +``` + +现在,从该文件重新创建模型: + +```py +# 重新创建完全相同的模型,包括其权重和优化程序 +new_model = tf.keras.models.load_model('my_model.h5') + +# 显示网络结构 +new_model.summary() +``` + +```py +Model: "sequential_6" +_________________________________________________________________ +Layer (type) Output Shape Param # +================================================================= +dense_12 (Dense) (None, 512) 401920 +_________________________________________________________________ +dropout_6 (Dropout) (None, 512) 0 +_________________________________________________________________ +dense_13 (Dense) (None, 10) 5130 +================================================================= +Total params: 407,050 +Trainable params: 407,050 +Non-trainable params: 0 +_________________________________________________________________ + +``` + +检查其准确率(accuracy): + +```py +loss, acc = new_model.evaluate(test_images, test_labels, verbose=2) +print('Restored model, accuracy: {:5.2f}%'.format(100*acc)) +``` + +```py +32/32 - 0s - loss: 0.4639 - accuracy: 0.0840 +Restored model, accuracy: 8.40% + +``` + +Keras 通过检查网络结构来保存模型。这项技术可以保存一切: + +* 权重值 +* 模型的架构 +* 模型的训练配置(您传递给编译的内容) +* 优化器及其状态(如果有的话)(这使您可以在中断的地方重新开始训练) + +Keras 无法保存 `v1.x` 优化器(来自 [`tf.compat.v1.train`](https://tensorflow.google.cn/api_docs/python/tf/compat/v1/train)),因为它们与检查点不兼容。对于 v1.x 优化器,您需要在加载-失去优化器的状态后,重新编译模型。 + +### 保存自定义对象 + +如果使用的是 SavedModel 格式,则可以跳过此部分。HDF5 和 SavedModel 之间的主要区别在于,HDF5 使用对象配置保存模型结构,而 SavedModel 保存执行图。因此,SavedModel 能够保存自定义对象,例如子类化模型和自定义层,而无需原始代码。 + +要将自定义对象保存到 HDF5,必须执行以下操作: + +1. 在对象中定义一个 `get_config` 方法,以及可选的 `from_config` 类方法。 + * `get_config(self)` 返回重新创建对象所需的参数的 JSON 可序列化字典。 + * `from_config(cls, config)` 使用从 get_config 返回的 config 来创建一个新对象。默认情况下,此函数将使用 config 作为初始化 kwargs(`return cls(**config)`)。 +2. 加载模型时,将对象传递给 `custom_objects` 参数。参数必须是将字符串类名称映射到 Python 类的字典。例如,`tf.keras.models.load_model(path, custom_objects={'CustomLayer': CustomLayer})` + +有关自定义对象和 `get_config` 的示例,请参见[从头开始编写层和模型](https://tensorflow.google.cn/guide/keras/custom_layers_and_models)教程。 + +```py +# MIT License +# +# Copyright (c) 2017 François Chollet +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. +``` \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/012.md b/Tensorflow/TensorFlow2.0/012.md new file mode 100644 index 00000000..466f1be0 --- /dev/null +++ b/Tensorflow/TensorFlow2.0/012.md @@ -0,0 +1,180 @@ +# Introduction to the Keras Tuner + +> 原文:[https://tensorflow.google.cn/tutorials/keras/keras_tuner](https://tensorflow.google.cn/tutorials/keras/keras_tuner) + +## Overview + +The Keras Tuner is a library that helps you pick the optimal set of hyperparameters for your TensorFlow program. The process of selecting the right set of hyperparameters for your machine learning (ML) application is called *hyperparameter tuning* or *hypertuning*. + +Hyperparameters are the variables that govern the training process and the topology of an ML model. These variables remain constant over the training process and directly impact the performance of your ML program. Hyperparameters are of two types: + +1. **Model hyperparameters** which influence model selection such as the number and width of hidden layers +2. **Algorithm hyperparameters** which influence the speed and quality of the learning algorithm such as the learning rate for Stochastic Gradient Descent (SGD) and the number of nearest neighbors for a k Nearest Neighbors (KNN) classifier + +In this tutorial, you will use the Keras Tuner to perform hypertuning for an image classification application. + +## Setup + +```py +import tensorflow as tf +from tensorflow import keras + +import IPython +``` + +Install and import the Keras Tuner. + +```py +!pip install -q -U keras-tuner +import kerastuner as kt +``` + +```py +WARNING: You are using pip version 20.2.2; however, version 20.2.3 is available. +You should consider upgrading via the '/tmpfs/src/tf_docs_env/bin/python -m pip install --upgrade pip' command. + +``` + +## Download and prepare the dataset + +In this tutorial, you will use the Keras Tuner to find the best hyperparameters for a machine learning model that classifies images of clothing from the [Fashion MNIST dataset](https://github.com/zalandoresearch/fashion-mnist). + +Load the data. + +```py +(img_train, label_train), (img_test, label_test) = keras.datasets.fashion_mnist.load_data() +``` + +```py +# Normalize pixel values between 0 and 1 +img_train = img_train.astype('float32') / 255.0 +img_test = img_test.astype('float32') / 255.0 +``` + +## Define the model + +When you build a model for hypertuning, you also define the hyperparameter search space in addition to the model architecture. The model you set up for hypertuning is called a *hypermodel*. + +You can define a hypermodel through two approaches: + +* By using a model builder function +* By subclassing the `HyperModel` class of the Keras Tuner API + +You can also use two pre-defined `HyperModel` classes - [HyperXception](https://keras-team.github.io/keras-tuner/documentation/hypermodels/#hyperxception-class) and [HyperResNet](https://keras-team.github.io/keras-tuner/documentation/hypermodels/#hyperresnet-class) for computer vision applications. + +In this tutorial, you use a model builder function to define the image classification model. The model builder function returns a compiled model and uses hyperparameters you define inline to hypertune the model. + +```py +def model_builder(hp): + model = keras.Sequential() + model.add(keras.layers.Flatten(input_shape=(28, 28))) + + # Tune the number of units in the first Dense layer + # Choose an optimal value between 32-512 + hp_units = hp.Int('units', min_value = 32, max_value = 512, step = 32) + model.add(keras.layers.Dense(units = hp_units, activation = 'relu')) + model.add(keras.layers.Dense(10)) + + # Tune the learning rate for the optimizer + # Choose an optimal value from 0.01, 0.001, or 0.0001 + hp_learning_rate = hp.Choice('learning_rate', values = [1e-2, 1e-3, 1e-4]) + + model.compile(optimizer = keras.optimizers.Adam(learning_rate = hp_learning_rate), + loss = keras.losses.SparseCategoricalCrossentropy(from_logits = True), + metrics = ['accuracy']) + + return model +``` + +## Instantiate the tuner and perform hypertuning + +Instantiate the tuner to perform the hypertuning. The Keras Tuner has four tuners available - `RandomSearch`, `Hyperband`, `BayesianOptimization`, and `Sklearn`. In this tutorial, you use the [Hyperband](https://arxiv.org/pdf/1603.06560.pdf) tuner. + +To instantiate the Hyperband tuner, you must specify the hypermodel, the `objective` to optimize and the maximum number of epochs to train (`max_epochs`). + +```py +tuner = kt.Hyperband(model_builder, + objective = 'val_accuracy', + max_epochs = 10, + factor = 3, + directory = 'my_dir', + project_name = 'intro_to_kt') +``` + +The Hyperband tuning algorithm uses adaptive resource allocation and early-stopping to quickly converge on a high-performing model. This is done using a sports championship style bracket. The algorithm trains a large number of models for a few epochs and carries forward only the top-performing half of models to the next round. Hyperband determines the number of models to train in a bracket by computing 1 + log`factor`(`max_epochs`) and rounding it up to the nearest integer. + +Before running the hyperparameter search, define a callback to clear the training outputs at the end of every training step. + +```py +class ClearTrainingOutput(tf.keras.callbacks.Callback): + def on_train_end(*args, **kwargs): + IPython.display.clear_output(wait = True) +``` + +Run the hyperparameter search. The arguments for the search method are the same as those used for `tf.keras.model.fit` in addition to the callback above. + +```py +tuner.search(img_train, label_train, epochs = 10, validation_data = (img_test, label_test), callbacks = [ClearTrainingOutput()]) + +# Get the optimal hyperparameters +best_hps = tuner.get_best_hyperparameters(num_trials = 1)[0] + +print(f""" +The hyperparameter search is complete. The optimal number of units in the first densely-connected +layer is {best_hps.get('units')} and the optimal learning rate for the optimizer +is {best_hps.get('learning_rate')}. +""") +``` + + + +```py +Epoch 3/4 + 911/1875 [=============>................] - ETA: 1s - loss: 0.5757 - accuracy: 0.8040 + +``` + +To finish this tutorial, retrain the model with the optimal hyperparameters from the search. + +```py +# Build the model with the optimal hyperparameters and train it on the data +model = tuner.hypermodel.build(best_hps) +model.fit(img_train, label_train, epochs = 10, validation_data = (img_test, label_test)) +``` + +```py +Epoch 1/10 +1875/1875 [==============================] - 3s 2ms/step - loss: 0.4787 - accuracy: 0.8303 - val_loss: 0.4199 - val_accuracy: 0.8509 +Epoch 2/10 +1875/1875 [==============================] - 3s 2ms/step - loss: 0.3600 - accuracy: 0.8684 - val_loss: 0.3902 - val_accuracy: 0.8570 +Epoch 3/10 +1875/1875 [==============================] - 3s 2ms/step - loss: 0.3253 - accuracy: 0.8794 - val_loss: 0.3670 - val_accuracy: 0.8689 +Epoch 4/10 +1875/1875 [==============================] - 3s 2ms/step - loss: 0.3038 - accuracy: 0.8874 - val_loss: 0.3714 - val_accuracy: 0.8684 +Epoch 5/10 +1875/1875 [==============================] - 3s 2ms/step - loss: 0.2842 - accuracy: 0.8939 - val_loss: 0.3527 - val_accuracy: 0.8758 +Epoch 6/10 +1875/1875 [==============================] - 3s 2ms/step - loss: 0.2678 - accuracy: 0.9005 - val_loss: 0.3334 - val_accuracy: 0.8785 +Epoch 7/10 +1875/1875 [==============================] - 3s 2ms/step - loss: 0.2533 - accuracy: 0.9055 - val_loss: 0.3277 - val_accuracy: 0.8834 +Epoch 8/10 +1875/1875 [==============================] - 3s 2ms/step - loss: 0.2445 - accuracy: 0.9089 - val_loss: 0.3487 - val_accuracy: 0.8768 +Epoch 9/10 +1875/1875 [==============================] - 3s 2ms/step - loss: 0.2352 - accuracy: 0.9116 - val_loss: 0.3352 - val_accuracy: 0.8843 +Epoch 10/10 +1875/1875 [==============================] - 3s 2ms/step - loss: 0.2260 - accuracy: 0.9145 - val_loss: 0.3457 - val_accuracy: 0.8814 + + + +``` + +The `my_dir/intro_to_kt` directory contains detailed logs and checkpoints for every trial (model configuration) run during the hyperparameter search. If you re-run the hyperparameter search, the Keras Tuner uses the existing state from these logs to resume the search. To disable this behavior, pass an additional `overwrite = True` argument while instantiating the tuner. + +## Summary + +In this tutorial, you learned how to use the Keras Tuner to tune hyperparameters for a model. To learn more about the Keras Tuner, check out these additional resources: + +* [Keras Tuner on the TensorFlow blog](https://blog.tensorflow.org/2020/01/hyperparameter-tuning-with-keras-tuner.html) +* [Keras Tuner website](https://keras-team.github.io/keras-tuner/) + +Also check out the [HParams Dashboard](https://tensorflow.google.cn/tensorboard/hyperparameter_tuning_with_hparams) in TensorBoard to interactively tune your model hyperparameters. \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/013.md b/Tensorflow/TensorFlow2.0/013.md new file mode 100644 index 00000000..e2a98697 --- /dev/null +++ b/Tensorflow/TensorFlow2.0/013.md @@ -0,0 +1 @@ +# 加载和预处理数据 \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/014.md b/Tensorflow/TensorFlow2.0/014.md new file mode 100644 index 00000000..77fcec85 --- /dev/null +++ b/Tensorflow/TensorFlow2.0/014.md @@ -0,0 +1,872 @@ +# 用 tf.data 加载图片 + +> 原文:[https://tensorflow.google.cn/tutorials/load_data/images](https://tensorflow.google.cn/tutorials/load_data/images) + +**Note:** 我们的 TensorFlow 社区翻译了这些文档。因为社区翻译是尽力而为, 所以无法保证它们是最准确的,并且反映了最新的 [官方英文文档](https://tensorflow.google.cn/?hl=en)。如果您有改进此翻译的建议, 请提交 pull request 到 [tensorflow/docs](https://github.com/tensorflow/docs) GitHub 仓库。要志愿地撰写或者审核译文,请加入 [docs-zh-cn@tensorflow.org Google Group](https://groups.google.com/a/tensorflow.org/forum/#!forum/docs-zh-cn)。 + +本教程提供一个如何使用 [`tf.data`](https://tensorflow.google.cn/api_docs/python/tf/data) 加载图片的简单例子。 + +本例中使用的数据集分布在图片文件夹中,一个文件夹含有一类图片。 + +## 配置 + +```py +import tensorflow as tf +``` + +```py +AUTOTUNE = tf.data.experimental.AUTOTUNE +``` + +## 下载并检查数据集 + +### 检索图片 + +在你开始任何训练之前,你将需要一组图片来教会网络你想要训练的新类别。你已经创建了一个文件夹,存储了最初使用的拥有创作共用许可的花卉照片。 + +```py +import pathlib +data_root_orig = tf.keras.utils.get_file(origin='https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz', + fname='flower_photos', untar=True) +data_root = pathlib.Path(data_root_orig) +print(data_root) +``` + +```py +Downloading data from https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz +228818944/228813984 [==============================] - 2s 0us/step +/home/kbuilder/.keras/datasets/flower_photos + +``` + +下载了 218 MB 之后,你现在应该有花卉照片副本: + +```py +for item in data_root.iterdir(): + print(item) +``` + +```py +/home/kbuilder/.keras/datasets/flower_photos/sunflowers +/home/kbuilder/.keras/datasets/flower_photos/daisy +/home/kbuilder/.keras/datasets/flower_photos/LICENSE.txt +/home/kbuilder/.keras/datasets/flower_photos/roses +/home/kbuilder/.keras/datasets/flower_photos/tulips +/home/kbuilder/.keras/datasets/flower_photos/dandelion + +``` + +```py +import random +all_image_paths = list(data_root.glob('*/*')) +all_image_paths = [str(path) for path in all_image_paths] +random.shuffle(all_image_paths) + +image_count = len(all_image_paths) +image_count +``` + +```py +3670 + +``` + +```py +all_image_paths[:10] +``` + +```py +['/home/kbuilder/.keras/datasets/flower_photos/daisy/4820415253_15bc3b6833_n.jpg', + '/home/kbuilder/.keras/datasets/flower_photos/roses/14172324538_2147808483_n.jpg', + '/home/kbuilder/.keras/datasets/flower_photos/sunflowers/15054866658_c1a6223403_m.jpg', + '/home/kbuilder/.keras/datasets/flower_photos/daisy/422094774_28acc69a8b_n.jpg', + '/home/kbuilder/.keras/datasets/flower_photos/roses/22982871191_ec61e36939_n.jpg', + '/home/kbuilder/.keras/datasets/flower_photos/tulips/8673416166_620fc18e2f_n.jpg', + '/home/kbuilder/.keras/datasets/flower_photos/tulips/16582481123_06e8e6b966_n.jpg', + '/home/kbuilder/.keras/datasets/flower_photos/daisy/5434914569_e9b982fde0_n.jpg', + '/home/kbuilder/.keras/datasets/flower_photos/sunflowers/184682652_c927a49226_m.jpg', + '/home/kbuilder/.keras/datasets/flower_photos/dandelion/3021333497_b927cd8596.jpg'] + +``` + +### 检查图片 + +现在让我们快速浏览几张图片,这样你知道你在处理什么: + +```py +import os +attributions = (data_root/"LICENSE.txt").open(encoding='utf-8').readlines()[4:] +attributions = [line.split(' CC-BY') for line in attributions] +attributions = dict(attributions) +``` + +```py +import IPython.display as display + +def caption_image(image_path): + image_rel = pathlib.Path(image_path).relative_to(data_root) + return "Image (CC BY 2.0) " + ' - '.join(attributions[str(image_rel)].split(' - ')[:-1]) +``` + +```py +for n in range(3): + image_path = random.choice(all_image_paths) + display.display(display.Image(image_path)) + print(caption_image(image_path)) + print() +``` + +![jpeg](img/e954331a93f7da6b3ebeb6d2c90586f4.png) + +```py +Image (CC BY 2.0) by Pavlina Jane + +``` + +![jpeg](img/82eeef92c3c39a6fc38d679c9e4c37fa.png) + +```py +Image (CC BY 2.0) by Samantha Forsberg + +``` + +![jpeg](img/13fa130027f8343fe8d952fec8dd0555.png) + +```py +Image (CC BY 2.0) by Manu + +``` + +### 确定每张图片的标签 + +列出可用的标签: + +```py +label_names = sorted(item.name for item in data_root.glob('*/') if item.is_dir()) +label_names +``` + +```py +['daisy', 'dandelion', 'roses', 'sunflowers', 'tulips'] + +``` + +为每个标签分配索引: + +```py +label_to_index = dict((name, index) for index, name in enumerate(label_names)) +label_to_index +``` + +```py +{'daisy': 0, 'dandelion': 1, 'roses': 2, 'sunflowers': 3, 'tulips': 4} + +``` + +创建一个列表,包含每个文件的标签索引: + +```py +all_image_labels = [label_to_index[pathlib.Path(path).parent.name] + for path in all_image_paths] + +print("First 10 labels indices: ", all_image_labels[:10]) +``` + +```py +First 10 labels indices: [0, 2, 3, 0, 2, 4, 4, 0, 3, 1] + +``` + +### 加载和格式化图片 + +TensorFlow 包含加载和处理图片时你需要的所有工具: + +```py +img_path = all_image_paths[0] +img_path +``` + +```py +'/home/kbuilder/.keras/datasets/flower_photos/daisy/4820415253_15bc3b6833_n.jpg' + +``` + +以下是原始数据: + +```py +img_raw = tf.io.read_file(img_path) +print(repr(img_raw)[:100]+"...") +``` + +```py + + +``` + +根据你的模型调整其大小: + +```py +img_final = tf.image.resize(img_tensor, [192, 192]) +img_final = img_final/255.0 +print(img_final.shape) +print(img_final.numpy().min()) +print(img_final.numpy().max()) +``` + +```py +(192, 192, 3) +0.0 +1.0 + +``` + +将这些包装在一个简单的函数里,以备后用。 + +```py +def preprocess_image(image): + image = tf.image.decode_jpeg(image, channels=3) + image = tf.image.resize(image, [192, 192]) + image /= 255.0 # normalize to [0,1] range + + return image +``` + +```py +def load_and_preprocess_image(path): + image = tf.io.read_file(path) + return preprocess_image(image) +``` + +```py +import matplotlib.pyplot as plt + +image_path = all_image_paths[0] +label = all_image_labels[0] + +plt.imshow(load_and_preprocess_image(img_path)) +plt.grid(False) +plt.xlabel(caption_image(img_path)) +plt.title(label_names[label].title()) +print() +``` + +![png](img/d99736f992ec3e1883b57ef705221367.png) + +## 构建一个 [`tf.data.Dataset`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset) + +### 一个图片数据集 + +构建 [`tf.data.Dataset`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset) 最简单的方法就是使用 `from_tensor_slices` 方法。 + +将字符串数组切片,得到一个字符串数据集: + +```py +path_ds = tf.data.Dataset.from_tensor_slices(all_image_paths) +``` + +`shapes(维数)` 和 `types(类型)` 描述数据集里每个数据项的内容。在这里是一组标量二进制字符串。 + +```py +print(path_ds) +``` + +```py + + +``` + +现在创建一个新的数据集,通过在路径数据集上映射 `preprocess_image` 来动态加载和格式化图片。 + +```py +image_ds = path_ds.map(load_and_preprocess_image, num_parallel_calls=AUTOTUNE) +``` + +```py +import matplotlib.pyplot as plt + +plt.figure(figsize=(8,8)) +for n, image in enumerate(image_ds.take(4)): + plt.subplot(2,2,n+1) + plt.imshow(image) + plt.grid(False) + plt.xticks([]) + plt.yticks([]) + plt.xlabel(caption_image(all_image_paths[n])) + plt.show() +``` + +![png](img/87f405a26e039fc527ac7f2dd59de28d.png) + +![png](img/309f23cd3db44be87a1c9d9d25619301.png) + +![png](img/461f849577ccb00ee49683e824e095cf.png) + +![png](img/187f414e1afde064024f6898871831da.png) + +### 一个`(图片, 标签)`对数据集 + +使用同样的 `from_tensor_slices` 方法你可以创建一个标签数据集: + +```py +label_ds = tf.data.Dataset.from_tensor_slices(tf.cast(all_image_labels, tf.int64)) +``` + +```py +for label in label_ds.take(10): + print(label_names[label.numpy()]) +``` + +```py +daisy +roses +sunflowers +daisy +roses +tulips +tulips +daisy +sunflowers +dandelion + +``` + +由于这些数据集顺序相同,你可以将他们打包在一起得到一个`(图片, 标签)`对数据集: + +```py +image_label_ds = tf.data.Dataset.zip((image_ds, label_ds)) +``` + +这个新数据集的 `shapes(维数)` 和 `types(类型)` 也是维数和类型的元组,用来描述每个字段: + +```py +print(image_label_ds) +``` + +```py + + +``` + +注意:当你拥有形似 `all_image_labels` 和 `all_image_paths` 的数组,`tf.data.dataset.Dataset.zip` 的替代方法是将这对数组切片。 + +```py +ds = tf.data.Dataset.from_tensor_slices((all_image_paths, all_image_labels)) + +# 元组被解压缩到映射函数的位置参数中 +def load_and_preprocess_from_path_label(path, label): + return load_and_preprocess_image(path), label + +image_label_ds = ds.map(load_and_preprocess_from_path_label) +image_label_ds +``` + +```py + + +``` + +### 训练的基本方法 + +要使用此数据集训练模型,你将会想要数据: + +* 被充分打乱。 +* 被分割为 batch。 +* 永远重复。 +* 尽快提供 batch。 + +使用 [`tf.data`](https://tensorflow.google.cn/api_docs/python/tf/data) api 可以轻松添加这些功能。 + +```py +BATCH_SIZE = 32 + +# 设置一个和数据集大小一致的 shuffle buffer size(随机缓冲区大小)以保证数据 +# 被充分打乱。 +ds = image_label_ds.shuffle(buffer_size=image_count) +ds = ds.repeat() +ds = ds.batch(BATCH_SIZE) +# 当模型在训练的时候,`prefetch` 使数据集在后台取得 batch。 +ds = ds.prefetch(buffer_size=AUTOTUNE) +ds +``` + +```py + + +``` + +这里有一些注意事项: + +1. 顺序很重要。 + + * 在 `.repeat` 之后 `.shuffle`,会在 epoch 之间打乱数据(当有些数据出现两次的时候,其他数据还没有出现过)。 + + * 在 `.batch` 之后 `.shuffle`,会打乱 batch 的顺序,但是不会在 batch 之间打乱数据。 + +2. 你在完全打乱中使用和数据集大小一样的 `buffer_size(缓冲区大小)`。较大的缓冲区大小提供更好的随机化,但使用更多的内存,直到超过数据集大小。 + +3. 在从随机缓冲区中拉取任何元素前,要先填满它。所以当你的 `Dataset(数据集)`启动的时候一个大的 `buffer_size(缓冲区大小)`可能会引起延迟。 + +4. 在随机缓冲区完全为空之前,被打乱的数据集不会报告数据集的结尾。`Dataset(数据集)`由 `.repeat` 重新启动,导致需要再次等待随机缓冲区被填满。 + +最后一点可以通过使用 [`tf.data.Dataset.apply`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset#apply) 方法和融合过的 [`tf.data.experimental.shuffle_and_repeat`](https://tensorflow.google.cn/api_docs/python/tf/data/experimental/shuffle_and_repeat) 函数来解决: + +```py +ds = image_label_ds.apply( + tf.data.experimental.shuffle_and_repeat(buffer_size=image_count)) +ds = ds.batch(BATCH_SIZE) +ds = ds.prefetch(buffer_size=AUTOTUNE) +ds +``` + +```py +WARNING:tensorflow:From :2: shuffle_and_repeat (from tensorflow.python.data.experimental.ops.shuffle_ops) is deprecated and will be removed in a future version. +Instructions for updating: +Use `tf.data.Dataset.shuffle(buffer_size, seed)` followed by `tf.data.Dataset.repeat(count)`. Static tf.data optimizations will take care of using the fused implementation. + + + +``` + +### 传递数据集至模型 + +从 [`tf.keras.applications`](https://tensorflow.google.cn/api_docs/python/tf/keras/applications) 取得 MobileNet v2 副本。 + +该模型副本会被用于一个简单的迁移学习例子。 + +设置 MobileNet 的权重为不可训练: + +```py +mobile_net = tf.keras.applications.MobileNetV2(input_shape=(192, 192, 3), include_top=False) +mobile_net.trainable=False +``` + +```py +Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_192_no_top.h5 +9412608/9406464 [==============================] - 0s 0us/step + +``` + +该模型期望它的输出被标准化至 `[-1,1]` 范围内: + +```py +help(keras_applications.mobilenet_v2.preprocess_input) +``` + +```py +…… +该函数使用“Inception”预处理,将 +RGB 值从 [0, 255] 转化为 [-1, 1] +…… + +``` + +在你将输出传递给 MobilNet 模型之前,你需要将其范围从 `[0,1]` 转化为 `[-1,1]`: + +```py +def change_range(image,label): + return 2*image-1, label + +keras_ds = ds.map(change_range) +``` + +MobileNet 为每张图片的特征返回一个 `6x6` 的空间网格。 + +传递一个 batch 的图片给它,查看结果: + +```py +# 数据集可能需要几秒来启动,因为要填满其随机缓冲区。 +image_batch, label_batch = next(iter(keras_ds)) +``` + +```py +feature_map_batch = mobile_net(image_batch) +print(feature_map_batch.shape) +``` + +```py +(32, 6, 6, 1280) + +``` + +构建一个包装了 MobileNet 的模型并在 [`tf.keras.layers.Dense`](https://tensorflow.google.cn/api_docs/python/tf/keras/layers/Dense) 输出层之前使用 [`tf.keras.layers.GlobalAveragePooling2D`](https://tensorflow.google.cn/api_docs/python/tf/keras/layers/GlobalAveragePooling2D) 来平均那些空间向量: + +```py +model = tf.keras.Sequential([ + mobile_net, + tf.keras.layers.GlobalAveragePooling2D(), + tf.keras.layers.Dense(len(label_names), activation = 'softmax')]) +``` + +现在它产出符合预期 shape(维数)的输出: + +```py +logit_batch = model(image_batch).numpy() + +print("min logit:", logit_batch.min()) +print("max logit:", logit_batch.max()) +print() + +print("Shape:", logit_batch.shape) +``` + +```py +min logit: 0.0039403443 +max logit: 0.82328725 + +Shape: (32, 5) + +``` + +编译模型以描述训练过程: + +```py +model.compile(optimizer=tf.keras.optimizers.Adam(), + loss='sparse_categorical_crossentropy', + metrics=["accuracy"]) +``` + +此处有两个可训练的变量 —— Dense 层中的 `weights(权重)` 和 `bias(偏差)`: + +```py +len(model.trainable_variables) +``` + +```py +2 + +``` + +```py +model.summary() +``` + +```py +Model: "sequential" +_________________________________________________________________ +Layer (type) Output Shape Param # +================================================================= +mobilenetv2_1.00_192 (Functi (None, 6, 6, 1280) 2257984 +_________________________________________________________________ +global_average_pooling2d (Gl (None, 1280) 0 +_________________________________________________________________ +dense (Dense) (None, 5) 6405 +================================================================= +Total params: 2,264,389 +Trainable params: 6,405 +Non-trainable params: 2,257,984 +_________________________________________________________________ + +``` + +你已经准备好来训练模型了。 + +注意,出于演示目的每一个 epoch 中你将只运行 3 step,但一般来说在传递给 `model.fit()` 之前你会指定 step 的真实数量,如下所示: + +```py +steps_per_epoch=tf.math.ceil(len(all_image_paths)/BATCH_SIZE).numpy() +steps_per_epoch +``` + +```py +115.0 + +``` + +```py +model.fit(ds, epochs=1, steps_per_epoch=3) +``` + +```py +3/3 [==============================] - 0s 31ms/step - loss: 1.8837 - accuracy: 0.2812 + + + +``` + +## 性能 + +注意:这部分只是展示一些可能帮助提升性能的简单技巧。深入指南,请看:[输入 pipeline(管道)的性能](https://tensorflow.google.cn/guide/performance/datasets)。 + +上面使用的简单 pipeline(管道)在每个 epoch 中单独读取每个文件。在本地使用 CPU 训练时这个方法是可行的,但是可能不足以进行 GPU 训练并且完全不适合任何形式的分布式训练。 + +要研究这点,首先构建一个简单的函数来检查数据集的性能: + +```py +import time +default_timeit_steps = 2*steps_per_epoch+1 + +def timeit(ds, steps=default_timeit_steps): + overall_start = time.time() + # 在开始计时之前 + # 取得单个 batch 来填充 pipeline(管道)(填充随机缓冲区) + it = iter(ds.take(steps+1)) + next(it) + + start = time.time() + for i,(images,labels) in enumerate(it): + if i%10 == 0: + print('.',end='') + print() + end = time.time() + + duration = end-start + print("{} batches: {} s".format(steps, duration)) + print("{:0.5f} Images/s".format(BATCH_SIZE*steps/duration)) + print("Total time: {}s".format(end-overall_start)) +``` + +当前数据集的性能是: + +```py +ds = image_label_ds.apply( + tf.data.experimental.shuffle_and_repeat(buffer_size=image_count)) +ds = ds.batch(BATCH_SIZE).prefetch(buffer_size=AUTOTUNE) +ds +``` + +```py + + +``` + +```py +timeit(ds) +``` + +```py +........................ +231.0 batches: 14.869637966156006 s +497.12037 Images/s +Total time: 21.789817333221436s + +``` + +### 缓存 + +使用 [`tf.data.Dataset.cache`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset#cache) 在 epoch 之间轻松缓存计算结果。这是非常高效的,特别是当内存能容纳全部数据时。 + +在被预处理之后(解码和调整大小),图片在此被缓存了: + +```py +ds = image_label_ds.cache() +ds = ds.apply( + tf.data.experimental.shuffle_and_repeat(buffer_size=image_count)) +ds = ds.batch(BATCH_SIZE).prefetch(buffer_size=AUTOTUNE) +ds +``` + +```py + + +``` + +```py +timeit(ds) +``` + +```py +........................ +231.0 batches: 0.5994970798492432 s +12330.33529 Images/s +Total time: 7.475242614746094s + +``` + +使用内存缓存的一个缺点是必须在每次运行时重建缓存,这使得每次启动数据集时有相同的启动延迟: + +```py +timeit(ds) +``` + +```py +........................ +231.0 batches: 0.6120779514312744 s +12076.89312 Images/s +Total time: 0.6253445148468018s + +``` + +如果内存不够容纳数据,使用一个缓存文件: + +```py +ds = image_label_ds.cache(filename='./cache.tf-data') +ds = ds.apply( + tf.data.experimental.shuffle_and_repeat(buffer_size=image_count)) +ds = ds.batch(BATCH_SIZE).prefetch(1) +ds +``` + +```py + + +``` + +```py +timeit(ds) +``` + +```py +........................ +231.0 batches: 3.0341720581054688 s +2436.24945 Images/s +Total time: 12.044088363647461s + +``` + +这个缓存文件也有可快速重启数据集而无需重建缓存的优点。注意第二次快了多少: + +```py +timeit(ds) +``` + +```py +........................ +231.0 batches: 2.358055353164673 s +3134.78646 Images/s +Total time: 3.105525493621826s + +``` + +### TFRecord 文件 + +#### 原始图片数据 + +TFRecord 文件是一种用来存储一串二进制 blob 的简单格式。通过将多个示例打包进同一个文件内,TensorFlow 能够一次性读取多个示例,当使用一个远程存储服务,如 GCS 时,这对性能来说尤其重要。 + +首先,从原始图片数据中构建出一个 TFRecord 文件: + +```py +image_ds = tf.data.Dataset.from_tensor_slices(all_image_paths).map(tf.io.read_file) +tfrec = tf.data.experimental.TFRecordWriter('images.tfrec') +tfrec.write(image_ds) +``` + +接着,构建一个从 TFRecord 文件读取的数据集,并使用你之前定义的 `preprocess_image` 函数对图像进行解码/重新格式化: + +```py +image_ds = tf.data.TFRecordDataset('images.tfrec').map(preprocess_image) +``` + +压缩该数据集和你之前定义的标签数据集以得到期望的 `(图片,标签)` 对: + +```py +ds = tf.data.Dataset.zip((image_ds, label_ds)) +ds = ds.apply( + tf.data.experimental.shuffle_and_repeat(buffer_size=image_count)) +ds=ds.batch(BATCH_SIZE).prefetch(AUTOTUNE) +ds +``` + +```py + + +``` + +```py +timeit(ds) +``` + +```py +........................ +231.0 batches: 14.661343574523926 s +504.18299 Images/s +Total time: 21.57948637008667s + +``` + +这比 `缓存` 版本慢,因为你还没有缓存预处理。 + +#### 序列化的 Tensor(张量) + +要为 TFRecord 文件省去一些预处理过程,首先像之前一样制作一个处理过的图片数据集: + +```py +paths_ds = tf.data.Dataset.from_tensor_slices(all_image_paths) +image_ds = paths_ds.map(load_and_preprocess_image) +image_ds +``` + +```py + + +``` + +现在你有一个 tensor(张量)数据集,而不是一个 `.jpeg` 字符串数据集。 + +要将此序列化至一个 TFRecord 文件你首先将该 tensor(张量)数据集转化为一个字符串数据集: + +```py +ds = image_ds.map(tf.io.serialize_tensor) +ds +``` + +```py + + +``` + +```py +tfrec = tf.data.experimental.TFRecordWriter('images.tfrec') +tfrec.write(ds) +``` + +有了被缓存的预处理,就能从 TFrecord 文件高效地加载数据——只需记得在使用它之前反序列化: + +```py +ds = tf.data.TFRecordDataset('images.tfrec') + +def parse(x): + result = tf.io.parse_tensor(x, out_type=tf.float32) + result = tf.reshape(result, [192, 192, 3]) + return result + +ds = ds.map(parse, num_parallel_calls=AUTOTUNE) +ds +``` + +```py + + +``` + +现在,像之前一样添加标签和进行相同的标准操作: + +```py +ds = tf.data.Dataset.zip((ds, label_ds)) +ds = ds.apply( + tf.data.experimental.shuffle_and_repeat(buffer_size=image_count)) +ds=ds.batch(BATCH_SIZE).prefetch(AUTOTUNE) +ds +``` + +```py + + +``` + +```py +timeit(ds) +``` + +```py +........................ +231.0 batches: 1.8890972137451172 s +3912.98020 Images/s +Total time: 2.7021732330322266s + +``` \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/015.md b/Tensorflow/TensorFlow2.0/015.md new file mode 100644 index 00000000..27a9fade --- /dev/null +++ b/Tensorflow/TensorFlow2.0/015.md @@ -0,0 +1,291 @@ +# 使用 tf.data 加载文本数据 + +> 原文:[https://tensorflow.google.cn/tutorials/load_data/text](https://tensorflow.google.cn/tutorials/load_data/text) + +**Note:** 我们的 TensorFlow 社区翻译了这些文档。因为社区翻译是尽力而为, 所以无法保证它们是最准确的,并且反映了最新的 [官方英文文档](https://tensorflow.google.cn/?hl=en)。如果您有改进此翻译的建议, 请提交 pull request 到 [tensorflow/docs](https://github.com/tensorflow/docs) GitHub 仓库。要志愿地撰写或者审核译文,请加入 [docs-zh-cn@tensorflow.org Google Group](https://groups.google.com/a/tensorflow.org/forum/#!forum/docs-zh-cn)。 + +本教程为你提供了一个如何使用 [`tf.data.TextLineDataset`](https://tensorflow.google.cn/api_docs/python/tf/data/TextLineDataset) 来加载文本文件的示例。`TextLineDataset` 通常被用来以文本文件构建数据集(原文件中的一行为一个样本) 。这适用于大多数的基于行的文本数据(例如,诗歌或错误日志) 。下面我们将使用相同作品(荷马的伊利亚特)三个不同版本的英文翻译,然后训练一个模型来通过单行文本确定译者。 + +## 环境搭建 + +```py +import tensorflow as tf + +import tensorflow_datasets as tfds +import os +``` + +三个版本的翻译分别来自于: + +* [William Cowper](https://en.wikipedia.org/wiki/William_Cowper) — [text](https://storage.googleapis.com/download.tensorflow.org/data/illiad/cowper.txt) + +* [Edward, Earl of Derby](https://en.wikipedia.org/wiki/Edward_Smith-Stanley,_14th_Earl_of_Derby) — [text](https://storage.googleapis.com/download.tensorflow.org/data/illiad/derby.txt) + +* [Samuel Butler](https://en.wikipedia.org/wiki/Samuel_Butler_%28novelist%29) — [text](https://storage.googleapis.com/download.tensorflow.org/data/illiad/butler.txt) + +本教程中使用的文本文件已经进行过一些典型的预处理,主要包括删除了文档页眉和页脚,行号,章节标题。请下载这些已经被局部改动过的文件。 + +```py +DIRECTORY_URL = 'https://storage.googleapis.com/download.tensorflow.org/data/illiad/' +FILE_NAMES = ['cowper.txt', 'derby.txt', 'butler.txt'] + +for name in FILE_NAMES: + text_dir = tf.keras.utils.get_file(name, origin=DIRECTORY_URL+name) + +parent_dir = os.path.dirname(text_dir) + +parent_dir +``` + +```py +Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/illiad/cowper.txt +819200/815980 [==============================] - 0s 0us/step +Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/illiad/derby.txt +811008/809730 [==============================] - 0s 0us/step +Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/illiad/butler.txt +811008/807992 [==============================] - 0s 0us/step + +'/home/kbuilder/.keras/datasets' + +``` + +## 将文本加载到数据集中 + +迭代整个文件,将整个文件加载到自己的数据集中。 + +每个样本都需要单独标记,所以请使用 [`tf.data.Dataset.map`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset#map) 来为每个样本设定标签。这将迭代数据集中的每一个样本并且返回( `example, label` )对。 + +```py +def labeler(example, index): + return example, tf.cast(index, tf.int64) + +labeled_data_sets = [] + +for i, file_name in enumerate(FILE_NAMES): + lines_dataset = tf.data.TextLineDataset(os.path.join(parent_dir, file_name)) + labeled_dataset = lines_dataset.map(lambda ex: labeler(ex, i)) + labeled_data_sets.append(labeled_dataset) +``` + +将这些标记的数据集合并到一个数据集中,然后对其进行随机化操作。 + +```py +BUFFER_SIZE = 50000 +BATCH_SIZE = 64 +TAKE_SIZE = 5000 +``` + +```py +all_labeled_data = labeled_data_sets[0] +for labeled_dataset in labeled_data_sets[1:]: + all_labeled_data = all_labeled_data.concatenate(labeled_dataset) + +all_labeled_data = all_labeled_data.shuffle( + BUFFER_SIZE, reshuffle_each_iteration=False) +``` + +你可以使用 [`tf.data.Dataset.take`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset#take) 与 `print` 来查看 `(example, label)` 对的外观。`numpy` 属性显示每个 Tensor 的值。 + +```py +for ex in all_labeled_data.take(5): + print(ex) +``` + +```py +(, ) +(, ) +(, ) +(, ) +(, ) + +``` + +## 将文本编码成数字 + +机器学习基于的是数字而非文本,所以字符串需要被转化成数字列表。 为了达到此目的,我们需要构建文本与整数的一一映射。 + +### 建立词汇表 + +首先,通过将文本标记为单独的单词集合来构建词汇表。在 TensorFlow 和 Python 中均有很多方法来达成这一目的。在本教程中: + +1. 迭代每个样本的 `numpy` 值。 +2. 使用 `tfds.features.text.Tokenizer` 来将其分割成 `token`。 +3. 将这些 `token` 放入一个 Python 集合中,借此来清除重复项。 +4. 获取该词汇表的大小以便于以后使用。 + +```py +tokenizer = tfds.features.text.Tokenizer() + +vocabulary_set = set() +for text_tensor, _ in all_labeled_data: + some_tokens = tokenizer.tokenize(text_tensor.numpy()) + vocabulary_set.update(some_tokens) + +vocab_size = len(vocabulary_set) +vocab_size +``` + +```py +17178 + +``` + +### 样本编码 + +通过传递 `vocabulary_set` 到 `tfds.features.text.TokenTextEncoder` 来构建一个编码器。编码器的 `encode` 方法传入一行文本,返回一个整数列表。 + +```py +encoder = tfds.features.text.TokenTextEncoder(vocabulary_set) +``` + +你可以尝试运行这一行代码并查看输出的样式。 + +```py +example_text = next(iter(all_labeled_data))[0].numpy() +print(example_text) +``` + +```py +b'To Ida; in his presence once arrived,' + +``` + +```py +encoded_example = encoder.encode(example_text) +print(encoded_example) +``` + +```py +[15746, 11433, 8394, 9006, 379, 3463, 17072] + +``` + +现在,在数据集上运行编码器(通过将编码器打包到 [`tf.py_function`](https://tensorflow.google.cn/api_docs/python/tf/py_function) 并且传参至数据集的 `map` 方法的方式来运行)。 + +```py +def encode(text_tensor, label): + encoded_text = encoder.encode(text_tensor.numpy()) + return encoded_text, label + +def encode_map_fn(text, label): + # py_func doesn't set the shape of the returned tensors. + encoded_text, label = tf.py_function(encode, + inp=[text, label], + Tout=(tf.int64, tf.int64)) + + # `tf.data.Datasets` work best if all components have a shape set + # so set the shapes manually: + encoded_text.set_shape([None]) + label.set_shape([]) + + return encoded_text, label + +all_encoded_data = all_labeled_data.map(encode_map_fn) +``` + +## 将数据集分割为测试集和训练集且进行分支 + +使用 [`tf.data.Dataset.take`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset#take) 和 [`tf.data.Dataset.skip`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset#skip) 来建立一个小一些的测试数据集和稍大一些的训练数据集。 + +在数据集被传入模型之前,数据集需要被分批。最典型的是,每个分支中的样本大小与格式需要一致。但是数据集中样本并不全是相同大小的(每行文本字数并不相同)。因此,使用 [`tf.data.Dataset.padded_batch`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset#padded_batch)(而不是 `batch` )将样本填充到相同的大小。 + +```py +train_data = all_encoded_data.skip(TAKE_SIZE).shuffle(BUFFER_SIZE) +train_data = train_data.padded_batch(BATCH_SIZE) + +test_data = all_encoded_data.take(TAKE_SIZE) +test_data = test_data.padded_batch(BATCH_SIZE) +``` + +现在,test_data 和 train_data 不是( `example, label` )对的集合,而是批次的集合。每个批次都是一对(*多样本*, *多标签* ),表示为数组。 + +```py +sample_text, sample_labels = next(iter(test_data)) + +sample_text[0], sample_labels[0] +``` + +```py +(, + ) + +``` + +由于我们引入了一个新的 token 来编码(填充零),因此词汇表大小增加了一个。 + +```py +vocab_size += 1 +``` + +## 建立模型 + +```py +model = tf.keras.Sequential() +``` + +第一层将整数表示转换为密集矢量嵌入。更多内容请查阅 [Word Embeddings](https://tensorflow.google.cn/tutorials/sequences/word_embeddings) 教程。 + +```py +model.add(tf.keras.layers.Embedding(vocab_size, 64)) +``` + +下一层是 [LSTM](http://colah.github.io/posts/2015-08-Understanding-LSTMs/) 层,它允许模型利用上下文中理解单词含义。 LSTM 上的双向包装器有助于模型理解当前数据点与其之前和之后的数据点的关系。 + +```py +model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64))) +``` + +最后,我们将获得一个或多个紧密连接的层,其中最后一层是输出层。输出层输出样本属于各个标签的概率,最后具有最高概率的分类标签即为最终预测结果。 + +```py +# 一个或多个紧密连接的层 +# 编辑 `for` 行的列表去检测层的大小 +for units in [64, 64]: + model.add(tf.keras.layers.Dense(units, activation='relu')) + +# 输出层。第一个参数是标签个数。 +model.add(tf.keras.layers.Dense(3, activation='softmax')) +``` + +最后,编译这个模型。对于一个 softmax 分类模型来说,通常使用 `sparse_categorical_crossentropy` 作为其损失函数。你可以尝试其他的优化器,但是 `adam` 是最常用的。 + +```py +model.compile(optimizer='adam', + loss='sparse_categorical_crossentropy', + metrics=['accuracy']) +``` + +## 训练模型 + +利用提供的数据训练出的模型有着不错的精度(大约 83% )。 + +```py +model.fit(train_data, epochs=3, validation_data=test_data) +``` + +```py +Epoch 1/3 +697/697 [==============================] - 10s 14ms/step - loss: 0.5181 - accuracy: 0.7457 - val_loss: 0.3855 - val_accuracy: 0.8222 +Epoch 2/3 +697/697 [==============================] - 9s 13ms/step - loss: 0.2985 - accuracy: 0.8685 - val_loss: 0.3635 - val_accuracy: 0.8350 +Epoch 3/3 +697/697 [==============================] - 9s 13ms/step - loss: 0.2242 - accuracy: 0.9027 - val_loss: 0.3794 - val_accuracy: 0.8246 + + + +``` + +```py +eval_loss, eval_acc = model.evaluate(test_data) + +print('\nEval loss: {}, Eval accuracy: {}'.format(eval_loss, eval_acc)) +``` + +```py +79/79 [==============================] - 1s 18ms/step - loss: 0.3794 - accuracy: 0.8246 + +Eval loss: 0.3794495761394501, Eval accuracy: 0.8245999813079834 + +``` \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/016.md b/Tensorflow/TensorFlow2.0/016.md new file mode 100644 index 00000000..093ccb8e --- /dev/null +++ b/Tensorflow/TensorFlow2.0/016.md @@ -0,0 +1,367 @@ +# 用 tf.data 加载 CSV 数据 + +> 原文:[https://tensorflow.google.cn/tutorials/load_data/csv](https://tensorflow.google.cn/tutorials/load_data/csv) + +**Note:** 我们的 TensorFlow 社区翻译了这些文档。因为社区翻译是尽力而为, 所以无法保证它们是最准确的,并且反映了最新的 [官方英文文档](https://tensorflow.google.cn/?hl=en)。如果您有改进此翻译的建议, 请提交 pull request 到 [tensorflow/docs](https://github.com/tensorflow/docs) GitHub 仓库。要志愿地撰写或者审核译文,请加入 [docs-zh-cn@tensorflow.org Google Group](https://groups.google.com/a/tensorflow.org/forum/#!forum/docs-zh-cn)。 + +这篇教程通过一个示例展示了怎样将 CSV 格式的数据加载进 [`tf.data.Dataset`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset)。 + +这篇教程使用的是泰坦尼克号乘客的数据。模型会根据乘客的年龄、性别、票务舱和是否独自旅行等特征来预测乘客生还的可能性。 + +## 设置 + +```py +import functools + +import numpy as np +import tensorflow as tf +import tensorflow_datasets as tfds +``` + +```py +TRAIN_DATA_URL = "https://storage.googleapis.com/tf-datasets/titanic/train.csv" +TEST_DATA_URL = "https://storage.googleapis.com/tf-datasets/titanic/eval.csv" + +train_file_path = tf.keras.utils.get_file("train.csv", TRAIN_DATA_URL) +test_file_path = tf.keras.utils.get_file("eval.csv", TEST_DATA_URL) +``` + +```py +Downloading data from https://storage.googleapis.com/tf-datasets/titanic/train.csv +32768/30874 [===============================] - 0s 0us/step +Downloading data from https://storage.googleapis.com/tf-datasets/titanic/eval.csv +16384/13049 [=====================================] - 0s 0us/step + +``` + +```py +# 让 numpy 数据更易读。 +np.set_printoptions(precision=3, suppress=True) +``` + +## 加载数据 + +开始的时候,我们通过打印 CSV 文件的前几行来了解文件的格式。 + +```py +head {train_file_path} + +``` + +```py +survived,sex,age,n_siblings_spouses,parch,fare,class,deck,embark_town,alone +0,male,22.0,1,0,7.25,Third,unknown,Southampton,n +1,female,38.0,1,0,71.2833,First,C,Cherbourg,n +1,female,26.0,0,0,7.925,Third,unknown,Southampton,y +1,female,35.0,1,0,53.1,First,C,Southampton,n +0,male,28.0,0,0,8.4583,Third,unknown,Queenstown,y +0,male,2.0,3,1,21.075,Third,unknown,Southampton,n +1,female,27.0,0,2,11.1333,Third,unknown,Southampton,n +1,female,14.0,1,0,30.0708,Second,unknown,Cherbourg,n +1,female,4.0,1,1,16.7,Third,G,Southampton,n + +``` + +正如你看到的那样,CSV 文件的每列都会有一个列名。dataset 的构造函数会自动识别这些列名。如果你使用的文件的第一行不包含列名,那么需要将列名通过字符串列表传给 `make_csv_dataset` 函数的 `column_names` 参数。 + +```py + CSV_COLUMNS = ['survived', 'sex', 'age', 'n_siblings_spouses', 'parch', 'fare', 'class', 'deck', 'embark_town', 'alone'] + +dataset = tf.data.experimental.make_csv_dataset( + ..., + column_names=CSV_COLUMNS, + ...) +``` + +这个示例使用了所有的列。如果你需要忽略数据集中的某些列,创建一个包含你需要使用的列的列表,然后传给构造器的(可选)参数 `select_columns`。 + +```py + dataset = tf.data.experimental.make_csv_dataset( + ..., + select_columns = columns_to_use, + ...) +``` + +对于包含模型需要预测的值的列是你需要显式指定的。 + +```py +LABEL_COLUMN = 'survived' +LABELS = [0, 1] +``` + +现在从文件中读取 CSV 数据并且创建 dataset。 + +(完整的文档,参考 [`tf.data.experimental.make_csv_dataset`](https://tensorflow.google.cn/api_docs/python/tf/data/experimental/make_csv_dataset)) + +```py +def get_dataset(file_path): + dataset = tf.data.experimental.make_csv_dataset( + file_path, + batch_size=12, # 为了示例更容易展示,手动设置较小的值 + label_name=LABEL_COLUMN, + na_value="?", + num_epochs=1, + ignore_errors=True) + return dataset + +raw_train_data = get_dataset(train_file_path) +raw_test_data = get_dataset(test_file_path) +``` + +dataset 中的每个条目都是一个批次,用一个元组(*多个样本*,*多个标签*)表示。样本中的数据组织形式是以列为主的张量(而不是以行为主的张量),每条数据中包含的元素个数就是批次大小(这个示例中是 12)。 + +阅读下面的示例有助于你的理解。 + +```py +examples, labels = next(iter(raw_train_data)) # 第一个批次 +print("EXAMPLES: \n", examples, "\n") +print("LABELS: \n", labels) +``` + +```py +EXAMPLES: + OrderedDict([('sex', ), ('age', ), ('n_siblings_spouses', ), ('parch', ), ('fare', ), ('class', ), ('deck', ), ('embark_town', ), ('alone', )]) + +LABELS: + tf.Tensor([0 0 0 0 0 1 0 1 0 0 0 1], shape=(12,), dtype=int32) + +``` + +## 数据预处理 + +### 分类数据 + +CSV 数据中的有些列是分类的列。也就是说,这些列只能在有限的集合中取值。 + +使用 [`tf.feature_column`](https://tensorflow.google.cn/api_docs/python/tf/feature_column) API 创建一个 [`tf.feature_column.indicator_column`](https://tensorflow.google.cn/api_docs/python/tf/feature_column/indicator_column) 集合,每个 [`tf.feature_column.indicator_column`](https://tensorflow.google.cn/api_docs/python/tf/feature_column/indicator_column) 对应一个分类的列。 + +```py +CATEGORIES = { + 'sex': ['male', 'female'], + 'class' : ['First', 'Second', 'Third'], + 'deck' : ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J'], + 'embark_town' : ['Cherbourg', 'Southhampton', 'Queenstown'], + 'alone' : ['y', 'n'] +} +``` + +```py +categorical_columns = [] +for feature, vocab in CATEGORIES.items(): + cat_col = tf.feature_column.categorical_column_with_vocabulary_list( + key=feature, vocabulary_list=vocab) + categorical_columns.append(tf.feature_column.indicator_column(cat_col)) +``` + +```py +# 你刚才创建的内容 +categorical_columns +``` + +```py +[IndicatorColumn(categorical_column=VocabularyListCategoricalColumn(key='sex', vocabulary_list=('male', 'female'), dtype=tf.string, default_value=-1, num_oov_buckets=0)), + IndicatorColumn(categorical_column=VocabularyListCategoricalColumn(key='class', vocabulary_list=('First', 'Second', 'Third'), dtype=tf.string, default_value=-1, num_oov_buckets=0)), + IndicatorColumn(categorical_column=VocabularyListCategoricalColumn(key='deck', vocabulary_list=('A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J'), dtype=tf.string, default_value=-1, num_oov_buckets=0)), + IndicatorColumn(categorical_column=VocabularyListCategoricalColumn(key='embark_town', vocabulary_list=('Cherbourg', 'Southhampton', 'Queenstown'), dtype=tf.string, default_value=-1, num_oov_buckets=0)), + IndicatorColumn(categorical_column=VocabularyListCategoricalColumn(key='alone', vocabulary_list=('y', 'n'), dtype=tf.string, default_value=-1, num_oov_buckets=0))] + +``` + +这将是后续构建模型时处理输入数据的一部分。 + +### 连续数据 + +连续数据需要标准化。 + +写一个函数标准化这些值,然后将这些值改造成 2 维的张量。 + +```py +def process_continuous_data(mean, data): + # 标准化数据 + data = tf.cast(data, tf.float32) * 1/(2*mean) + return tf.reshape(data, [-1, 1]) +``` + +现在创建一个数值列的集合。`tf.feature_columns.numeric_column` API 会使用 `normalizer_fn` 参数。在传参的时候使用 [`functools.partial`](https://docs.python.org/3/library/functools.html#functools.partial),`functools.partial` 由使用每个列的均值进行标准化的函数构成。 + +```py +MEANS = { + 'age' : 29.631308, + 'n_siblings_spouses' : 0.545455, + 'parch' : 0.379585, + 'fare' : 34.385399 +} + +numerical_columns = [] + +for feature in MEANS.keys(): + num_col = tf.feature_column.numeric_column(feature, normalizer_fn=functools.partial(process_continuous_data, MEANS[feature])) + numerical_columns.append(num_col) +``` + +```py +# 你刚才创建的内容。 +numerical_columns +``` + +```py +[NumericColumn(key='age', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=functools.partial(, 29.631308)), + NumericColumn(key='n_siblings_spouses', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=functools.partial(, 0.545455)), + NumericColumn(key='parch', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=functools.partial(, 0.379585)), + NumericColumn(key='fare', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=functools.partial(, 34.385399))] + +``` + +这里使用标准化的方法需要提前知道每列的均值。如果需要计算连续的数据流的标准化的值可以使用 [TensorFlow Transform](https://tensorflow.google.cn/tfx/transform/get_started)。 + +### 创建预处理层 + +将这两个特征列的集合相加,并且传给 [`tf.keras.layers.DenseFeatures`](https://tensorflow.google.cn/api_docs/python/tf/keras/layers/DenseFeatures) 从而创建一个进行预处理的输入层。 + +```py +preprocessing_layer = tf.keras.layers.DenseFeatures(categorical_columns+numerical_columns) +``` + +## 构建模型 + +从 `preprocessing_layer` 开始构建 [`tf.keras.Sequential`](https://tensorflow.google.cn/api_docs/python/tf/keras/Sequential)。 + +```py +model = tf.keras.Sequential([ + preprocessing_layer, + tf.keras.layers.Dense(128, activation='relu'), + tf.keras.layers.Dense(128, activation='relu'), + tf.keras.layers.Dense(1, activation='sigmoid'), +]) + +model.compile( + loss='binary_crossentropy', + optimizer='adam', + metrics=['accuracy']) +``` + +## 训练、评估和预测 + +现在可以实例化和训练模型。 + +```py +train_data = raw_train_data.shuffle(500) +test_data = raw_test_data +``` + +```py +model.fit(train_data, epochs=20) +``` + +```py +Epoch 1/20 +WARNING:tensorflow:Layers in a Sequential model should only have a single input tensor, but we receive a input: OrderedDict([('sex', ), ('age', ), ('n_siblings_spouses', ), ('parch', ), ('fare', ), ('class', ), ('deck', ), ('embark_town', ), ('alone', )]) +Consider rewriting this model with the Functional API. +WARNING:tensorflow:Layers in a Sequential model should only have a single input tensor, but we receive a input: OrderedDict([('sex', ), ('age', ), ('n_siblings_spouses', ), ('parch', ), ('fare', ), ('class', ), ('deck', ), ('embark_town', ), ('alone', )]) +Consider rewriting this model with the Functional API. +53/53 [==============================] - 0s 4ms/step - loss: 0.5501 - accuracy: 0.7225 +Epoch 2/20 +53/53 [==============================] - 0s 3ms/step - loss: 0.4399 - accuracy: 0.8102 +Epoch 3/20 +53/53 [==============================] - 0s 3ms/step - loss: 0.4158 - accuracy: 0.8150 +Epoch 4/20 +53/53 [==============================] - 0s 3ms/step - loss: 0.4137 - accuracy: 0.8118 +Epoch 5/20 +53/53 [==============================] - 0s 3ms/step - loss: 0.4011 - accuracy: 0.8278 +Epoch 6/20 +53/53 [==============================] - 0s 3ms/step - loss: 0.3953 - accuracy: 0.8198 +Epoch 7/20 +53/53 [==============================] - 0s 3ms/step - loss: 0.3834 - accuracy: 0.8325 +Epoch 8/20 +53/53 [==============================] - 0s 3ms/step - loss: 0.3831 - accuracy: 0.8309 +Epoch 9/20 +53/53 [==============================] - 0s 3ms/step - loss: 0.3768 - accuracy: 0.8453 +Epoch 10/20 +53/53 [==============================] - 0s 3ms/step - loss: 0.3710 - accuracy: 0.8437 +Epoch 11/20 +53/53 [==============================] - 0s 3ms/step - loss: 0.3704 - accuracy: 0.8389 +Epoch 12/20 +53/53 [==============================] - 0s 3ms/step - loss: 0.3670 - accuracy: 0.8325 +Epoch 13/20 +53/53 [==============================] - 0s 3ms/step - loss: 0.3603 - accuracy: 0.8517 +Epoch 14/20 +53/53 [==============================] - 0s 3ms/step - loss: 0.3548 - accuracy: 0.8501 +Epoch 15/20 +53/53 [==============================] - 0s 3ms/step - loss: 0.3554 - accuracy: 0.8469 +Epoch 16/20 +53/53 [==============================] - 0s 3ms/step - loss: 0.3519 - accuracy: 0.8453 +Epoch 17/20 +53/53 [==============================] - 0s 3ms/step - loss: 0.3472 - accuracy: 0.8596 +Epoch 18/20 +53/53 [==============================] - 0s 3ms/step - loss: 0.3513 - accuracy: 0.8581 +Epoch 19/20 +53/53 [==============================] - 0s 3ms/step - loss: 0.3448 - accuracy: 0.8469 +Epoch 20/20 +53/53 [==============================] - 0s 3ms/step - loss: 0.3390 - accuracy: 0.8581 + + + +``` + +当模型训练完成的时候,你可以在测试集 `test_data` 上检查准确性。 + +```py +test_loss, test_accuracy = model.evaluate(test_data) + +print('\n\nTest Loss {}, Test Accuracy {}'.format(test_loss, test_accuracy)) +``` + +```py +WARNING:tensorflow:Layers in a Sequential model should only have a single input tensor, but we receive a input: OrderedDict([('sex', ), ('age', ), ('n_siblings_spouses', ), ('parch', ), ('fare', ), ('class', ), ('deck', ), ('embark_town', ), ('alone', )]) +Consider rewriting this model with the Functional API. +22/22 [==============================] - 0s 3ms/step - loss: 0.4596 - accuracy: 0.7992 + +Test Loss 0.45956382155418396, Test Accuracy 0.7992424368858337 + +``` + +使用 [`tf.keras.Model.predict`](https://tensorflow.google.cn/api_docs/python/tf/keras/Model#predict) 推断一个批次或多个批次的标签。 + +```py +predictions = model.predict(test_data) + +# 显示部分结果 +for prediction, survived in zip(predictions[:10], list(test_data)[0][1][:10]): + print("Predicted survival: {:.2%}".format(prediction[0]), + " | Actual outcome: ", + ("SURVIVED" if bool(survived) else "DIED")) +``` + +```py +WARNING:tensorflow:Layers in a Sequential model should only have a single input tensor, but we receive a input: OrderedDict([('sex', ), ('age', ), ('n_siblings_spouses', ), ('parch', ), ('fare', ), ('class', ), ('deck', ), ('embark_town', ), ('alone', )]) +Consider rewriting this model with the Functional API. +Predicted survival: 99.81% | Actual outcome: DIED +Predicted survival: 14.77% | Actual outcome: SURVIVED +Predicted survival: 11.87% | Actual outcome: DIED +Predicted survival: 6.05% | Actual outcome: DIED +Predicted survival: 10.83% | Actual outcome: DIED +Predicted survival: 29.45% | Actual outcome: SURVIVED +Predicted survival: 92.37% | Actual outcome: SURVIVED +Predicted survival: 4.18% | Actual outcome: SURVIVED +Predicted survival: 14.32% | Actual outcome: DIED +Predicted survival: 4.36% | Actual outcome: SURVIVED + +``` \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/017.md b/Tensorflow/TensorFlow2.0/017.md new file mode 100644 index 00000000..b7f8d227 --- /dev/null +++ b/Tensorflow/TensorFlow2.0/017.md @@ -0,0 +1,104 @@ +# 使用 tf.data 加载 NumPy 数据 + +> 原文:[https://tensorflow.google.cn/tutorials/load_data/numpy](https://tensorflow.google.cn/tutorials/load_data/numpy) + +**Note:** 我们的 TensorFlow 社区翻译了这些文档。因为社区翻译是尽力而为, 所以无法保证它们是最准确的,并且反映了最新的 [官方英文文档](https://tensorflow.google.cn/?hl=en)。如果您有改进此翻译的建议, 请提交 pull request 到 [tensorflow/docs](https://github.com/tensorflow/docs) GitHub 仓库。要志愿地撰写或者审核译文,请加入 [docs-zh-cn@tensorflow.org Google Group](https://groups.google.com/a/tensorflow.org/forum/#!forum/docs-zh-cn)。 + +本教程提供了将数据从 NumPy 数组加载到 [`tf.data.Dataset`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset) 的示例 本示例从一个 `.npz` 文件中加载 MNIST 数据集。但是,本实例中 NumPy 数据的来源并不重要。 + +## 安装 + +```py + import numpy as np +import tensorflow as tf +import tensorflow_datasets as tfds +``` + +### 从 `.npz` 文件中加载 + +```py +DATA_URL = 'https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz' + +path = tf.keras.utils.get_file('mnist.npz', DATA_URL) +with np.load(path) as data: + train_examples = data['x_train'] + train_labels = data['y_train'] + test_examples = data['x_test'] + test_labels = data['y_test'] +``` + +## 使用 [`tf.data.Dataset`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset) 加载 NumPy 数组 + +假设您有一个示例数组和相应的标签数组,请将两个数组作为元组传递给 [`tf.data.Dataset.from_tensor_slices`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset#from_tensor_slices) 以创建 [`tf.data.Dataset`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset) 。 + +```py +train_dataset = tf.data.Dataset.from_tensor_slices((train_examples, train_labels)) +test_dataset = tf.data.Dataset.from_tensor_slices((test_examples, test_labels)) +``` + +## 使用该数据集 + +### 打乱和批次化数据集 + +```py +BATCH_SIZE = 64 +SHUFFLE_BUFFER_SIZE = 100 + +train_dataset = train_dataset.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE) +test_dataset = test_dataset.batch(BATCH_SIZE) +``` + +### 建立和训练模型 + +```py +model = tf.keras.Sequential([ + tf.keras.layers.Flatten(input_shape=(28, 28)), + tf.keras.layers.Dense(128, activation='relu'), + tf.keras.layers.Dense(10, activation='softmax') +]) + +model.compile(optimizer=tf.keras.optimizers.RMSprop(), + loss=tf.keras.losses.SparseCategoricalCrossentropy(), + metrics=[tf.keras.metrics.SparseCategoricalAccuracy()]) +``` + +```py +model.fit(train_dataset, epochs=10) +``` + +```py +Epoch 1/10 +938/938 [==============================] - 2s 2ms/step - loss: 3.1713 - sparse_categorical_accuracy: 0.8769 +Epoch 2/10 +938/938 [==============================] - 2s 2ms/step - loss: 0.5085 - sparse_categorical_accuracy: 0.9271 +Epoch 3/10 +938/938 [==============================] - 2s 2ms/step - loss: 0.3764 - sparse_categorical_accuracy: 0.9466 +Epoch 4/10 +938/938 [==============================] - 2s 2ms/step - loss: 0.3165 - sparse_categorical_accuracy: 0.9550 +Epoch 5/10 +938/938 [==============================] - 2s 2ms/step - loss: 0.2812 - sparse_categorical_accuracy: 0.9599 +Epoch 6/10 +938/938 [==============================] - 2s 2ms/step - loss: 0.2587 - sparse_categorical_accuracy: 0.9645 +Epoch 7/10 +938/938 [==============================] - 2s 2ms/step - loss: 0.2530 - sparse_categorical_accuracy: 0.9674 +Epoch 8/10 +938/938 [==============================] - 2s 2ms/step - loss: 0.2192 - sparse_categorical_accuracy: 0.9707 +Epoch 9/10 +938/938 [==============================] - 2s 2ms/step - loss: 0.2116 - sparse_categorical_accuracy: 0.9721 +Epoch 10/10 +938/938 [==============================] - 2s 2ms/step - loss: 0.2014 - sparse_categorical_accuracy: 0.9747 + + + +``` + +```py +model.evaluate(test_dataset) +``` + +```py +157/157 [==============================] - 0s 2ms/step - loss: 0.5586 - sparse_categorical_accuracy: 0.9568 + +[0.5586389303207397, 0.9567999839782715] + +``` \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/018.md b/Tensorflow/TensorFlow2.0/018.md new file mode 100644 index 00000000..3e25bc3e --- /dev/null +++ b/Tensorflow/TensorFlow2.0/018.md @@ -0,0 +1,297 @@ +# 使用 tf.data 加载 pandas dataframes + +> 原文:[https://tensorflow.google.cn/tutorials/load_data/pandas_dataframe](https://tensorflow.google.cn/tutorials/load_data/pandas_dataframe) + +**Note:** 我们的 TensorFlow 社区翻译了这些文档。因为社区翻译是尽力而为, 所以无法保证它们是最准确的,并且反映了最新的 [官方英文文档](https://tensorflow.google.cn/?hl=en)。如果您有改进此翻译的建议, 请提交 pull request 到 [tensorflow/docs](https://github.com/tensorflow/docs) GitHub 仓库。要志愿地撰写或者审核译文,请加入 [docs-zh-cn@tensorflow.org Google Group](https://groups.google.com/a/tensorflow.org/forum/#!forum/docs-zh-cn)。 + +本教程提供了如何将 pandas dataframes 加载到 [`tf.data.Dataset`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset)。 + +本教程使用了一个小型[数据集](https://archive.ics.uci.edu/ml/datasets/heart+Disease),由克利夫兰诊所心脏病基金会(Cleveland Clinic Foundation for Heart Disease)提供. 此数据集中有几百行 CSV。每行表示一个患者,每列表示一个属性(describe)。我们将使用这些信息来预测患者是否患有心脏病,这是一个二分类问题。 + +## 使用 pandas 读取数据 + +```py +!pip install -q tensorflow-gpu==2.0.0-rc1 +import pandas as pd +import tensorflow as tf +``` + +```py +WARNING: You are using pip version 20.2.2; however, version 20.2.3 is available. +You should consider upgrading via the '/tmpfs/src/tf_docs_env/bin/python -m pip install --upgrade pip' command. + +``` + +下载包含心脏数据集的 csv 文件。 + +```py +csv_file = tf.keras.utils.get_file('heart.csv', 'https://storage.googleapis.com/applied-dl/heart.csv') +``` + +使用 pandas 读取 csv 文件。 + +```py +df = pd.read_csv(csv_file) +``` + +```py +df.head() +``` + + + +```py +df.dtypes +``` + +```py +age int64 +sex int64 +cp int64 +trestbps int64 +chol int64 +fbs int64 +restecg int64 +thalach int64 +exang int64 +oldpeak float64 +slope int64 +ca int64 +thal object +target int64 +dtype: object + +``` + +将 `thal` 列(数据帧(dataframe)中的 `object` )转换为离散数值。 + +```py +df['thal'] = pd.Categorical(df['thal']) +df['thal'] = df.thal.cat.codes +``` + +```py +df.head() +``` + + + +## 使用 [`tf.data.Dataset`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset) 读取数据 + +使用 [`tf.data.Dataset.from_tensor_slices`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset#from_tensor_slices) 从 pandas dataframe 中读取数值。 + +使用 [`tf.data.Dataset`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset) 的其中一个优势是可以允许您写一些简单而又高效的数据管道(data pipelines)。从 [loading data guide](https://tensorflow.google.cn/guide/data) 可以了解更多。 + +```py +target = df.pop('target') +``` + +```py +dataset = tf.data.Dataset.from_tensor_slices((df.values, target.values)) +``` + +```py +for feat, targ in dataset.take(5): + print ('Features: {}, Target: {}'.format(feat, targ)) +``` + +```py +Features: [ 63\. 1\. 1\. 145\. 233\. 1\. 2\. 150\. 0\. 2.3 3\. 0. + + 2\. ], Target: 0 +Features: [ 67\. 1\. 4\. 160\. 286\. 0\. 2\. 108\. 1\. 1.5 2\. 3. + 3\. ], Target: 1 +Features: [ 67\. 1\. 4\. 120\. 229\. 0\. 2\. 129\. 1\. 2.6 2\. 2. + 4\. ], Target: 0 +Features: [ 37\. 1\. 3\. 130\. 250\. 0\. 0\. 187\. 0\. 3.5 3\. 0. + 3\. ], Target: 0 +Features: [ 41\. 0\. 2\. 130\. 204\. 0\. 2\. 172\. 0\. 1.4 1\. 0. + 3\. ], Target: 0 + +``` + +由于 `pd.Series` 实现了 `__array__` 协议,因此几乎可以在任何使用 `np.array` 或 [`tf.Tensor`](https://tensorflow.google.cn/api_docs/python/tf/Tensor) 的地方透明地使用它。 + +```py +tf.constant(df['thal']) +``` + +```py + + +``` + +随机读取(shuffle)并批量处理数据集。 + +```py +train_dataset = dataset.shuffle(len(df)).batch(1) +``` + +## 创建并训练模型 + +```py +def get_compiled_model(): + model = tf.keras.Sequential([ + tf.keras.layers.Dense(10, activation='relu'), + tf.keras.layers.Dense(10, activation='relu'), + tf.keras.layers.Dense(1, activation='sigmoid') + ]) + + model.compile(optimizer='adam', + loss='binary_crossentropy', + metrics=['accuracy']) + return model +``` + +```py +model = get_compiled_model() +model.fit(train_dataset, epochs=15) +``` + +```py +WARNING:tensorflow:Layer sequential is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2\. The layer has dtype float32 because it's dtype defaults to floatx. + +If you intended to run this layer in float32, you can safely ignore this warning. If in doubt, this warning is likely only an issue if you are porting a TensorFlow 1.X model to TensorFlow 2. + +To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor. + +Epoch 1/15 +WARNING:tensorflow:From /tmpfs/src/tf_docs_env/lib/python3.6/site-packages/tensorflow_core/python/ops/nn_impl.py:183: where (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version. +Instructions for updating: +Use tf.where in 2.0, which has the same broadcast rule as np.where +WARNING:tensorflow:Entity .initialize_variables at 0x7f3d7029f620> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: module 'gast' has no attribute 'Num' +WARNING: Entity .initialize_variables at 0x7f3d7029f620> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: module 'gast' has no attribute 'Num' +303/303 [==============================] - 1s 4ms/step - loss: 3.8214 - accuracy: 0.5149 +Epoch 2/15 +303/303 [==============================] - 0s 1ms/step - loss: 0.9302 - accuracy: 0.6766 +Epoch 3/15 +303/303 [==============================] - 0s 1ms/step - loss: 0.8203 - accuracy: 0.6964 +Epoch 4/15 +303/303 [==============================] - 0s 1ms/step - loss: 0.7565 - accuracy: 0.7162 +Epoch 5/15 +303/303 [==============================] - 0s 1ms/step - loss: 0.6607 - accuracy: 0.7162 +Epoch 6/15 +303/303 [==============================] - 0s 1ms/step - loss: 0.6804 - accuracy: 0.6931 +Epoch 7/15 +303/303 [==============================] - 0s 1ms/step - loss: 0.5967 - accuracy: 0.7525 +Epoch 8/15 +303/303 [==============================] - 0s 1ms/step - loss: 0.6198 - accuracy: 0.7228 +Epoch 9/15 +303/303 [==============================] - 0s 1ms/step - loss: 0.5584 - accuracy: 0.7624 +Epoch 10/15 +303/303 [==============================] - 0s 1ms/step - loss: 0.5611 - accuracy: 0.7756 +Epoch 11/15 +303/303 [==============================] - 0s 1ms/step - loss: 0.5364 - accuracy: 0.7492 +Epoch 12/15 +303/303 [==============================] - 0s 1ms/step - loss: 0.5042 - accuracy: 0.7822 +Epoch 13/15 +303/303 [==============================] - 0s 1ms/step - loss: 0.5168 - accuracy: 0.7624 +Epoch 14/15 +303/303 [==============================] - 0s 1ms/step - loss: 0.4560 - accuracy: 0.8053 +Epoch 15/15 +303/303 [==============================] - 0s 1ms/step - loss: 0.4350 - accuracy: 0.7987 + + + +``` + +## 代替特征列 + +将字典作为输入传输给模型就像创建 [`tf.keras.layers.Input`](https://tensorflow.google.cn/api_docs/python/tf/keras/Input) 层的匹配字典一样简单,应用任何预处理并使用 [functional api](https://tensorflow.google.cn/guide/keras/functional)。 您可以使用它作为 [feature columns](https://tensorflow.google.cn/tutorials/keras/feature_columns) 的替代方法。 + +```py +inputs = {key: tf.keras.layers.Input(shape=(), name=key) for key in df.keys()} +x = tf.stack(list(inputs.values()), axis=-1) + +x = tf.keras.layers.Dense(10, activation='relu')(x) +output = tf.keras.layers.Dense(1, activation='sigmoid')(x) + +model_func = tf.keras.Model(inputs=inputs, outputs=output) + +model_func.compile(optimizer='adam', + loss='binary_crossentropy', + metrics=['accuracy']) +``` + +与 [`tf.data`](https://tensorflow.google.cn/api_docs/python/tf/data) 一起使用时,保存 `pd.DataFrame` 列结构的最简单方法是将 `pd.DataFrame` 转换为 `dict` ,并对该字典进行切片。 + +```py +dict_slices = tf.data.Dataset.from_tensor_slices((df.to_dict('list'), target.values)).batch(16) +``` + +```py +for dict_slice in dict_slices.take(1): + print (dict_slice) +``` + +```py +({'age': , 'sex': , 'cp': , 'trestbps': , 'chol': , 'fbs': , 'restecg': , 'thalach': , 'exang': , 'oldpeak': , 'slope': , 'ca': , 'thal': }, ) + +``` + +```py +model_func.fit(dict_slices, epochs=15) +``` + +```py +Epoch 1/15 +WARNING:tensorflow:Entity .initialize_variables at 0x7f3d2c33a510> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: module 'gast' has no attribute 'Num' +WARNING: Entity .initialize_variables at 0x7f3d2c33a510> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: module 'gast' has no attribute 'Num' +19/19 [==============================] - 1s 30ms/step - loss: 17.3744 - accuracy: 0.7261 +Epoch 2/15 +19/19 [==============================] - 0s 3ms/step - loss: 9.7210 - accuracy: 0.7261 +Epoch 3/15 +19/19 [==============================] - 0s 3ms/step - loss: 5.0425 - accuracy: 0.6106 +Epoch 4/15 +19/19 [==============================] - 0s 3ms/step - loss: 4.8356 - accuracy: 0.5182 +Epoch 5/15 +19/19 [==============================] - 0s 3ms/step - loss: 4.4312 - accuracy: 0.5743 +Epoch 6/15 +19/19 [==============================] - 0s 3ms/step - loss: 4.2668 - accuracy: 0.5644 +Epoch 7/15 +19/19 [==============================] - 0s 3ms/step - loss: 4.1296 - accuracy: 0.5776 +Epoch 8/15 +19/19 [==============================] - 0s 3ms/step - loss: 4.0027 - accuracy: 0.5776 +Epoch 9/15 +19/19 [==============================] - 0s 3ms/step - loss: 3.8945 - accuracy: 0.5776 +Epoch 10/15 +19/19 [==============================] - 0s 3ms/step - loss: 3.7877 - accuracy: 0.5776 +Epoch 11/15 +19/19 [==============================] - 0s 3ms/step - loss: 3.6851 - accuracy: 0.5776 +Epoch 12/15 +19/19 [==============================] - 0s 3ms/step - loss: 3.5828 - accuracy: 0.5743 +Epoch 13/15 +19/19 [==============================] - 0s 3ms/step - loss: 3.4813 - accuracy: 0.5776 +Epoch 14/15 +19/19 [==============================] - 0s 3ms/step - loss: 3.3808 - accuracy: 0.5842 +Epoch 15/15 +19/19 [==============================] - 0s 3ms/step - loss: 3.2814 - accuracy: 0.5842 + + + +``` \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/019.md b/Tensorflow/TensorFlow2.0/019.md new file mode 100644 index 00000000..87f6fb23 --- /dev/null +++ b/Tensorflow/TensorFlow2.0/019.md @@ -0,0 +1,444 @@ +# Unicode 字符串 + +> 原文:[https://tensorflow.google.cn/tutorials/load_data/unicode](https://tensorflow.google.cn/tutorials/load_data/unicode) + +## 简介 + +处理自然语言的模型通常使用不同的字符集来处理不同的语言。*Unicode* 是一种标准的编码系统,用于表示几乎所有语言的字符。每个字符使用 `0` 和 `0x10FFFF` 之间的唯一整数[码位](https://en.wikipedia.org/wiki/Code_point)进行编码。*Unicode 字符串*是由零个或更多码位组成的序列。 + +本教程介绍了如何在 TensorFlow 中表示 Unicode 字符串,以及如何使用标准字符串运算的 Unicode 等效项对其进行操作。它会根据字符体系检测将 Unicode 字符串划分为不同词例。 + +```py +import tensorflow as tf +``` + +## [`tf.string`](https://tensorflow.google.cn/api_docs/python/tf#string) 数据类型 + +您可以使用基本的 TensorFlow [`tf.string`](https://tensorflow.google.cn/api_docs/python/tf#string) `dtype` 构建字节字符串张量。Unicode 字符串默认使用 UTF-8 编码。 + +``` +tf.constant(u"Thanks 😊") +```py + +``` + + +```py + +[`tf.string`](https://tensorflow.google.cn/api_docs/python/tf#string) 张量可以容纳不同长度的字节字符串,因为字节字符串会被视为原子单元。字符串长度不包括在张量维度中。 + +``` +tf.constant([u"You're", u"welcome!"]).shape +```py + +``` +TensorShape([2]) + +```py + +注:使用 Python 构造字符串时,v2 和 v3 对 Unicode 的处理方式有所不同。在 v2 中,Unicode 字符串用前缀“u”表示(如上所示)。在 v3 中,字符串默认使用 Unicode 编码。 + +## 表示 Unicode + +在 TensorFlow 中有两种表示 Unicode 字符串的标准方式: + +* `string` 标量 - 使用已知[字符编码](https://en.wikipedia.org/wiki/Character_encoding)对码位序列进行编码。 +* `int32` 向量 - 每个位置包含单个码位。 + +例如,以下三个值均表示 Unicode 字符串 `"语言处理"`: + +``` +# Unicode string, represented as a UTF-8 encoded string scalar. +text_utf8 = tf.constant(u"语言处理") +text_utf8 +```py + +``` + + +```py + +``` +# Unicode string, represented as a UTF-16-BE encoded string scalar. +text_utf16be = tf.constant(u"语言处理".encode("UTF-16-BE")) +text_utf16be +```py + +``` + + +```py + +``` +# Unicode string, represented as a vector of Unicode code points. +text_chars = tf.constant([ord(char) for char in u"语言处理"]) +text_chars +```py + +``` + + +```py + +### 在不同表示之间进行转换 + +TensorFlow 提供了在下列不同表示之间进行转换的运算: + +* [`tf.strings.unicode_decode`](https://tensorflow.google.cn/api_docs/python/tf/strings/unicode_decode):将编码的字符串标量转换为码位的向量。 +* [`tf.strings.unicode_encode`](https://tensorflow.google.cn/api_docs/python/tf/strings/unicode_encode):将码位的向量转换为编码的字符串标量。 +* [`tf.strings.unicode_transcode`](https://tensorflow.google.cn/api_docs/python/tf/strings/unicode_transcode):将编码的字符串标量转换为其他编码。 + +``` +tf.strings.unicode_decode(text_utf8, + input_encoding='UTF-8') +```py + +``` + + +```py + +``` +tf.strings.unicode_encode(text_chars, + output_encoding='UTF-8') +```py + +``` + + +```py + +``` +tf.strings.unicode_transcode(text_utf8, + input_encoding='UTF8', + output_encoding='UTF-16-BE') +```py + +``` + + +```py + +### 批次维度 + +解码多个字符串时,每个字符串中的字符数可能不相等。返回结果是 [`tf.RaggedTensor`](https://tensorflow.google.cn/guide/ragged_tensor),其中最里面的维度的长度会根据每个字符串中的字符数而变化: + +``` +# A batch of Unicode strings, each represented as a UTF8-encoded string. +batch_utf8 = [s.encode('UTF-8') for s in + [u'hÃllo', u'What is the weather tomorrow', u'Göödnight', u'😊']] +batch_chars_ragged = tf.strings.unicode_decode(batch_utf8, + input_encoding='UTF-8') +for sentence_chars in batch_chars_ragged.to_list(): + print(sentence_chars) +```py + +``` +[104, 195, 108, 108, 111] +[87, 104, 97, 116, 32, 105, 115, 32, 116, 104, 101, 32, 119, 101, 97, 116, 104, 101, 114, 32, 116, 111, 109, 111, 114, 114, 111, 119] +[71, 246, 246, 100, 110, 105, 103, 104, 116] +[128522] + +```py + +您可以直接使用此 [`tf.RaggedTensor`](https://tensorflow.google.cn/api_docs/python/tf/RaggedTensor),也可以使用 [`tf.RaggedTensor.to_tensor`](https://tensorflow.google.cn/api_docs/python/tf/RaggedTensor#to_tensor) 和 [`tf.RaggedTensor.to_sparse`](https://tensorflow.google.cn/api_docs/python/tf/RaggedTensor#to_sparse) 方法将其转换为带有填充的密集 [`tf.Tensor`](https://tensorflow.google.cn/api_docs/python/tf/Tensor) 或 [`tf.SparseTensor`](https://tensorflow.google.cn/api_docs/python/tf/sparse/SparseTensor)。 + +``` +batch_chars_padded = batch_chars_ragged.to_tensor(default_value=-1) +print(batch_chars_padded.numpy()) +```py + +``` +[[ 104 195 108 108 111 -1 -1 -1 -1 -1 + -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 + -1 -1 -1 -1 -1 -1 -1 -1] + [ 87 104 97 116 32 105 115 32 116 104 + 101 32 119 101 97 116 104 101 114 32 + 116 111 109 111 114 114 111 119] + [ 71 246 246 100 110 105 103 104 116 -1 + -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 + -1 -1 -1 -1 -1 -1 -1 -1] + [128522 -1 -1 -1 -1 -1 -1 -1 -1 -1 + -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 + -1 -1 -1 -1 -1 -1 -1 -1]] + +```py + +``` +batch_chars_sparse = batch_chars_ragged.to_sparse() +```py + +在对多个具有相同长度的字符串进行编码时,可以将 [`tf.Tensor`](https://tensorflow.google.cn/api_docs/python/tf/Tensor) 用作输入: + +``` +tf.strings.unicode_encode([[99, 97, 116], [100, 111, 103], [ 99, 111, 119]], + output_encoding='UTF-8') +```py + +``` + + +```py + +当对多个具有不同长度的字符串进行编码时,应将 [`tf.RaggedTensor`](https://tensorflow.google.cn/api_docs/python/tf/RaggedTensor) 用作输入: + +``` +tf.strings.unicode_encode(batch_chars_ragged, output_encoding='UTF-8') +```py + +``` + + +```py + +如果您的张量具有填充或稀疏格式的多个字符串,请在调用 `unicode_encode` 之前将其转换为 [`tf.RaggedTensor`](https://tensorflow.google.cn/api_docs/python/tf/RaggedTensor): + +``` +tf.strings.unicode_encode( + tf.RaggedTensor.from_sparse(batch_chars_sparse), + output_encoding='UTF-8') +```py + +``` + + +```py + +``` +tf.strings.unicode_encode( + tf.RaggedTensor.from_tensor(batch_chars_padded, padding=-1), + output_encoding='UTF-8') +```py + +``` + + +```py + +## Unicode 运算 + +### 字符长度 + +[`tf.strings.length`](https://tensorflow.google.cn/api_docs/python/tf/strings/length) 运算具有 `unit` 参数,该参数表示计算长度的方式。`unit` 默认为 `"BYTE"`,但也可以将其设置为其他值(例如 `"UTF8_CHAR"` 或 `"UTF16_CHAR"`),以确定每个已编码 `string` 中的 Unicode 码位数量。 + +``` +# Note that the final character takes up 4 bytes in UTF8. +thanks = u'Thanks 😊'.encode('UTF-8') +num_bytes = tf.strings.length(thanks).numpy() +num_chars = tf.strings.length(thanks, unit='UTF8_CHAR').numpy() +print('{} bytes; {} UTF-8 characters'.format(num_bytes, num_chars)) +```py + +``` +11 bytes; 8 UTF-8 characters + +```py + +### 字符子字符串 + +类似地,[`tf.strings.substr`](https://tensorflow.google.cn/api_docs/python/tf/strings/substr) 运算会接受 "`unit`" 参数,并用它来确定 "`pos`" 和 "`len`" 参数包含的偏移类型。 + +``` +# default: unit='BYTE'. With len=1, we return a single byte. +tf.strings.substr(thanks, pos=7, len=1).numpy() +```py + +``` +b'\xf0' + +```py + +``` +# Specifying unit='UTF8_CHAR', we return a single character, which in this case +# is 4 bytes. +print(tf.strings.substr(thanks, pos=7, len=1, unit='UTF8_CHAR').numpy()) +```py + +``` +b'\xf0\x9f\x98\x8a' + +```py + +### 拆分 Unicode 字符串 + +[`tf.strings.unicode_split`](https://tensorflow.google.cn/api_docs/python/tf/strings/unicode_split) 运算会将 Unicode 字符串拆分为单个字符的子字符串: + +``` +tf.strings.unicode_split(thanks, 'UTF-8').numpy() +```py + +``` +array([b'T', b'h', b'a', b'n', b'k', b's', b' ', b'\xf0\x9f\x98\x8a'], + dtype=object) + +```py + +### 字符的字节偏移量 + +为了将 [`tf.strings.unicode_decode`](https://tensorflow.google.cn/api_docs/python/tf/strings/unicode_decode) 生成的字符张量与原始字符串对齐,了解每个字符开始位置的偏移量很有用。方法 [`tf.strings.unicode_decode_with_offsets`](https://tensorflow.google.cn/api_docs/python/tf/strings/unicode_decode_with_offsets) 与 `unicode_decode` 类似,不同的是它会返回包含每个字符起始偏移量的第二张量。 + +``` +codepoints, offsets = tf.strings.unicode_decode_with_offsets(u"🎈🎉🎊", 'UTF-8') + +for (codepoint, offset) in zip(codepoints.numpy(), offsets.numpy()): + print("At byte offset {}: codepoint {}".format(offset, codepoint)) +```py + +``` +At byte offset 0: codepoint 127880 +At byte offset 4: codepoint 127881 +At byte offset 8: codepoint 127882 + +```py + +## Unicode 字符体系 + +每个 Unicode 码位都属于某个码位集合,这些集合被称作[字符体系](https://en.wikipedia.org/wiki/Script_%28Unicode%29)。某个字符的字符体系有助于确定该字符可能所属的语言。例如,已知 'Б' 属于西里尔字符体系,表明包含该字符的现代文本很可能来自某个斯拉夫语种(如俄语或乌克兰语)。 + +TensorFlow 提供了 [`tf.strings.unicode_script`](https://tensorflow.google.cn/api_docs/python/tf/strings/unicode_script) 运算来确定某一给定码位使用的是哪个字符体系。字符体系代码是对应于[国际 Unicode 组件](http://site.icu-project.org/home) (ICU) [`UScriptCode`](http://icu-project.org/apiref/icu4c/uscript_8h.html) 值的 `int32` 值。 + +``` +uscript = tf.strings.unicode_script([33464, 1041]) # ['芸', 'Б'] + +print(uscript.numpy()) # [17, 8] == [USCRIPT_HAN, USCRIPT_CYRILLIC] +```py + +``` +[17 8] + +```py + +[`tf.strings.unicode_script`](https://tensorflow.google.cn/api_docs/python/tf/strings/unicode_script) 运算还可以应用于码位的多维 [`tf.Tensor`](https://tensorflow.google.cn/api_docs/python/tf/Tensor) 或 [`tf.RaggedTensor`](https://tensorflow.google.cn/api_docs/python/tf/RaggedTensor): + +``` +print(tf.strings.unicode_script(batch_chars_ragged)) +```py + +``` + + +```py + +## 示例:简单分词 + +分词是将文本拆分为类似单词的单元的任务。当使用空格字符分隔单词时,这通常很容易,但是某些语言(如中文和日语)不使用空格,而某些语言(如德语)中存在长复合词,必须进行拆分才能分析其含义。在网页文本中,不同语言和字符体系常常混合在一起,例如“NY 株価”(纽约证券交易所)。 + +我们可以利用字符体系的变化进行粗略分词(不实现任何 ML 模型),从而估算词边界。这对类似上面“NY 株価”示例的字符串都有效。这种方法对大多数使用空格的语言也都有效,因为各种字符体系中的空格字符都归类为 USCRIPT_COMMON,这是一种特殊的字符体系代码,不同于任何实际文本。 + +``` +# dtype: string; shape: [num_sentences] +# +# The sentences to process. Edit this line to try out different inputs! +sentence_texts = [u'Hello, world.', u'世界こんにちは'] +```py + +首先,我们将句子解码为字符码位,然后查找每个字符的字符体系标识符。 + +``` +# dtype: int32; shape: [num_sentences, (num_chars_per_sentence)] +# +# sentence_char_codepoint[i, j] is the codepoint for the j'th character in +# the i'th sentence. +sentence_char_codepoint = tf.strings.unicode_decode(sentence_texts, 'UTF-8') +print(sentence_char_codepoint) + +# dtype: int32; shape: [num_sentences, (num_chars_per_sentence)] +# +# sentence_char_scripts[i, j] is the unicode script of the j'th character in +# the i'th sentence. +sentence_char_script = tf.strings.unicode_script(sentence_char_codepoint) +print(sentence_char_script) +```py + +``` + + + +```py + +接下来,我们使用这些字符体系标识符来确定添加词边界的位置。我们在每个句子的开头添加一个词边界;如果某个字符与前一个字符属于不同的字符体系,也为该字符添加词边界。 + +``` +# dtype: bool; shape: [num_sentences, (num_chars_per_sentence)] +# +# sentence_char_starts_word[i, j] is True if the j'th character in the i'th +# sentence is the start of a word. +sentence_char_starts_word = tf.concat( + [tf.fill([sentence_char_script.nrows(), 1], True), + tf.not_equal(sentence_char_script[:, 1:], sentence_char_script[:, :-1])], + axis=1) + +# dtype: int64; shape: [num_words] +# +# word_starts[i] is the index of the character that starts the i'th word (in +# the flattened list of characters from all sentences). +word_starts = tf.squeeze(tf.where(sentence_char_starts_word.values), axis=1) +print(word_starts) +```py + +``` +tf.Tensor([ 0 5 7 12 13 15], shape=(6,), dtype=int64) + +```py + +然后,我们可以使用这些起始偏移量来构建 `RaggedTensor`,它包含了所有批次的单词列表: + +``` +# dtype: int32; shape: [num_words, (num_chars_per_word)] +# +# word_char_codepoint[i, j] is the codepoint for the j'th character in the +# i'th word. +word_char_codepoint = tf.RaggedTensor.from_row_starts( + values=sentence_char_codepoint.values, + row_starts=word_starts) +print(word_char_codepoint) +```py + +``` + + +```py + +最后,我们可以将词码位 `RaggedTensor` 划分回句子中: + +``` +# dtype: int64; shape: [num_sentences] +# +# sentence_num_words[i] is the number of words in the i'th sentence. +sentence_num_words = tf.reduce_sum( + tf.cast(sentence_char_starts_word, tf.int64), + axis=1) + +# dtype: int32; shape: [num_sentences, (num_words_per_sentence), (num_chars_per_word)] +# +# sentence_word_char_codepoint[i, j, k] is the codepoint for the k'th character +# in the j'th word in the i'th sentence. +sentence_word_char_codepoint = tf.RaggedTensor.from_row_lengths( + values=word_char_codepoint, + row_lengths=sentence_num_words) +print(sentence_word_char_codepoint) +```py + +``` + + +```py + +为了使最终结果更易于阅读,我们可以将其重新编码为 UTF-8 字符串: + +``` +tf.strings.unicode_encode(sentence_word_char_codepoint, 'UTF-8').to_list() +```py + +``` +[[b'Hello', b', ', b'world', b'.'], + [b'\xe4\xb8\x96\xe7\x95\x8c', + b'\xe3\x81\x93\xe3\x82\x93\xe3\x81\xab\xe3\x81\xa1\xe3\x81\xaf']] + +``` \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/020.md b/Tensorflow/TensorFlow2.0/020.md new file mode 100644 index 00000000..d3859380 --- /dev/null +++ b/Tensorflow/TensorFlow2.0/020.md @@ -0,0 +1,198 @@ +# TF.Text + +> 原文:[https://tensorflow.google.cn/tutorials/tensorflow_text/intro](https://tensorflow.google.cn/tutorials/tensorflow_text/intro) + +## Introduction + +TensorFlow Text provides a collection of text related classes and ops ready to use with TensorFlow 2.0\. The library can perform the preprocessing regularly required by text-based models, and includes other features useful for sequence modeling not provided by core TensorFlow. + +The benefit of using these ops in your text preprocessing is that they are done in the TensorFlow graph. You do not need to worry about tokenization in training being different than the tokenization at inference, or managing preprocessing scripts. + +## Eager Execution + +TensorFlow Text requires TensorFlow 2.0, and is fully compatible with eager mode and graph mode. + +* * * + +**Note:** On rare occassions, this import may fail looking for the TF library. Please reset the runtime and rerun the pip install -q above. + +```py +!pip install -q tensorflow-text +``` + +```py +DEPRECATION: Python 3.4 support has been deprecated. pip 19.1 will be the last one supporting it. Please upgrade your Python as Python 3.4 won't be maintained after March 2019 (cf PEP 429). + +``` + +```py +import tensorflow as tf +import tensorflow_text as text +``` + +## Unicode + +Most ops expect that the strings are in UTF-8\. If you're using a different encoding, you can use the core tensorflow transcode op to transcode into UTF-8\. You can also use the same op to coerce your string to structurally valid UTF-8 if your input could be invalid. + +```py +docs = tf.constant([u'Everything not saved will be lost.'.encode('UTF-16-BE'), u'Sad☹'.encode('UTF-16-BE')]) +utf8_docs = tf.strings.unicode_transcode(docs, input_encoding='UTF-16-BE', output_encoding='UTF-8') +``` + +## Tokenization + +Tokenization is the process of breaking up a string into tokens. Commonly, these tokens are words, numbers, and/or punctuation. + +The main interfaces are `Tokenizer` and `TokenizerWithOffsets` which each have a single method `tokenize` and `tokenize_with_offsets` respectively. There are multiple tokenizers available now. Each of these implement `TokenizerWithOffsets` (which extends `Tokenizer`) which includes an option for getting byte offsets into the original string. This allows the caller to know the bytes in the original string the token was created from. + +All of the tokenizers return RaggedTensors with the inner-most dimension of tokens mapping to the original individual strings. As a result, the resulting shape's rank is increased by one. Please review the ragged tensor guide if you are unfamiliar with them. https://www.tensorflow.org/guide/ragged_tensors + +### WhitespaceTokenizer + +This is a basic tokenizer that splits UTF-8 strings on ICU defined whitespace characters (eg. space, tab, new line). + +```py +tokenizer = text.WhitespaceTokenizer() +tokens = tokenizer.tokenize(['everything not saved will be lost.', u'Sad☹'.encode('UTF-8')]) +print(tokens.to_list()) +``` + +```py +WARNING: Logging before flag parsing goes to stderr. +W0701 13:16:14.667488 140633166759744 deprecation.py:323] From /tmpfs/src/tf_docs_env/lib/python3.4/site-packages/tensorflow/python/util/dispatch.py:180: batch_gather (from tensorflow.python.ops.array_ops) is deprecated and will be removed after 2017-10-25. +Instructions for updating: +`tf.batch_gather` is deprecated, please use `tf.gather` with `batch_dims` instead. +W0701 13:16:14.671800 140633166759744 deprecation.py:323] From /tmpfs/src/tf_docs_env/lib/python3.4/site-packages/tensorflow/python/ops/array_ops.py:1340: add_dispatch_support..wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version. +Instructions for updating: +Use tf.where in 2.0, which has the same broadcast rule as np.where + +[[b'everything', b'not', b'saved', b'will', b'be', b'lost.'], [b'Sad\xe2\x98\xb9']] + +``` + +### UnicodeScriptTokenizer + +This tokenizer splits UTF-8 strings based on Unicode script boundaries. The script codes used correspond to International Components for Unicode (ICU) UScriptCode values. See: http://icu-project.org/apiref/icu4c/uscript_8h.html + +In practice, this is similar to the `WhitespaceTokenizer` with the most apparent difference being that it will split punctuation (USCRIPT_COMMON) from language texts (eg. USCRIPT_LATIN, USCRIPT_CYRILLIC, etc) while also separating language texts from each other. + +```py +tokenizer = text.UnicodeScriptTokenizer() +tokens = tokenizer.tokenize(['everything not saved will be lost.', u'Sad☹'.encode('UTF-8')]) +print(tokens.to_list()) +``` + +```py +[[b'everything', b'not', b'saved', b'will', b'be', b'lost', b'.'], [b'Sad', b'\xe2\x98\xb9']] + +``` + +### Unicode split + +When tokenizing languages without whitespace to segment words, it is common to just split by character, which can be accomplished using the [unicode_split](https://tensorflow.google.cn/api_docs/python/tf/strings/unicode_split) op found in core. + +```py +tokens = tf.strings.unicode_split([u"仅今年前".encode('UTF-8')], 'UTF-8') +print(tokens.to_list()) +``` + +```py +[[b'\xe4\xbb\x85', b'\xe4\xbb\x8a', b'\xe5\xb9\xb4', b'\xe5\x89\x8d']] + +``` + +### Offsets + +When tokenizing strings, it is often desired to know where in the original string the token originated from. For this reason, each tokenizer which implements `TokenizerWithOffsets` has a *tokenize_with_offsets* method that will return the byte offsets along with the tokens. The offset_starts lists the bytes in the original string each token starts at, and the offset_limits lists the bytes where each token ends. + +```py +tokenizer = text.UnicodeScriptTokenizer() +(tokens, offset_starts, offset_limits) = tokenizer.tokenize_with_offsets(['everything not saved will be lost.', u'Sad☹'.encode('UTF-8')]) +print(tokens.to_list()) +print(offset_starts.to_list()) +print(offset_limits.to_list()) +``` + +```py +[[b'everything', b'not', b'saved', b'will', b'be', b'lost', b'.'], [b'Sad', b'\xe2\x98\xb9']] +[[0, 11, 15, 21, 26, 29, 33], [0, 3]] +[[10, 14, 20, 25, 28, 33, 34], [3, 6]] + +``` + +### TF.Data Example + +Tokenizers work as expected with the tf.data API. A simple example is provided below. + +```py +docs = tf.data.Dataset.from_tensor_slices([['Never tell me the odds.'], ["It's a trap!"]]) +tokenizer = text.WhitespaceTokenizer() +tokenized_docs = docs.map(lambda x: tokenizer.tokenize(x)) +iterator = iter(tokenized_docs) +print(next(iterator).to_list()) +print(next(iterator).to_list()) +``` + +```py +[[b'Never', b'tell', b'me', b'the', b'odds.']] +[[b"It's", b'a', b'trap!']] + +``` + +## Other Text Ops + +TF.Text packages other useful preprocessing ops. We will review a couple below. + +### Wordshape + +A common feature used in some natural language understanding models is to see if the text string has a certain property. For example, a sentence breaking model might contain features which check for word capitalization or if a punctuation character is at the end of a string. + +Wordshape defines a variety of useful regular expression based helper functions for matching various relevant patterns in your input text. Here are a few examples. + +```py +tokenizer = text.WhitespaceTokenizer() +tokens = tokenizer.tokenize(['Everything not saved will be lost.', u'Sad☹'.encode('UTF-8')]) + +# Is capitalized? +f1 = text.wordshape(tokens, text.WordShape.HAS_TITLE_CASE) +# Are all letters uppercased? +f2 = text.wordshape(tokens, text.WordShape.IS_UPPERCASE) +# Does the token contain punctuation? +f3 = text.wordshape(tokens, text.WordShape.HAS_SOME_PUNCT_OR_SYMBOL) +# Is the token a number? +f4 = text.wordshape(tokens, text.WordShape.IS_NUMERIC_VALUE) + +print(f1.to_list()) +print(f2.to_list()) +print(f3.to_list()) +print(f4.to_list()) +``` + +```py +[[True, False, False, False, False, False], [True]] +[[False, False, False, False, False, False], [False]] +[[False, False, False, False, False, True], [True]] +[[False, False, False, False, False, False], [False]] + +``` + +### N-grams & Sliding Window + +N-grams are sequential words given a sliding window size of *n*. When combining the tokens, there are three reduction mechanisms supported. For text, you would want to use `Reduction.STRING_JOIN` which appends the strings to each other. The default separator character is a space, but this can be changed with the string_separater argument. + +The other two reduction methods are most often used with numerical values, and these are `Reduction.SUM` and `Reduction.MEAN`. + +```py +tokenizer = text.WhitespaceTokenizer() +tokens = tokenizer.tokenize(['Everything not saved will be lost.', u'Sad☹'.encode('UTF-8')]) + +# Ngrams, in this case bi-gram (n = 2) +bigrams = text.ngrams(tokens, 2, reduction_type=text.Reduction.STRING_JOIN) + +print(bigrams.to_list()) +``` + +```py +[[b'Everything not', b'not saved', b'saved will', b'will be', b'be lost.'], []] + +``` \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/021.md b/Tensorflow/TensorFlow2.0/021.md new file mode 100644 index 00000000..973362e4 --- /dev/null +++ b/Tensorflow/TensorFlow2.0/021.md @@ -0,0 +1,721 @@ +# TFRecord 和 tf.Example + +> 原文:[https://tensorflow.google.cn/tutorials/load_data/tfrecord](https://tensorflow.google.cn/tutorials/load_data/tfrecord) + +为了高效地读取数据,比较有帮助的一种做法是对数据进行序列化并将其存储在一组可线性读取的文件(每个文件 100-200MB)中。这尤其适用于通过网络进行流式传输的数据。这种做法对缓冲任何数据预处理也十分有用。 + +TFRecord 格式是一种用于存储二进制记录序列的简单格式。 + +[协议缓冲区](https://developers.google.cn/protocol-buffers/)是一个跨平台、跨语言的库,用于高效地序列化结构化数据。 + +协议消息由 `.proto` 文件定义,这通常是了解消息类型最简单的方法。 + +`tf.Example` 消息(或 protobuf)是一种灵活的消息类型,表示 `{"string": value}` 映射。它专为 TensorFlow 而设计,并被用于 [TFX](https://tensorflow.google.cn/tfx/) 等高级 API。 + +本笔记本将演示如何创建、解析和使用 `tf.Example` 消息,以及如何在 `.tfrecord` 文件之间对 `tf.Example` 消息进行序列化、写入和读取。 + +注:这些结构虽然有用,但并不是强制的。您无需转换现有代码即可使用 TFRecord,除非您正在使用 [tf.data](https://tensorflow.google.cn/guide/datasets) 且读取数据仍是训练的瓶颈。有关数据集性能的提示,请参阅[数据输入流水线性能](https://tensorflow.google.cn/guide/performance/datasets)。 + +## 设置 + +```py +import tensorflow as tf + +import numpy as np +import IPython.display as display +``` + +## `tf.Example` + +### `tf.Example` 的数据类型 + +从根本上讲,`tf.Example` 是 `{"string": tf.train.Feature}` 映射。 + +[`tf.train.Feature`](https://tensorflow.google.cn/api_docs/python/tf/train/Feature) 消息类型可以接受以下三种类型(请参阅 [`.proto` 文件](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/example/feature.proto))。大多数其他通用类型也可以强制转换成下面的其中一种: + +1. [`tf.train.BytesList`](https://tensorflow.google.cn/api_docs/python/tf/train/BytesList)(可强制转换自以下类型) + +* `string` +* `byte` + +1. [`tf.train.FloatList`](https://tensorflow.google.cn/api_docs/python/tf/train/FloatList)(可强制转换自以下类型) + +* `float` (`float32`) +* `double` (`float64`) + +1. [`tf.train.Int64List`](https://tensorflow.google.cn/api_docs/python/tf/train/Int64List)(可强制转换自以下类型) + +* `bool` +* `enum` +* `int32` +* `uint32` +* `int64` +* `uint64` + +为了将标准 TensorFlow 类型转换为兼容 `tf.Example` 的 [`tf.train.Feature`](https://tensorflow.google.cn/api_docs/python/tf/train/Feature),可以使用下面的快捷函数。请注意,每个函数会接受标量输入值并返回包含上述三种 `list` 类型之一的 [`tf.train.Feature`](https://tensorflow.google.cn/api_docs/python/tf/train/Feature): + +```py +# The following functions can be used to convert a value to a type compatible +# with tf.Example. + +def _bytes_feature(value): + """Returns a bytes_list from a string / byte.""" + if isinstance(value, type(tf.constant(0))): + value = value.numpy() # BytesList won't unpack a string from an EagerTensor. + return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) + +def _float_feature(value): + """Returns a float_list from a float / double.""" + return tf.train.Feature(float_list=tf.train.FloatList(value=[value])) + +def _int64_feature(value): + """Returns an int64_list from a bool / enum / int / uint.""" + return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) +``` + +注:为了简单起见,本示例仅使用标量输入。要处理非标量特征,最简单的方法是使用 [`tf.io.serialize_tensor`](https://tensorflow.google.cn/api_docs/python/tf/io/serialize_tensor) 将张量转换为二进制字符串。在 TensorFlow 中,字符串是标量。使用 [`tf.io.parse_tensor`](https://tensorflow.google.cn/api_docs/python/tf/io/parse_tensor) 可将二进制字符串转换回张量。 + +下面是有关这些函数如何工作的一些示例。请注意不同的输入类型和标准化的输出类型。如果函数的输入类型与上述可强制转换的类型均不匹配,则该函数将引发异常(例如,`_int64_feature(1.0)` 将出错,因为 `1.0` 是浮点数,应该用于 `_float_feature` 函数): + +```py +print(_bytes_feature(b'test_string')) +print(_bytes_feature(u'test_bytes'.encode('utf-8'))) + +print(_float_feature(np.exp(1))) + +print(_int64_feature(True)) +print(_int64_feature(1)) +``` + +```py +bytes_list { + value: "test_string" +} + +bytes_list { + value: "test_bytes" +} + +float_list { + value: 2.7182817459106445 +} + +int64_list { + value: 1 +} + +int64_list { + value: 1 +} + +``` + +可以使用 `.SerializeToString` 方法将所有协议消息序列化为二进制字符串: + +```py +feature = _float_feature(np.exp(1)) + +feature.SerializeToString() +``` + +```py +b'\x12\x06\n\x04T\xf8-@' + +``` + +### 创建 `tf.Example` 消息 + +假设您要根据现有数据创建 `tf.Example` 消息。在实践中,数据集可能来自任何地方,但是从单个观测值创建 `tf.Example` 消息的过程相同: + +1. 在每个观测结果中,需要使用上述其中一种函数,将每个值转换为包含三种兼容类型之一的 [`tf.train.Feature`](https://tensorflow.google.cn/api_docs/python/tf/train/Feature)。 + +2. 创建一个从特征名称字符串到第 1 步中生成的编码特征值的映射(字典)。 + +3. 将第 2 步中生成的映射转换为 [`Features` 消息](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/example/feature.proto#L85)。 + +在此笔记本中,您将使用 NumPy 创建一个数据集。 + +此数据集将具有 4 个特征: + +* 具有相等 `False` 或 `True` 概率的布尔特征 +* 从 `[0, 5]` 均匀随机选择的整数特征 +* 通过将整数特征作为索引从字符串表生成的字符串特征 +* 来自标准正态分布的浮点特征 + +请思考一个样本,其中包含来自上述每个分布的 10,000 个独立且分布相同的观测值: + +```py +# The number of observations in the dataset. +n_observations = int(1e4) + +# Boolean feature, encoded as False or True. +feature0 = np.random.choice([False, True], n_observations) + +# Integer feature, random from 0 to 4. +feature1 = np.random.randint(0, 5, n_observations) + +# String feature +strings = np.array([b'cat', b'dog', b'chicken', b'horse', b'goat']) +feature2 = strings[feature1] + +# Float feature, from a standard normal distribution +feature3 = np.random.randn(n_observations) +``` + +您可以使用 `_bytes_feature`、`_float_feature` 或 `_int64_feature` 将下面的每个特征强制转换为兼容 `tf.Example` 的类型。然后,可以通过下面的已编码特征创建 `tf.Example` 消息: + +```py +def serialize_example(feature0, feature1, feature2, feature3): + """ + Creates a tf.Example message ready to be written to a file. + """ + # Create a dictionary mapping the feature name to the tf.Example-compatible + # data type. + feature = { + 'feature0': _int64_feature(feature0), + 'feature1': _int64_feature(feature1), + 'feature2': _bytes_feature(feature2), + 'feature3': _float_feature(feature3), + } + + # Create a Features message using tf.train.Example. + + example_proto = tf.train.Example(features=tf.train.Features(feature=feature)) + return example_proto.SerializeToString() +``` + +例如,假设您从数据集中获得了一个观测值 `[False, 4, bytes('goat'), 0.9876]`。您可以使用 `create_message()` 创建和打印此观测值的 `tf.Example` 消息。如上所述,每个观测值将被写为一条 `Features` 消息。请注意,`tf.Example` [消息](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/example/example.proto#L88)只是 `Features` 消息外围的包装器: + +```py +# This is an example observation from the dataset. + +example_observation = [] + +serialized_example = serialize_example(False, 4, b'goat', 0.9876) +serialized_example +``` + +```py +b'\nR\n\x14\n\x08feature3\x12\x08\x12\x06\n\x04[\xd3|?\n\x14\n\x08feature2\x12\x08\n\x06\n\x04goat\n\x11\n\x08feature0\x12\x05\x1a\x03\n\x01\x00\n\x11\n\x08feature1\x12\x05\x1a\x03\n\x01\x04' + +``` + +要解码消息,请使用 [`tf.train.Example.FromString`](https://tensorflow.google.cn/api_docs/python/tf/train/Example#FromString) 方法。 + +```py +example_proto = tf.train.Example.FromString(serialized_example) +example_proto +``` + +```py +features { + feature { + key: "feature0" + value { + int64_list { + value: 0 + } + } + } + feature { + key: "feature1" + value { + int64_list { + value: 4 + } + } + } + feature { + key: "feature2" + value { + bytes_list { + value: "goat" + } + } + } + feature { + key: "feature3" + value { + float_list { + value: 0.9876000285148621 + } + } + } +} + +``` + +## TFRecords 格式详细信息 + +TFRecord 文件包含一系列记录。该文件只能按顺序读取。 + +每条记录包含一个字节字符串(用于数据有效负载),外加数据长度,以及用于完整性检查的 CRC32C(使用 Castagnoli 多项式的 32 位 CRC)哈希值。 + +每条记录会存储为以下格式: + +```py +uint64 length uint32 masked_crc32_of_length byte data[length] uint32 masked_crc32_of_data +``` + +将记录连接起来以生成文件。[此处](https://en.wikipedia.org/wiki/Cyclic_redundancy_check)对 CRC 进行了说明,且 CRC 的掩码为: + +```py +masked_crc = ((crc >> 15) | (crc << 17)) + 0xa282ead8ul +``` + +注:不需要在 TFRecord 文件中使用 `tf.Example`。`tf.Example` 只是将字典序列化为字节字符串的一种方法。文本行、编码的图像数据,或序列化的张量(使用 [`tf.io.serialize_tensor`](https://tensorflow.google.cn/api_docs/python/tf/io/serialize_tensor),或在加载时使用 [`tf.io.parse_tensor`](https://tensorflow.google.cn/api_docs/python/tf/io/parse_tensor))。有关更多选项,请参阅 [`tf.io`](https://tensorflow.google.cn/api_docs/python/tf/io) 模块。 + +## 使用 [`tf.data`](https://tensorflow.google.cn/api_docs/python/tf/data) 的 TFRecord 文件 + +[`tf.data`](https://tensorflow.google.cn/api_docs/python/tf/data) 模块还提供用于在 TensorFlow 中读取和写入数据的工具。 + +### 写入 TFRecord 文件 + +要将数据放入数据集中,最简单的方式是使用 `from_tensor_slices` 方法。 + +若应用于数组,将返回标量数据集: + +```py +tf.data.Dataset.from_tensor_slices(feature1) +``` + +```py + + +``` + +若应用于数组的元组,将返回元组的数据集: + +```py +features_dataset = tf.data.Dataset.from_tensor_slices((feature0, feature1, feature2, feature3)) +features_dataset +``` + +```py + + +``` + +```py +# Use `take(1)` to only pull one example from the dataset. +for f0,f1,f2,f3 in features_dataset.take(1): + print(f0) + print(f1) + print(f2) + print(f3) +``` + +```py +tf.Tensor(False, shape=(), dtype=bool) +tf.Tensor(1, shape=(), dtype=int64) +tf.Tensor(b'dog', shape=(), dtype=string) +tf.Tensor(-0.07658295354196158, shape=(), dtype=float64) + +``` + +使用 [`tf.data.Dataset.map`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset#map) 方法可将函数应用于 `Dataset` 的每个元素。 + +映射函数必须在 TensorFlow 计算图模式下进行运算(它必须在 `tf.Tensors` 上运算并返回)。可以使用 [`tf.py_function`](https://tensorflow.google.cn/api_docs/python/tf/py_function) 包装非张量函数(如 `serialize_example`)以使其兼容。 + +使用 [`tf.py_function`](https://tensorflow.google.cn/api_docs/python/tf/py_function) 需要指定形状和类型信息,否则它将不可用: + +```py +def tf_serialize_example(f0,f1,f2,f3): + tf_string = tf.py_function( + serialize_example, + (f0,f1,f2,f3), # pass these args to the above function. + tf.string) # the return type is `tf.string`. + return tf.reshape(tf_string, ()) # The result is a scalar +``` + +```py +tf_serialize_example(f0,f1,f2,f3) +``` + +```py + + +``` + +将此函数应用于数据集中的每个元素: + +```py +serialized_features_dataset = features_dataset.map(tf_serialize_example) +serialized_features_dataset +``` + +```py + + +``` + +```py +def generator(): + for features in features_dataset: + yield serialize_example(*features) +``` + +```py +serialized_features_dataset = tf.data.Dataset.from_generator( + generator, output_types=tf.string, output_shapes=()) +``` + +```py +serialized_features_dataset +``` + +```py + + +``` + +并将它们写入 TFRecord 文件: + +```py +filename = 'test.tfrecord' +writer = tf.data.experimental.TFRecordWriter(filename) +writer.write(serialized_features_dataset) +``` + +### 读取 TFRecord 文件 + +您还可以使用 [`tf.data.TFRecordDataset`](https://tensorflow.google.cn/api_docs/python/tf/data/TFRecordDataset) 类来读取 TFRecord 文件。 + +有关通过 [`tf.data`](https://tensorflow.google.cn/api_docs/python/tf/data) 使用 TFRecord 文件的详细信息,请参见[此处](https://tensorflow.google.cn/guide/datasets#consuming_tfrecord_data)。 + +使用 `TFRecordDataset` 对于标准化输入数据和优化性能十分有用。 + +```py +filenames = [filename] +raw_dataset = tf.data.TFRecordDataset(filenames) +raw_dataset +``` + +```py + + +``` + +此时,数据集包含序列化的 [`tf.train.Example`](https://tensorflow.google.cn/api_docs/python/tf/train/Example) 消息。迭代时,它会将其作为标量字符串张量返回。 + +使用 `.take` 方法仅显示前 10 条记录。 + +注:在 [`tf.data.Dataset`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset) 上进行迭代仅在启用了 Eager Execution 时有效。 + +```py +for raw_record in raw_dataset.take(10): + print(repr(raw_record)) +``` + +```py + + + + + + +\n\x11\n\x08feature0\x12\x05\x1a\x03\n\x01\x00\n\x11\n\x08feature1\x12\x05\x1a\x03\n\x01\x00'> + + + + +``` + +可以使用以下函数对这些张量进行解析。请注意,这里的 `feature_description` 是必需的,因为数据集使用计算图执行,并且需要以下描述来构建它们的形状和类型签名: + +```py +# Create a description of the features. +feature_description = { + 'feature0': tf.io.FixedLenFeature([], tf.int64, default_value=0), + 'feature1': tf.io.FixedLenFeature([], tf.int64, default_value=0), + 'feature2': tf.io.FixedLenFeature([], tf.string, default_value=''), + 'feature3': tf.io.FixedLenFeature([], tf.float32, default_value=0.0), +} + +def _parse_function(example_proto): + # Parse the input `tf.Example` proto using the dictionary above. + return tf.io.parse_single_example(example_proto, feature_description) +``` + +或者,使用 `tf.parse example` 一次解析整个批次。使用 [`tf.data.Dataset.map`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset#map) 方法将此函数应用于数据集中的每一项: + +```py +parsed_dataset = raw_dataset.map(_parse_function) +parsed_dataset +``` + +```py + + +``` + +使用 Eager Execution 在数据集中显示观测值。此数据集中有 10,000 个观测值,但只会显示前 10 个。数据会作为特征字典进行显示。每一项都是一个 [`tf.Tensor`](https://tensorflow.google.cn/api_docs/python/tf/Tensor),此张量的 `numpy` 元素会显示特征的值: + +```py +for parsed_record in parsed_dataset.take(10): + print(repr(parsed_record)) +``` + +```py +{'feature0': , 'feature1': , 'feature2': , 'feature3': } +{'feature0': , 'feature1': , 'feature2': , 'feature3': } +{'feature0': , 'feature1': , 'feature2': , 'feature3': } +{'feature0': , 'feature1': , 'feature2': , 'feature3': } +{'feature0': , 'feature1': , 'feature2': , 'feature3': } +{'feature0': , 'feature1': , 'feature2': , 'feature3': } +{'feature0': , 'feature1': , 'feature2': , 'feature3': } +{'feature0': , 'feature1': , 'feature2': , 'feature3': } +{'feature0': , 'feature1': , 'feature2': , 'feature3': } +{'feature0': , 'feature1': , 'feature2': , 'feature3': } + +``` + +在这里,`tf.parse_example` 函数会将 `tf.Example` 字段解压缩为标准张量。 + +## Python 中的 TFRecord 文件 + +[`tf.io`](https://tensorflow.google.cn/api_docs/python/tf/io) 模块还包含用于读取和写入 TFRecord 文件的纯 Python 函数。 + +### 写入 TFRecord 文件 + +接下来,将 10,000 个观测值写入文件 `test.tfrecord`。每个观测值都将转换为一条 `tf.Example` 消息,然后被写入文件。随后,您可以验证是否已创建 `test.tfrecord` 文件: + +```py +# Write the `tf.Example` observations to the file. +with tf.io.TFRecordWriter(filename) as writer: + for i in range(n_observations): + example = serialize_example(feature0[i], feature1[i], feature2[i], feature3[i]) + writer.write(example) +``` + +```py +du -sh {filename} + +``` + +```py +984K test.tfrecord + +``` + +### 读取 TFRecord 文件 + +您可以使用 [`tf.train.Example.ParseFromString`](https://tensorflow.google.cn/api_docs/python/tf/train/Example#ParseFromString) 轻松解析以下序列化张量: + +```py +filenames = [filename] +raw_dataset = tf.data.TFRecordDataset(filenames) +raw_dataset +``` + +```py + + +``` + +```py +for raw_record in raw_dataset.take(1): + example = tf.train.Example() + example.ParseFromString(raw_record.numpy()) + print(example) +``` + +```py +features { + feature { + key: "feature0" + value { + int64_list { + value: 0 + } + } + } + feature { + key: "feature1" + value { + int64_list { + value: 1 + } + } + } + feature { + key: "feature2" + value { + bytes_list { + value: "dog" + } + } + } + feature { + key: "feature3" + value { + float_list { + value: -0.07658295333385468 + } + } + } +} + +``` + +## 演练:读取和写入图像数据 + +下面是关于如何使用 TFRecord 读取和写入图像数据的端到端示例。您将使用图像作为输入数据,将数据写入 TFRecord 文件,然后将文件读取回来并显示图像。 + +如果您想在同一个输入数据集上使用多个模型,这种做法会很有用。您可以不以原始格式存储图像,而是将图像预处理为 TFRecord 格式,然后将其用于所有后续的处理和建模中。 + +首先,让我们下载雪中的猫的[图像](https://commons.wikimedia.org/wiki/File:Felis_catus-cat_on_snow.jpg),以及施工中的纽约威廉斯堡大桥的[照片](https://upload.wikimedia.org/wikipedia/commons/f/fe/New_East_River_Bridge_from_Brooklyn_det.4a09796u.jpg)。 + +### 提取图像 + +```py +cat_in_snow = tf.keras.utils.get_file('320px-Felis_catus-cat_on_snow.jpg', 'https://storage.googleapis.com/download.tensorflow.org/example_images/320px-Felis_catus-cat_on_snow.jpg') +williamsburg_bridge = tf.keras.utils.get_file('194px-New_East_River_Bridge_from_Brooklyn_det.4a09796u.jpg','https://storage.googleapis.com/download.tensorflow.org/example_images/194px-New_East_River_Bridge_from_Brooklyn_det.4a09796u.jpg') +``` + +```py +Downloading data from https://storage.googleapis.com/download.tensorflow.org/example_images/320px-Felis_catus-cat_on_snow.jpg +24576/17858 [=========================================] - 0s 0us/step +Downloading data from https://storage.googleapis.com/download.tensorflow.org/example_images/194px-New_East_River_Bridge_from_Brooklyn_det.4a09796u.jpg +16384/15477 [===============================] - 0s 0us/step + +``` + +```py +display.display(display.Image(filename=cat_in_snow)) +display.display(display.HTML('Image cc-by: <a "href=https://commons.wikimedia.org/wiki/File:Felis_catus-cat_on_snow.jpg">Von.grzanka</a>')) +``` + +![jpeg](img/e8d23da7a633c8eaa5878bca988b63f3.png) + + + +```py +display.display(display.Image(filename=williamsburg_bridge)) +display.display(display.HTML('<a "href=https://commons.wikimedia.org/wiki/File:New_East_River_Bridge_from_Brooklyn_det.4a09796u.jpg">From Wikimedia</a>')) +``` + +![jpeg](img/47e128c5852147da0f7b0158465fe752.png) + + + +### 写入 TFRecord 文件 + +和以前一样,将特征编码为与 `tf.Example` 兼容的类型。这将存储原始图像字符串特征,以及高度、宽度、深度和任意 `label` 特征。后者会在您写入文件以区分猫和桥的图像时使用。将 `0` 用于猫的图像,将 `1` 用于桥的图像: + +```py +image_labels = { + cat_in_snow : 0, + williamsburg_bridge : 1, +} +``` + +```py +# This is an example, just using the cat image. +image_string = open(cat_in_snow, 'rb').read() + +label = image_labels[cat_in_snow] + +# Create a dictionary with features that may be relevant. +def image_example(image_string, label): + image_shape = tf.image.decode_jpeg(image_string).shape + + feature = { + 'height': _int64_feature(image_shape[0]), + 'width': _int64_feature(image_shape[1]), + 'depth': _int64_feature(image_shape[2]), + 'label': _int64_feature(label), + 'image_raw': _bytes_feature(image_string), + } + + return tf.train.Example(features=tf.train.Features(feature=feature)) + +for line in str(image_example(image_string, label)).split('\n')[:15]: + print(line) +print('...') +``` + +```py +features { + feature { + key: "depth" + value { + int64_list { + value: 3 + } + } + } + feature { + key: "height" + value { + int64_list { + value: 213 + } +... + +``` + +请注意,所有特征现在都存储在 `tf.Example` 消息中。接下来,函数化上面的代码,并将示例消息写入名为 `images.tfrecords` 的文件: + +```py +# Write the raw image files to `images.tfrecords`. +# First, process the two images into `tf.Example` messages. +# Then, write to a `.tfrecords` file. +record_file = 'images.tfrecords' +with tf.io.TFRecordWriter(record_file) as writer: + for filename, label in image_labels.items(): + image_string = open(filename, 'rb').read() + tf_example = image_example(image_string, label) + writer.write(tf_example.SerializeToString()) +``` + +```py +du -sh {record_file} + +``` + +```py +36K images.tfrecords + +``` + +### 读取 TFRecord 文件 + +现在,您有文件 `images.tfrecords`,并可以迭代其中的记录以将您写入的内容读取回来。因为在此示例中您只需重新生成图像,所以您只需要原始图像字符串这一个特征。使用上面描述的 getter 方法(即 `example.features.feature['image_raw'].bytes_list.value[0]`)提取该特征。您还可以使用标签来确定哪个记录是猫,哪个记录是桥: + +```py +raw_image_dataset = tf.data.TFRecordDataset('images.tfrecords') + +# Create a dictionary describing the features. +image_feature_description = { + 'height': tf.io.FixedLenFeature([], tf.int64), + 'width': tf.io.FixedLenFeature([], tf.int64), + 'depth': tf.io.FixedLenFeature([], tf.int64), + 'label': tf.io.FixedLenFeature([], tf.int64), + 'image_raw': tf.io.FixedLenFeature([], tf.string), +} + +def _parse_image_function(example_proto): + # Parse the input tf.Example proto using the dictionary above. + return tf.io.parse_single_example(example_proto, image_feature_description) + +parsed_image_dataset = raw_image_dataset.map(_parse_image_function) +parsed_image_dataset +``` + +```py + + +``` + +从 TFRecord 文件中恢复图像: + +```py +for image_features in parsed_image_dataset: + image_raw = image_features['image_raw'].numpy() + display.display(display.Image(data=image_raw)) +``` + +![jpeg](img/36943305bc87e9d7bacdd3122d2620ca.png) + +![jpeg](img/9a244f6224055e7727787fe289c2ca7c.png) \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/022.md b/Tensorflow/TensorFlow2.0/022.md new file mode 100644 index 00000000..943515fa --- /dev/null +++ b/Tensorflow/TensorFlow2.0/022.md @@ -0,0 +1 @@ +# Estimator \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/023.md b/Tensorflow/TensorFlow2.0/023.md new file mode 100644 index 00000000..2f6d7866 --- /dev/null +++ b/Tensorflow/TensorFlow2.0/023.md @@ -0,0 +1,410 @@ +# 预创建的 Estimators + +> 原文:[https://tensorflow.google.cn/tutorials/estimator/premade](https://tensorflow.google.cn/tutorials/estimator/premade) + +**Note:** 我们的 TensorFlow 社区翻译了这些文档。因为社区翻译是尽力而为, 所以无法保证它们是最准确的,并且反映了最新的 [官方英文文档](https://tensorflow.google.cn/?hl=en)。如果您有改进此翻译的建议, 请提交 pull request 到 [tensorflow/docs](https://github.com/tensorflow/docs) GitHub 仓库。要志愿地撰写或者审核译文,请加入 [docs-zh-cn@tensorflow.org Google Group](https://groups.google.com/a/tensorflow.org/forum/#!forum/docs-zh-cn)。 + +本教程将向您展示如何使用 Estimators 解决 Tensorflow 中的鸢尾花(Iris)分类问题。Estimator 是 Tensorflow 完整模型的高级表示,它被设计用于轻松扩展和异步训练。更多细节请参阅 [Estimators](https://tensorflow.google.cn/guide/estimator)。 + +请注意,在 Tensorflow 2.0 中,[Keras API](https://tensorflow.google.cn/guide/keras) 可以完成许多相同的任务,而且被认为是一个更易学习的 API。如果您刚刚开始入门,我们建议您从 Keras 开始。有关 Tensorflow 2.0 中可用高级 API 的更多信息,请参阅 [Keras 标准化](https://medium.com/tensorflow/standardizing-on-keras-guidance-on-high-level-apis-in-tensorflow-2-0-bad2b04c819a)。 + +## 首先要做的事 + +为了开始,您将首先导入 Tensorflow 和一系列您需要的库。 + +```py +import tensorflow as tf + +import pandas as pd +``` + +## 数据集 + +本文档中的示例程序构建并测试了一个模型,该模型根据[花萼](https://en.wikipedia.org/wiki/Sepal)和[花瓣](https://en.wikipedia.org/wiki/Petal)的大小将鸢尾花分成三种物种。 + +您将使用鸢尾花数据集训练模型。该数据集包括四个特征和一个[标签](https://developers.google.cn/machine-learning/glossary/#label)。这四个特征确定了单个鸢尾花的以下植物学特征: + +* 花萼长度 +* 花萼宽度 +* 花瓣长度 +* 花瓣宽度 + +根据这些信息,您可以定义一些有用的常量来解析数据: + +```py +CSV_COLUMN_NAMES = ['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth', 'Species'] +SPECIES = ['Setosa', 'Versicolor', 'Virginica'] +``` + +接下来,使用 Keras 与 Pandas 下载并解析鸢尾花数据集。注意为训练和测试保留不同的数据集。 + +```py +train_path = tf.keras.utils.get_file( + "iris_training.csv", "https://storage.googleapis.com/download.tensorflow.org/data/iris_training.csv") +test_path = tf.keras.utils.get_file( + "iris_test.csv", "https://storage.googleapis.com/download.tensorflow.org/data/iris_test.csv") + +train = pd.read_csv(train_path, names=CSV_COLUMN_NAMES, header=0) +test = pd.read_csv(test_path, names=CSV_COLUMN_NAMES, header=0) +``` + +通过检查数据您可以发现有四列浮点型特征和一列 int32 型标签。 + +```py +train.head() +``` + + + +对于每个数据集都分割出标签,模型将被训练来预测这些标签。 + +```py +train_y = train.pop('Species') +test_y = test.pop('Species') + +# 标签列现已从数据中删除 +train.head() +``` + + + +## Estimator 编程概述 + +现在您已经设定好了数据,您可以使用 Tensorflow Estimator 定义模型。Estimator 是从 [`tf.estimator.Estimator`](https://tensorflow.google.cn/api_docs/python/tf/estimator/Estimator) 中派生的任何类。Tensorflow 提供了一组[`tf.estimator`](https://tensorflow.google.cn/api_docs/python/tf/estimator)(例如,`LinearRegressor`)来实现常见的机器学习算法。此外,您可以编写您自己的[自定义 Estimator](https://tensorflow.google.cn/guide/custom_estimators)。入门阶段我们建议使用预创建的 Estimator。 + +为了编写基于预创建的 Estimator 的 Tensorflow 项目,您必须完成以下工作: + +* 创建一个或多个输入函数 +* 定义模型的特征列 +* 实例化一个 Estimator,指定特征列和各种超参数。 +* 在 Estimator 对象上调用一个或多个方法,传递合适的输入函数以作为数据源。 + +我们来看看这些任务是如何在鸢尾花分类中实现的。 + +## 创建输入函数 + +您必须创建输入函数来提供用于训练、评估和预测的数据。 + +**输入函数**是一个返回 [`tf.data.Dataset`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset) 对象的函数,此对象会输出下列含两个元素的元组: + +* [`features`](https://developers.google.cn/machine-learning/glossary/#feature)——Python 字典,其中: + * 每个键都是特征名称 + * 每个值都是包含此特征所有值的数组 +* `label` 包含每个样本的[标签](https://developers.google.cn/machine-learning/glossary/#label)的值的数组。 + +为了向您展示输入函数的格式,请查看下面这个简单的实现: + +```py +def input_evaluation_set(): + features = {'SepalLength': np.array([6.4, 5.0]), + 'SepalWidth': np.array([2.8, 2.3]), + 'PetalLength': np.array([5.6, 3.3]), + 'PetalWidth': np.array([2.2, 1.0])} + labels = np.array([2, 1]) + return features, labels +``` + +您的输入函数可以以您喜欢的方式生成 `features` 字典与 `label` 列表。但是,我们建议使用 Tensorflow 的 [Dataset API](https://tensorflow.google.cn/guide/datasets),该 API 可以用来解析各种类型的数据。 + +Dataset API 可以为您处理很多常见情况。例如,使用 Dataset API,您可以轻松地从大量文件中并行读取记录,并将它们合并为单个数据流。 + +为了简化此示例,我们将使用 [pandas](https://pandas.pydata.org/) 加载数据,并利用此内存数据构建输入管道。 + +```py +def input_fn(features, labels, training=True, batch_size=256): + """An input function for training or evaluating""" + # 将输入转换为数据集。 + dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels)) + + # 如果在训练模式下混淆并重复数据。 + if training: + dataset = dataset.shuffle(1000).repeat() + + return dataset.batch(batch_size) +``` + +## 定义特征列(feature columns) + +[**特征列(feature columns)**](https://developers.google.cn/machine-learning/glossary/#feature_columns)是一个对象,用于描述模型应该如何使用特征字典中的原始输入数据。当您构建一个 Estimator 模型的时候,您会向其传递一个特征列的列表,其中包含您希望模型使用的每个特征。[`tf.feature_column`](https://tensorflow.google.cn/api_docs/python/tf/feature_column) 模块提供了许多为模型表示数据的选项。 + +对于鸢尾花问题,4 个原始特征是数值,因此我们将构建一个特征列的列表,以告知 Estimator 模型将 4 个特征都表示为 32 位浮点值。故创建特征列的代码如下所示: + +```py +# 特征列描述了如何使用输入。 +my_feature_columns = [] +for key in train.keys(): + my_feature_columns.append(tf.feature_column.numeric_column(key=key)) +``` + +特征列可能比上述示例复杂得多。您可以从[指南](https://tensorflow.google.cn/guide/feature_columns)获取更多关于特征列的信息。 + +我们已经介绍了如何使模型表示原始特征,现在您可以构建 Estimator 了。 + +## 实例化 Estimator + +鸢尾花为题是一个经典的分类问题。幸运的是,Tensorflow 提供了几个预创建的 Estimator 分类器,其中包括: + +* [`tf.estimator.DNNClassifier`](https://tensorflow.google.cn/api_docs/python/tf/estimator/DNNClassifier) 用于多类别分类的深度模型 +* [`tf.estimator.DNNLinearCombinedClassifier`](https://tensorflow.google.cn/api_docs/python/tf/estimator/DNNLinearCombinedClassifier) 用于广度与深度模型 +* [`tf.estimator.LinearClassifier`](https://tensorflow.google.cn/api_docs/python/tf/estimator/LinearClassifier) 用于基于线性模型的分类器 + +对于鸢尾花问题,[`tf.estimator.DNNClassifier`](https://tensorflow.google.cn/api_docs/python/tf/estimator/DNNClassifier) 似乎是最好的选择。您可以这样实例化该 Estimator: + +```py +# 构建一个拥有两个隐层,隐藏节点分别为 30 和 10 的深度神经网络。 +classifier = tf.estimator.DNNClassifier( + feature_columns=my_feature_columns, + # 隐层所含结点数量分别为 30 和 10. + hidden_units=[30, 10], + # 模型必须从三个类别中做出选择。 + n_classes=3) +``` + +```py +INFO:tensorflow:Using default config. +WARNING:tensorflow:Using temporary folder as model directory: /tmp/tmpkhwws8ja +INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmpkhwws8ja', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true +graph_options { + rewrite_options { + meta_optimizer_iterations: ONE + } +} +, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1} + +``` + +## 训练、评估和预测 + +我们已经有一个 Estimator 对象,现在可以调用方法来执行下列操作: + +* 训练模型。 +* 评估经过训练的模型。 +* 使用经过训练的模型进行预测。 + +### 训练模型 + +通过调用 Estimator 的 `Train` 方法来训练模型,如下所示: + +```py +# 训练模型。 +classifier.train( + input_fn=lambda: input_fn(train, train_y, training=True), + steps=5000) +``` + +```py +WARNING:tensorflow:From /tmpfs/src/tf_docs_env/lib/python3.6/site-packages/tensorflow/python/training/training_util.py:236: Variable.initialized_value (from tensorflow.python.ops.variables) is deprecated and will be removed in a future version. +Instructions for updating: +Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts. +INFO:tensorflow:Calling model_fn. +WARNING:tensorflow:Layer dnn is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2\. The layer has dtype float32 because its dtype defaults to floatx. + +If you intended to run this layer in float32, you can safely ignore this warning. If in doubt, this warning is likely only an issue if you are porting a TensorFlow 1.X model to TensorFlow 2. + +To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor. + +Warning:tensorflow:From /tmpfs/src/tf_docs_env/lib/python3.6/site-packages/tensorflow/python/keras/optimizer_v2/adagrad.py:83: calling Constant.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version. +Instructions for updating: +Call initializer instance with the dtype argument instead of passing it to the constructor +INFO:tensorflow:Done calling model_fn. +INFO:tensorflow:Create CheckpointSaverHook. +INFO:tensorflow:Graph was finalized. +INFO:tensorflow:Running local_init_op. +INFO:tensorflow:Done running local_init_op. +INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 0... +INFO:tensorflow:Saving checkpoints for 0 into /tmp/tmpkhwws8ja/model.ckpt. +INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 0... +INFO:tensorflow:loss = 1.6968713, step = 0 +INFO:tensorflow:global_step/sec: 308.34 +INFO:tensorflow:loss = 1.1691835, step = 100 (0.325 sec) +INFO:tensorflow:global_step/sec: 365.112 +INFO:tensorflow:loss = 1.0332501, step = 200 (0.274 sec) +INFO:tensorflow:global_step/sec: 365.44 +INFO:tensorflow:loss = 0.9807229, step = 300 (0.274 sec) +INFO:tensorflow:global_step/sec: 364.789 +INFO:tensorflow:loss = 0.9437329, step = 400 (0.274 sec) +INFO:tensorflow:global_step/sec: 368.124 +INFO:tensorflow:loss = 0.94162637, step = 500 (0.272 sec) +INFO:tensorflow:global_step/sec: 366.689 +INFO:tensorflow:loss = 0.9129944, step = 600 (0.273 sec) +INFO:tensorflow:global_step/sec: 368.813 +INFO:tensorflow:loss = 0.91519016, step = 700 (0.271 sec) +INFO:tensorflow:global_step/sec: 369.377 +INFO:tensorflow:loss = 0.8866866, step = 800 (0.271 sec) +INFO:tensorflow:global_step/sec: 371.999 +INFO:tensorflow:loss = 0.88594323, step = 900 (0.269 sec) +INFO:tensorflow:global_step/sec: 372.481 +INFO:tensorflow:loss = 0.8859284, step = 1000 (0.269 sec) +INFO:tensorflow:global_step/sec: 369.793 +INFO:tensorflow:loss = 0.87800217, step = 1100 (0.270 sec) +INFO:tensorflow:global_step/sec: 364.966 +INFO:tensorflow:loss = 0.8652306, step = 1200 (0.274 sec) +INFO:tensorflow:global_step/sec: 368.742 +INFO:tensorflow:loss = 0.8569569, step = 1300 (0.271 sec) +INFO:tensorflow:global_step/sec: 368.955 +INFO:tensorflow:loss = 0.8538004, step = 1400 (0.271 sec) +INFO:tensorflow:global_step/sec: 371.44 +INFO:tensorflow:loss = 0.8501439, step = 1500 (0.269 sec) +INFO:tensorflow:global_step/sec: 369.55 +INFO:tensorflow:loss = 0.8453819, step = 1600 (0.271 sec) +INFO:tensorflow:global_step/sec: 366 +INFO:tensorflow:loss = 0.83854586, step = 1700 (0.273 sec) +INFO:tensorflow:global_step/sec: 370.695 +INFO:tensorflow:loss = 0.81984085, step = 1800 (0.270 sec) +INFO:tensorflow:global_step/sec: 371.791 +INFO:tensorflow:loss = 0.8254725, step = 1900 (0.271 sec) +INFO:tensorflow:global_step/sec: 363.724 +INFO:tensorflow:loss = 0.839285, step = 2000 (0.273 sec) +INFO:tensorflow:global_step/sec: 366.998 +INFO:tensorflow:loss = 0.81192434, step = 2100 (0.273 sec) +INFO:tensorflow:global_step/sec: 362.578 +INFO:tensorflow:loss = 0.80626756, step = 2200 (0.276 sec) +INFO:tensorflow:global_step/sec: 370.678 +INFO:tensorflow:loss = 0.8144733, step = 2300 (0.270 sec) +INFO:tensorflow:global_step/sec: 367.415 +INFO:tensorflow:loss = 0.80486006, step = 2400 (0.272 sec) +INFO:tensorflow:global_step/sec: 363.869 +INFO:tensorflow:loss = 0.7996403, step = 2500 (0.275 sec) +INFO:tensorflow:global_step/sec: 366.247 +INFO:tensorflow:loss = 0.78972137, step = 2600 (0.273 sec) +INFO:tensorflow:global_step/sec: 366.514 +INFO:tensorflow:loss = 0.7898851, step = 2700 (0.273 sec) +INFO:tensorflow:global_step/sec: 363.635 +INFO:tensorflow:loss = 0.7798088, step = 2800 (0.275 sec) +INFO:tensorflow:global_step/sec: 371.201 +INFO:tensorflow:loss = 0.7830296, step = 2900 (0.269 sec) +INFO:tensorflow:global_step/sec: 372.843 +INFO:tensorflow:loss = 0.78415155, step = 3000 (0.268 sec) +INFO:tensorflow:global_step/sec: 370.754 +INFO:tensorflow:loss = 0.7710204, step = 3100 (0.270 sec) +INFO:tensorflow:global_step/sec: 373.092 +INFO:tensorflow:loss = 0.7817295, step = 3200 (0.268 sec) +INFO:tensorflow:global_step/sec: 369.337 +INFO:tensorflow:loss = 0.78129435, step = 3300 (0.271 sec) +INFO:tensorflow:global_step/sec: 368.646 +INFO:tensorflow:loss = 0.78726315, step = 3400 (0.271 sec) +INFO:tensorflow:global_step/sec: 367.989 +INFO:tensorflow:loss = 0.76692796, step = 3500 (0.273 sec) +INFO:tensorflow:global_step/sec: 365.108 +INFO:tensorflow:loss = 0.7719732, step = 3600 (0.272 sec) +INFO:tensorflow:global_step/sec: 370.532 +INFO:tensorflow:loss = 0.76764953, step = 3700 (0.270 sec) +INFO:tensorflow:global_step/sec: 362.993 +INFO:tensorflow:loss = 0.75807786, step = 3800 (0.277 sec) +INFO:tensorflow:global_step/sec: 365.707 +INFO:tensorflow:loss = 0.7590251, step = 3900 (0.272 sec) +INFO:tensorflow:global_step/sec: 368.977 +INFO:tensorflow:loss = 0.7478892, step = 4000 (0.271 sec) +INFO:tensorflow:global_step/sec: 370.263 +INFO:tensorflow:loss = 0.74537545, step = 4100 (0.270 sec) +INFO:tensorflow:global_step/sec: 370.648 +INFO:tensorflow:loss = 0.7506561, step = 4200 (0.270 sec) +INFO:tensorflow:global_step/sec: 372.419 +INFO:tensorflow:loss = 0.74983096, step = 4300 (0.268 sec) +INFO:tensorflow:global_step/sec: 370.771 +INFO:tensorflow:loss = 0.74485517, step = 4400 (0.270 sec) +INFO:tensorflow:global_step/sec: 371.489 +INFO:tensorflow:loss = 0.74746263, step = 4500 (0.269 sec) +INFO:tensorflow:global_step/sec: 370.063 +INFO:tensorflow:loss = 0.7356381, step = 4600 (0.270 sec) +INFO:tensorflow:global_step/sec: 370.305 +INFO:tensorflow:loss = 0.74623525, step = 4700 (0.270 sec) +INFO:tensorflow:global_step/sec: 365.488 +INFO:tensorflow:loss = 0.7425093, step = 4800 (0.274 sec) +INFO:tensorflow:global_step/sec: 370.235 +INFO:tensorflow:loss = 0.7342787, step = 4900 (0.270 sec) +INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 5000... +INFO:tensorflow:Saving checkpoints for 5000 into /tmp/tmpkhwws8ja/model.ckpt. +INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 5000... +INFO:tensorflow:Loss for final step: 0.7211363. + + + +``` + +注意将 `input_fn` 调用封装在 [`lambda`](https://docs.python.org/3/tutorial/controlflow.html) 中以获取参数,同时提供不带参数的输入函数,如 Estimator 所预期的那样。`step` 参数告知该方法在训练多少步后停止训练。 + +### 评估经过训练的模型 + +现在模型已经经过训练,您可以获取一些关于模型性能的统计信息。代码块将在测试数据上对经过训练的模型的准确率(accuracy)进行评估: + +```py +eval_result = classifier.evaluate( + input_fn=lambda: input_fn(test, test_y, training=False)) + +print('\nTest set accuracy: {accuracy:0.3f}\n'.format(**eval_result)) +``` + +```py +INFO:tensorflow:Calling model_fn. +WARNING:tensorflow:Layer dnn is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2\. The layer has dtype float32 because its dtype defaults to floatx. + +If you intended to run this layer in float32, you can safely ignore this warning. If in doubt, this warning is likely only an issue if you are porting a TensorFlow 1.X model to TensorFlow 2. + +To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor. + +INFO:tensorflow:Done calling model_fn. +INFO:tensorflow:Starting evaluation at 2020-09-22T19:58:23Z +INFO:tensorflow:Graph was finalized. +INFO:tensorflow:Restoring parameters from /tmp/tmpkhwws8ja/model.ckpt-5000 +INFO:tensorflow:Running local_init_op. +INFO:tensorflow:Done running local_init_op. +INFO:tensorflow:Inference Time : 0.20579s +INFO:tensorflow:Finished evaluation at 2020-09-22-19:58:23 +INFO:tensorflow:Saving dict for global step 5000: accuracy = 0.53333336, average_loss = 0.760622, global_step = 5000, loss = 0.760622 +INFO:tensorflow:Saving 'checkpoint_path' summary for global step 5000: /tmp/tmpkhwws8ja/model.ckpt-5000 + +Test set accuracy: 0.533 + +``` + +与对 `train` 方法的调用不同,我们没有传递 `steps` 参数来进行评估。用于评估的 `input_fn` 只生成一个 [epoch](https://developers.google.cn/machine-learning/glossary/#epoch) 的数据。 + +`eval_result` 字典亦包含 `average_loss`(每个样本的平均误差),`loss`(每个 mini-batch 的平均误差)与 Estimator 的 `global_step`(经历的训练迭代次数)值。 + +### 利用经过训练的模型进行预测(推理) + +我们已经有一个经过训练的模型,可以生成准确的评估结果。我们现在可以使用经过训练的模型,根据一些无标签测量结果预测鸢尾花的品种。与训练和评估一样,我们使用单个函数调用进行预测: + +```py +# 由模型生成预测 +expected = ['Setosa', 'Versicolor', 'Virginica'] +predict_x = { + 'SepalLength': [5.1, 5.9, 6.9], + 'SepalWidth': [3.3, 3.0, 3.1], + 'PetalLength': [1.7, 4.2, 5.4], + 'PetalWidth': [0.5, 1.5, 2.1], +} + +def input_fn(features, batch_size=256): + """An input function for prediction.""" + # 将输入转换为无标签数据集。 + return tf.data.Dataset.from_tensor_slices(dict(features)).batch(batch_size) + +predictions = classifier.predict( + input_fn=lambda: input_fn(predict_x)) +``` + +`predict` 方法返回一个 Python 可迭代对象,为每个样本生成一个预测结果字典。以下代码输出了一些预测及其概率: + +```py +for pred_dict, expec in zip(predictions, expected): + class_id = pred_dict['class_ids'][0] + probability = pred_dict['probabilities'][class_id] + + print('Prediction is "{}" ({:.1f}%), expected "{}"'.format( + SPECIES[class_id], 100 * probability, expec)) +``` + +```py +INFO:tensorflow:Calling model_fn. +INFO:tensorflow:Done calling model_fn. +INFO:tensorflow:Graph was finalized. +INFO:tensorflow:Restoring parameters from /tmp/tmpkhwws8ja/model.ckpt-5000 +INFO:tensorflow:Running local_init_op. +INFO:tensorflow:Done running local_init_op. +Prediction is "Versicolor" (36.6%), expected "Setosa" +Prediction is "Virginica" (50.9%), expected "Versicolor" +Prediction is "Virginica" (62.6%), expected "Virginica" + +``` \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/024.md b/Tensorflow/TensorFlow2.0/024.md new file mode 100644 index 00000000..185efaeb --- /dev/null +++ b/Tensorflow/TensorFlow2.0/024.md @@ -0,0 +1,340 @@ +# Build a linear model with Estimators + +> 原文:[https://tensorflow.google.cn/tutorials/estimator/linear](https://tensorflow.google.cn/tutorials/estimator/linear) + +## Overview + +This end-to-end walkthrough trains a logistic regression model using the [`tf.estimator`](https://tensorflow.google.cn/api_docs/python/tf/estimator) API. The model is often used as a baseline for other, more complex, algorithms. + +## Setup + +```py +pip install -q sklearn + +``` + +```py +WARNING: You are using pip version 20.2.2; however, version 20.2.3 is available. +You should consider upgrading via the '/tmpfs/src/tf_docs_env/bin/python -m pip install --upgrade pip' command. + +``` + +```py +import os +import sys + +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +from IPython.display import clear_output +from six.moves import urllib +``` + +## Load the titanic dataset + +You will use the Titanic dataset with the (rather morbid) goal of predicting passenger survival, given characteristics such as gender, age, class, etc. + +```py +import tensorflow.compat.v2.feature_column as fc + +import tensorflow as tf +``` + +```py +# Load dataset. +dftrain = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/train.csv') +dfeval = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/eval.csv') +y_train = dftrain.pop('survived') +y_eval = dfeval.pop('survived') +``` + +## Explore the data + +The dataset contains the following features + +```py +dftrain.head() +``` + + + +```py +dftrain.describe() +``` + + + +There are 627 and 264 examples in the training and evaluation sets, respectively. + +```py +dftrain.shape[0], dfeval.shape[0] +``` + +```py +(627, 264) + +``` + +The majority of passengers are in their 20's and 30's. + +```py +dftrain.age.hist(bins=20) +``` + +```py + + +``` + +![png](img/7d1de3cd2c94ab5fb2b9e44445a2fa6b.png) + +There are approximately twice as many male passengers as female passengers aboard. + +```py +dftrain.sex.value_counts().plot(kind='barh') +``` + +```py + + +``` + +![png](img/2ab61e10f9f53c1738f397150ea65f3d.png) + +The majority of passengers were in the "third" class. + +```py +dftrain['class'].value_counts().plot(kind='barh') +``` + +```py + + +``` + +![png](img/90c153ba31f6c32d7d760bc031b5d956.png) + +Females have a much higher chance of surviving versus males. This is clearly a predictive feature for the model. + +```py +pd.concat([dftrain, y_train], axis=1).groupby('sex').survived.mean().plot(kind='barh').set_xlabel('% survive') +``` + +```py +Text(0.5, 0, '% survive') + +``` + +![png](img/aaf0cfc73c7f275786e66d759ad26df6.png) + +## Feature Engineering for the Model + +Estimators use a system called [feature columns](https://tensorflow.google.cn/guide/feature_columns) to describe how the model should interpret each of the raw input features. An Estimator expects a vector of numeric inputs, and *feature columns* describe how the model should convert each feature. + +Selecting and crafting the right set of feature columns is key to learning an effective model. A feature column can be either one of the raw inputs in the original features `dict` (a *base feature column*), or any new columns created using transformations defined over one or multiple base columns (a *derived feature columns*). + +The linear estimator uses both numeric and categorical features. Feature columns work with all TensorFlow estimators and their purpose is to define the features used for modeling. Additionally, they provide some feature engineering capabilities like one-hot-encoding, normalization, and bucketization. + +### Base Feature Columns + +```py +CATEGORICAL_COLUMNS = ['sex', 'n_siblings_spouses', 'parch', 'class', 'deck', + 'embark_town', 'alone'] +NUMERIC_COLUMNS = ['age', 'fare'] + +feature_columns = [] +for feature_name in CATEGORICAL_COLUMNS: + vocabulary = dftrain[feature_name].unique() + feature_columns.append(tf.feature_column.categorical_column_with_vocabulary_list(feature_name, vocabulary)) + +for feature_name in NUMERIC_COLUMNS: + feature_columns.append(tf.feature_column.numeric_column(feature_name, dtype=tf.float32)) +``` + +The `input_function` specifies how data is converted to a [`tf.data.Dataset`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset) that feeds the input pipeline in a streaming fashion. [`tf.data.Dataset`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset) can take in multiple sources such as a dataframe, a csv-formatted file, and more. + +```py +def make_input_fn(data_df, label_df, num_epochs=10, shuffle=True, batch_size=32): + def input_function(): + ds = tf.data.Dataset.from_tensor_slices((dict(data_df), label_df)) + if shuffle: + ds = ds.shuffle(1000) + ds = ds.batch(batch_size).repeat(num_epochs) + return ds + return input_function + +train_input_fn = make_input_fn(dftrain, y_train) +eval_input_fn = make_input_fn(dfeval, y_eval, num_epochs=1, shuffle=False) +``` + +You can inspect the dataset: + +```py +ds = make_input_fn(dftrain, y_train, batch_size=10)() +for feature_batch, label_batch in ds.take(1): + print('Some feature keys:', list(feature_batch.keys())) + print() + print('A batch of class:', feature_batch['class'].numpy()) + print() + print('A batch of Labels:', label_batch.numpy()) +``` + +```py +Some feature keys: ['sex', 'age', 'n_siblings_spouses', 'parch', 'fare', 'class', 'deck', 'embark_town', 'alone'] + +A batch of class: [b'Third' b'Third' b'Third' b'Third' b'First' b'Third' b'Third' b'First' + b'Third' b'Third'] + +A batch of Labels: [1 0 0 0 1 0 0 0 0 0] + +``` + +You can also inspect the result of a specific feature column using the [`tf.keras.layers.DenseFeatures`](https://tensorflow.google.cn/api_docs/python/tf/keras/layers/DenseFeatures) layer: + +```py +age_column = feature_columns[7] +tf.keras.layers.DenseFeatures([age_column])(feature_batch).numpy() +``` + +```py +WARNING:tensorflow:Layer dense_features is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2\. The layer has dtype float32 because its dtype defaults to floatx. + +If you intended to run this layer in float32, you can safely ignore this warning. If in doubt, this warning is likely only an issue if you are porting a TensorFlow 1.X model to TensorFlow 2. + +To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor. + +array([[27.], + [28.], + [30.], + [18.], + [32.], + [26.], + [61.], + [37.], + [28.], + [40.]], dtype=float32) + +``` + +`DenseFeatures` only accepts dense tensors, to inspect a categorical column you need to transform that to a indicator column first: + +```py +gender_column = feature_columns[0] +tf.keras.layers.DenseFeatures([tf.feature_column.indicator_column(gender_column)])(feature_batch).numpy() +``` + +```py +WARNING:tensorflow:Layer dense_features_1 is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2\. The layer has dtype float32 because its dtype defaults to floatx. + +If you intended to run this layer in float32, you can safely ignore this warning. If in doubt, this warning is likely only an issue if you are porting a TensorFlow 1.X model to TensorFlow 2. + +To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor. + +array([[1., 0.], + [1., 0.], + [1., 0.], + [0., 1.], + [1., 0.], + [1., 0.], + [1., 0.], + [1., 0.], + [1., 0.], + [1., 0.]], dtype=float32) + +``` + +After adding all the base features to the model, let's train the model. Training a model is just a single command using the [`tf.estimator`](https://tensorflow.google.cn/api_docs/python/tf/estimator) API: + +```py +linear_est = tf.estimator.LinearClassifier(feature_columns=feature_columns) +linear_est.train(train_input_fn) +result = linear_est.evaluate(eval_input_fn) + +clear_output() +print(result) +``` + +```py +{'accuracy': 0.7613636, 'accuracy_baseline': 0.625, 'auc': 0.809244, 'auc_precision_recall': 0.75609726, 'average_loss': 0.5452906, 'label/mean': 0.375, 'loss': 0.5347039, 'precision': 0.75, 'prediction/mean': 0.27201703, 'recall': 0.54545456, 'global_step': 200} + +``` + +### Derived Feature Columns + +Now you reached an accuracy of 75%. Using each base feature column separately may not be enough to explain the data. For example, the correlation between gender and the label may be different for different gender. Therefore, if you only learn a single model weight for `gender="Male"` and `gender="Female"`, you won't capture every age-gender combination (e.g. distinguishing between `gender="Male"` AND `age="30"` AND `gender="Male"` AND `age="40"`). + +To learn the differences between different feature combinations, you can add *crossed feature columns* to the model (you can also bucketize age column before the cross column): + +```py +age_x_gender = tf.feature_column.crossed_column(['age', 'sex'], hash_bucket_size=100) +``` + +After adding the combination feature to the model, let's train the model again: + +```py +derived_feature_columns = [age_x_gender] +linear_est = tf.estimator.LinearClassifier(feature_columns=feature_columns+derived_feature_columns) +linear_est.train(train_input_fn) +result = linear_est.evaluate(eval_input_fn) + +clear_output() +print(result) +``` + +```py +{'accuracy': 0.7613636, 'accuracy_baseline': 0.625, 'auc': 0.84352624, 'auc_precision_recall': 0.78346276, 'average_loss': 0.48114488, 'label/mean': 0.375, 'loss': 0.4756022, 'precision': 0.65789473, 'prediction/mean': 0.4285249, 'recall': 0.75757575, 'global_step': 200} + +``` + +It now achieves an accuracy of 77.6%, which is slightly better than only trained in base features. You can try using more features and transformations to see if you can do better! + +Now you can use the train model to make predictions on a passenger from the evaluation set. TensorFlow models are optimized to make predictions on a batch, or collection, of examples at once. Earlier, the `eval_input_fn` was defined using the entire evaluation set. + +```py +pred_dicts = list(linear_est.predict(eval_input_fn)) +probs = pd.Series([pred['probabilities'][1] for pred in pred_dicts]) + +probs.plot(kind='hist', bins=20, title='predicted probabilities') +``` + +```py +INFO:tensorflow:Calling model_fn. +WARNING:tensorflow:Layer linear/linear_model is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2\. The layer has dtype float32 because its dtype defaults to floatx. + +If you intended to run this layer in float32, you can safely ignore this warning. If in doubt, this warning is likely only an issue if you are porting a TensorFlow 1.X model to TensorFlow 2. + +To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor. + +INFO:tensorflow:Done calling model_fn. +INFO:tensorflow:Graph was finalized. +INFO:tensorflow:Restoring parameters from /tmp/tmpg17o3o7e/model.ckpt-200 +INFO:tensorflow:Running local_init_op. +INFO:tensorflow:Done running local_init_op. + + + +``` + +![png](img/5fcd4749c7b37cf8714bd83753d1da5b.png) + +Finally, look at the receiver operating characteristic (ROC) of the results, which will give us a better idea of the tradeoff between the true positive rate and false positive rate. + +```py +from sklearn.metrics import roc_curve +from matplotlib import pyplot as plt + +fpr, tpr, _ = roc_curve(y_eval, probs) +plt.plot(fpr, tpr) +plt.title('ROC curve') +plt.xlabel('false positive rate') +plt.ylabel('true positive rate') +plt.xlim(0,) +plt.ylim(0,) +``` + +```py +(0.0, 1.05) + +``` + +![png](img/2230343d999d9f0dd8b71b8bf390e82f.png) \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/025.md b/Tensorflow/TensorFlow2.0/025.md new file mode 100644 index 00000000..cfeba579 --- /dev/null +++ b/Tensorflow/TensorFlow2.0/025.md @@ -0,0 +1,302 @@ +# 在 Tensorflow 中训练提升树(Boosted Trees)模型 + +> 原文:[https://tensorflow.google.cn/tutorials/estimator/boosted_trees](https://tensorflow.google.cn/tutorials/estimator/boosted_trees) + +**Note:** 我们的 TensorFlow 社区翻译了这些文档。因为社区翻译是尽力而为, 所以无法保证它们是最准确的,并且反映了最新的 [官方英文文档](https://tensorflow.google.cn/?hl=en)。如果您有改进此翻译的建议, 请提交 pull request 到 [tensorflow/docs](https://github.com/tensorflow/docs) GitHub 仓库。要志愿地撰写或者审核译文,请加入 [docs-zh-cn@tensorflow.org Google Group](https://groups.google.com/a/tensorflow.org/forum/#!forum/docs-zh-cn)。 + +本教程是使用基于 [`tf.estimator`](https://tensorflow.google.cn/api_docs/python/tf/estimator) API 的决策树来训练梯度提升模型的端到端演示。提升树(Boosted Trees)模型是回归和分类问题中最受欢迎并最有效的机器学习方法之一。这是一种融合技术,它结合了几个(10 个,100 个或者甚至 1000 个)树模型的预测值。 + +提升树(Boosted Trees)模型受到许多机器学习从业者的欢迎,因为它们可以通过最小化的超参数调整获得令人印象深刻的性能。 + +## 加载泰坦尼克数据集 + +您将使用泰坦尼克数据集,该数据集的目标是在给出性别、年龄、阶级等特征的条件下预测乘客幸存与否。 + +```py +import numpy as np +import pandas as pd +from IPython.display import clear_output +from matplotlib import pyplot as plt + +# 加载数据集。 +dftrain = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/train.csv') +dfeval = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/eval.csv') +y_train = dftrain.pop('survived') +y_eval = dfeval.pop('survived') +``` + +```py +import tensorflow as tf +tf.random.set_seed(123) +``` + +数据集由训练集和验证集组成: + +* `dftrain` 与 `y_train` 是*训练集*——模型用来学习的数据。 +* 模型根据*评估集*,`dfeval` 和 `y_eval` 进行测试。 + +您将使用以下特征来进行训练: + +| 特征名称 | 描述 | +| sex | 乘客的性别 | +| age | 乘客的年龄 | +| n_siblings_spouses | 船上的兄弟姐妹与伙伴 | +| parch | 船上的父母与孩子 | +| fare | 乘客所支付的票价 | +| class | 乘客在船上的舱室等级 | +| deck | 哪个甲板上的乘客 | +| embark_town | 乘客是从哪个城镇上船的 | +| alone | 是否乘客独自一人 | + +## 探索数据 + +让我们首先预览一些数据,并在训练集上创建摘要统计。 + +```py +dftrain.head() +``` + + + +```py +dftrain.describe() +``` + + + +训练集和评估集分别有 627 和 264 个样本。 + +```py +dftrain.shape[0], dfeval.shape[0] +``` + +```py +(627, 264) + +``` + +大多数乘客在 20 岁或 30 岁。 + +```py +dftrain.age.hist(bins=20) +plt.show() +``` + +![png](img/58d9d20121aa86120aded9afa9cfff6d.png) + +男乘客大约是女乘客的两倍。 + +```py +dftrain.sex.value_counts().plot(kind='barh') +plt.show() +``` + +![png](img/3c3d7b5efcc814913b1fdc4d8ab17c2c.png) + +大多数乘客都在“三等”舱。 + +```py +dftrain['class'].value_counts().plot(kind='barh') +plt.show() +``` + +![png](img/4630405ff1451bfc3979433eb4bb7a43.png) + +大多数乘客从南安普顿出发。 + +```py +dftrain['embark_town'].value_counts().plot(kind='barh') +plt.show() +``` + +![png](img/a3920eb34218a65a21b046a30c7d3808.png) + +与男性相比,女性存活的几率要高得多。这显然是该模型的预测特征。 + +```py +pd.concat([dftrain, y_train], axis=1).groupby('sex').survived.mean().plot(kind='barh').set_xlabel('% survive') +plt.show() +``` + +![png](img/2c848f6027c084a244c86c336c02ce35.png) + +## 创建特征列与输入函数 + +梯度提升(Gradient Boosting) Estimator 可以利用数值和分类特征。特征列适用于所有的 Tensorflow estimator,其目的是定义用于建模的特征。此外,它们还提供一些特征工程功能,如独热编码(one-hot-encoding)、标准化(normalization)和桶化(bucketization)。在本教程中,`CATEGORICAL_COLUMNS` 中的字段从分类列转换为独热编码列([指标列](https://tensorflow.google.cn/api_docs/python/tf/feature_column/indicator_column)): + +```py +fc = tf.feature_column +CATEGORICAL_COLUMNS = ['sex', 'n_siblings_spouses', 'parch', 'class', 'deck', + 'embark_town', 'alone'] +NUMERIC_COLUMNS = ['age', 'fare'] + +def one_hot_cat_column(feature_name, vocab): + return tf.feature_column.indicator_column( + tf.feature_column.categorical_column_with_vocabulary_list(feature_name, + vocab)) +feature_columns = [] +for feature_name in CATEGORICAL_COLUMNS: + # Need to one-hot encode categorical features. + vocabulary = dftrain[feature_name].unique() + feature_columns.append(one_hot_cat_column(feature_name, vocabulary)) + +for feature_name in NUMERIC_COLUMNS: + feature_columns.append(tf.feature_column.numeric_column(feature_name, + dtype=tf.float32)) +``` + +您可以查看特征列生成的转换。例如,以下是在单个样本中使用 `indicator_column` 的输出: + +```py +example = dict(dftrain.head(1)) +class_fc = tf.feature_column.indicator_column(tf.feature_column.categorical_column_with_vocabulary_list('class', ('First', 'Second', 'Third'))) +print('Feature value: "{}"'.format(example['class'].iloc[0])) +print('One-hot encoded: ', tf.keras.layers.DenseFeatures([class_fc])(example).numpy()) +``` + +```py +Feature value: "Third" +One-hot encoded: [[ 0\. 0\. 1.]] + +``` + +此外,您还可以一起查看所有特征列的转换: + +```py +tf.keras.layers.DenseFeatures(feature_columns)(example).numpy() +``` + +```py +array([[ 22\. , 1\. , 0\. , 1\. , 0\. , 0\. , 1\. , 0\. , + + 0\. , 0\. , 0\. , 0\. , 0\. , 0\. , 1\. , 0\. , + 0\. , 0\. , 7.25, 1\. , 0\. , 0\. , 0\. , 0\. , + 0\. , 0\. , 1\. , 0\. , 0\. , 0\. , 0\. , 0\. , + 1\. , 0\. ]], dtype=float32) + +``` + +接下来,您需要创建输入函数。这些将指定如何将数据读入到我们的模型中以供训练与推理。您将使用 [`tf.data`](https://tensorflow.google.cn/api_docs/python/tf/data)API 中的 `from_tensor_slices` 方法直接从 Pandas 中读取数据。这适用于较小的内存数据集。对于较大的数据集,tf.data API 支持各种文件格式(包括 [csv](https://tensorflow.google.cn/api_docs/python/tf/data/experimental/make_csv_dataset)),以便您能处理那些不适合放入内存中的数据集。 + +```py +# 使用大小为全部数据的 batch ,因为数据规模非常小. +NUM_EXAMPLES = len(y_train) + +def make_input_fn(X, y, n_epochs=None, shuffle=True): + def input_fn(): + dataset = tf.data.Dataset.from_tensor_slices((dict(X), y)) + if shuffle: + dataset = dataset.shuffle(NUM_EXAMPLES) + # 对于训练,可以按需多次循环数据集(n_epochs=None)。 + dataset = dataset.repeat(n_epochs) + # 在内存中训练不使用 batch。 + dataset = dataset.batch(NUM_EXAMPLES) + return dataset + return input_fn + +# 训练与评估的输入函数。 +train_input_fn = make_input_fn(dftrain, y_train) +eval_input_fn = make_input_fn(dfeval, y_eval, shuffle=False, n_epochs=1) +``` + +## 训练与评估模型 + +您将执行以下步骤: + +1. 初始化模型,指定特征和超参数。 +2. 使用 `train_input_fn` 将训练数据输入模型,使用 `train` 函数训练模型。 +3. 您将使用此示例中的评估集评估模型性能,即 `dfeval` DataFrame。您将验证预测是否与 `y_eval` 数组中的标签匹配。 + +在训练提升树(Boosted Trees)模型之前,让我们先训练一个线性分类器(逻辑回归模型)。最好的做法是从更简单的模型开始建立基准。 + +```py +linear_est = tf.estimator.LinearClassifier(feature_columns) + +# 训练模型。 +linear_est.train(train_input_fn, max_steps=100) + +# 评估。 +result = linear_est.evaluate(eval_input_fn) +clear_output() +print(pd.Series(result)) +``` + +```py +accuracy 0.765152 +accuracy_baseline 0.625000 +auc 0.832844 +auc_precision_recall 0.789631 +average_loss 0.478908 +global_step 100.000000 +label/mean 0.375000 +loss 0.478908 +precision 0.703297 +prediction/mean 0.350790 +recall 0.646465 +dtype: float64 + +``` + +下面让我们训练提升树(Boosted Trees)模型。提升树(Boosted Trees)是支持回归(`BoostedTreesRegressor`)和分类(`BoostedTreesClassifier`)的。由于目标是预测一个生存与否的标签,您将使用 `BoostedTreesClassifier`。 + +```py +# 由于数据存入内存中,在每层使用全部数据会更快。 +# 上面一个 batch 定义为整个数据集。 +n_batches = 1 +est = tf.estimator.BoostedTreesClassifier(feature_columns, + n_batches_per_layer=n_batches) + +# 一旦建立了指定数量的树,模型将停止训练, +# 而不是基于训练步数。 +est.train(train_input_fn, max_steps=100) + +# 评估。 +result = est.evaluate(eval_input_fn) +clear_output() +print(pd.Series(result)) +``` + +```py +accuracy 0.829545 +accuracy_baseline 0.625000 +auc 0.872788 +auc_precision_recall 0.857807 +average_loss 0.411839 +global_step 100.000000 +label/mean 0.375000 +loss 0.411839 +precision 0.793478 +prediction/mean 0.381942 +recall 0.737374 +dtype: float64 + +``` + +现在您可以使用训练的模型从评估集上对乘客进行预测了。Tensorflow 模型经过优化,可以同时在一个 batch 或一个集合的样本上进行预测。之前,`eval_inout_fn` 是使用整个评估集定义的。 + +```py +pred_dicts = list(est.predict(eval_input_fn)) +probs = pd.Series([pred['probabilities'][1] for pred in pred_dicts]) + +probs.plot(kind='hist', bins=20, title='predicted probabilities') +plt.show() +``` + +![png](img/56a137f761015af5a025d2d0cc2a9985.png) + +最后,您还可以查看结果的受试者工作特征曲线(ROC),这将使我们更好地了解真阳性率与假阴性率之间的权衡。 + +```py +from sklearn.metrics import roc_curve + +fpr, tpr, _ = roc_curve(y_eval, probs) +plt.plot(fpr, tpr) +plt.title('ROC curve') +plt.xlabel('false positive rate') +plt.ylabel('true positive rate') +plt.xlim(0,) +plt.ylim(0,) +plt.show() +``` + +![png](img/bf058b152584cc8e8c3987a57eb7331f.png) \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/026.md b/Tensorflow/TensorFlow2.0/026.md new file mode 100644 index 00000000..58a949e7 --- /dev/null +++ b/Tensorflow/TensorFlow2.0/026.md @@ -0,0 +1,801 @@ +# 梯度提升树(Gradient Boosted Trees):模型理解 + +> 原文:[https://tensorflow.google.cn/tutorials/estimator/boosted_trees_model_understanding](https://tensorflow.google.cn/tutorials/estimator/boosted_trees_model_understanding) + + + +**Note:** 我们的 TensorFlow 社区翻译了这些文档。因为社区翻译是尽力而为, 所以无法保证它们是最准确的,并且反映了最新的 [官方英文文档](https://tensorflow.google.cn/?hl=en)。如果您有改进此翻译的建议, 请提交 pull request 到 [tensorflow/docs](https://github.com/tensorflow/docs) GitHub 仓库。要志愿地撰写或者审核译文,请加入 [docs-zh-cn@tensorflow.org Google Group](https://groups.google.com/a/tensorflow.org/forum/#!forum/docs-zh-cn)。 + +对于梯度提升模型(Gradient Boosting model)的端到端演示(end-to-end walkthrough),请查阅[在 Tensorflow 中训练提升树(Boosted Trees)模型](https://tensorflow.google.cn/tutorials/estimator/boosted_trees)。在本教程中,您将: + +* 学习到如何对提升树模型(Boosted Trees model)进行*局部解释*和*全局解释* +* 了解到提升树模型在数据集上的表现。 + +## 如何对提升树模型(Boosted Trees model)进行局部解释和全局解释 + +局部可解释性指模型的预测在单一样例层面上的理解程度,而全局可解释性指模型作为一个整体的理解能力。这种技术可以帮助使用机器学习的人在模型开发阶段检测偏差(bias)和 bug。 + +对于局部可解释性,您将了解到如何创造并可视化每个实例(per-instance)的贡献度。区别于特征重要性,这种贡献被称为 DFCs(定向特征贡献,directional feature contributions)。 + +对于全局可解释性,您将学习并可视化基于增益的特征重要性(gain-based feature importances),排列特征重要性([permutation feature importances](https://www.stat.berkeley.edu/%7Ebreiman/randomforest2001.pdf))和总 DFCs。 + +## 加载泰坦尼克数据集(titanic) + +本教程使用泰坦尼克数据集,旨在已知乘客的性别,年龄和客舱等级等特征的情况下预测的存活率。 + +```py +import numpy as np +import pandas as pd +from IPython.display import clear_output + +# 加载数据集。 +dftrain = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/train.csv') +dfeval = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/eval.csv') +y_train = dftrain.pop('survived') +y_eval = dfeval.pop('survived') +``` + +```py +import tensorflow as tf +tf.random.set_seed(123) +``` + +```py +TensorFlow 2.x selected. + +``` + +有关特征的描述,请参阅之前的教程。 + +## 创建特征列, 输入函数并训练 estimator + +### 数据预处理 + +特征处理,使用原始的数值特征和独热编码(one-hot-encoding)处理过的非数值特征(如性别,舱位)别建立数据集。 + +```py +fc = tf.feature_column +CATEGORICAL_COLUMNS = ['sex', 'n_siblings_spouses', 'parch', 'class', 'deck', + 'embark_town', 'alone'] +NUMERIC_COLUMNS = ['age', 'fare'] + +def one_hot_cat_column(feature_name, vocab): + return fc.indicator_column( + fc.categorical_column_with_vocabulary_list(feature_name, + vocab)) +feature_columns = [] +for feature_name in CATEGORICAL_COLUMNS: + # 需要使用独热编码(one-hot-encoding)处理非数值特征。 + vocabulary = dftrain[feature_name].unique() + feature_columns.append(one_hot_cat_column(feature_name, vocabulary)) + +for feature_name in NUMERIC_COLUMNS: + feature_columns.append(fc.numeric_column(feature_name, + dtype=tf.float32)) +``` + +### 构建输入 pipeline + +使用 API [`tf.data`](https://tensorflow.google.cn/api_docs/python/tf/data) 中的 `from_tensor_slices` 方法建立输入方程来从 Pandas 中直接读取数据。 + +```py +# 当数据集小的时候,将整个数据集作为一个 batch。 +NUM_EXAMPLES = len(y_train) + +def make_input_fn(X, y, n_epochs=None, shuffle=True): + def input_fn(): + dataset = tf.data.Dataset.from_tensor_slices((X.to_dict(orient='list'), y)) + if shuffle: + dataset = dataset.shuffle(NUM_EXAMPLES) + # 训练时让数据迭代尽可能多次 (n_epochs=None)。 + dataset = (dataset + .repeat(n_epochs) + .batch(NUM_EXAMPLES)) + return dataset + return input_fn + +# 训练并评估输入函数。 +train_input_fn = make_input_fn(dftrain, y_train) +eval_input_fn = make_input_fn(dfeval, y_eval, shuffle=False, n_epochs=1) +``` + +### 训练模型 + +```py +params = { + 'n_trees': 50, + 'max_depth': 3, + 'n_batches_per_layer': 1, + # 为了得到 DFCs,请设置 center_bias = True。这将强制 + # 模型在使用特征(例如:回归中训练集标签的均值,分类中使 + # 用交叉熵损失函数时的对数几率)前做一个初始预测。 + 'center_bias': True +} + +est = tf.estimator.BoostedTreesClassifier(feature_columns, **params) +# 训练模型。 +est.train(train_input_fn, max_steps=100) + +# 评估。 +results = est.evaluate(eval_input_fn) +clear_output() +pd.Series(results).to_frame() +``` + + + +出于性能方面的原因,当您的数据是内存数据集时,我们推荐您使用 `boosted_trees_classifier_train_in_memory` 函数。此外,如果您对训练时间没有要求抑或是您的数据集很大且不愿做分布式训练,请使用上面显示的 `tf.estimator.BoostedTrees` API。 + +当您使用此方法时,请不要对数据分批(batch),而是对整个数据集进行操作。 + +```py +in_memory_params = dict(params) +in_memory_params['n_batches_per_layer'] = 1 +# 在内存中的输入方程请不要对数据分批。 +def make_inmemory_train_input_fn(X, y): + y = np.expand_dims(y, axis=1) + def input_fn(): + return dict(X), y + return input_fn +train_input_fn = make_inmemory_train_input_fn(dftrain, y_train) + +# 训练模型。 +est = tf.estimator.BoostedTreesClassifier( + feature_columns, + train_in_memory=True, + **in_memory_params) + +est.train(train_input_fn) +print(est.evaluate(eval_input_fn)) +``` + +```py +INFO:tensorflow:Using default config. +WARNING:tensorflow:Using temporary folder as model directory: /tmp/tmpec8e696f +INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmpec8e696f', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true +graph_options { + rewrite_options { + meta_optimizer_iterations: ONE + } +} +, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1} +INFO:tensorflow:Calling model_fn. +INFO:tensorflow:Done calling model_fn. +INFO:tensorflow:Create CheckpointSaverHook. +WARNING:tensorflow:Issue encountered when serializing resources. +Type is unsupported, or the types of the items don't match field type in CollectionDef. Note this is a warning and probably safe to ignore. +'_Resource' object has no attribute 'name' +INFO:tensorflow:Graph was finalized. +INFO:tensorflow:Running local_init_op. +INFO:tensorflow:Done running local_init_op. +WARNING:tensorflow:Issue encountered when serializing resources. +Type is unsupported, or the types of the items don't match field type in CollectionDef. Note this is a warning and probably safe to ignore. +'_Resource' object has no attribute 'name' +INFO:tensorflow:Saving checkpoints for 0 into /tmp/tmpec8e696f/model.ckpt. +WARNING:tensorflow:Issue encountered when serializing resources. +Type is unsupported, or the types of the items don't match field type in CollectionDef. Note this is a warning and probably safe to ignore. +'_Resource' object has no attribute 'name' +INFO:tensorflow:loss = 0.6931472, step = 0 +WARNING:tensorflow:It seems that global step (tf.train.get_global_step) has not been increased. Current value (could be stable): 0 vs previous value: 0\. You could increase the global step by passing tf.train.get_global_step() to Optimizer.apply_gradients or Optimizer.minimize. +INFO:tensorflow:global_step/sec: 80.2732 +INFO:tensorflow:loss = 0.34654337, step = 99 (1.249 sec) +INFO:tensorflow:Saving checkpoints for 153 into /tmp/tmpec8e696f/model.ckpt. +WARNING:tensorflow:Issue encountered when serializing resources. +Type is unsupported, or the types of the items don't match field type in CollectionDef. Note this is a warning and probably safe to ignore. +'_Resource' object has no attribute 'name' +INFO:tensorflow:Loss for final step: 0.31796658. +INFO:tensorflow:Calling model_fn. +WARNING:tensorflow:Trapezoidal rule is known to produce incorrect PR-AUCs; please switch to "careful_interpolation" instead. +WARNING:tensorflow:Trapezoidal rule is known to produce incorrect PR-AUCs; please switch to "careful_interpolation" instead. +INFO:tensorflow:Done calling model_fn. +INFO:tensorflow:Starting evaluation at 2020-03-09T21:21:14Z +INFO:tensorflow:Graph was finalized. +INFO:tensorflow:Restoring parameters from /tmp/tmpec8e696f/model.ckpt-153 +INFO:tensorflow:Running local_init_op. +INFO:tensorflow:Done running local_init_op. +INFO:tensorflow:Inference Time : 0.55945s +INFO:tensorflow:Finished evaluation at 2020-03-09-21:21:15 +INFO:tensorflow:Saving dict for global step 153: accuracy = 0.8030303, accuracy_baseline = 0.625, auc = 0.8679216, auc_precision_recall = 0.8527449, average_loss = 0.4203342, global_step = 153, label/mean = 0.375, loss = 0.4203342, precision = 0.7473684, prediction/mean = 0.38673538, recall = 0.7171717 +WARNING:tensorflow:Issue encountered when serializing resources. +Type is unsupported, or the types of the items don't match field type in CollectionDef. Note this is a warning and probably safe to ignore. +'_Resource' object has no attribute 'name' +INFO:tensorflow:Saving 'checkpoint_path' summary for global step 153: /tmp/tmpec8e696f/model.ckpt-153 +{'accuracy': 0.8030303, 'accuracy_baseline': 0.625, 'auc': 0.8679216, 'auc_precision_recall': 0.8527449, 'average_loss': 0.4203342, 'label/mean': 0.375, 'loss': 0.4203342, 'precision': 0.7473684, 'prediction/mean': 0.38673538, 'recall': 0.7171717, 'global_step': 153} + +``` + +## 模型说明与绘制 + +```py +import matplotlib.pyplot as plt +import seaborn as sns +sns_colors = sns.color_palette('colorblind') +``` + +## 局部可解释性(Local interpretability) + +接下来,您将输出定向特征贡献(DFCs)来解释单个预测。输出依据 [Palczewska et al](https://arxiv.org/pdf/1312.1121.pdf) 和 Saabas 在 [解释随机森林(Interpreting Random Forests)](http://blog.datadive.net/interpreting-random-forests/) 中提出的方法产生(scikit-learn 中随机森林相关的包 [`treeinterpreter`](https://github.com/andosa/treeinterpreter) 使用原理相同的远离). 使用以下语句输出 DFCs: + +`pred_dicts = list(est.experimental_predict_with_explanations(pred_input_fn))` + +(注意:带 “experimental” 前缀为实验版本(开发中),在正式版发布前可能对其修改。) + +```py +pred_dicts = list(est.experimental_predict_with_explanations(eval_input_fn)) +``` + +```py +INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmpec8e696f', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true +graph_options { + rewrite_options { + meta_optimizer_iterations: ONE + } +} +, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1} +INFO:tensorflow:Calling model_fn. +INFO:tensorflow:Done calling model_fn. +INFO:tensorflow:Graph was finalized. +INFO:tensorflow:Restoring parameters from /tmp/tmpec8e696f/model.ckpt-153 +INFO:tensorflow:Running local_init_op. +INFO:tensorflow:Done running local_init_op. + +``` + +```py +# 创建 DFCs 的 DataFrame。 +labels = y_eval.values +probs = pd.Series([pred['probabilities'][1] for pred in pred_dicts]) +df_dfc = pd.DataFrame([pred['dfc'] for pred in pred_dicts]) +df_dfc.describe().T +``` + + + +DFCs 有个不错的特性:贡献和 + 偏差(bias) = 给出样例的预测值。 + +```py +# DFCs 的和 + 偏差(bias) == 可能性 +bias = pred_dicts[0]['bias'] +dfc_prob = df_dfc.sum(axis=1) + bias +np.testing.assert_almost_equal(dfc_prob.values, + probs.values) +``` + +为单个乘客绘制 DFCs,绘图时按贡献的方向性对其进行涂色并添加特征的值。 + +```py +# 绘制模版 :) +def _get_color(value): + """正的 DFCs 标为绿色,负的为红色。""" + green, red = sns.color_palette()[2:4] + if value >= 0: return green + return red + +def _add_feature_values(feature_values, ax): + """在图的左侧显示特征的值""" + x_coord = ax.get_xlim()[0] + OFFSET = 0.15 + for y_coord, (feat_name, feat_val) in enumerate(feature_values.items()): + t = plt.text(x_coord, y_coord - OFFSET, '{}'.format(feat_val), size=12) + t.set_bbox(dict(facecolor='white', alpha=0.5)) + from matplotlib.font_manager import FontProperties + font = FontProperties() + font.set_weight('bold') + t = plt.text(x_coord, y_coord + 1 - OFFSET, 'feature\nvalue', + fontproperties=font, size=12) + +def plot_example(example): + TOP_N = 8 # 显示前 8 个特征。 + sorted_ix = example.abs().sort_values()[-TOP_N:].index # 按值排序。 + example = example[sorted_ix] + colors = example.map(_get_color).tolist() + ax = example.to_frame().plot(kind='barh', + color=[colors], + legend=None, + alpha=0.75, + figsize=(10,6)) + ax.grid(False, axis='y') + ax.set_yticklabels(ax.get_yticklabels(), size=14) + + # 添加特征的值。 + _add_feature_values(dfeval.iloc[ID][sorted_ix], ax) + return ax +``` + +```py +# 绘制结果。 +ID = 182 +example = df_dfc.iloc[ID] # 从评估集中选择第 i 个样例。 +TOP_N = 8 # 显示前 8 个特征。 +sorted_ix = example.abs().sort_values()[-TOP_N:].index +ax = plot_example(example) +ax.set_title('Feature contributions for example {}\n pred: {:1.2f}; label: {}'.format(ID, probs[ID], labels[ID])) +ax.set_xlabel('Contribution to predicted probability', size=14) +plt.show() +``` + +![png](img/982e1307bbc8145644b791d775fcc2c7.png) + +更大的贡献值意味着对模型的预测有更大的影响。负的贡献表示此样例该特征的值减小了减小了模型的预测,正贡献值表示增加了模型的预测。 + +您也可以使用小提琴图(violin plot)来绘制该样例的 DFCs 并与整体分布比较。 + +```py +# 绘制代码模版。 +def dist_violin_plot(df_dfc, ID): + # 初始化画布。 + fig, ax = plt.subplots(1, 1, figsize=(10, 6)) + + # 创建样例 DataFrame。 + TOP_N = 8 # 显示前 8 个特征。 + example = df_dfc.iloc[ID] + ix = example.abs().sort_values()[-TOP_N:].index + example = example[ix] + example_df = example.to_frame(name='dfc') + + # 添加整个分布的贡献。 + parts=ax.violinplot([df_dfc[w] for w in ix], + vert=False, + showextrema=False, + widths=0.7, + positions=np.arange(len(ix))) + face_color = sns_colors[0] + alpha = 0.15 + for pc in parts['bodies']: + pc.set_facecolor(face_color) + pc.set_alpha(alpha) + + # 添加特征的值。 + _add_feature_values(dfeval.iloc[ID][sorted_ix], ax) + + # 添加局部贡献。 + ax.scatter(example, + np.arange(example.shape[0]), + color=sns.color_palette()[2], + s=100, + marker="s", + label='contributions for example') + + # 图例。 + # 生成小提琴图的详细图例。 + ax.plot([0,0], [1,1], label='eval set contributions\ndistributions', + color=face_color, alpha=alpha, linewidth=10) + legend = ax.legend(loc='lower right', shadow=True, fontsize='x-large', + frameon=True) + legend.get_frame().set_facecolor('white') + + # 调整格式。 + ax.set_yticks(np.arange(example.shape[0])) + ax.set_yticklabels(example.index) + ax.grid(False, axis='y') + ax.set_xlabel('Contribution to predicted probability', size=14) +``` + +绘制此样例。 + +```py +dist_violin_plot(df_dfc, ID) +plt.title('Feature contributions for example {}\n pred: {:1.2f}; label: {}'.format(ID, probs[ID], labels[ID])) +plt.show() +``` + +![png](img/c91d625a0312bd25acf8dab10ecb51ed.png) + +最后,第三方的工具,如:[LIME](https://github.com/marcotcr/lime) 和 [shap](https://github.com/slundberg/shap) 也可以帮助理解模型的各个预测。 + +## 全局特征重要性(Global feature importances) + +此外,您或许想了解模型这个整体而不是单个预测。接下来,您将计算并使用: + +* 通过 `est.experimental_feature_importances` 得到基于增益的特征重要性(Gain-based feature importances) +* 排列特征重要性(Permutation feature importances) +* 使用 `est.experimental_predict_with_explanations` 得到总 DFCs。 + +基于增益的特征重要性在分离特定特征时测量损失的变化。而排列特征重要性是在评估集上通过每次打乱一个特征后观察模型性能的变化计算而出。 + +一般来说,排列特征重要性要优于基于增益的特征重要性,尽管这两种方法在潜在预测变量的测量范围或类别数量不确定时和特征相关联时不可信([来源](https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-9-307))。 对不同种类特征重要性的更透彻概括和更翔实讨论请参考 [这篇文章](http://explained.ai/rf-importance/index.html) 。 + +### 基于增益的特征重要性(Gain-based feature importances) + +TensorFlow 的提升树估算器(estimator)内置了函数 `est.experimental_feature_importances` 用于计算基于增益的特征重要性。 + +```py +importances = est.experimental_feature_importances(normalize=True) +df_imp = pd.Series(importances) + +# 可视化重要性。 +N = 8 +ax = (df_imp.iloc[0:N][::-1] + .plot(kind='barh', + color=sns_colors[0], + title='Gain feature importances', + figsize=(10, 6))) +ax.grid(False, axis='y') +``` + +![png](img/11c5fe9ef9f8ed2389fe40e5fa1ccbb7.png) + +### 平均绝对 DFCs + +您还可以得到绝对 DFCs 的平均值来从全局的角度分析影响。 + +```py +# 绘图。 +dfc_mean = df_dfc.abs().mean() +N = 8 +sorted_ix = dfc_mean.abs().sort_values()[-N:].index # 求平均并按绝对值排序。 +ax = dfc_mean[sorted_ix].plot(kind='barh', + color=sns_colors[1], + title='Mean |directional feature contributions|', + figsize=(10, 6)) +ax.grid(False, axis='y') +``` + +![png](img/edb8cf06303c60cf812dce4865e8d331.png) + +您可以看到 DFCs 如何随特征的值变化而变化。 + +```py +FEATURE = 'fare' +feature = pd.Series(df_dfc[FEATURE].values, index=dfeval[FEATURE].values).sort_index() +ax = sns.regplot(feature.index.values, feature.values, lowess=True) +ax.set_ylabel('contribution') +ax.set_xlabel(FEATURE) +ax.set_xlim(0, 100) +plt.show() +``` + +![png](img/dbd4a3a9bd5a14a61bcaf558a2231993.png) + +### 排列特征重要性(Permutation feature importances) + +```py +def permutation_importances(est, X_eval, y_eval, metric, features): + """ + 分别对每列,打散列中的值并观察其对评估集的影响。 + + 在训练过程中,有一种类似的方法,请参阅文章(来源:http://explained.ai/rf-importance/index.html) + 中有关 “Drop-column importance” 的部分。 + """ + baseline = metric(est, X_eval, y_eval) + imp = [] + for col in features: + save = X_eval[col].copy() + X_eval[col] = np.random.permutation(X_eval[col]) + m = metric(est, X_eval, y_eval) + X_eval[col] = save + imp.append(baseline - m) + return np.array(imp) + +def accuracy_metric(est, X, y): + """TensorFlow 估算器精度""" + eval_input_fn = make_input_fn(X, + y=y, + shuffle=False, + n_epochs=1) + return est.evaluate(input_fn=eval_input_fn)['accuracy'] +features = CATEGORICAL_COLUMNS + NUMERIC_COLUMNS +importances = permutation_importances(est, dfeval, y_eval, accuracy_metric, + features) +df_imp = pd.Series(importances, index=features) + +sorted_ix = df_imp.abs().sort_values().index +ax = df_imp[sorted_ix][-5:].plot(kind='barh', color=sns_colors[2], figsize=(10, 6)) +ax.grid(False, axis='y') +ax.set_title('Permutation feature importance') +plt.show() +``` + +```py +INFO:tensorflow:Calling model_fn. +WARNING:tensorflow:Trapezoidal rule is known to produce incorrect PR-AUCs; please switch to "careful_interpolation" instead. +WARNING:tensorflow:Trapezoidal rule is known to produce incorrect PR-AUCs; please switch to "careful_interpolation" instead. +INFO:tensorflow:Done calling model_fn. +INFO:tensorflow:Starting evaluation at 2020-03-09T21:21:18Z +INFO:tensorflow:Graph was finalized. +INFO:tensorflow:Restoring parameters from /tmp/tmpec8e696f/model.ckpt-153 +INFO:tensorflow:Running local_init_op. +INFO:tensorflow:Done running local_init_op. +INFO:tensorflow:Inference Time : 0.56113s +INFO:tensorflow:Finished evaluation at 2020-03-09-21:21:18 +INFO:tensorflow:Saving dict for global step 153: accuracy = 0.8030303, accuracy_baseline = 0.625, auc = 0.8679216, auc_precision_recall = 0.8527449, average_loss = 0.4203342, global_step = 153, label/mean = 0.375, loss = 0.4203342, precision = 0.7473684, prediction/mean = 0.38673538, recall = 0.7171717 +INFO:tensorflow:Saving 'checkpoint_path' summary for global step 153: /tmp/tmpec8e696f/model.ckpt-153 +INFO:tensorflow:Calling model_fn. +WARNING:tensorflow:Trapezoidal rule is known to produce incorrect PR-AUCs; please switch to "careful_interpolation" instead. +WARNING:tensorflow:Trapezoidal rule is known to produce incorrect PR-AUCs; please switch to "careful_interpolation" instead. +INFO:tensorflow:Done calling model_fn. +INFO:tensorflow:Starting evaluation at 2020-03-09T21:21:19Z +INFO:tensorflow:Graph was finalized. +INFO:tensorflow:Restoring parameters from /tmp/tmpec8e696f/model.ckpt-153 +INFO:tensorflow:Running local_init_op. +INFO:tensorflow:Done running local_init_op. +INFO:tensorflow:Inference Time : 0.57949s +INFO:tensorflow:Finished evaluation at 2020-03-09-21:21:19 +INFO:tensorflow:Saving dict for global step 153: accuracy = 0.6060606, accuracy_baseline = 0.625, auc = 0.64355683, auc_precision_recall = 0.5400543, average_loss = 0.74337494, global_step = 153, label/mean = 0.375, loss = 0.74337494, precision = 0.47524753, prediction/mean = 0.39103043, recall = 0.4848485 +INFO:tensorflow:Saving 'checkpoint_path' summary for global step 153: /tmp/tmpec8e696f/model.ckpt-153 +INFO:tensorflow:Calling model_fn. +WARNING:tensorflow:Trapezoidal rule is known to produce incorrect PR-AUCs; please switch to "careful_interpolation" instead. +WARNING:tensorflow:Trapezoidal rule is known to produce incorrect PR-AUCs; please switch to "careful_interpolation" instead. +INFO:tensorflow:Done calling model_fn. +INFO:tensorflow:Starting evaluation at 2020-03-09T21:21:20Z +INFO:tensorflow:Graph was finalized. +INFO:tensorflow:Restoring parameters from /tmp/tmpec8e696f/model.ckpt-153 +INFO:tensorflow:Running local_init_op. +INFO:tensorflow:Done running local_init_op. +INFO:tensorflow:Inference Time : 0.58528s +INFO:tensorflow:Finished evaluation at 2020-03-09-21:21:21 +INFO:tensorflow:Saving dict for global step 153: accuracy = 0.7916667, accuracy_baseline = 0.625, auc = 0.8624732, auc_precision_recall = 0.8392693, average_loss = 0.43363357, global_step = 153, label/mean = 0.375, loss = 0.43363357, precision = 0.7244898, prediction/mean = 0.38975066, recall = 0.7171717 +INFO:tensorflow:Saving 'checkpoint_path' summary for global step 153: /tmp/tmpec8e696f/model.ckpt-153 +INFO:tensorflow:Calling model_fn. +WARNING:tensorflow:Trapezoidal rule is known to produce incorrect PR-AUCs; please switch to "careful_interpolation" instead. +WARNING:tensorflow:Trapezoidal rule is known to produce incorrect PR-AUCs; please switch to "careful_interpolation" instead. +INFO:tensorflow:Done calling model_fn. +INFO:tensorflow:Starting evaluation at 2020-03-09T21:21:21Z +INFO:tensorflow:Graph was finalized. +INFO:tensorflow:Restoring parameters from /tmp/tmpec8e696f/model.ckpt-153 +INFO:tensorflow:Running local_init_op. +INFO:tensorflow:Done running local_init_op. +INFO:tensorflow:Inference Time : 0.55600s +INFO:tensorflow:Finished evaluation at 2020-03-09-21:21:22 +INFO:tensorflow:Saving dict for global step 153: accuracy = 0.8068182, accuracy_baseline = 0.625, auc = 0.8674931, auc_precision_recall = 0.85280114, average_loss = 0.4206087, global_step = 153, label/mean = 0.375, loss = 0.4206087, precision = 0.75, prediction/mean = 0.38792592, recall = 0.72727275 +INFO:tensorflow:Saving 'checkpoint_path' summary for global step 153: /tmp/tmpec8e696f/model.ckpt-153 +INFO:tensorflow:Calling model_fn. +WARNING:tensorflow:Trapezoidal rule is known to produce incorrect PR-AUCs; please switch to "careful_interpolation" instead. +WARNING:tensorflow:Trapezoidal rule is known to produce incorrect PR-AUCs; please switch to "careful_interpolation" instead. +INFO:tensorflow:Done calling model_fn. +INFO:tensorflow:Starting evaluation at 2020-03-09T21:21:22Z +INFO:tensorflow:Graph was finalized. +INFO:tensorflow:Restoring parameters from /tmp/tmpec8e696f/model.ckpt-153 +INFO:tensorflow:Running local_init_op. +INFO:tensorflow:Done running local_init_op. +INFO:tensorflow:Inference Time : 0.54454s +INFO:tensorflow:Finished evaluation at 2020-03-09-21:21:23 +INFO:tensorflow:Saving dict for global step 153: accuracy = 0.72727275, accuracy_baseline = 0.625, auc = 0.76737064, auc_precision_recall = 0.62659556, average_loss = 0.6019534, global_step = 153, label/mean = 0.375, loss = 0.6019534, precision = 0.6626506, prediction/mean = 0.3688063, recall = 0.5555556 +INFO:tensorflow:Saving 'checkpoint_path' summary for global step 153: /tmp/tmpec8e696f/model.ckpt-153 +INFO:tensorflow:Calling model_fn. +WARNING:tensorflow:Trapezoidal rule is known to produce incorrect PR-AUCs; please switch to "careful_interpolation" instead. +WARNING:tensorflow:Trapezoidal rule is known to produce incorrect PR-AUCs; please switch to "careful_interpolation" instead. +INFO:tensorflow:Done calling model_fn. +INFO:tensorflow:Starting evaluation at 2020-03-09T21:21:24Z +INFO:tensorflow:Graph was finalized. +INFO:tensorflow:Restoring parameters from /tmp/tmpec8e696f/model.ckpt-153 +INFO:tensorflow:Running local_init_op. +INFO:tensorflow:Done running local_init_op. +INFO:tensorflow:Inference Time : 0.53149s +INFO:tensorflow:Finished evaluation at 2020-03-09-21:21:24 +INFO:tensorflow:Saving dict for global step 153: accuracy = 0.7878788, accuracy_baseline = 0.625, auc = 0.8389348, auc_precision_recall = 0.8278463, average_loss = 0.45054114, global_step = 153, label/mean = 0.375, loss = 0.45054114, precision = 0.7263158, prediction/mean = 0.3912348, recall = 0.6969697 +INFO:tensorflow:Saving 'checkpoint_path' summary for global step 153: /tmp/tmpec8e696f/model.ckpt-153 +INFO:tensorflow:Calling model_fn. +WARNING:tensorflow:Trapezoidal rule is known to produce incorrect PR-AUCs; please switch to "careful_interpolation" instead. +WARNING:tensorflow:Trapezoidal rule is known to produce incorrect PR-AUCs; please switch to "careful_interpolation" instead. +INFO:tensorflow:Done calling model_fn. +INFO:tensorflow:Starting evaluation at 2020-03-09T21:21:25Z +INFO:tensorflow:Graph was finalized. +INFO:tensorflow:Restoring parameters from /tmp/tmpec8e696f/model.ckpt-153 +INFO:tensorflow:Running local_init_op. +INFO:tensorflow:Done running local_init_op. +INFO:tensorflow:Inference Time : 0.54399s +INFO:tensorflow:Finished evaluation at 2020-03-09-21:21:25 +INFO:tensorflow:Saving dict for global step 153: accuracy = 0.8030303, accuracy_baseline = 0.625, auc = 0.862565, auc_precision_recall = 0.84412414, average_loss = 0.42553493, global_step = 153, label/mean = 0.375, loss = 0.42553493, precision = 0.75268817, prediction/mean = 0.37500647, recall = 0.7070707 +INFO:tensorflow:Saving 'checkpoint_path' summary for global step 153: /tmp/tmpec8e696f/model.ckpt-153 +INFO:tensorflow:Calling model_fn. +WARNING:tensorflow:Trapezoidal rule is known to produce incorrect PR-AUCs; please switch to "careful_interpolation" instead. +WARNING:tensorflow:Trapezoidal rule is known to produce incorrect PR-AUCs; please switch to "careful_interpolation" instead. +INFO:tensorflow:Done calling model_fn. +INFO:tensorflow:Starting evaluation at 2020-03-09T21:21:26Z +INFO:tensorflow:Graph was finalized. +INFO:tensorflow:Restoring parameters from /tmp/tmpec8e696f/model.ckpt-153 +INFO:tensorflow:Running local_init_op. +INFO:tensorflow:Done running local_init_op. +INFO:tensorflow:Inference Time : 0.56776s +INFO:tensorflow:Finished evaluation at 2020-03-09-21:21:26 +INFO:tensorflow:Saving dict for global step 153: accuracy = 0.8030303, accuracy_baseline = 0.625, auc = 0.8679216, auc_precision_recall = 0.8527449, average_loss = 0.4203342, global_step = 153, label/mean = 0.375, loss = 0.4203342, precision = 0.7473684, prediction/mean = 0.38673538, recall = 0.7171717 +INFO:tensorflow:Saving 'checkpoint_path' summary for global step 153: /tmp/tmpec8e696f/model.ckpt-153 +INFO:tensorflow:Calling model_fn. +WARNING:tensorflow:Trapezoidal rule is known to produce incorrect PR-AUCs; please switch to "careful_interpolation" instead. +WARNING:tensorflow:Trapezoidal rule is known to produce incorrect PR-AUCs; please switch to "careful_interpolation" instead. +INFO:tensorflow:Done calling model_fn. +INFO:tensorflow:Starting evaluation at 2020-03-09T21:21:27Z +INFO:tensorflow:Graph was finalized. +INFO:tensorflow:Restoring parameters from /tmp/tmpec8e696f/model.ckpt-153 +INFO:tensorflow:Running local_init_op. +INFO:tensorflow:Done running local_init_op. +INFO:tensorflow:Inference Time : 0.56329s +INFO:tensorflow:Finished evaluation at 2020-03-09-21:21:28 +INFO:tensorflow:Saving dict for global step 153: accuracy = 0.79924244, accuracy_baseline = 0.625, auc = 0.8132232, auc_precision_recall = 0.7860318, average_loss = 0.4787808, global_step = 153, label/mean = 0.375, loss = 0.4787808, precision = 0.7613636, prediction/mean = 0.37704408, recall = 0.67676765 +INFO:tensorflow:Saving 'checkpoint_path' summary for global step 153: /tmp/tmpec8e696f/model.ckpt-153 +INFO:tensorflow:Calling model_fn. +WARNING:tensorflow:Trapezoidal rule is known to produce incorrect PR-AUCs; please switch to "careful_interpolation" instead. +WARNING:tensorflow:Trapezoidal rule is known to produce incorrect PR-AUCs; please switch to "careful_interpolation" instead. +INFO:tensorflow:Done calling model_fn. +INFO:tensorflow:Starting evaluation at 2020-03-09T21:21:28Z +INFO:tensorflow:Graph was finalized. +INFO:tensorflow:Restoring parameters from /tmp/tmpec8e696f/model.ckpt-153 +INFO:tensorflow:Running local_init_op. +INFO:tensorflow:Done running local_init_op. +INFO:tensorflow:Inference Time : 0.60489s +INFO:tensorflow:Finished evaluation at 2020-03-09-21:21:29 +INFO:tensorflow:Saving dict for global step 153: accuracy = 0.8030303, accuracy_baseline = 0.625, auc = 0.8360882, auc_precision_recall = 0.7940172, average_loss = 0.45960733, global_step = 153, label/mean = 0.375, loss = 0.45960733, precision = 0.7473684, prediction/mean = 0.38010252, recall = 0.7171717 +INFO:tensorflow:Saving 'checkpoint_path' summary for global step 153: /tmp/tmpec8e696f/model.ckpt-153 + +``` + +![png](img/3b5e2e711798f7ff0d6ff949ea4f54f3.png) + +## 可视化模型拟合过程 + +首先,使用以下公式构建训练数据: + +$$z=x* e^{-x^2 - y^2}$$ + +其中, (z) 是您要试着预测的值(因变量),(x) 和 (y) 是特征。 + +```py +from numpy.random import uniform, seed +from scipy.interpolate import griddata + +# 生成数据。 +seed(0) +npts = 5000 +x = uniform(-2, 2, npts) +y = uniform(-2, 2, npts) +z = x*np.exp(-x**2 - y**2) +xy = np.zeros((2,np.size(x))) +xy[0] = x +xy[1] = y +xy = xy.T +``` + +```py +# 准备用于训练的数据。 +df = pd.DataFrame({'x': x, 'y': y, 'z': z}) + +xi = np.linspace(-2.0, 2.0, 200), +yi = np.linspace(-2.1, 2.1, 210), +xi,yi = np.meshgrid(xi, yi) + +df_predict = pd.DataFrame({ + 'x' : xi.flatten(), + 'y' : yi.flatten(), +}) +predict_shape = xi.shape +``` + +```py +def plot_contour(x, y, z, **kwargs): + # 准备用于训练的数据。 + plt.figure(figsize=(10, 8)) + # 绘制等值线图,标出非均匀数据点。 + CS = plt.contour(x, y, z, 15, linewidths=0.5, colors='k') + CS = plt.contourf(x, y, z, 15, + vmax=abs(zi).max(), vmin=-abs(zi).max(), cmap='RdBu_r') + plt.colorbar() # 绘制颜色图例。 + # 绘制数据点。 + plt.xlim(-2, 2) + plt.ylim(-2, 2) +``` + +您可以可视化这个方程,红色代表较大的值。 + +```py +zi = griddata(xy, z, (xi, yi), method='linear', fill_value='0') +plot_contour(xi, yi, zi) +plt.scatter(df.x, df.y, marker='.') +plt.title('Contour on training data') +plt.show() +``` + +![png](img/02b2fc97a46c88c22ee2d11e8c28bf0d.png) + +```py +fc = [tf.feature_column.numeric_column('x'), + tf.feature_column.numeric_column('y')] +``` + +```py +def predict(est): + """已有估算器给出的预测""" + predict_input_fn = lambda: tf.data.Dataset.from_tensors(dict(df_predict)) + preds = np.array([p['predictions'][0] for p in est.predict(predict_input_fn)]) + return preds.reshape(predict_shape) +``` + +首先,我们尝试用线性模型拟合数据。 + +```py +train_input_fn = make_input_fn(df, df.z) +est = tf.estimator.LinearRegressor(fc) +est.train(train_input_fn, max_steps=500); +``` + +```py +INFO:tensorflow:Using default config. +WARNING:tensorflow:Using temporary folder as model directory: /tmp/tmpd4fqobc9 +INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmpd4fqobc9', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true +graph_options { + rewrite_options { + meta_optimizer_iterations: ONE + } +} +, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1} +INFO:tensorflow:Calling model_fn. +WARNING:tensorflow:From /tensorflow-2.1.0/python3.6/tensorflow_core/python/feature_column/feature_column_v2.py:518: Layer.add_variable (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version. +Instructions for updating: +Please use `layer.add_weight` method instead. +WARNING:tensorflow:From /tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/optimizer_v2/ftrl.py:143: calling Constant.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version. +Instructions for updating: +Call initializer instance with the dtype argument instead of passing it to the constructor +INFO:tensorflow:Done calling model_fn. +INFO:tensorflow:Create CheckpointSaverHook. +INFO:tensorflow:Graph was finalized. +INFO:tensorflow:Running local_init_op. +INFO:tensorflow:Done running local_init_op. +INFO:tensorflow:Saving checkpoints for 0 into /tmp/tmpd4fqobc9/model.ckpt. +INFO:tensorflow:loss = 0.023290718, step = 0 +INFO:tensorflow:global_step/sec: 267.329 +INFO:tensorflow:loss = 0.017512696, step = 100 (0.377 sec) +INFO:tensorflow:global_step/sec: 312.355 +INFO:tensorflow:loss = 0.018098738, step = 200 (0.321 sec) +INFO:tensorflow:global_step/sec: 341.77 +INFO:tensorflow:loss = 0.019927984, step = 300 (0.291 sec) +INFO:tensorflow:global_step/sec: 307.825 +INFO:tensorflow:loss = 0.01797011, step = 400 (0.327 sec) +INFO:tensorflow:Saving checkpoints for 500 into /tmp/tmpd4fqobc9/model.ckpt. +INFO:tensorflow:Loss for final step: 0.019703189. + +``` + +```py +plot_contour(xi, yi, predict(est)) +``` + +```py +INFO:tensorflow:Calling model_fn. +WARNING:tensorflow:Layer linear/linear_model is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2\. The layer has dtype float32 because it's dtype defaults to floatx. + +If you intended to run this layer in float32, you can safely ignore this warning. If in doubt, this warning is likely only an issue if you are porting a TensorFlow 1.X model to TensorFlow 2. + +To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor. + +INFO:tensorflow:Done calling model_fn. +INFO:tensorflow:Graph was finalized. +INFO:tensorflow:Restoring parameters from /tmp/tmpd4fqobc9/model.ckpt-500 +INFO:tensorflow:Running local_init_op. +INFO:tensorflow:Done running local_init_op. + +``` + +![png](img/2bc3a9da8c0e479bf906dd0c765549f4.png) + +可见,拟合效果并不好。接下来,我们试着用 GBDT 模型拟合并了解模型是如何拟合方程的。 + +```py +n_trees = 37 + +est = tf.estimator.BoostedTreesRegressor(fc, n_batches_per_layer=1, n_trees=n_trees) +est.train(train_input_fn, max_steps=500) +clear_output() +plot_contour(xi, yi, predict(est)) +plt.text(-1.8, 2.1, '# trees: {}'.format(n_trees), color='w', backgroundcolor='black', size=20) +plt.show() +``` + +```py +INFO:tensorflow:Calling model_fn. +INFO:tensorflow:Done calling model_fn. +INFO:tensorflow:Graph was finalized. +INFO:tensorflow:Restoring parameters from /tmp/tmp3jae7fgc/model.ckpt-222 +INFO:tensorflow:Running local_init_op. +INFO:tensorflow:Done running local_init_op. + +``` + +![png](img/60960a15d5ca50a1486f3c3f8c200635.png) + +随着树的数量增加,模型的预测越来越接近真实方程。 + +![](img/cb18ad8212a0648018238babc8fe2325.png) + +## 总结 + +本文介绍了如何使用定向特征贡献(DFCs)及几种特征重要性来解释提升树模型。这些方法可以帮助您了解特征是如何影响模型的预测。 最后,您还可以通过观察其他模型的超平面(decision surface)并结合本文内容来学习提升树模型是如何拟合方程的。 \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/027.md b/Tensorflow/TensorFlow2.0/027.md new file mode 100644 index 00000000..5a0eeffe --- /dev/null +++ b/Tensorflow/TensorFlow2.0/027.md @@ -0,0 +1,383 @@ +# 通过 Keras 模型创建 Estimator + +> 原文:[https://tensorflow.google.cn/tutorials/estimator/keras_model_to_estimator](https://tensorflow.google.cn/tutorials/estimator/keras_model_to_estimator) + +## 概述 + +TensorFlow 完全支持 TensorFlow Estimator,可以从新的和现有的 [`tf.keras`](https://tensorflow.google.cn/api_docs/python/tf/keras) 模型创建 Estimator。本教程包含了该过程完整且最为简短的示例。 + +## 设置 + +```py +import tensorflow as tf + +import numpy as np +import tensorflow_datasets as tfds +``` + +### 创建一个简单的 Keras 模型。 + +在 Keras 中,需要通过组装*层*来构建*模型*。模型(通常)是由层构成的计算图。最常见的模型类型是一种叠加层:[`tf.keras.Sequential`](https://tensorflow.google.cn/api_docs/python/tf/keras/Sequential) 模型。 + +构建一个简单的全连接网络(即多层感知器): + +```py +model = tf.keras.models.Sequential([ + tf.keras.layers.Dense(16, activation='relu', input_shape=(4,)), + tf.keras.layers.Dropout(0.2), + tf.keras.layers.Dense(3) +]) +``` + +编译模型并获取摘要。 + +```py +model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), + optimizer='adam') +model.summary() +``` + +```py +Model: "sequential" +_________________________________________________________________ +Layer (type) Output Shape Param # +================================================================= +dense (Dense) (None, 16) 80 +_________________________________________________________________ +dropout (Dropout) (None, 16) 0 +_________________________________________________________________ +dense_1 (Dense) (None, 3) 51 +================================================================= +Total params: 131 +Trainable params: 131 +Non-trainable params: 0 +_________________________________________________________________ + +``` + +### 创建输入函数 + +使用 [Datasets API](https://tensorflow.google.cn/guide/data) 可以扩展到大型数据集或多设备训练。 + +Estimator 需要控制构建输入流水线的时间和方式。为此,它们需要一个“输入函数”或 `input_fn`。`Estimator` 将不使用任何参数调用此函数。`input_fn` 必须返回 [`tf.data.Dataset`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset)。 + +```py +def input_fn(): + split = tfds.Split.TRAIN + dataset = tfds.load('iris', split=split, as_supervised=True) + dataset = dataset.map(lambda features, labels: ({'dense_input':features}, labels)) + dataset = dataset.batch(32).repeat() + return dataset +``` + +测试您的 `input_fn` + +```py +for features_batch, labels_batch in input_fn().take(1): + print(features_batch) + print(labels_batch) +``` + +```py +Downloading and preparing dataset iris/2.0.0 (download: 4.44 KiB, generated: Unknown size, total: 4.44 KiB) to /home/kbuilder/tensorflow_datasets/iris/2.0.0... +Shuffling and writing examples to /home/kbuilder/tensorflow_datasets/iris/2.0.0.incompleteQ29ZWS/iris-train.tfrecord +Dataset iris downloaded and prepared to /home/kbuilder/tensorflow_datasets/iris/2.0.0\. Subsequent calls will reuse this data. +{'dense_input': } +tf.Tensor([0 2 1 2 0 1 1 1 0 2 1 0 2 0 0 0 0 0 2 2 2 2 2 0 2 0 2 1 1 1 1 1], shape=(32,), dtype=int64) + +``` + +### 通过 tf.keras 模型创建 Estimator。 + +可以使用 [`tf.estimator`](https://tensorflow.google.cn/api_docs/python/tf/estimator) API 来训练 [`tf.keras.Model`](https://tensorflow.google.cn/api_docs/python/tf/keras/Model),方法是使用 [`tf.keras.estimator.model_to_estimator`](https://tensorflow.google.cn/api_docs/python/tf/keras/estimator/model_to_estimator) 将模型转换为 [`tf.estimator.Estimator`](https://tensorflow.google.cn/api_docs/python/tf/estimator/Estimator) 对象。 + +```py +import tempfile +model_dir = tempfile.mkdtemp() +keras_estimator = tf.keras.estimator.model_to_estimator( + keras_model=model, model_dir=model_dir) +``` + +```py +INFO:tensorflow:Using default config. + +INFO:tensorflow:Using default config. + +INFO:tensorflow:Using the Keras model provided. + +INFO:tensorflow:Using the Keras model provided. + +Warning:tensorflow:From /tmpfs/src/tf_docs_env/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/keras.py:220: set_learning_phase (from tensorflow.python.keras.backend) is deprecated and will be removed after 2020-10-11. +Instructions for updating: +Simply pass a True/False value to the `training` argument of the `__call__` method of your layer or model. + +Warning:tensorflow:From /tmpfs/src/tf_docs_env/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/keras.py:220: set_learning_phase (from tensorflow.python.keras.backend) is deprecated and will be removed after 2020-10-11. +Instructions for updating: +Simply pass a True/False value to the `training` argument of the `__call__` method of your layer or model. + +INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmp13998n2j', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true +graph_options { + rewrite_options { + meta_optimizer_iterations: ONE + } +} +, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1} + +INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmp13998n2j', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true +graph_options { + rewrite_options { + meta_optimizer_iterations: ONE + } +} +, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1} + +``` + +训练和评估 Estimator。 + +```py +keras_estimator.train(input_fn=input_fn, steps=500) +eval_result = keras_estimator.evaluate(input_fn=input_fn, steps=10) +print('Eval result: {}'.format(eval_result)) +``` + +```py +WARNING:tensorflow:From /tmpfs/src/tf_docs_env/lib/python3.6/site-packages/tensorflow/python/training/training_util.py:236: Variable.initialized_value (from tensorflow.python.ops.variables) is deprecated and will be removed in a future version. +Instructions for updating: +Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts. + +Warning:tensorflow:From /tmpfs/src/tf_docs_env/lib/python3.6/site-packages/tensorflow/python/training/training_util.py:236: Variable.initialized_value (from tensorflow.python.ops.variables) is deprecated and will be removed in a future version. +Instructions for updating: +Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts. + +INFO:tensorflow:Calling model_fn. + +INFO:tensorflow:Calling model_fn. + +INFO:tensorflow:Done calling model_fn. + +INFO:tensorflow:Done calling model_fn. + +INFO:tensorflow:Warm-starting with WarmStartSettings: WarmStartSettings(ckpt_to_initialize_from='/tmp/tmp13998n2j/keras/keras_model.ckpt', vars_to_warm_start='.*', var_name_to_vocab_info={}, var_name_to_prev_var_name={}) + +INFO:tensorflow:Warm-starting with WarmStartSettings: WarmStartSettings(ckpt_to_initialize_from='/tmp/tmp13998n2j/keras/keras_model.ckpt', vars_to_warm_start='.*', var_name_to_vocab_info={}, var_name_to_prev_var_name={}) + +INFO:tensorflow:Warm-starting from: /tmp/tmp13998n2j/keras/keras_model.ckpt + +INFO:tensorflow:Warm-starting from: /tmp/tmp13998n2j/keras/keras_model.ckpt + +INFO:tensorflow:Warm-starting variables only in TRAINABLE_VARIABLES. + +INFO:tensorflow:Warm-starting variables only in TRAINABLE_VARIABLES. + +INFO:tensorflow:Warm-started 4 variables. + +INFO:tensorflow:Warm-started 4 variables. + +INFO:tensorflow:Create CheckpointSaverHook. + +INFO:tensorflow:Create CheckpointSaverHook. + +INFO:tensorflow:Graph was finalized. + +INFO:tensorflow:Graph was finalized. + +INFO:tensorflow:Running local_init_op. + +INFO:tensorflow:Running local_init_op. + +INFO:tensorflow:Done running local_init_op. + +INFO:tensorflow:Done running local_init_op. + +INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 0... + +INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 0... + +INFO:tensorflow:Saving checkpoints for 0 into /tmp/tmp13998n2j/model.ckpt. + +INFO:tensorflow:Saving checkpoints for 0 into /tmp/tmp13998n2j/model.ckpt. + +INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 0... + +INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 0... + +INFO:tensorflow:loss = 1.5731332, step = 0 + +INFO:tensorflow:loss = 1.5731332, step = 0 + +INFO:tensorflow:global_step/sec: 444.326 + +INFO:tensorflow:global_step/sec: 444.326 + +INFO:tensorflow:loss = 0.79164267, step = 100 (0.227 sec) + +INFO:tensorflow:loss = 0.79164267, step = 100 (0.227 sec) + +INFO:tensorflow:global_step/sec: 515.459 + +INFO:tensorflow:global_step/sec: 515.459 + +INFO:tensorflow:loss = 0.5765847, step = 200 (0.193 sec) + +INFO:tensorflow:loss = 0.5765847, step = 200 (0.193 sec) + +INFO:tensorflow:global_step/sec: 518.855 + +INFO:tensorflow:global_step/sec: 518.855 + +INFO:tensorflow:loss = 0.48571444, step = 300 (0.193 sec) + +INFO:tensorflow:loss = 0.48571444, step = 300 (0.193 sec) + +INFO:tensorflow:global_step/sec: 527.318 + +INFO:tensorflow:global_step/sec: 527.318 + +INFO:tensorflow:loss = 0.3836534, step = 400 (0.190 sec) + +INFO:tensorflow:loss = 0.3836534, step = 400 (0.190 sec) + +INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 500... + +INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 500... + +INFO:tensorflow:Saving checkpoints for 500 into /tmp/tmp13998n2j/model.ckpt. + +INFO:tensorflow:Saving checkpoints for 500 into /tmp/tmp13998n2j/model.ckpt. + +INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 500... + +INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 500... + +INFO:tensorflow:Loss for final step: 0.46023262. + +INFO:tensorflow:Loss for final step: 0.46023262. + +INFO:tensorflow:Calling model_fn. + +INFO:tensorflow:Calling model_fn. + +Warning:tensorflow:From /tmpfs/src/tf_docs_env/lib/python3.6/site-packages/tensorflow/python/keras/engine/training_v1.py:2048: Model.state_updates (from tensorflow.python.keras.engine.training) is deprecated and will be removed in a future version. +Instructions for updating: +This property should not be used in TensorFlow 2.0, as updates are applied automatically. + +Warning:tensorflow:From /tmpfs/src/tf_docs_env/lib/python3.6/site-packages/tensorflow/python/keras/engine/training_v1.py:2048: Model.state_updates (from tensorflow.python.keras.engine.training) is deprecated and will be removed in a future version. +Instructions for updating: +This property should not be used in TensorFlow 2.0, as updates are applied automatically. + +INFO:tensorflow:Done calling model_fn. + +INFO:tensorflow:Done calling model_fn. + +INFO:tensorflow:Starting evaluation at 2020-09-22T19:57:20Z + +INFO:tensorflow:Starting evaluation at 2020-09-22T19:57:20Z + +INFO:tensorflow:Graph was finalized. + +INFO:tensorflow:Graph was finalized. + +INFO:tensorflow:Restoring parameters from /tmp/tmp13998n2j/model.ckpt-500 + +INFO:tensorflow:Restoring parameters from /tmp/tmp13998n2j/model.ckpt-500 + +INFO:tensorflow:Running local_init_op. + +INFO:tensorflow:Running local_init_op. + +INFO:tensorflow:Done running local_init_op. + +INFO:tensorflow:Done running local_init_op. + +INFO:tensorflow:Evaluation [1/10] + +INFO:tensorflow:Evaluation [1/10] + +INFO:tensorflow:Evaluation [2/10] + +INFO:tensorflow:Evaluation [2/10] + +INFO:tensorflow:Evaluation [3/10] + +INFO:tensorflow:Evaluation [3/10] + +INFO:tensorflow:Evaluation [4/10] + +INFO:tensorflow:Evaluation [4/10] + +INFO:tensorflow:Evaluation [5/10] + +INFO:tensorflow:Evaluation [5/10] + +INFO:tensorflow:Evaluation [6/10] + +INFO:tensorflow:Evaluation [6/10] + +INFO:tensorflow:Evaluation [7/10] + +INFO:tensorflow:Evaluation [7/10] + +INFO:tensorflow:Evaluation [8/10] + +INFO:tensorflow:Evaluation [8/10] + +INFO:tensorflow:Evaluation [9/10] + +INFO:tensorflow:Evaluation [9/10] + +INFO:tensorflow:Evaluation [10/10] + +INFO:tensorflow:Evaluation [10/10] + +INFO:tensorflow:Inference Time : 0.16498s + +INFO:tensorflow:Inference Time : 0.16498s + +INFO:tensorflow:Finished evaluation at 2020-09-22-19:57:20 + +INFO:tensorflow:Finished evaluation at 2020-09-22-19:57:20 + +INFO:tensorflow:Saving dict for global step 500: global_step = 500, loss = 0.33660004 + +INFO:tensorflow:Saving dict for global step 500: global_step = 500, loss = 0.33660004 + +INFO:tensorflow:Saving 'checkpoint_path' summary for global step 500: /tmp/tmp13998n2j/model.ckpt-500 + +INFO:tensorflow:Saving 'checkpoint_path' summary for global step 500: /tmp/tmp13998n2j/model.ckpt-500 + +Eval result: {'loss': 0.33660004, 'global_step': 500} + +``` \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/028.md b/Tensorflow/TensorFlow2.0/028.md new file mode 100644 index 00000000..961991fe --- /dev/null +++ b/Tensorflow/TensorFlow2.0/028.md @@ -0,0 +1 @@ +# 高级 \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/029.md b/Tensorflow/TensorFlow2.0/029.md new file mode 100644 index 00000000..997d7697 --- /dev/null +++ b/Tensorflow/TensorFlow2.0/029.md @@ -0,0 +1 @@ +# 自定义 \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/030.md b/Tensorflow/TensorFlow2.0/030.md new file mode 100644 index 00000000..8bab7da8 --- /dev/null +++ b/Tensorflow/TensorFlow2.0/030.md @@ -0,0 +1,230 @@ +# Customization basics: tensors and operations + +> 原文:[https://tensorflow.google.cn/tutorials/customization/basics](https://tensorflow.google.cn/tutorials/customization/basics) + +This is an introductory TensorFlow tutorial that shows how to: + +* Import the required package +* Create and use tensors +* Use GPU acceleration +* Demonstrate [`tf.data.Dataset`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset) + +## Import TensorFlow + +To get started, import the `tensorflow` module. As of TensorFlow 2, eager execution is turned on by default. This enables a more interactive frontend to TensorFlow, the details of which we will discuss much later. + +```py +import tensorflow as tf +``` + +## Tensors + +A Tensor is a multi-dimensional array. Similar to NumPy `ndarray` objects, [`tf.Tensor`](https://tensorflow.google.cn/api_docs/python/tf/Tensor) objects have a data type and a shape. Additionally, [`tf.Tensor`](https://tensorflow.google.cn/api_docs/python/tf/Tensor)s can reside in accelerator memory (like a GPU). TensorFlow offers a rich library of operations ([tf.add](https://tensorflow.google.cn/api_docs/python/tf/add), [tf.matmul](https://tensorflow.google.cn/api_docs/python/tf/matmul), [tf.linalg.inv](https://tensorflow.google.cn/api_docs/python/tf/linalg/inv) etc.) that consume and produce [`tf.Tensor`](https://tensorflow.google.cn/api_docs/python/tf/Tensor)s. These operations automatically convert native Python types, for example: + +```py +print(tf.add(1, 2)) +print(tf.add([1, 2], [3, 4])) +print(tf.square(5)) +print(tf.reduce_sum([1, 2, 3])) + +# Operator overloading is also supported +print(tf.square(2) + tf.square(3)) +``` + +```py +tf.Tensor(3, shape=(), dtype=int32) +tf.Tensor([4 6], shape=(2,), dtype=int32) +tf.Tensor(25, shape=(), dtype=int32) +tf.Tensor(6, shape=(), dtype=int32) +tf.Tensor(13, shape=(), dtype=int32) + +``` + +Each [`tf.Tensor`](https://tensorflow.google.cn/api_docs/python/tf/Tensor) has a shape and a datatype: + +```py +x = tf.matmul([[1]], [[2, 3]]) +print(x) +print(x.shape) +print(x.dtype) +``` + +```py +tf.Tensor([[2 3]], shape=(1, 2), dtype=int32) +(1, 2) + + +``` + +The most obvious differences between NumPy arrays and [`tf.Tensor`](https://tensorflow.google.cn/api_docs/python/tf/Tensor)s are: + +1. Tensors can be backed by accelerator memory (like GPU, TPU). +2. Tensors are immutable. + +### NumPy Compatibility + +Converting between a TensorFlow [`tf.Tensor`](https://tensorflow.google.cn/api_docs/python/tf/Tensor)s and a NumPy `ndarray` is easy: + +* TensorFlow operations automatically convert NumPy ndarrays to Tensors. +* NumPy operations automatically convert Tensors to NumPy ndarrays. + +Tensors are explicitly converted to NumPy ndarrays using their `.numpy()` method. These conversions are typically cheap since the array and [`tf.Tensor`](https://tensorflow.google.cn/api_docs/python/tf/Tensor) share the underlying memory representation, if possible. However, sharing the underlying representation isn't always possible since the [`tf.Tensor`](https://tensorflow.google.cn/api_docs/python/tf/Tensor) may be hosted in GPU memory while NumPy arrays are always backed by host memory, and the conversion involves a copy from GPU to host memory. + +```py +import numpy as np + +ndarray = np.ones([3, 3]) + +print("TensorFlow operations convert numpy arrays to Tensors automatically") +tensor = tf.multiply(ndarray, 42) +print(tensor) + +print("And NumPy operations convert Tensors to numpy arrays automatically") +print(np.add(tensor, 1)) + +print("The .numpy() method explicitly converts a Tensor to a numpy array") +print(tensor.numpy()) +``` + +```py +TensorFlow operations convert numpy arrays to Tensors automatically +tf.Tensor( +[[42\. 42\. 42.] + [42\. 42\. 42.] + [42\. 42\. 42.]], shape=(3, 3), dtype=float64) +And NumPy operations convert Tensors to numpy arrays automatically +[[43\. 43\. 43.] + [43\. 43\. 43.] + [43\. 43\. 43.]] +The .numpy() method explicitly converts a Tensor to a numpy array +[[42\. 42\. 42.] + [42\. 42\. 42.] + [42\. 42\. 42.]] + +``` + +## GPU acceleration + +Many TensorFlow operations are accelerated using the GPU for computation. Without any annotations, TensorFlow automatically decides whether to use the GPU or CPU for an operation—copying the tensor between CPU and GPU memory, if necessary. Tensors produced by an operation are typically backed by the memory of the device on which the operation executed, for example: + +```py +x = tf.random.uniform([3, 3]) + +print("Is there a GPU available: "), +print(tf.config.experimental.list_physical_devices("GPU")) + +print("Is the Tensor on GPU #0: "), +print(x.device.endswith('GPU:0')) +``` + +```py +Is there a GPU available: +[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')] +Is the Tensor on GPU #0: +True + +``` + +### Device Names + +The [`Tensor.device`](https://tensorflow.google.cn/api_docs/python/tf/Tensor#device) property provides a fully qualified string name of the device hosting the contents of the tensor. This name encodes many details, such as an identifier of the network address of the host on which this program is executing and the device within that host. This is required for distributed execution of a TensorFlow program. The string ends with `GPU:` if the tensor is placed on the `N`-th GPU on the host. + +### Explicit Device Placement + +In TensorFlow, *placement* refers to how individual operations are assigned (placed on) a device for execution. As mentioned, when there is no explicit guidance provided, TensorFlow automatically decides which device to execute an operation and copies tensors to that device, if needed. However, TensorFlow operations can be explicitly placed on specific devices using the [`tf.device`](https://tensorflow.google.cn/api_docs/python/tf/device) context manager, for example: + +```py +import time + +def time_matmul(x): + start = time.time() + for loop in range(10): + tf.matmul(x, x) + + result = time.time()-start + + print("10 loops: {:0.2f}ms".format(1000*result)) + +# Force execution on CPU +print("On CPU:") +with tf.device("CPU:0"): + x = tf.random.uniform([1000, 1000]) + assert x.device.endswith("CPU:0") + time_matmul(x) + +# Force execution on GPU #0 if available +if tf.config.experimental.list_physical_devices("GPU"): + print("On GPU:") + with tf.device("GPU:0"): # Or GPU:1 for the 2nd GPU, GPU:2 for the 3rd etc. + x = tf.random.uniform([1000, 1000]) + assert x.device.endswith("GPU:0") + time_matmul(x) +``` + +```py +On CPU: +10 loops: 102.06ms +On GPU: +10 loops: 231.87ms + +``` + +## Datasets + +This section uses the [`tf.data.Dataset` API](https://tensorflow.google.cn/guide/datasets) to build a pipeline for feeding data to your model. The [`tf.data.Dataset`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset) API is used to build performant, complex input pipelines from simple, re-usable pieces that will feed your model's training or evaluation loops. + +### Create a source `Dataset` + +Create a *source* dataset using one of the factory functions like [`Dataset.from_tensors`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset#from_tensors), [`Dataset.from_tensor_slices`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset#from_tensor_slices), or using objects that read from files like [`TextLineDataset`](https://tensorflow.google.cn/api_docs/python/tf/data/TextLineDataset) or [`TFRecordDataset`](https://tensorflow.google.cn/api_docs/python/tf/data/TFRecordDataset). See the [TensorFlow Dataset guide](https://tensorflow.google.cn/guide/datasets#reading_input_data) for more information. + +```py +ds_tensors = tf.data.Dataset.from_tensor_slices([1, 2, 3, 4, 5, 6]) + +# Create a CSV file +import tempfile +_, filename = tempfile.mkstemp() + +with open(filename, 'w') as f: + f.write("""Line 1 +Line 2 +Line 3 + """) + +ds_file = tf.data.TextLineDataset(filename) +``` + +### Apply transformations + +Use the transformations functions like [`map`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset#map), [`batch`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset#batch), and [`shuffle`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset#shuffle) to apply transformations to dataset records. + +```py +ds_tensors = ds_tensors.map(tf.square).shuffle(2).batch(2) + +ds_file = ds_file.batch(2) +``` + +### Iterate + +[`tf.data.Dataset`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset) objects support iteration to loop over records: + +```py +print('Elements of ds_tensors:') +for x in ds_tensors: + print(x) + +print('\nElements in ds_file:') +for x in ds_file: + print(x) +``` + +```py +Elements of ds_tensors: +tf.Tensor([1 4], shape=(2,), dtype=int32) +tf.Tensor([16 9], shape=(2,), dtype=int32) +tf.Tensor([25 36], shape=(2,), dtype=int32) + +Elements in ds_file: +tf.Tensor([b'Line 1' b'Line 2'], shape=(2,), dtype=string) +tf.Tensor([b'Line 3' b' '], shape=(2,), dtype=string) + +``` \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/031.md b/Tensorflow/TensorFlow2.0/031.md new file mode 100644 index 00000000..ad82eb1b --- /dev/null +++ b/Tensorflow/TensorFlow2.0/031.md @@ -0,0 +1,308 @@ +# Custom layers + +> 原文:[https://tensorflow.google.cn/tutorials/customization/custom_layers](https://tensorflow.google.cn/tutorials/customization/custom_layers) + +We recommend using [`tf.keras`](https://tensorflow.google.cn/api_docs/python/tf/keras) as a high-level API for building neural networks. That said, most TensorFlow APIs are usable with eager execution. + +```py +import tensorflow as tf +``` + +```py +print(tf.test.is_gpu_available()) +``` + +```py +WARNING:tensorflow:From :1: is_gpu_available (from tensorflow.python.framework.test_util) is deprecated and will be removed in a future version. +Instructions for updating: +Use `tf.config.list_physical_devices('GPU')` instead. +True + +``` + +## Layers: common sets of useful operations + +Most of the time when writing code for machine learning models you want to operate at a higher level of abstraction than individual operations and manipulation of individual variables. + +Many machine learning models are expressible as the composition and stacking of relatively simple layers, and TensorFlow provides both a set of many common layers as a well as easy ways for you to write your own application-specific layers either from scratch or as the composition of existing layers. + +TensorFlow includes the full [Keras](https://keras.io) API in the tf.keras package, and the Keras layers are very useful when building your own models. + +```py +# In the tf.keras.layers package, layers are objects. To construct a layer, +# simply construct the object. Most layers take as a first argument the number +# of output dimensions / channels. +layer = tf.keras.layers.Dense(100) +# The number of input dimensions is often unnecessary, as it can be inferred +# the first time the layer is used, but it can be provided if you want to +# specify it manually, which is useful in some complex models. +layer = tf.keras.layers.Dense(10, input_shape=(None, 5)) +``` + +The full list of pre-existing layers can be seen in [the documentation](https://tensorflow.google.cn/api_docs/python/tf/keras/layers). It includes Dense (a fully-connected layer), Conv2D, LSTM, BatchNormalization, Dropout, and many others. + +```py +# To use a layer, simply call it. +layer(tf.zeros([10, 5])) +``` + +```py + + +``` + +```py +# Layers have many useful methods. For example, you can inspect all variables +# in a layer using `layer.variables` and trainable variables using +# `layer.trainable_variables`. In this case a fully-connected layer +# will have variables for weights and biases. +layer.variables +``` + +```py +[, + ] + +``` + +```py +# The variables are also accessible through nice accessors +layer.kernel, layer.bias +``` + +```py +(, + ) + +``` + +## Implementing custom layers + +The best way to implement your own layer is extending the tf.keras.Layer class and implementing: + +1. `__init__` , where you can do all input-independent initialization +2. `build`, where you know the shapes of the input tensors and can do the rest of the initialization +3. `call`, where you do the forward computation + +Note that you don't have to wait until `build` is called to create your variables, you can also create them in `__init__`. However, the advantage of creating them in `build` is that it enables late variable creation based on the shape of the inputs the layer will operate on. On the other hand, creating variables in `__init__` would mean that shapes required to create the variables will need to be explicitly specified. + +```py +class MyDenseLayer(tf.keras.layers.Layer): + def __init__(self, num_outputs): + super(MyDenseLayer, self).__init__() + self.num_outputs = num_outputs + + def build(self, input_shape): + self.kernel = self.add_weight("kernel", + shape=[int(input_shape[-1]), + self.num_outputs]) + + def call(self, input): + return tf.matmul(input, self.kernel) + +layer = MyDenseLayer(10) +``` + +```py +_ = layer(tf.zeros([10, 5])) # Calling the layer `.builds` it. +``` + +```py +print([var.name for var in layer.trainable_variables]) +``` + +```py +['my_dense_layer/kernel:0'] + +``` + +Overall code is easier to read and maintain if it uses standard layers whenever possible, as other readers will be familiar with the behavior of standard layers. If you want to use a layer which is not present in [`tf.keras.layers`](https://tensorflow.google.cn/api_docs/python/tf/keras/layers), consider filing a [github issue](http://github.com/tensorflow/tensorflow/issues/new) or, even better, sending us a pull request! + +## Models: Composing layers + +Many interesting layer-like things in machine learning models are implemented by composing existing layers. For example, each residual block in a resnet is a composition of convolutions, batch normalizations, and a shortcut. Layers can be nested inside other layers. + +Typically you inherit from [`keras.Model`](https://tensorflow.google.cn/api_docs/python/tf/keras/Model) when you need the model methods like: [`Model.fit`](https://tensorflow.google.cn/api_docs/python/tf/keras/Model#fit),[`Model.evaluate`](https://tensorflow.google.cn/api_docs/python/tf/keras/Model#evaluate), and [`Model.save`](https://tensorflow.google.cn/api_docs/python/tf/keras/Model#save) (see [Custom Keras layers and models](https://tensorflow.google.cn/guide/keras/custom_layers_and_models) for details). + +One other feature provided by [`keras.Model`](https://tensorflow.google.cn/api_docs/python/tf/keras/Model) (instead of [`keras.layers.Layer`](https://tensorflow.google.cn/api_docs/python/tf/keras/layers/Layer)) is that in addition to tracking variables, a [`keras.Model`](https://tensorflow.google.cn/api_docs/python/tf/keras/Model) also tracks its internal layers, making them easier to inspect. + +For example here is a ResNet block: + +```py +class ResnetIdentityBlock(tf.keras.Model): + def __init__(self, kernel_size, filters): + super(ResnetIdentityBlock, self).__init__(name='') + filters1, filters2, filters3 = filters + + self.conv2a = tf.keras.layers.Conv2D(filters1, (1, 1)) + self.bn2a = tf.keras.layers.BatchNormalization() + + self.conv2b = tf.keras.layers.Conv2D(filters2, kernel_size, padding='same') + self.bn2b = tf.keras.layers.BatchNormalization() + + self.conv2c = tf.keras.layers.Conv2D(filters3, (1, 1)) + self.bn2c = tf.keras.layers.BatchNormalization() + + def call(self, input_tensor, training=False): + x = self.conv2a(input_tensor) + x = self.bn2a(x, training=training) + x = tf.nn.relu(x) + + x = self.conv2b(x) + x = self.bn2b(x, training=training) + x = tf.nn.relu(x) + + x = self.conv2c(x) + x = self.bn2c(x, training=training) + + x += input_tensor + return tf.nn.relu(x) + +block = ResnetIdentityBlock(1, [1, 2, 3]) +``` + +```py +_ = block(tf.zeros([1, 2, 3, 3])) +``` + +```py +block.layers +``` + +```py +[, + , + , + , + , + ] + +``` + +```py +len(block.variables) +``` + +```py +18 + +``` + +```py +block.summary() +``` + +```py +Model: "resnet_identity_block" +_________________________________________________________________ +Layer (type) Output Shape Param # +================================================================= +conv2d (Conv2D) multiple 4 +_________________________________________________________________ +batch_normalization (BatchNo multiple 4 +_________________________________________________________________ +conv2d_1 (Conv2D) multiple 4 +_________________________________________________________________ +batch_normalization_1 (Batch multiple 8 +_________________________________________________________________ +conv2d_2 (Conv2D) multiple 9 +_________________________________________________________________ +batch_normalization_2 (Batch multiple 12 +================================================================= +Total params: 41 +Trainable params: 29 +Non-trainable params: 12 +_________________________________________________________________ + +``` + +Much of the time, however, models which compose many layers simply call one layer after the other. This can be done in very little code using [`tf.keras.Sequential`](https://tensorflow.google.cn/api_docs/python/tf/keras/Sequential): + +```py +my_seq = tf.keras.Sequential([tf.keras.layers.Conv2D(1, (1, 1), + input_shape=( + None, None, 3)), + tf.keras.layers.BatchNormalization(), + tf.keras.layers.Conv2D(2, 1, + padding='same'), + tf.keras.layers.BatchNormalization(), + tf.keras.layers.Conv2D(3, (1, 1)), + tf.keras.layers.BatchNormalization()]) +my_seq(tf.zeros([1, 2, 3, 3])) +``` + +```py + + +``` + +```py +my_seq.summary() +``` + +```py +Model: "sequential" +_________________________________________________________________ +Layer (type) Output Shape Param # +================================================================= +conv2d_3 (Conv2D) (None, None, None, 1) 4 +_________________________________________________________________ +batch_normalization_3 (Batch (None, None, None, 1) 4 +_________________________________________________________________ +conv2d_4 (Conv2D) (None, None, None, 2) 4 +_________________________________________________________________ +batch_normalization_4 (Batch (None, None, None, 2) 8 +_________________________________________________________________ +conv2d_5 (Conv2D) (None, None, None, 3) 9 +_________________________________________________________________ +batch_normalization_5 (Batch (None, None, None, 3) 12 +================================================================= +Total params: 41 +Trainable params: 29 +Non-trainable params: 12 +_________________________________________________________________ + +``` + +# Next steps + +Now you can go back to the previous notebook and adapt the linear regression example to use layers and models to be better structured. \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/032.md b/Tensorflow/TensorFlow2.0/032.md new file mode 100644 index 00000000..6c37305c --- /dev/null +++ b/Tensorflow/TensorFlow2.0/032.md @@ -0,0 +1,626 @@ +# 自定义训练: 演示 + +> 原文:[https://tensorflow.google.cn/tutorials/customization/custom_training_walkthrough](https://tensorflow.google.cn/tutorials/customization/custom_training_walkthrough) + +这个教程将利用机器学习的手段来对鸢尾花按照物种进行分类。本教程将利用 TensorFlow 来进行以下操作: + +1. 构建一个模型, +2. 用样例数据集对模型进行训练,以及 +3. 利用该模型对未知数据进行预测。 + +## TensorFlow 编程 + +本指南采用了以下高级 TensorFlow 概念: + +* 使用 TensorFlow 默认的 [eager execution](https://tensorflow.google.cn/guide/eager) 开发环境, +* 使用 [Datasets API](https://tensorflow.google.cn/guide/datasets) 导入数据, +* 使用 TensorFlow 的 [Keras API](https://keras.io/getting-started/sequential-model-guide/) 来构建各层以及整个模型。 + +本教程的结构同很多 TensorFlow 程序相似: + +1. 数据集的导入与解析 +2. 选择模型类型 +3. 对模型进行训练 +4. 评估模型效果 +5. 使用训练过的模型进行预测 + +## 环境的搭建 + +### 配置导入 + +导入 TensorFlow 以及其他需要的 Python 库。 默认情况下,TensorFlow 用 [eager execution](https://tensorflow.google.cn/guide/eager) 来实时评估操作, 返回具体值而不是建立一个稍后执行的[计算图](https://tensorflow.google.cn/guide/graphs)。 如果您习惯使用 REPL 或 python 交互控制台, 对此您会感觉得心应手。 + +```py +import os +import matplotlib.pyplot as plt +``` + +```py +import tensorflow as tf +``` + +```py +print("TensorFlow version: {}".format(tf.__version__)) +print("Eager execution: {}".format(tf.executing_eagerly())) +``` + +```py +TensorFlow version: 2.3.0 +Eager execution: True + +``` + +## 鸢尾花分类问题 + +想象一下,您是一名植物学家,正在寻找一种能够对所发现的每株鸢尾花进行自动归类的方法。机器学习可提供多种从统计学上分类花卉的算法。例如,一个复杂的机器学习程序可以根据照片对花卉进行分类。我们的要求并不高 - 我们将根据鸢尾花花萼和花瓣的长度和宽度对其进行分类。 + +鸢尾属约有 300 个品种,但我们的程序将仅对下列三个品种进行分类: + +* 山鸢尾 +* 维吉尼亚鸢尾 +* 变色鸢尾 + +| ![Petal geometry compared for three iris species: Iris setosa, Iris virginica, and Iris versicolor](img/bb63d10882d3aa9a631d3cf50ff7f21e.png) | +| **Figure 1.** [山鸢尾](https://commons.wikimedia.org/w/index.php?curid=170298) (by [Radomil](https://commons.wikimedia.org/wiki/User:Radomil), CC BY-SA 3.0), [变色鸢尾](https://commons.wikimedia.org/w/index.php?curid=248095), (by [Dlanglois](https://commons.wikimedia.org/wiki/User:Dlanglois), CC BY-SA 3.0), and [维吉尼亚鸢尾](https://www.flickr.com/photos/33397993@N05/3352169862) (by [Frank Mayfield](https://www.flickr.com/photos/33397993@N05), CC BY-SA 2.0). +  | + +幸运的是,有人已经创建了一个包含有花萼和花瓣的测量值的[120 株鸢尾花的数据集](https://en.wikipedia.org/wiki/Iris_flower_data_set)。这是一个在入门级机器学习分类问题中经常使用的经典数据集。 + +## 导入和解析训练数据集 + +下载数据集文件并将其转换为可供此 Python 程序使用的结构。 + +### 下载数据集 + +使用 [tf.keras.utils.get_file](https://tensorflow.google.cn/api_docs/python/tf/keras/utils/get_file) 函数下载训练数据集文件。该函数会返回下载文件的文件路径: + +```py +train_dataset_url = "https://storage.googleapis.com/download.tensorflow.org/data/iris_training.csv" + +train_dataset_fp = tf.keras.utils.get_file(fname=os.path.basename(train_dataset_url), + origin=train_dataset_url) + +print("Local copy of the dataset file: {}".format(train_dataset_fp)) +``` + +```py +Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/iris_training.csv +8192/2194 [================================================================================================================] - 0s 0us/step +Local copy of the dataset file: /home/kbuilder/.keras/datasets/iris_training.csv + +``` + +### 检查数据 + +数据集 `iris_training.csv` 是一个纯文本文件,其中存储了逗号分隔值 (CSV) 格式的表格式数据.请使用 `head -n5` 命令查看前 5 个条目: + +```py +head -n5 {train_dataset_fp} + +``` + +```py +120,4,setosa,versicolor,virginica +6.4,2.8,5.6,2.2,2 +5.0,2.3,3.3,1.0,1 +4.9,2.5,4.5,1.7,2 +4.9,3.1,1.5,0.1,0 + +``` + +我们可以从该数据集视图中注意到以下信息: + +1. 第一行是表头,其中包含数据集信息: + +* 共有 120 个样本。每个样本都有四个特征和一个标签名称,标签名称有三种可能。 +* 后面的行是数据记录,每个[样本](https://developers.google.cn/machine-learning/glossary/#example)各占一行,其中: + * 前四个字段是[特征](https://developers.google.cn/machine-learning/glossary/#feature): 这四个字段代表的是样本的特点。在此数据集中,这些字段存储的是代表花卉测量值的浮点数。 + * 最后一列是[标签](https://developers.google.cn/machine-learning/glossary/#label):即我们想要预测的值。对于此数据集,该值为 0、1 或 2 中的某个整数值(每个值分别对应一个花卉名称)。 + +我们用代码表示出来: + +```py +# CSV 文件中列的顺序 +column_names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species'] + +feature_names = column_names[:-1] +label_name = column_names[-1] + +print("Features: {}".format(feature_names)) +print("Label: {}".format(label_name)) +``` + +```py +Features: ['sepal_length', 'sepal_width', 'petal_length', 'petal_width'] +Label: species + +``` + +每个标签都分别与一个字符串名称(例如 “setosa” )相关联,但机器学习通常依赖于数字值。标签编号会映射到一个指定的表示法,例如: + +* `0` : 山鸢尾 +* `1` : 变色鸢尾 +* `2` : 维吉尼亚鸢尾 + +如需详细了解特征和标签,请参阅 [《机器学习速成课程》的“机器学习术语”部分](https://developers.google.cn/machine-learning/crash-course/framing/ml-terminology). + +```py +class_names = ['Iris setosa', 'Iris versicolor', 'Iris virginica'] +``` + +### 创建一个 [`tf.data.Dataset`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset) + +TensorFlow 的 [Dataset API](https://tensorflow.google.cn/guide/datasets) 可处理在向模型加载数据时遇到的许多常见情况。这是一种高阶 API ,用于读取数据并将其转换为可供训练使用的格式。如需了解详情,请参阅[数据集快速入门指南](https://tensorflow.google.cn/get_started/datasets_quickstart) + +由于数据集是 CSV 格式的文本文件,请使用 [make_csv_dataset](https://tensorflow.google.cn/api_docs/python/tf/data/experimental/make_csv_dataset) 函数将数据解析为合适的格式。由于此函数为训练模型生成数据,默认行为是对数据进行随机处理 (`shuffle=True, shuffle_buffer_size=10000`),并且无限期重复数据集(`num_epochs=None`)。 我们还设置了 [batch_size](https://developers.google.cn/machine-learning/glossary/#batch_size) 参数: + +```py +batch_size = 32 + +train_dataset = tf.data.experimental.make_csv_dataset( + train_dataset_fp, + batch_size, + column_names=column_names, + label_name=label_name, + num_epochs=1) +``` + +`make_csv_dataset` 返回一个`(features, label)` 对构建的 [`tf.data.Dataset`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset) ,其中 `features` 是一个字典: `{'feature_name': value}` + +这些 `Dataset` 对象是可迭代的。 我们来看看下面的一些特征: + +```py +features, labels = next(iter(train_dataset)) + +print(features) +``` + +```py +OrderedDict([('sepal_length', ), ('sepal_width', ), ('petal_length', ), ('petal_width', )]) + +``` + +注意到具有相似特征的样本会归为一组,即分为一批。更改 `batch_size` 可以设置存储在这些特征数组中的样本数。 + +绘制该批次中的几个特征后,就会开始看到一些集群现象: + +```py +plt.scatter(features['petal_length'], + features['sepal_length'], + c=labels, + cmap='viridis') + +plt.xlabel("Petal length") +plt.ylabel("Sepal length") +plt.show() +``` + +![png](img/6396c35912fab965e30d9adf6c7c8981.png) + +要简化模型构建步骤,请创建一个函数以将特征字典重新打包为形状为 `(batch_size, num_features)` 的单个数组。 + +此函数使用 [tf.stack](https://tensorflow.google.cn/api_docs/python/tf/stack) 方法,该方法从张量列表中获取值,并创建指定维度的组合张量: + +```py +def pack_features_vector(features, labels): + """将特征打包到一个数组中""" + features = tf.stack(list(features.values()), axis=1) + return features, labels +``` + +然后使用 [tf.data.Dataset.map](https://tensorflow.google.cn/api_docs/python/tf/data/dataset/map) 方法将每个 `(features,label)` 对中的 `features` 打包到训练数据集中: + +```py +train_dataset = train_dataset.map(pack_features_vector) +``` + +`Dataset` 的特征元素被构成了形如 `(batch_size, num_features)` 的数组。我们来看看前几个样本: + +```py +features, labels = next(iter(train_dataset)) + +print(features[:5]) +``` + +```py +tf.Tensor( +[[5\. 3.5 1.3 0.3] + [4.8 3.1 1.6 0.2] + [6.3 2.7 4.9 1.8] + [7.4 2.8 6.1 1.9] + [5\. 3.2 1.2 0.2]], shape=(5, 4), dtype=float32) + +``` + +## 选择模型类型 + +### 为何要使用模型? + +[模型](https://developers.google.cn/machine-learning/crash-course/glossary#model)是指特征与标签之间的关系。对于鸢尾花分类问题,模型定义了花萼和花瓣测量值与预测的鸢尾花品种之间的关系。一些简单的模型可以用几行代数进行描述,但复杂的机器学习模型拥有大量难以汇总的参数。 + +您能否在不使用机器学习的情况下确定四个特征与鸢尾花品种之间的关系?也就是说,您能否使用传统编程技巧(例如大量条件语句)创建模型?也许能,前提是反复分析该数据集,并最终确定花瓣和花萼测量值与特定品种的关系。对于更复杂的数据集来说,这会变得非常困难,或许根本就做不到。一个好的机器学习方法可为您确定模型。如果您将足够多的代表性样本馈送到正确类型的机器学习模型中,该程序便会为您找出相应的关系。 + +### 选择模型 + +我们需要选择要进行训练的模型类型。模型具有许多类型,挑选合适的类型需要一定的经验。本教程使用神经网络来解决鸢尾花分类问题。[神经网络](https://developers.google.cn/machine-learning/glossary/#neural_network)可以发现特征与标签之间的复杂关系。神经网络是一个高度结构化的图,其中包含一个或多个[隐含层](https://developers.google.cn/machine-learning/glossary/#hidden_layer)。每个隐含层都包含一个或多个[神经元](https://developers.google.cn/machine-learning/glossary/#neuron)。 神经网络有多种类别,该程序使用的是密集型神经网络,也称为[全连接神经网络](https://developers.google.cn/machine-learning/glossary/#fully_connected_layer) : 一个层中的神经元将从上一层中的每个神经元获取输入连接。例如,图 2 显示了一个密集型神经网络,其中包含 1 个输入层、2 个隐藏层以及 1 个输出层: + +| ![网络结构示意图: 输入层, 2 隐含层, 输出层](img/d6c8610603858ddd864cc7f024f16e40.png) | +| **图 2.** 包含特征、隐藏层和预测的神经网络 +  | + +当图 2 中的模型经过训练并获得无标签样本后,它会产生 3 个预测结果:相应鸢尾花属于指定品种的可能性。这种预测称为[推理](https://developers.google.cn/machine-learning/crash-course/glossary#inference)。对于该示例,输出预测结果的总和是 1.0。在图 2 中,该预测结果分解如下:山鸢尾为 0.02,变色鸢尾为 0.95,维吉尼亚鸢尾为 0.03。这意味着该模型预测某个无标签鸢尾花样本是变色鸢尾的概率为 95%。 + +### 使用 Keras 创建模型 + +TensorFlow [tf.keras](https://tensorflow.google.cn/api_docs/python/tf/keras) API 是创建模型和层的首选方式。通过该 API,您可以轻松地构建模型并进行实验,而将所有部分连接在一起的复杂工作则由 Keras 处理。 + +[tf.keras.Sequential](https://tensorflow.google.cn/api_docs/python/tf/keras/Sequential) 模型是层的线性堆叠。该模型的构造函数会采用一系列层实例;在本示例中,采用的是 2 个[密集层](https://tensorflow.google.cn/api_docs/python/tf/keras/layers/Dense)(各自包含 10 个节点),以及 1 个输出层(包含 3 个代表标签预测的节点。第一个层的 `input_shape` 参数对应该数据集中的特征数量,它是一项必需参数: + +```py +model = tf.keras.Sequential([ + tf.keras.layers.Dense(10, activation=tf.nn.relu, input_shape=(4,)), # 需要给出输入的形式 + tf.keras.layers.Dense(10, activation=tf.nn.relu), + tf.keras.layers.Dense(3) +]) +``` + +[激活函数](https://developers.google.cn/machine-learning/crash-course/glossary#activation_function)可决定层中每个节点的输出形式。 这些非线性关系很重要,如果没有它们,模型将等同于单个层。[激活函数](https://tensorflow.google.cn/api_docs/python/tf/keras/activations)有很多种,但隐藏层通常使用 [ReLU](https://developers.google.cn/machine-learning/crash-course/glossary#ReLU)。 + +隐藏层和神经元的理想数量取决于问题和数据集。与机器学习的多个方面一样,选择最佳的神经网络形状需要一定的知识水平和实验基础。一般来说,增加隐藏层和神经元的数量通常会产生更强大的模型,而这需要更多数据才能有效地进行训练。 + +### 使用模型 + +我们快速了解一下此模型如何处理一批特征: + +```py +predictions = model(features) +predictions[:5] +``` + +```py + + +``` + +在此示例中,每个样本针对每个类别返回一个 [logit](https://developers.google.cn/machine-learning/crash-course/glossary#logits)。 + +要将这些对数转换为每个类别的概率,请使用 [softmax](https://developers.google.cn/machine-learning/crash-course/glossary#softmax) 函数: + +```py +tf.nn.softmax(predictions[:5]) +``` + +```py + + +``` + +对每个类别执行 [`tf.argmax`](https://tensorflow.google.cn/api_docs/python/tf/math/argmax) 运算可得出预测的类别索引。不过,该模型尚未接受训练,因此这些预测并不理想。 + +```py +print("Prediction: {}".format(tf.argmax(predictions, axis=1))) +print(" Labels: {}".format(labels)) +``` + +```py +Prediction: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] + Labels: [0 0 2 2 0 2 0 1 0 0 1 1 0 1 0 0 2 2 1 1 0 2 2 0 0 2 1 0 0 0 2 2] + +``` + +## 训练模型 + +[训练](https://developers.google.cn/machine-learning/crash-course/glossary#training) 是一个机器学习阶段,在此阶段中,模型会逐渐得到优化,也就是说,模型会了解数据集。目标是充分了解训练数据集的结构,以便对未见过的数据进行预测。如果您从训练数据集中获得了过多的信息,预测便会仅适用于模型见过的数据,但是无法泛化。此问题被称之为[过拟合](https://developers.google.cn/machine-learning/crash-course/glossary#overfitting)—就好比将答案死记硬背下来,而不去理解问题的解决方式。 + +鸢尾花分类问题是[监督式机器学习](https://developers.google.cn/machine-learning/glossary/#supervised_machine_learning)的一个示例: 模型通过包含标签的样本加以训练。 而在[非监督式机器学习](https://developers.google.cn/machine-learning/glossary/#unsupervised_machine_learning)中,样本不包含标签。相反,模型通常会在特征中发现一些规律。 + +### 定义损失和梯度函数 + +在训练和评估阶段,我们都需要计算模型的[损失](https://developers.google.cn/machine-learning/crash-course/glossary#loss)。 这样可以衡量模型的预测结果与预期标签有多大偏差,也就是说,模型的效果有多差。我们希望尽可能减小或优化这个值。 + +我们的模型会使用 [`tf.keras.losses.SparseCategoricalCrossentropy`](https://tensorflow.google.cn/api_docs/python/tf/keras/losses/SparseCategoricalCrossentropy) 函数计算其损失,此函数会接受模型的类别概率预测结果和预期标签,然后返回样本的平均损失。 + +```py +loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) +``` + +```py +def loss(model, x, y): + y_ = model(x) + + return loss_object(y_true=y, y_pred=y_) + +l = loss(model, features, labels) +print("Loss test: {}".format(l)) +``` + +```py +Loss test: 1.6707830429077148 + +``` + +使用 [tf.GradientTape](https://tensorflow.google.cn/api_docs/python/tf/GradientTape) 的前后关系来计算[梯度](https://developers.google.cn/machine-learning/crash-course/glossary#gradient)以优化你的模型: + +```py +def grad(model, inputs, targets): + with tf.GradientTape() as tape: + loss_value = loss(model, inputs, targets) + return loss_value, tape.gradient(loss_value, model.trainable_variables) +``` + +### 创建优化器 + +[优化器](https://developers.google.cn/machine-learning/crash-course/glossary#optimizer) 会将计算出的梯度应用于模型的变量,以使 `loss` 函数最小化。您可以将损失函数想象为一个曲面(见图 3),我们希望通过到处走动找到该曲面的最低点。梯度指向最高速上升的方向,因此我们将沿相反的方向向下移动。我们以迭代方式计算每个批次的损失和梯度,以在训练过程中调整模型。模型会逐渐找到权重和偏差的最佳组合,从而将损失降至最低。损失越低,模型的预测效果就越好。 + +| ![Optimization algorithms visualized over time in 3D space.](img/fb0bdd5ec0ad3a81aa686b46a6fa16d7.png) | +| **图 3.** 优化算法在三维空间中随时间推移而变化的可视化效果。 +(来源: [斯坦福大学 CS231n 课程](http://cs231n.github.io/neural-networks-3/),MIT 许可证,Image credit: [Alec Radford](https://twitter.com/alecrad)) | + +TensorFlow 有许多可用于训练的[优化算法](https://tensorflow.google.cn/api_guides/python/train)。此模型使用的是 [tf.train.GradientDescentOptimizer](https://tensorflow.google.cn/api_docs/python/tf/train/GradientDescentOptimizer) , 它可以实现[随机梯度下降法](https://developers.google.cn/machine-learning/crash-course/glossary#gradient_descent)(SGD)。`learning_rate` 被用于设置每次迭代(向下行走)的步长。 这是一个 *超参数* ,您通常需要调整此参数以获得更好的结果。 + +我们来设置优化器: + +```py +optimizer = tf.keras.optimizers.Adam(learning_rate=0.01) +``` + +我们将使用它来计算单个优化步骤: + +```py +loss_value, grads = grad(model, features, labels) + +print("Step: {}, Initial Loss: {}".format(optimizer.iterations.numpy(), + loss_value.numpy())) + +optimizer.apply_gradients(zip(grads, model.trainable_variables)) + +print("Step: {}, Loss: {}".format(optimizer.iterations.numpy(), + loss(model, features, labels).numpy())) +``` + +```py +Step: 0, Initial Loss: 1.6707830429077148 +Step: 1, Loss: 1.447718620300293 + +``` + +### 训练循环 + +一切准备就绪后,就可以开始训练模型了!训练循环会将数据集样本馈送到模型中,以帮助模型做出更好的预测。以下代码块可设置这些训练步骤: + +1. 迭代每个周期。通过一次数据集即为一个周期。 +2. 在一个周期中,遍历训练 `Dataset` 中的每个样本,并获取样本的*特征*(`x`)和*标签*(`y`)。 +3. 根据样本的特征进行预测,并比较预测结果和标签。衡量预测结果的不准确性,并使用所得的值计算模型的损失和梯度。 +4. 使用 `optimizer` 更新模型的变量。 +5. 跟踪一些统计信息以进行可视化。 +6. 对每个周期重复执行以上步骤。 + +`num_epochs` 变量是遍历数据集集合的次数。与直觉恰恰相反的是,训练模型的时间越长,并不能保证模型就越好。`num_epochs` 是一个可以调整的[超参数](https://developers.google.cn/machine-learning/glossary/#hyperparameter)。选择正确的次数通常需要一定的经验和实验基础。 + +```py +## Note: 使用相同的模型变量重新运行此单元 + +# 保留结果用于绘制 +train_loss_results = [] +train_accuracy_results = [] + +num_epochs = 201 + +for epoch in range(num_epochs): + epoch_loss_avg = tf.keras.metrics.Mean() + epoch_accuracy = tf.keras.metrics.SparseCategoricalAccuracy() + + # Training loop - using batches of 32 + for x, y in train_dataset: + # 优化模型 + loss_value, grads = grad(model, x, y) + optimizer.apply_gradients(zip(grads, model.trainable_variables)) + + # 追踪进度 + epoch_loss_avg(loss_value) # 添加当前的 batch loss + # 比较预测标签与真实标签 + epoch_accuracy(y, model(x)) + + # 循环结束 + train_loss_results.append(epoch_loss_avg.result()) + train_accuracy_results.append(epoch_accuracy.result()) + + if epoch % 50 == 0: + print("Epoch {:03d}: Loss: {:.3f}, Accuracy: {:.3%}".format(epoch, + epoch_loss_avg.result(), + epoch_accuracy.result())) +``` + +```py +Epoch 000: Loss: 1.470, Accuracy: 35.833% +Epoch 050: Loss: 0.112, Accuracy: 96.667% +Epoch 100: Loss: 0.055, Accuracy: 98.333% +Epoch 150: Loss: 0.065, Accuracy: 98.333% +Epoch 200: Loss: 0.053, Accuracy: 98.333% + +``` + +### 可视化损失函数随时间推移而变化的情况 + +虽然输出模型的训练过程有帮助,但查看这一过程往往*更有帮助*。 [TensorBoard](https://tensorflow.google.cn/guide/summaries_and_tensorboard) 是与 TensorFlow 封装在一起的出色可视化工具,不过我们可以使用 `matplotlib` 模块创建基本图表。 + +解读这些图表需要一定的经验,不过您确实希望看到*损失*下降且*准确率*上升。 + +```py +fig, axes = plt.subplots(2, sharex=True, figsize=(12, 8)) +fig.suptitle('Training Metrics') + +axes[0].set_ylabel("Loss", fontsize=14) +axes[0].plot(train_loss_results) + +axes[1].set_ylabel("Accuracy", fontsize=14) +axes[1].set_xlabel("Epoch", fontsize=14) +axes[1].plot(train_accuracy_results) +plt.show() +``` + +![png](img/4123df32a452f5e3727c6372cf1fa755.png) + +## 评估模型的效果 + +模型已经过训练,现在我们可以获取一些关于其效果的统计信息了。 + +*评估* 指的是确定模型做出预测的效果。要确定模型在鸢尾花分类方面的效果,请将一些花萼和花瓣测量值传递给模型,并要求模型预测它们所代表的鸢尾花品种。然后,将模型的预测结果与实际标签进行比较。例如,如果模型对一半输入样本的品种预测正确,则 [准确率](https://developers.google.cn/machine-learning/glossary/#accuracy) 为 `0.5` 。 图 4 显示的是一个效果更好一些的模型,该模型做出 5 次预测,其中有 4 次正确,准确率为 80%: + + +| 样本特征 | 标签 | 模型预测 | +| 5.9 | 3.0 | 4.3 | 1.5 | 1 | 1 | +| 6.9 | 3.1 | 5.4 | 2.1 | 2 | 2 | +| 5.1 | 3.3 | 1.7 | 0.5 | 0 | 0 | +| 6.0 | 3.4 | 4.5 | 1.6 | 1 | 2 | +| 5.5 | 2.5 | 4.0 | 1.3 | 1 | 1 | +| **图 4.** 准确率为 80% 的鸢尾花分类器 +  | + +### 建立测试数据集 + +评估模型与训练模型相似。最大的区别在于,样本来自一个单独的[测试集](https://developers.google.cn/machine-learning/crash-course/glossary#test_set),而不是训练集。为了公正地评估模型的效果,用于评估模型的样本务必与用于训练模型的样本不同。 + +测试 `Dataset` 的建立与训练 `Dataset` 相似。下载 CSV 文本文件并解析相应的值,然后对数据稍加随机化处理: + +```py +test_url = "https://storage.googleapis.com/download.tensorflow.org/data/iris_test.csv" + +test_fp = tf.keras.utils.get_file(fname=os.path.basename(test_url), + origin=test_url) +``` + +```py +Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/iris_test.csv +8192/573 [============================================================================================================================================================================================================================================================================================================================================================================================================================================] - 0s 0us/step + +``` + +```py +test_dataset = tf.data.experimental.make_csv_dataset( + test_fp, + batch_size, + column_names=column_names, + label_name='species', + num_epochs=1, + shuffle=False) + +test_dataset = test_dataset.map(pack_features_vector) +``` + +### 根据测试数据集评估模型 + +与训练阶段不同,模型仅评估测试数据的一个[周期](https://developers.google.cn/machine-learning/glossary/#epoch)。在以下代码单元格中,我们会遍历测试集中的每个样本,然后将模型的预测结果与实际标签进行比较。这是为了衡量模型在整个测试集中的准确率。 + +```py +test_accuracy = tf.keras.metrics.Accuracy() + +for (x, y) in test_dataset: + logits = model(x) + prediction = tf.argmax(logits, axis=1, output_type=tf.int32) + test_accuracy(prediction, y) + +print("Test set accuracy: {:.3%}".format(test_accuracy.result())) +``` + +```py +Test set accuracy: 96.667% + +``` + +例如,我们可以看到对于最后一批数据,该模型通常预测正确: + +```py +tf.stack([y,prediction],axis=1) +``` + +```py + + +``` + +## 使用经过训练的模型进行预测 + +我们已经训练了一个模型并“证明”它是有效的,但在对鸢尾花品种进行分类方面,这还不够。现在,我们使用经过训练的模型对 [无标签样本](https://developers.google.cn/machine-learning/glossary/#unlabeled_example)(即包含特征但不包含标签的样本)进行一些预测。 + +在现实生活中,无标签样本可能来自很多不同的来源,包括应用、CSV 文件和数据 Feed。暂时我们将手动提供三个无标签样本以预测其标签。回想一下,标签编号会映射到一个指定的表示法: + +* `0`: 山鸢尾 +* `1`: 变色鸢尾 +* `2`: 维吉尼亚鸢尾 + +```py +predict_dataset = tf.convert_to_tensor([ + [5.1, 3.3, 1.7, 0.5,], + [5.9, 3.0, 4.2, 1.5,], + [6.9, 3.1, 5.4, 2.1] +]) + +predictions = model(predict_dataset) + +for i, logits in enumerate(predictions): + class_idx = tf.argmax(logits).numpy() + p = tf.nn.softmax(logits)[class_idx] + name = class_names[class_idx] + print("Example {} prediction: {} ({:4.1f}%)".format(i, name, 100*p)) +``` + +```py +Example 0 prediction: Iris setosa (99.9%) +Example 1 prediction: Iris versicolor (99.8%) +Example 2 prediction: Iris virginica (99.6%) + +``` \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/033.md b/Tensorflow/TensorFlow2.0/033.md new file mode 100644 index 00000000..dc3921ef --- /dev/null +++ b/Tensorflow/TensorFlow2.0/033.md @@ -0,0 +1 @@ +# 分布式训练 \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/034.md b/Tensorflow/TensorFlow2.0/034.md new file mode 100644 index 00000000..acc35f3b --- /dev/null +++ b/Tensorflow/TensorFlow2.0/034.md @@ -0,0 +1,450 @@ +# Keras 的分布式训练 + +> 原文:[https://tensorflow.google.cn/tutorials/distribute/keras](https://tensorflow.google.cn/tutorials/distribute/keras) + + + +**Note:** 我们的 TensorFlow 社区翻译了这些文档。因为社区翻译是尽力而为, 所以无法保证它们是最准确的,并且反映了最新的 [官方英文文档](https://tensorflow.google.cn/?hl=en)。如果您有改进此翻译的建议, 请提交 pull request 到 [tensorflow/docs](https://github.com/tensorflow/docs) GitHub 仓库。要志愿地撰写或者审核译文,请加入 [docs-zh-cn@tensorflow.org Google Group](https://groups.google.com/a/tensorflow.org/forum/#!forum/docs-zh-cn)。 + +## 概述 + +[`tf.distribute.Strategy`](https://tensorflow.google.cn/api_docs/python/tf/distribute/Strategy) API 提供了一个抽象的 API ,用于跨多个处理单元(processing units)分布式训练。它的目的是允许用户使用现有模型和训练代码,只需要很少的修改,就可以启用分布式训练。 + +本教程使用 [`tf.distribute.MirroredStrategy`](https://tensorflow.google.cn/api_docs/python/tf/distribute/MirroredStrategy),这是在一台计算机上的多 GPU(单机多卡)进行同时训练的图形内复制(in-graph replication)。事实上,它会将所有模型的变量复制到每个处理器上,然后,通过使用 [all-reduce](http://mpitutorial.com/tutorials/mpi-reduce-and-allreduce/) 去整合所有处理器的梯度(gradients),并将整合的结果应用于所有副本之中。 + +`MirroredStategy` 是 tensorflow 中可用的几种分发策略之一。 您可以在 [分发策略指南](https://tensorflow.google.cn/guide/distribute_strategy) 中阅读更多分发策略。 + +### Keras API + +这个例子使用 [`tf.keras`](https://tensorflow.google.cn/api_docs/python/tf/keras) API 去构建和训练模型。 关于自定义训练模型,请参阅 [tf.distribute.Strategy with training loops](/tutorials/distribute/training_loops) 教程。 + +## 导入依赖 + +```py +# 导入 TensorFlow 和 TensorFlow 数据集 + +import tensorflow_datasets as tfds +import tensorflow as tf +tfds.disable_progress_bar() + +import os +``` + +```py +print(tf.__version__) +``` + +```py +2.3.0 + +``` + +## 下载数据集 + +下载 MNIST 数据集并从 [TensorFlow Datasets](https://tensorflow.google.cn/datasets) 加载。 这会返回 [`tf.data`](https://tensorflow.google.cn/api_docs/python/tf/data) 格式的数据集。 + +将 `with_info` 设置为 `True` 会包含整个数据集的元数据,其中这些数据集将保存在 `info` 中。 除此之外,该元数据对象包括训练和测试示例的数量。 + +```py +datasets, info = tfds.load(name='mnist', with_info=True, as_supervised=True) + +mnist_train, mnist_test = datasets['train'], datasets['test'] +``` + +## 定义分配策略 + +创建一个 `MirroredStrategy` 对象。这将处理分配策略,并提供一个上下文管理器([`tf.distribute.MirroredStrategy.scope`](https://tensorflow.google.cn/api_docs/python/tf/distribute/MirroredStrategy#scope))来构建你的模型。 + +```py +strategy = tf.distribute.MirroredStrategy() +``` + +```py +INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',) + +INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',) + +``` + +```py +print('Number of devices: {}'.format(strategy.num_replicas_in_sync)) +``` + +```py +Number of devices: 1 + +``` + +## 设置输入管道(pipeline) + +在训练具有多个 GPU 的模型时,您可以通过增加批量大小(batch size)来有效地使用额外的计算能力。通常来说,使用适合 GPU 内存的最大批量大小(batch size),并相应地调整学习速率。 + +```py +# 您还可以执行 info.splits.total_num_examples 来获取总数 +# 数据集中的样例数量。 + +num_train_examples = info.splits['train'].num_examples +num_test_examples = info.splits['test'].num_examples + +BUFFER_SIZE = 10000 + +BATCH_SIZE_PER_REPLICA = 64 +BATCH_SIZE = BATCH_SIZE_PER_REPLICA * strategy.num_replicas_in_sync +``` + +0-255 的像素值, [必须标准化到 0-1 范围](https://en.wikipedia.org/wiki/Feature_scaling)。在函数中定义标准化。 + +```py +def scale(image, label): + image = tf.cast(image, tf.float32) + image /= 255 + + return image, label +``` + +将此功能应用于训练和测试数据,随机打乱训练数据,并[批量训练](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset#batch)。 请注意,我们还保留了训练数据的内存缓存以提高性能。 + +```py +train_dataset = mnist_train.map(scale).cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE) +eval_dataset = mnist_test.map(scale).batch(BATCH_SIZE) +``` + +## 生成模型 + +在 `strategy.scope` 的上下文中创建和编译 Keras 模型。 + +```py +with strategy.scope(): + model = tf.keras.Sequential([ + tf.keras.layers.Conv2D(32, 3, activation='relu', input_shape=(28, 28, 1)), + tf.keras.layers.MaxPooling2D(), + tf.keras.layers.Flatten(), + tf.keras.layers.Dense(64, activation='relu'), + tf.keras.layers.Dense(10) + ]) + + model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), + optimizer=tf.keras.optimizers.Adam(), + metrics=['accuracy']) +``` + +## 定义回调(callback) + +这里使用的回调(callbacks)是: + +* *TensorBoard*: 此回调(callbacks)为 TensorBoard 写入日志,允许您可视化图形。 +* *Model Checkpoint*: 此回调(callbacks)在每个 epoch 后保存模型。 +* *Learning Rate Scheduler*: 使用此回调(callbacks),您可以安排学习率在每个 epoch/batch 之后更改。 + +为了便于说明,添加打印回调(callbacks)以在笔记本中显示*学习率*。 + +```py +# 定义检查点(checkpoint)目录以存储检查点(checkpoints) + +checkpoint_dir = './training_checkpoints' +# 检查点(checkpoint)文件的名称 +checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}") +``` + +```py +# 衰减学习率的函数。 +# 您可以定义所需的任何衰减函数。 +def decay(epoch): + if epoch < 3: + return 1e-3 + elif epoch >= 3 and epoch < 7: + return 1e-4 + else: + return 1e-5 +``` + +```py +# 在每个 epoch 结束时打印 LR 的回调(callbacks)。 +class PrintLR(tf.keras.callbacks.Callback): + def on_epoch_end(self, epoch, logs=None): + print('\nLearning rate for epoch {} is {}'.format(epoch + 1, + model.optimizer.lr.numpy())) +``` + +```py +callbacks = [ + tf.keras.callbacks.TensorBoard(log_dir='./logs'), + tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_prefix, + save_weights_only=True), + tf.keras.callbacks.LearningRateScheduler(decay), + PrintLR() +] +``` + +## 训练和评估 + +在该部分,以普通的方式训练模型,在模型上调用 `fit` 并传入在教程开始时创建的数据集。 无论您是否分布式训练,此步骤都是相同的。 + +```py +model.fit(train_dataset, epochs=12, callbacks=callbacks) +``` + +```py +Epoch 1/12 +WARNING:tensorflow:From /tmpfs/src/tf_docs_env/lib/python3.6/site-packages/tensorflow/python/data/ops/multi_device_iterator_ops.py:601: get_next_as_optional (from tensorflow.python.data.ops.iterator_ops) is deprecated and will be removed in a future version. +Instructions for updating: +Use `tf.data.Iterator.get_next_as_optional()` instead. + +Warning:tensorflow:From /tmpfs/src/tf_docs_env/lib/python3.6/site-packages/tensorflow/python/data/ops/multi_device_iterator_ops.py:601: get_next_as_optional (from tensorflow.python.data.ops.iterator_ops) is deprecated and will be removed in a future version. +Instructions for updating: +Use `tf.data.Iterator.get_next_as_optional()` instead. + +INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',). + +INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',). + +INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',). + +INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',). + +INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',). + +INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',). + +INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',). + +INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',). + +INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',). + +INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',). + +INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',). + +INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',). + +INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',). + +INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',). + +INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',). + +INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',). + + 1/938 [..............................] - ETA: 0s - loss: 2.3194 - accuracy: 0.0938WARNING:tensorflow:From /tmpfs/src/tf_docs_env/lib/python3.6/site-packages/tensorflow/python/ops/summary_ops_v2.py:1277: stop (from tensorflow.python.eager.profiler) is deprecated and will be removed after 2020-07-01. +Instructions for updating: +use `tf.profiler.experimental.stop` instead. + +Warning:tensorflow:From /tmpfs/src/tf_docs_env/lib/python3.6/site-packages/tensorflow/python/ops/summary_ops_v2.py:1277: stop (from tensorflow.python.eager.profiler) is deprecated and will be removed after 2020-07-01. +Instructions for updating: +use `tf.profiler.experimental.stop` instead. + +Warning:tensorflow:Callbacks method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0046s vs `on_train_batch_end` time: 0.0296s). Check your callbacks. + +Warning:tensorflow:Callbacks method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0046s vs `on_train_batch_end` time: 0.0296s). Check your callbacks. + +932/938 [============================>.] - ETA: 0s - loss: 0.2055 - accuracy: 0.9422 +Learning rate for epoch 1 is 0.0010000000474974513 +938/938 [==============================] - 4s 5ms/step - loss: 0.2049 - accuracy: 0.9424 +INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',). + +INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',). + +INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',). + +INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',). + +Epoch 2/12 +922/938 [============================>.] - ETA: 0s - loss: 0.0681 - accuracy: 0.9797 +Learning rate for epoch 2 is 0.0010000000474974513 +938/938 [==============================] - 3s 3ms/step - loss: 0.0680 - accuracy: 0.9798 +Epoch 3/12 +930/938 [============================>.] - ETA: 0s - loss: 0.0484 - accuracy: 0.9855 +Learning rate for epoch 3 is 0.0010000000474974513 +938/938 [==============================] - 3s 3ms/step - loss: 0.0484 - accuracy: 0.9855 +Epoch 4/12 +920/938 [============================>.] - ETA: 0s - loss: 0.0277 - accuracy: 0.9925 +Learning rate for epoch 4 is 9.999999747378752e-05 +938/938 [==============================] - 3s 3ms/step - loss: 0.0276 - accuracy: 0.9926 +Epoch 5/12 +931/938 [============================>.] - ETA: 0s - loss: 0.0248 - accuracy: 0.9935 +Learning rate for epoch 5 is 9.999999747378752e-05 +938/938 [==============================] - 3s 3ms/step - loss: 0.0247 - accuracy: 0.9936 +Epoch 6/12 +931/938 [============================>.] - ETA: 0s - loss: 0.0231 - accuracy: 0.9938 +Learning rate for epoch 6 is 9.999999747378752e-05 +938/938 [==============================] - 3s 3ms/step - loss: 0.0230 - accuracy: 0.9938 +Epoch 7/12 +936/938 [============================>.] - ETA: 0s - loss: 0.0217 - accuracy: 0.9941 +Learning rate for epoch 7 is 9.999999747378752e-05 +938/938 [==============================] - 3s 3ms/step - loss: 0.0216 - accuracy: 0.9941 +Epoch 8/12 +932/938 [============================>.] - ETA: 0s - loss: 0.0189 - accuracy: 0.9952 +Learning rate for epoch 8 is 9.999999747378752e-06 +938/938 [==============================] - 3s 3ms/step - loss: 0.0189 - accuracy: 0.9952 +Epoch 9/12 +932/938 [============================>.] - ETA: 0s - loss: 0.0188 - accuracy: 0.9953 +Learning rate for epoch 9 is 9.999999747378752e-06 +938/938 [==============================] - 3s 3ms/step - loss: 0.0187 - accuracy: 0.9953 +Epoch 10/12 +932/938 [============================>.] - ETA: 0s - loss: 0.0185 - accuracy: 0.9953 +Learning rate for epoch 10 is 9.999999747378752e-06 +938/938 [==============================] - 3s 3ms/step - loss: 0.0185 - accuracy: 0.9953 +Epoch 11/12 +934/938 [============================>.] - ETA: 0s - loss: 0.0183 - accuracy: 0.9953 +Learning rate for epoch 11 is 9.999999747378752e-06 +938/938 [==============================] - 3s 3ms/step - loss: 0.0184 - accuracy: 0.9953 +Epoch 12/12 +931/938 [============================>.] - ETA: 0s - loss: 0.0183 - accuracy: 0.9954 +Learning rate for epoch 12 is 9.999999747378752e-06 +938/938 [==============================] - 3s 3ms/step - loss: 0.0182 - accuracy: 0.9955 + + + +``` + +如下所示,检查点(checkpoint)将被保存。 + +```py +# 检查检查点(checkpoint)目录 +ls {checkpoint_dir} + +``` + +```py +checkpoint ckpt_4.data-00000-of-00001 +ckpt_1.data-00000-of-00001 ckpt_4.index +ckpt_1.index ckpt_5.data-00000-of-00001 +ckpt_10.data-00000-of-00001 ckpt_5.index +ckpt_10.index ckpt_6.data-00000-of-00001 +ckpt_11.data-00000-of-00001 ckpt_6.index +ckpt_11.index ckpt_7.data-00000-of-00001 +ckpt_12.data-00000-of-00001 ckpt_7.index +ckpt_12.index ckpt_8.data-00000-of-00001 +ckpt_2.data-00000-of-00001 ckpt_8.index +ckpt_2.index ckpt_9.data-00000-of-00001 +ckpt_3.data-00000-of-00001 ckpt_9.index +ckpt_3.index + +``` + +要查看模型的执行方式,请加载最新的检查点(checkpoint)并在测试数据上调用 `evaluate` 。 + +使用适当的数据集调用 `evaluate` 。 + +```py +model.load_weights(tf.train.latest_checkpoint(checkpoint_dir)) + +eval_loss, eval_acc = model.evaluate(eval_dataset) + +print('Eval loss: {}, Eval Accuracy: {}'.format(eval_loss, eval_acc)) +``` + +```py +157/157 [==============================] - 1s 6ms/step - loss: 0.0399 - accuracy: 0.9861 +Eval loss: 0.03988004848361015, Eval Accuracy: 0.9861000180244446 + +``` + +要查看输出,您可以在终端下载并查看 TensorBoard 日志。 + +```py +$ tensorboard --logdir=path/to/log-directory +``` + +```py +ls -sh ./logs + +``` + +```py +total 4.0K +4.0K train + +``` + +## 导出到 SavedModel + +将图形和变量导出为与平台无关的 SavedModel 格式。 保存模型后,可以在有或没有 scope 的情况下加载模型。 + +```py +path = 'saved_model/' +``` + +```py +model.save(path, save_format='tf') +``` + +```py +WARNING:tensorflow:From /tmpfs/src/tf_docs_env/lib/python3.6/site-packages/tensorflow/python/training/tracking/tracking.py:111: Model.state_updates (from tensorflow.python.keras.engine.training) is deprecated and will be removed in a future version. +Instructions for updating: +This property should not be used in TensorFlow 2.0, as updates are applied automatically. + +Warning:tensorflow:From /tmpfs/src/tf_docs_env/lib/python3.6/site-packages/tensorflow/python/training/tracking/tracking.py:111: Model.state_updates (from tensorflow.python.keras.engine.training) is deprecated and will be removed in a future version. +Instructions for updating: +This property should not be used in TensorFlow 2.0, as updates are applied automatically. + +Warning:tensorflow:From /tmpfs/src/tf_docs_env/lib/python3.6/site-packages/tensorflow/python/training/tracking/tracking.py:111: Layer.updates (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version. +Instructions for updating: +This property should not be used in TensorFlow 2.0, as updates are applied automatically. + +Warning:tensorflow:From /tmpfs/src/tf_docs_env/lib/python3.6/site-packages/tensorflow/python/training/tracking/tracking.py:111: Layer.updates (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version. +Instructions for updating: +This property should not be used in TensorFlow 2.0, as updates are applied automatically. + +INFO:tensorflow:Assets written to: saved_model/assets + +INFO:tensorflow:Assets written to: saved_model/assets + +``` + +在无需 `strategy.scope` 加载模型。 + +```py +unreplicated_model = tf.keras.models.load_model(path) + +unreplicated_model.compile( + loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), + optimizer=tf.keras.optimizers.Adam(), + metrics=['accuracy']) + +eval_loss, eval_acc = unreplicated_model.evaluate(eval_dataset) + +print('Eval loss: {}, Eval Accuracy: {}'.format(eval_loss, eval_acc)) +``` + +```py +157/157 [==============================] - 1s 3ms/step - loss: 0.0399 - accuracy: 0.9861 +Eval loss: 0.03988004848361015, Eval Accuracy: 0.9861000180244446 + +``` + +在含 `strategy.scope` 加载模型。 + +```py +with strategy.scope(): + replicated_model = tf.keras.models.load_model(path) + replicated_model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), + optimizer=tf.keras.optimizers.Adam(), + metrics=['accuracy']) + + eval_loss, eval_acc = replicated_model.evaluate(eval_dataset) + print ('Eval loss: {}, Eval Accuracy: {}'.format(eval_loss, eval_acc)) +``` + +```py +157/157 [==============================] - 1s 5ms/step - loss: 0.0399 - accuracy: 0.9861 +Eval loss: 0.03988004848361015, Eval Accuracy: 0.9861000180244446 + +``` + +### 示例和教程 + +以下是使用 keras fit/compile 分布式策略的一些示例: + +1. 使用[`tf.distribute.MirroredStrategy`](https://tensorflow.google.cn/api_docs/python/tf/distribute/MirroredStrategy) 训练 [Transformer](https://github.com/tensorflow/models/blob/master/official/nlp/transformer/transformer_main.py) 的示例。 +2. 使用[`tf.distribute.MirroredStrategy`](https://tensorflow.google.cn/api_docs/python/tf/distribute/MirroredStrategy) 训练 [NCF](https://github.com/tensorflow/models/blob/master/official/recommendation/ncf_keras_main.py) 的示例。 + +[分布式策略指南](https://tensorflow.google.cn/guide/distribute_strategy#examples_and_tutorials)中列出的更多示例 + +## 下一步 + +* 阅读[分布式策略指南](https://tensorflow.google.cn/guide/distribute_strategy)。 +* 阅读[自定义训练的分布式训练](/tutorials/distribute/training_loops)教程。 + +注意:[`tf.distribute.Strategy`](https://tensorflow.google.cn/api_docs/python/tf/distribute/Strategy) 正在积极开发中,我们将在不久的将来添加更多示例和教程。欢迎您进行尝试。我们欢迎您通过 [GitHub 上的 issue](https://github.com/tensorflow/tensorflow/issues/new) 提供反馈。 \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/035.md b/Tensorflow/TensorFlow2.0/035.md new file mode 100644 index 00000000..8534fb4a --- /dev/null +++ b/Tensorflow/TensorFlow2.0/035.md @@ -0,0 +1,447 @@ +# 使用 tf.distribute.Strategy 进行自定义训练 + +> 原文:[https://tensorflow.google.cn/tutorials/distribute/custom_training](https://tensorflow.google.cn/tutorials/distribute/custom_training) + +本教程演示了如何使用 [`tf.distribute.Strategy`](https://tensorflow.google.cn/guide/distribute_strategy) 来进行自定义训练循环。 我们将在流行的 MNIST 数据集上训练一个简单的 CNN 模型。 流行的 MNIST 数据集包含了 60000 张尺寸为 28 x 28 的训练图像和 10000 张尺寸为 28 x 28 的测试图像。 + +我们用自定义训练循环来训练我们的模型是因为它们在训练的过程中为我们提供了灵活性和在训练过程中更好的控制。而且,使它们调试模型和训练循环的时候更容易。 + +```py +# 导入 TensorFlow +import tensorflow as tf + +# 帮助库 +import numpy as np +import os + +print(tf.__version__) +``` + +```py +2.3.0 + +``` + +## 下载流行的 MNIST 数据集 + +```py +fashion_mnist = tf.keras.datasets.fashion_mnist + +(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data() + +# 向数组添加维度 -> 新的维度 == (28, 28, 1) +# 我们这样做是因为我们模型中的第一层是卷积层 +# 而且它需要一个四维的输入 (批大小, 高, 宽, 通道). +# 批大小维度稍后将添加。 +train_images = train_images[..., None] +test_images = test_images[..., None] + +# 获取[0,1]范围内的图像。 +train_images = train_images / np.float32(255) +test_images = test_images / np.float32(255) +``` + +```py +Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz +32768/29515 [=================================] - 0s 0us/step +Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz +26427392/26421880 [==============================] - 0s 0us/step +Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz +8192/5148 [===============================================] - 0s 0us/step +Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz +4423680/4422102 [==============================] - 0s 0us/step + +``` + +## 创建一个分发变量和图形的策略 + +[`tf.distribute.MirroredStrategy`](https://tensorflow.google.cn/api_docs/python/tf/distribute/MirroredStrategy) 策略是如何运作的? + +* 所有变量和模型图都复制在副本上。 +* 输入都均匀分布在副本中。 +* 每个副本在收到输入后计算输入的损失和梯度。 +* 通过求和,每一个副本上的梯度都能同步。 +* 同步后,每个副本上的复制的变量都可以同样更新。 + +注意:您可以将下面的所有代码放在一个单独单元内。 我们将它分成几个代码单元用于说明目的。 + +```py +# 如果设备未在 `tf.distribute.MirroredStrategy` 的指定列表中,它会被自动检测到。 +strategy = tf.distribute.MirroredStrategy() +``` + +```py +INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',) + +``` + +```py +print ('Number of devices: {}'.format(strategy.num_replicas_in_sync)) +``` + +```py +Number of devices: 1 + +``` + +## 设置输入流水线 + +将图形和变量导出成平台不可识别的 SavedModel 格式。在你的模型保存后,你可以在有或没有范围的情况下载入它。 + +```py +BUFFER_SIZE = len(train_images) + +BATCH_SIZE_PER_REPLICA = 64 +GLOBAL_BATCH_SIZE = BATCH_SIZE_PER_REPLICA * strategy.num_replicas_in_sync + +EPOCHS = 10 +``` + +创建数据集并分发它们: + +```py +train_dataset = tf.data.Dataset.from_tensor_slices((train_images, train_labels)).shuffle(BUFFER_SIZE).batch(GLOBAL_BATCH_SIZE) +test_dataset = tf.data.Dataset.from_tensor_slices((test_images, test_labels)).batch(GLOBAL_BATCH_SIZE) + +train_dist_dataset = strategy.experimental_distribute_dataset(train_dataset) +test_dist_dataset = strategy.experimental_distribute_dataset(test_dataset) +``` + +## 创建模型 + +使用 [`tf.keras.Sequential`](https://tensorflow.google.cn/api_docs/python/tf/keras/Sequential) 创建一个模型。你也可以使用模型子类化 API 来完成这个。 + +```py +def create_model(): + model = tf.keras.Sequential([ + tf.keras.layers.Conv2D(32, 3, activation='relu'), + tf.keras.layers.MaxPooling2D(), + tf.keras.layers.Conv2D(64, 3, activation='relu'), + tf.keras.layers.MaxPooling2D(), + tf.keras.layers.Flatten(), + tf.keras.layers.Dense(64, activation='relu'), + tf.keras.layers.Dense(10, activation='softmax') + ]) + + return model +``` + +```py +# 创建检查点目录以存储检查点。 +checkpoint_dir = './training_checkpoints' +checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt") +``` + +## 定义损失函数 + +通常,在一台只有一个 GPU / CPU 的机器上,损失需要除去输入批量中的示例数。 + +*那么,使用 [`tf.distribute.Strategy`](https://tensorflow.google.cn/api_docs/python/tf/distribute/Strategy) 时应该如何计算损失?* + +* 举一个例子,假设您有 4 个 GPU,批量大小为 64\. 输入的一个批次分布在各个副本(4 个 GPU)上,每个副本获得的输入大小为 16。 + +* 每个副本上的模型使用其各自的输入执行正向传递并计算损失。 现在,相较于将损耗除以其各自输入中的示例数(BATCH_SIZE_PER_REPLICA = 16),应将损失除以 GLOBAL_BATCH_SIZE(64)。 + +*为什么这样做?* + +* 需要这样做是因为在每个副本上计算梯度之后,它们通过 **summing** 来使得在自身在各个副本之间同步。 + +*如何在 TensorFlow 中执行此操作?* + +* 如果您正在编写自定义训练循环,如本教程中所示,您应该将每个示例损失相加并将总和除以 GLOBAL_BATCH_SIZE : `scale_loss = tf.reduce_sum(loss) * (1\. / GLOBAL_BATCH_SIZE)` 或者你可以使用`tf.nn.compute_average_loss` 来获取每个示例的损失,可选的样本权重,将 GLOBAL_BATCH_SIZE 作为参数,并返回缩放的损失。 + +* 如果您在模型中使用正则化损失,则需要进行缩放多个副本的损失。 您可以使用[`tf.nn.scale_regularization_loss`](https://tensorflow.google.cn/api_docs/python/tf/nn/scale_regularization_loss)函数执行此操作。 + +* 建议不要使用[`tf.reduce_mean`](https://tensorflow.google.cn/api_docs/python/tf/math/reduce_mean)。 这样做会将损失除以实际的每个副本中每一步都会改变的批次大小。 + +* 这种缩小和缩放是在 keras 中 `modelcompile`和`model.fit`中自动完成的 + +* 如果使用[`tf.keras.losses`](https://tensorflow.google.cn/api_docs/python/tf/keras/losses)类(如下面这个例子所示),则需要将损失减少明确指定为“NONE”或者“SUM”。 使用 [`tf.distribute.Strategy`](https://tensorflow.google.cn/api_docs/python/tf/distribute/Strategy) 时,`AUTO`和`SUM_OVER_BATCH_SIZE` 是不能使用的。 不能使用 `AUTO` 是因为用户应明确考虑到在分布式情况下他们想做的哪些减少是正确的。不能使用`SUM_OVER_BATCH_SIZE`是因为目前它只按每个副本批次大小进行划分,并按照用户的副本数进行划分,这导致了它们很容易丢失。 因此,我们要求用户要明确这些减少。 + +```py +with strategy.scope(): + # 将减少设置为“无”,以便我们可以在之后进行这个减少并除以全局批量大小。 + loss_object = tf.keras.losses.SparseCategoricalCrossentropy( + reduction=tf.keras.losses.Reduction.NONE) + # 或者使用 loss_fn = tf.keras.losses.sparse_categorical_crossentropy + def compute_loss(labels, predictions): + per_example_loss = loss_object(labels, predictions) + return tf.nn.compute_average_loss(per_example_loss, global_batch_size=GLOBAL_BATCH_SIZE) +``` + +## 定义衡量指标以跟踪损失和准确性 + +这些指标可以跟踪测试的损失,训练和测试的准确性。 您可以使用`.result()`随时获取累积的统计信息。 + +```py +with strategy.scope(): + test_loss = tf.keras.metrics.Mean(name='test_loss') + + train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy( + name='train_accuracy') + test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy( + name='test_accuracy') +``` + +## 训练循环 + +```py +# 必须在`strategy.scope`下创建模型和优化器。 +with strategy.scope(): + model = create_model() + + optimizer = tf.keras.optimizers.Adam() + + checkpoint = tf.train.Checkpoint(optimizer=optimizer, model=model) +``` + +```py +with strategy.scope(): + def train_step(inputs): + images, labels = inputs + + with tf.GradientTape() as tape: + predictions = model(images, training=True) + loss = compute_loss(labels, predictions) + + gradients = tape.gradient(loss, model.trainable_variables) + optimizer.apply_gradients(zip(gradients, model.trainable_variables)) + + train_accuracy.update_state(labels, predictions) + return loss + + def test_step(inputs): + images, labels = inputs + + predictions = model(images, training=False) + t_loss = loss_object(labels, predictions) + + test_loss.update_state(t_loss) + test_accuracy.update_state(labels, predictions) +``` + +```py +with strategy.scope(): + # `experimental_run_v2`将复制提供的计算并使用分布式输入运行它。 + @tf.function + def distributed_train_step(dataset_inputs): + per_replica_losses = strategy.experimental_run_v2(train_step, + args=(dataset_inputs,)) + return strategy.reduce(tf.distribute.ReduceOp.SUM, per_replica_losses, + axis=None) + + @tf.function + def distributed_test_step(dataset_inputs): + return strategy.experimental_run_v2(test_step, args=(dataset_inputs,)) + + for epoch in range(EPOCHS): + # 训练循环 + total_loss = 0.0 + num_batches = 0 + for x in train_dist_dataset: + total_loss += distributed_train_step(x) + num_batches += 1 + train_loss = total_loss / num_batches + + # 测试循环 + for x in test_dist_dataset: + distributed_test_step(x) + + if epoch % 2 == 0: + checkpoint.save(checkpoint_prefix) + + template = ("Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, " + "Test Accuracy: {}") + print (template.format(epoch+1, train_loss, + train_accuracy.result()*100, test_loss.result(), + test_accuracy.result()*100)) + + test_loss.reset_states() + train_accuracy.reset_states() + test_accuracy.reset_states() +``` + +```py +WARNING:tensorflow:From /tmpfs/src/tf_docs_env/lib/python3.6/site-packages/tensorflow/python/data/ops/multi_device_iterator_ops.py:601: get_next_as_optional (from tensorflow.python.data.ops.iterator_ops) is deprecated and will be removed in a future version. +Instructions for updating: +Use `tf.data.Iterator.get_next_as_optional()` instead. +WARNING:tensorflow:From :5: StrategyBase.experimental_run_v2 (from tensorflow.python.distribute.distribute_lib) is deprecated and will be removed in a future version. +Instructions for updating: +renamed to `run` +INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',). +INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',). +INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',). +INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',). +INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',). +INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',). +Epoch 1, Loss: 0.5272247791290283, Accuracy: 80.95500183105469, Test Loss: 0.39799919724464417, Test Accuracy: 86.08000183105469 +INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',). +INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',). +INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',). +INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',). +Epoch 2, Loss: 0.3536641597747803, Accuracy: 87.19000244140625, Test Loss: 0.3652512729167938, Test Accuracy: 86.79999542236328 +Epoch 3, Loss: 0.30651605129241943, Accuracy: 88.96333312988281, Test Loss: 0.35199666023254395, Test Accuracy: 86.76000213623047 +Epoch 4, Loss: 0.2756423354148865, Accuracy: 89.99333190917969, Test Loss: 0.2974560558795929, Test Accuracy: 89.1500015258789 +Epoch 5, Loss: 0.24928639829158783, Accuracy: 90.86833953857422, Test Loss: 0.28945034742355347, Test Accuracy: 89.31999969482422 +Epoch 6, Loss: 0.22822219133377075, Accuracy: 91.66999816894531, Test Loss: 0.2690503001213074, Test Accuracy: 90.13999938964844 +Epoch 7, Loss: 0.21215270459651947, Accuracy: 92.19833374023438, Test Loss: 0.2673594057559967, Test Accuracy: 90.37000274658203 +Epoch 8, Loss: 0.19466665387153625, Accuracy: 92.86500549316406, Test Loss: 0.280720591545105, Test Accuracy: 90.36000061035156 +Epoch 9, Loss: 0.1819683462381363, Accuracy: 93.4000015258789, Test Loss: 0.2655133008956909, Test Accuracy: 90.54000091552734 +Epoch 10, Loss: 0.16936612129211426, Accuracy: 93.711669921875, Test Loss: 0.26561689376831055, Test Accuracy: 90.55999755859375 + +``` + +以上示例中需要注意的事项: + +* 我们使用`for x in ...`迭代构造`train_dist_dataset`和`test_dist_dataset`。 +* 缩放损失是`distributed_train_step`的返回值。 这个值会在各个副本使用[`tf.distribute.Strategy.reduce`](https://tensorflow.google.cn/api_docs/python/tf/distribute/Strategy#reduce)的时候合并,然后通过[`tf.distribute.Strategy.reduce`](https://tensorflow.google.cn/api_docs/python/tf/distribute/Strategy#reduce)叠加各个返回值来跨批次。 +* 在执行`tf.distribute.Strategy.experimental_run_v2`时,`tf.keras.Metrics`应在`train_step`和`test_step`中更新。 +* `tf.distribute.Strategy.experimental_run_v2`返回策略中每个本地副本的结果,并且有多种方法可以处理此结果。 您可以执行[`tf.distribute.Strategy.reduce`](https://tensorflow.google.cn/api_docs/python/tf/distribute/Strategy#reduce)来获取汇总值。 您还可以执行[`tf.distribute.Strategy.experimental_local_results`](https://tensorflow.google.cn/api_docs/python/tf/distribute/Strategy#experimental_local_results)来获取每个本地副本中结果中包含的值列表。 + +## 恢复最新的检查点并进行测试 + +一个模型使用了[`tf.distribute.Strategy`](https://tensorflow.google.cn/api_docs/python/tf/distribute/Strategy)的检查点可以使用策略或者不使用策略进行恢复。 + +```py +eval_accuracy = tf.keras.metrics.SparseCategoricalAccuracy( + name='eval_accuracy') + +new_model = create_model() +new_optimizer = tf.keras.optimizers.Adam() + +test_dataset = tf.data.Dataset.from_tensor_slices((test_images, test_labels)).batch(GLOBAL_BATCH_SIZE) +``` + +```py +@tf.function +def eval_step(images, labels): + predictions = new_model(images, training=False) + eval_accuracy(labels, predictions) +``` + +```py +checkpoint = tf.train.Checkpoint(optimizer=new_optimizer, model=new_model) +checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) + +for images, labels in test_dataset: + eval_step(images, labels) + +print ('Accuracy after restoring the saved model without strategy: {}'.format( + eval_accuracy.result()*100)) +``` + +```py +Accuracy after restoring the saved model without strategy: 90.54000091552734 + +``` + +## 迭代一个数据集的替代方法 + +### 使用迭代器 + +如果你想要迭代一个已经给定步骤数量而不需要整个遍历的数据集,你可以创建一个迭代器并在迭代器上调用`iter`和显式调用`next`。 您可以选择在 tf.function 内部和外部迭代数据集。 这是一个小片段,演示了使用迭代器在 tf.function 外部迭代数据集。 + +```py +with strategy.scope(): + for _ in range(EPOCHS): + total_loss = 0.0 + num_batches = 0 + train_iter = iter(train_dist_dataset) + + for _ in range(10): + total_loss += distributed_train_step(next(train_iter)) + num_batches += 1 + average_train_loss = total_loss / num_batches + + template = ("Epoch {}, Loss: {}, Accuracy: {}") + print (template.format(epoch+1, average_train_loss, train_accuracy.result()*100)) + train_accuracy.reset_states() +``` + +```py +Epoch 10, Loss: 0.17099234461784363, Accuracy: 93.75 +Epoch 10, Loss: 0.12641692161560059, Accuracy: 95.9375 +Epoch 10, Loss: 0.11636483669281006, Accuracy: 96.09375 +Epoch 10, Loss: 0.1404765546321869, Accuracy: 95.0 +Epoch 10, Loss: 0.16838286817073822, Accuracy: 92.5 +Epoch 10, Loss: 0.1905607134103775, Accuracy: 93.125 +Epoch 10, Loss: 0.12706035375595093, Accuracy: 95.78125 +Epoch 10, Loss: 0.14852401614189148, Accuracy: 93.59375 +Epoch 10, Loss: 0.11990274488925934, Accuracy: 95.9375 +Epoch 10, Loss: 0.1237613782286644, Accuracy: 95.9375 + +``` + +### 在 tf.function 中迭代 + +您还可以使用`for x in ...`构造在 tf.function 内部迭代整个输入`train_dist_dataset`,或者像上面那样创建迭代器。下面的例子演示了在 tf.function 中包装一个 epoch 并在功能内迭代`train_dist_dataset`。 + +```py +with strategy.scope(): + @tf.function + def distributed_train_epoch(dataset): + total_loss = 0.0 + num_batches = 0 + for x in dataset: + per_replica_losses = strategy.experimental_run_v2(train_step, + args=(x,)) + total_loss += strategy.reduce( + tf.distribute.ReduceOp.SUM, per_replica_losses, axis=None) + num_batches += 1 + return total_loss / tf.cast(num_batches, dtype=tf.float32) + + for epoch in range(EPOCHS): + train_loss = distributed_train_epoch(train_dist_dataset) + + template = ("Epoch {}, Loss: {}, Accuracy: {}") + print (template.format(epoch+1, train_loss, train_accuracy.result()*100)) + + train_accuracy.reset_states() +``` + +```py +Epoch 1, Loss: 0.1545342057943344, Accuracy: 94.34666442871094 +Epoch 2, Loss: 0.14368833601474762, Accuracy: 94.76666259765625 +Epoch 3, Loss: 0.13302761316299438, Accuracy: 95.22833251953125 +Epoch 4, Loss: 0.12302733212709427, Accuracy: 95.51499938964844 +Epoch 5, Loss: 0.11504675447940826, Accuracy: 95.7300033569336 +Epoch 6, Loss: 0.10611504316329956, Accuracy: 96.02000427246094 +Epoch 7, Loss: 0.09776321798563004, Accuracy: 96.3566665649414 +Epoch 8, Loss: 0.0923474133014679, Accuracy: 96.54166412353516 +Epoch 9, Loss: 0.08583918958902359, Accuracy: 96.85833740234375 +Epoch 10, Loss: 0.0784970372915268, Accuracy: 97.12332916259766 + +``` + +### 跟踪副本中的训练的损失 + +注意:作为通用的规则,您应该使用`tf.keras.Metrics`来跟踪每个样本的值以避免它们在副本中合并。 + +我们 *不* 建议使用[`tf.metrics.Mean`](https://tensorflow.google.cn/api_docs/python/tf/keras/metrics/Mean) 来跟踪不同副本的训练损失,因为在执行过程中会进行损失缩放计算。 + +例如,如果您运行具有以下特点的训练作业: + +* 两个副本 +* 在每个副本上处理两个例子 +* 产生的损失值:每个副本为[2,3]和[4,5] +* 全局批次大小 = 4 + +通过损失缩放,您可以通过添加损失值来计算每个副本上的每个样本的损失值,然后除以全局批量大小。 在这种情况下:`(2 + 3)/ 4 = 1.25`和`(4 + 5)/ 4 = 2.25`。 + +如果您使用 [`tf.metrics.Mean`](https://tensorflow.google.cn/api_docs/python/tf/keras/metrics/Mean) 来跟踪两个副本的损失,结果会有所不同。 在这个例子中,你最终得到一个`total`为 3.50 和`count`为 2 的结果,当调用`result()`时,你将得到`total` /`count` = 1.75。 使用`tf.keras.Metrics`计算损失时会通过一个等于同步副本数量的额外因子来缩放。 + +### 例子和教程 + +以下是一些使用自定义训练循环来分发策略的示例: + +1. [教程](/tutorials/distribute/training_loops) 使用 `MirroredStrategy` 来训练 MNIST 。 +2. [DenseNet](https://github.com/tensorflow/examples/blob/master/tensorflow_examples/models/densenet/distributed_train.py) 使用 `MirroredStrategy`的例子。 +3. [BERT](https://github.com/tensorflow/models/blob/master/official/nlp/bert/run_classifier.py) 使用 `MirroredStrategy` 和`TPUStrategy`来训练的例子。 此示例对于了解如何在分发训练过程中如何载入一个检测点和定期生成检查点特别有帮助。 +4. [NCF](https://github.com/tensorflow/models/blob/master/official/recommendation/ncf_keras_main.py) 使用 `MirroredStrategy` 来启用 `keras_use_ctl` 标记。 +5. [NMT](https://github.com/tensorflow/examples/blob/master/tensorflow_examples/models/nmt_with_attention/distributed_train.py) 使用 `MirroredStrategy`来训练的例子。 + +更多的例子列在 [分发策略指南](https://tensorflow.google.cn/guide/distribute_strategy#examples_and_tutorials)。 + +## 下一步 + +在你的模型上尝试新的[`tf.distribute.Strategy`](https://tensorflow.google.cn/api_docs/python/tf/distribute/Strategy) API。 \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/036.md b/Tensorflow/TensorFlow2.0/036.md new file mode 100644 index 00000000..a76b5bda --- /dev/null +++ b/Tensorflow/TensorFlow2.0/036.md @@ -0,0 +1,265 @@ +# 利用 Keras 来训练多工作器(worker) + +> 原文:[https://tensorflow.google.cn/tutorials/distribute/multi_worker_with_keras](https://tensorflow.google.cn/tutorials/distribute/multi_worker_with_keras) + +**Note:** 我们的 TensorFlow 社区翻译了这些文档。因为社区翻译是尽力而为, 所以无法保证它们是最准确的,并且反映了最新的 [官方英文文档](https://tensorflow.google.cn/?hl=en)。如果您有改进此翻译的建议, 请提交 pull request 到 [tensorflow/docs](https://github.com/tensorflow/docs) GitHub 仓库。要志愿地撰写或者审核译文,请加入 [docs-zh-cn@tensorflow.org Google Group](https://groups.google.com/a/tensorflow.org/forum/#!forum/docs-zh-cn)。 + +## 概述 + +本教程使用 [`tf.distribute.Strategy`](https://tensorflow.google.cn/api_docs/python/tf/distribute/Strategy) API 演示了使用 Keras 模型的多工作器(worker)分布式培训。借助专为多工作器(worker)训练而设计的策略,设计在单一工作器(worker)上运行的 Keras 模型可以在最少的代码更改的情况下无缝地处理多个工作器。 + +[TensorFlow 中的分布式培训](https://tensorflow.google.cn/guide/distribute_strategy)指南可用于概述 TensorFlow 支持的分布式策略,并想要更深入理解[`tf.distribute.Strategy`](https://tensorflow.google.cn/api_docs/python/tf/distribute/Strategy) API 感兴趣的人。 + +## 配置 + +首先,设置 TensorFlow 和必要的导入。 + +```py +!pip install -q tf-nightly +import tensorflow_datasets as tfds +import tensorflow as tf +tfds.disable_progress_bar() +``` + +```py +WARNING: You are using pip version 20.2.2; however, version 20.2.3 is available. +You should consider upgrading via the '/tmpfs/src/tf_docs_env/bin/python -m pip install --upgrade pip' command. + +``` + +## 准备数据集 + +现在,让我们从 [TensorFlow 数据集](https://tensorflow.google.cn/datasets) 中准备 MNIST 数据集。 [MNIST 数据集](http://yann.lecun.com/exdb/mnist/) 包括 60,000 个训练样本和 10,000 个手写数字 0-9 的测试示例,格式为 28x28 像素单色图像。 + +```py +BUFFER_SIZE = 10000 +BATCH_SIZE = 64 + +def make_datasets_unbatched(): + # 将 MNIST 数据从 (0, 255] 缩放到 (0., 1.] + def scale(image, label): + image = tf.cast(image, tf.float32) + image /= 255 + return image, label + + datasets, info = tfds.load(name='mnist', + with_info=True, + as_supervised=True) + + return datasets['train'].map(scale).cache().shuffle(BUFFER_SIZE) + +train_datasets = make_datasets_unbatched().batch(BATCH_SIZE) +``` + +## 构建 Keras 模型 + +在这里,我们使用[`tf.keras.Sequential`](https://tensorflow.google.cn/api_docs/python/tf/keras/Sequential) API 来构建和编译一个简单的卷积神经网络 Keras 模型,用我们的 MNIST 数据集进行训练。 + +注意:有关构建 Keras 模型的详细训练说明,请参阅[TensorFlow Keras 指南](https://tensorflow.google.cn/guide/keras#sequential_model)。 + +```py +def build_and_compile_cnn_model(): + model = tf.keras.Sequential([ + tf.keras.layers.Conv2D(32, 3, activation='relu', input_shape=(28, 28, 1)), + tf.keras.layers.MaxPooling2D(), + tf.keras.layers.Flatten(), + tf.keras.layers.Dense(64, activation='relu'), + tf.keras.layers.Dense(10) + ]) + model.compile( + loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), + optimizer=tf.keras.optimizers.SGD(learning_rate=0.001), + metrics=['accuracy']) + return model +``` + +让我们首先尝试用少量的 epoch 来训练模型,并在单个工作器(worker)中观察结果,以确保一切正常。 随着训练的迭代,您应该会看到损失(loss)下降和准确度(accuracy)接近 1.0。 + +```py +single_worker_model = build_and_compile_cnn_model() +single_worker_model.fit(x=train_datasets, epochs=3, steps_per_epoch=5) +``` + +```py +Epoch 1/3 +5/5 [==============================] - 1s 15ms/step - loss: 2.3390 - accuracy: 0.0211 +Epoch 2/3 +5/5 [==============================] - 0s 14ms/step - loss: 2.3315 - accuracy: 0.0368 +Epoch 3/3 +5/5 [==============================] - 0s 13ms/step - loss: 2.3271 - accuracy: 0.0484 + + + +``` + +## 多工作器(worker)配置 + +现在让我们进入多工作器(worker)训练的世界。在 TensorFlow 中,需要 `TF_CONFIG` 环境变量来训练多台机器,每台机器可能具有不同的角色。 `TF_CONFIG`用于指定作为集群一部分的每个 worker 的集群配置。 + +`TF_CONFIG` 有两个组件:`cluster` 和 `task` 。 `cluster` 提供有关训练集群的信息,这是一个由不同类型的工作组成的字典,例如 `worker` 。在多工作器(worker)培训中,除了常规的“工作器”之外,通常还有一个“工人”承担更多责任,比如保存检查点和为 TensorBoard 编写摘要文件。这样的工作器(worker)被称为“主要”工作者,习惯上`worker` 中 `index` 0 被指定为主要的 `worker`(事实上这就是[`tf.distribute.Strategy`](https://tensorflow.google.cn/api_docs/python/tf/distribute/Strategy)的实现方式)。 另一方面,`task` 提供当前任务的信息。 + +在这个例子中,我们将任务 `type` 设置为 `"worker"` 并将任务 `index` 设置为 `0` 。这意味着具有这种设置的机器是第一个工作器,它将被指定为主要工作器并且要比其他工作器做更多的工作。请注意,其他机器也需要设置 `TF_CONFIG` 环境变量,它应该具有相同的 `cluster` 字典,但是不同的任务`type` 或 `index` 取决于这些机器的角色。 + +为了便于说明,本教程展示了如何在 `localhost` 上设置一个带有 2 个工作器的`TF_CONFIG`。 实际上,用户会在外部 IP 地址/端口上创建多个工作器,并在每个工作器上适当地设置`TF_CONFIG`。 + +警告:不要在 Colab 中执行以下代码。TensorFlow 的运行时将尝试在指定的 IP 地址和端口创建 gRPC 服务器,这可能会失败。 + +```py +os.environ['TF_CONFIG'] = json.dumps({ + 'cluster': { + 'worker': ["localhost:12345", "localhost:23456"] + }, + 'task': {'type': 'worker', 'index': 0} +}) +``` + +注意,虽然在该示例中学习速率是固定的,但是通常可能需要基于全局批量大小来调整学习速率。 + +## 选择正确的策略 + +在 TensorFlow 中,分布式训练包括同步训练(其中训练步骤跨工作器和副本同步)、异步训练(训练步骤未严格同步)。 + +`MultiWorkerMirroredStrategy` 是同步多工作器训练的推荐策略,将在本指南中进行演示。 + +要训练模型,请使用 [`tf.distribute.experimental.MultiWorkerMirroredStrategy`](https://tensorflow.google.cn/api_docs/python/tf/distribute/experimental/MultiWorkerMirroredStrategy) 的实例。 `MultiWorkerMirroredStrategy` 在所有工作器的每台设备上创建模型层中所有变量的副本。 它使用 `CollectiveOps` ,一个用于集体通信的 TensorFlow 操作,来聚合梯度并使变量保持同步。 [`tf.distribute.Strategy`指南](https://tensorflow.google.cn/guide/distribute_strategy)有关于此策略的更多详细信息。 + +```py +strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy() +``` + +```py +WARNING:tensorflow:Collective ops is not configured at program startup. Some performance features may not be enabled. + +Warning:tensorflow:Collective ops is not configured at program startup. Some performance features may not be enabled. + +INFO:tensorflow:Using MirroredStrategy with devices ('/device:CPU:0',) + +INFO:tensorflow:Using MirroredStrategy with devices ('/device:CPU:0',) + +INFO:tensorflow:Single-worker MultiWorkerMirroredStrategy with local_devices = ('/device:CPU:0',), communication = CollectiveCommunication.AUTO + +INFO:tensorflow:Single-worker MultiWorkerMirroredStrategy with local_devices = ('/device:CPU:0',), communication = CollectiveCommunication.AUTO + +``` + +注意:解析 `TF_CONFIG` 并且在调用 [`MultiWorkerMirroredStrategy.**init**()`](https://tensorflow.google.cn/api_docs/python/tf/distribute/experimental/MultiWorkerMirroredStrategy#__init__) 时启动 TensorFlow 的 GRPC 服务器,因此必须在创建[`tf.distribute.Strategy`](https://tensorflow.google.cn/api_docs/python/tf/distribute/Strategy)实例之前设置 `TF_CONFIG` 环境变量。 + +`MultiWorkerMirroredStrategy` 通过[`CollectiveCommunication`](https://github.com/tensorflow/tensorflow/blob/a385a286a930601211d78530734368ccb415bee4/tensorflow/python/distribute/cross_device_ops.py#L928)参数提供多个实现。`RING` 使用 gRPC 作为跨主机通信层实现基于环的集合。`NCCL` 使用[Nvidia 的 NCCL](https://developer.nvidia.com/nccl)来实现集体。 `AUTO` 将选择推迟到运行时。 集体实现的最佳选择取决于 GPU 的数量和种类以及群集中的网络互连。 + +## 使用 MultiWorkerMirroredStrategy 训练模型 + +通过将 [`tf.distribute.Strategy`](https://tensorflow.google.cn/api_docs/python/tf/distribute/Strategy) API 集成到 [`tf.keras`](https://tensorflow.google.cn/api_docs/python/tf/keras) 中,将训练分发给多人的唯一更改就是将模型进行构建和 `model.compile()` 调用封装在 `strategy.scope()` 内部。 分发策略的范围决定了如何创建变量以及在何处创建变量,对于 MultiWorkerMirroredStrategy 而言,创建的变量为 MirroredVariable ,并且将它们复制到每个工作器上。 + +注意:在此 Colab 中,以下代码可以按预期结果运行,但是由于未设置`TF_CONFIG`,因此这实际上是单机训练。 在您自己的示例中设置了 `TF_CONFIG` 后,您应该期望在多台机器上进行培训可以提高速度。 + +```py +NUM_WORKERS = 2 +# 由于 `tf.data.Dataset.batch` 需要全局的批处理大小, +# 因此此处的批处理大小按工作器数量增加。 +# 以前我们使用 64,现在变成 128。 +GLOBAL_BATCH_SIZE = 64 * NUM_WORKERS + +# 创建数据集需要在 MultiWorkerMirroredStrategy 对象 +# 实例化后。 +train_datasets = make_datasets_unbatched().batch(GLOBAL_BATCH_SIZE) +with strategy.scope(): + # 模型的建立/编译需要在 `strategy.scope()` 内部。 + multi_worker_model = build_and_compile_cnn_model() + +# Keras 的 `model.fit()` 以特定的时期数和每时期的步数训练模型。 +# 注意此处的数量仅用于演示目的,并不足以产生高质量的模型。 +multi_worker_model.fit(x=train_datasets, epochs=3, steps_per_epoch=5) +``` + +```py +Epoch 1/3 +5/5 [==============================] - 3s 23ms/step - loss: 2.3042 - accuracy: 0.1243 +Epoch 2/3 +5/5 [==============================] - 0s 18ms/step - loss: 2.3129 - accuracy: 0.0801 +Epoch 3/3 +5/5 [==============================] - 0s 19ms/step - loss: 2.2974 - accuracy: 0.1253 + + + +``` + +### 数据集分片和批(batch)大小 + +在多工作器训练中,需要将数据分片为多个部分,以确保融合和性能。 但是,请注意,在上面的代码片段中,数据集直接发送到`model.fit()`,而无需分片; 这是因为[`tf.distribute.Strategy`](https://tensorflow.google.cn/api_docs/python/tf/distribute/Strategy) API 在多工作器训练中会自动处理数据集分片。 + +如果您喜欢手动分片进行训练,则可以通过[`tf.data.experimental.DistributeOptions`](https://tensorflow.google.cn/api_docs/python/tf/data/experimental/DistributeOptions) API 关闭自动分片。 + +```py +options = tf.data.Options() +options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.OFF +train_datasets_no_auto_shard = train_datasets.with_options(options) +``` + +要注意的另一件事是 `datasets` 的批处理大小。 在上面的代码片段中,我们使用 `GLOBAL_BATCH_SIZE = 64 * NUM_WORKERS` ,这是单个工作器的大小的 `NUM_WORKERS` 倍,因为每个工作器的有效批量大小是全局批量大小(参数从 [`tf.data.Dataset.batch()`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset#batch) 传入)除以工作器的数量,通过此更改,我们使每个工作器的批处理大小与以前相同。 + +## 性能 + +现在,您已经有了一个 Keras 模型,该模型全部通过 `MultiWorkerMirroredStrategy` 运行在多个工作器中。 您可以尝试以下技术来调整多工作器训练的效果。 + +* `MultiWorkerMirroredStrategy` 提供了多个[集体通信实现][collective communication implementations](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/distribute/cross_device_ops.py). `RING` 使用 gRPC 作为跨主机通信层实现基于环的集合。 `NCCL` 使用 [Nvidia's NCCL](https://developer.nvidia.com/nccl) 来实现集合。 `AUTO` 将推迟到运行时选择。集体实施的最佳选择取决于 GPU 的数量和种类以及集群中的网络互连。 要覆盖自动选择,请为 `MultiWorkerMirroredStrategy` 的构造函数的 `communication` 参数指定一个有效值,例如: `communication=tf.distribute.experimental.CollectiveCommunication.NCCL`. +* 如果可能的话,将变量强制转换为 `tf.float`。ResNet 的官方模型包括如何完成此操作的[示例](https://github.com/tensorflow/models/blob/8367cf6dabe11adf7628541706b660821f397dce/official/resnet/resnet_model.py#L466)。 + +## 容错能力 + +在同步训练中,如果其中一个工作器出现故障并且不存在故障恢复机制,则集群将失败。 在工作器退出或不稳定的情况下,将 Keras 与 [`tf.distribute.Strategy`](https://tensorflow.google.cn/api_docs/python/tf/distribute/Strategy) 一起使用会具有容错的优势。 我们通过在您选择的分布式文件系统中保留训练状态来做到这一点,以便在重新启动先前失败或被抢占的实例后,将恢复训练状态。 + +由于所有工作器在训练 epochs 和 steps 方面保持同步,因此其他工作器将需要等待失败或被抢占的工作器重新启动才能继续。 + +### ModelCheckpoint 回调 + +要在多工作器训练中利用容错功能,请在调用 [`tf.keras.Model.fit()`](https://tensorflow.google.cn/api_docs/python/tf/keras/Model#fit) 时提供一个 [`tf.keras.callbacks.ModelCheckpoint`](https://tensorflow.google.cn/api_docs/python/tf/keras/callbacks/ModelCheckpoint) 实例。 回调会将检查点和训练状态存储在与 `ModelCheckpoint` 的 `filepath` 参数相对应的目录中。 + +```py +# 将 `filepath` 参数替换为在文件系统中所有工作器都能访问的路径。 +callbacks = [tf.keras.callbacks.ModelCheckpoint(filepath='/tmp/keras-ckpt')] +with strategy.scope(): + multi_worker_model = build_and_compile_cnn_model() +multi_worker_model.fit(x=train_datasets, + epochs=3, + steps_per_epoch=5, + callbacks=callbacks) +``` + +```py +Epoch 1/3 +4/5 [=======================>......] - ETA: 0s - loss: 2.2830 - accuracy: 0.1810 +/tmpfs/src/tf_docs_env/lib/python3.6/site-packages/tensorflow/python/keras/engine/training.py:2289: UserWarning: `Model.state_updates` will be removed in a future version. This property should not be used in TensorFlow 2.0, as `updates` are applied automatically. + warnings.warn('`Model.state_updates` will be removed in a future version. ' +/tmpfs/src/tf_docs_env/lib/python3.6/site-packages/tensorflow/python/keras/engine/base_layer.py:1377: UserWarning: `layer.updates` will be removed in a future version. This property should not be used in TensorFlow 2.0, as `updates` are applied automatically. + warnings.warn('`layer.updates` will be removed in a future version. ' + +INFO:tensorflow:Assets written to: /tmp/keras-ckpt/assets + +INFO:tensorflow:Assets written to: /tmp/keras-ckpt/assets + +5/5 [==============================] - 4s 170ms/step - loss: 2.2852 - accuracy: 0.1790 +Epoch 2/3 +4/5 [=======================>......] - ETA: 0s - loss: 2.2871 - accuracy: 0.1758INFO:tensorflow:Assets written to: /tmp/keras-ckpt/assets + +INFO:tensorflow:Assets written to: /tmp/keras-ckpt/assets + +5/5 [==============================] - 1s 155ms/step - loss: 2.2869 - accuracy: 0.1797 +Epoch 3/3 +4/5 [=======================>......] - ETA: 0s - loss: 2.2876 - accuracy: 0.2041INFO:tensorflow:Assets written to: /tmp/keras-ckpt/assets + +INFO:tensorflow:Assets written to: /tmp/keras-ckpt/assets + +5/5 [==============================] - 1s 155ms/step - loss: 2.2872 - accuracy: 0.2064 + + + +``` + +如果某个工作线程被抢占,则整个集群将暂停,直到重新启动被抢占的工作线程为止。工作器重新加入集群后,其他工作器也将重新启动。 现在,每个工作器都将读取先前保存的检查点文件,并获取其以前的状态,从而使群集能够恢复同步,然后继续训练。 + +如果检查包含在`ModelCheckpoint` 中指定的 `filepath` 的目录,则可能会注意到一些临时生成的检查点文件。 这些文件是恢复以前丢失的实例所必需的,并且在成功退出多工作器训练后,这些文件将在 [`tf.keras.Model.fit()`](https://tensorflow.google.cn/api_docs/python/tf/keras/Model#fit) 的末尾被库删除。 + +## 您可以查阅 + +1. [Distributed Training in TensorFlow](https://tensorflow.google.cn/guide/distribute_strategy) 该指南概述了可用的分布式策略。 +2. [ResNet50](https://github.com/tensorflow/models/blob/master/official/resnet/imagenet_main.py) 官方模型,该模型可以使用 `MirroredStrategy` 或 `MultiWorkerMirroredStrategy` 进行训练 \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/037.md b/Tensorflow/TensorFlow2.0/037.md new file mode 100644 index 00000000..cf80ebef --- /dev/null +++ b/Tensorflow/TensorFlow2.0/037.md @@ -0,0 +1,427 @@ +# 利用 Estimator 进行多工作器训练 + +> 原文:[https://tensorflow.google.cn/tutorials/distribute/multi_worker_with_estimator](https://tensorflow.google.cn/tutorials/distribute/multi_worker_with_estimator) + +**Note:** 我们的 TensorFlow 社区翻译了这些文档。因为社区翻译是尽力而为, 所以无法保证它们是最准确的,并且反映了最新的 [官方英文文档](https://tensorflow.google.cn/?hl=en)。如果您有改进此翻译的建议, 请提交 pull request 到 [tensorflow/docs](https://github.com/tensorflow/docs) GitHub 仓库。要志愿地撰写或者审核译文,请加入 [docs-zh-cn@tensorflow.org Google Group](https://groups.google.com/a/tensorflow.org/forum/#!forum/docs-zh-cn)。 + +## 概述 + +本教程展示了在训练分布式多工作器(worker)时,如何使用 [`tf.distribute.Strategy`](https://tensorflow.google.cn/api_docs/python/tf/distribute/Strategy)。如果你的代码使用了 [`tf.estimator`](https://tensorflow.google.cn/api_docs/python/tf/estimator),而且你也对拓展单机以获取高性能有兴趣,那么这个教程就是为你准备的。 + +在开始之前,请先阅读 [`tf.distribute.Strategy` 指南](https://tensorflow.google.cn/guide/distribute_strategy)。同样相关的还有 [使用多 GPU 训练教程](https://tensorflow.google.cn/tutorials/distribute/keras),因为在这个教程里也使用了相同的模型。 + +## 创建 + +首先,设置好 TensorFlow 以及将会用到的输入模块。 + +```py +import tensorflow_datasets as tfds +import tensorflow as tf +tfds.disable_progress_bar() + +import os, json +``` + +## 输入函数 + +本教程里我们使用的是 [TensorFlow 数据集(TensorFlow Datasets)](https://tensorflow.google.cn/datasets)里的 MNIST 数据集。本教程里的代码和 [使用多 GPU 训练教程](https://tensorflow.google.cn/tutorials/distribute/keras) 类似,但有一个主要区别:当我们使用 Estimator 进行多工作器训练时,需要根据工作器的数量对数据集进行拆分,以确保模型收敛。输入的数据根据工作器其自身的索引来拆分,因此每个工作器各自负责处理该数据集 `1/num_workers` 个不同部分。 + +```py +BUFFER_SIZE = 10000 +BATCH_SIZE = 64 + +def input_fn(mode, input_context=None): + datasets, info = tfds.load(name='mnist', + with_info=True, + as_supervised=True) + mnist_dataset = (datasets['train'] if mode == tf.estimator.ModeKeys.TRAIN else + datasets['test']) + + def scale(image, label): + image = tf.cast(image, tf.float32) + image /= 255 + return image, label + + if input_context: + mnist_dataset = mnist_dataset.shard(input_context.num_input_pipelines, + input_context.input_pipeline_id) + return mnist_dataset.map(scale).shuffle(BUFFER_SIZE).batch(BATCH_SIZE) +``` + +使模型收敛的另一种合理方式是在每个工作器上设置不同的随机种子,然后对数据集进行随机重排。 + +## 多工作器配置 + +本教程主要的不同(区别于[使用多 GPU 训练教程](https://tensorflow.google.cn/tutorials/distribute/keras))在于多工作器的创建。明确集群中每个工作器的配置的标准方式是设置环境变量 `TF_CONFIG` 。 + +`TF_CONFIG` 里包括了两个部分:`cluster` 和 `task`。`cluster` 提供了关于整个集群的信息,也就是集群中的工作器和参数服务器(parameter server)。`task` 提供了关于当前任务的信息。在本例中,任务的类型(type)是 worker 且该任务的索引(index)是 0。 + +出于演示的目的,本教程展示了怎么将 `TF_CONFIG` 设置成两个本地的工作器。在实践中,你可以在外部的 IP 地址和端口上创建多个工作器,并为每个工作器正确地配置好 `TF_CONFIG` 变量,也就是更改任务的索引。 + +警告:不要在 Colab 里执行以下代码。TensorFlow 的运行程序会试图在指定的 IP 地址和端口创建 gRPC 服务器,这会导致创建失败。 + +```py +os.environ['TF_CONFIG'] = json.dumps({ + 'cluster': { + 'worker': ["localhost:12345", "localhost:23456"] + }, + 'task': {'type': 'worker', 'index': 0} +}) +``` + +## 定义模型 + +定义训练中用到的层,优化器和损失函数。本教程使用 Keras layers 定义模型,同[使用多 GPU 训练教程](https://tensorflow.google.cn/tutorials/distribute/keras)类似。 + +```py +LEARNING_RATE = 1e-4 +def model_fn(features, labels, mode): + model = tf.keras.Sequential([ + tf.keras.layers.Conv2D(32, 3, activation='relu', input_shape=(28, 28, 1)), + tf.keras.layers.MaxPooling2D(), + tf.keras.layers.Flatten(), + tf.keras.layers.Dense(64, activation='relu'), + tf.keras.layers.Dense(10, activation='softmax') + ]) + logits = model(features, training=False) + + if mode == tf.estimator.ModeKeys.PREDICT: + predictions = {'logits': logits} + return tf.estimator.EstimatorSpec(labels=labels, predictions=predictions) + + optimizer = tf.compat.v1.train.GradientDescentOptimizer( + learning_rate=LEARNING_RATE) + loss = tf.keras.losses.SparseCategoricalCrossentropy( + from_logits=True, reduction=tf.keras.losses.Reduction.NONE)(labels, logits) + loss = tf.reduce_sum(loss) * (1\. / BATCH_SIZE) + if mode == tf.estimator.ModeKeys.EVAL: + return tf.estimator.EstimatorSpec(mode, loss=loss) + + return tf.estimator.EstimatorSpec( + mode=mode, + loss=loss, + train_op=optimizer.minimize( + loss, tf.compat.v1.train.get_or_create_global_step())) +``` + +注意:尽管在本例中学习率是固定的,但是通常情况下可能有必要基于全局的批次大小对学习率进行调整。 + +## MultiWorkerMirroredStrategy + +为训练模型,需要使用 [`tf.distribute.experimental.MultiWorkerMirroredStrategy`](https://tensorflow.google.cn/api_docs/python/tf/distribute/experimental/MultiWorkerMirroredStrategy) 实例。`MultiWorkerMirroredStrategy` 创建了每个设备中模型层里所有变量的拷贝,且是跨工作器的。其用到了 `CollectiveOps`,这是 TensorFlow 里的一种操作,用来整合梯度以及确保变量同步。该策略的更多细节可以在 [`tf.distribute.Strategy` 指南](https://tensorflow.google.cn/guide/distribute_strategy)中找到。 + +```py +strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy() +``` + +```py +INFO:tensorflow:Using MirroredStrategy with devices ('/device:GPU:0',) +INFO:tensorflow:Single-worker MultiWorkerMirroredStrategy with local_devices = ('/device:GPU:0',), communication = CollectiveCommunication.AUTO + +``` + +## 训练和评估模型 + +接下来,在 `RunConfig` 中为 estimator 指明分布式策略,同时通过调用 [`tf.estimator.train_and_evaluate`](https://tensorflow.google.cn/api_docs/python/tf/estimator/train_and_evaluate) 训练和评估模型。本教程只通过指明 `train_distribute` 进行分布式训练。但是也同样也可以通过指明 `eval_distribute` 来进行分布式评估。 + +```py +config = tf.estimator.RunConfig(train_distribute=strategy) + +classifier = tf.estimator.Estimator( + model_fn=model_fn, model_dir='/tmp/multiworker', config=config) +tf.estimator.train_and_evaluate( + classifier, + train_spec=tf.estimator.TrainSpec(input_fn=input_fn), + eval_spec=tf.estimator.EvalSpec(input_fn=input_fn) +) +``` + +```py +INFO:tensorflow:Initializing RunConfig with distribution strategies. +INFO:tensorflow:Not using Distribute Coordinator. +INFO:tensorflow:Using config: {'_model_dir': '/tmp/multiworker', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true +graph_options { + rewrite_options { + meta_optimizer_iterations: ONE + } +} +, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': , '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1, '_distribute_coordinator_mode': None} +INFO:tensorflow:Not using Distribute Coordinator. +INFO:tensorflow:Running training and evaluation locally (non-distributed). +INFO:tensorflow:Start train and evaluate loop. The evaluate will happen after every checkpoint. Checkpoint frequency is determined based on RunConfig arguments: save_checkpoints_steps None or save_checkpoints_secs 600. +INFO:tensorflow:The `input_fn` accepts an `input_context` which will be given by DistributionStrategy +WARNING:tensorflow:From /tmpfs/src/tf_docs_env/lib/python3.6/site-packages/tensorflow/python/data/ops/multi_device_iterator_ops.py:339: get_next_as_optional (from tensorflow.python.data.ops.iterator_ops) is deprecated and will be removed in a future version. +Instructions for updating: +Use `tf.data.Iterator.get_next_as_optional()` instead. + +Warning:tensorflow:From /tmpfs/src/tf_docs_env/lib/python3.6/site-packages/tensorflow/python/data/ops/multi_device_iterator_ops.py:339: get_next_as_optional (from tensorflow.python.data.ops.iterator_ops) is deprecated and will be removed in a future version. +Instructions for updating: +Use `tf.data.Iterator.get_next_as_optional()` instead. + +INFO:tensorflow:Calling model_fn. + +INFO:tensorflow:Calling model_fn. + +INFO:tensorflow:Done calling model_fn. + +INFO:tensorflow:Done calling model_fn. + +Warning:tensorflow:AutoGraph could not transform . at 0x7f975c181c80> and will run it as-is. +Cause: could not parse the source code: + + lambda scaffold: scaffold.ready_op, args=(grouped_scaffold,)) + +This error may be avoided by creating the lambda in a standalone statement. + +To silence this warning, decorate the function with @tf.autograph.experimental.do_not_convert + +Warning:tensorflow:AutoGraph could not transform . at 0x7f975c181c80> and will run it as-is. +Cause: could not parse the source code: + + lambda scaffold: scaffold.ready_op, args=(grouped_scaffold,)) + +This error may be avoided by creating the lambda in a standalone statement. + +To silence this warning, decorate the function with @tf.autograph.experimental.do_not_convert + +Warning: AutoGraph could not transform . at 0x7f975c181c80> and will run it as-is. +Cause: could not parse the source code: + + lambda scaffold: scaffold.ready_op, args=(grouped_scaffold,)) + +This error may be avoided by creating the lambda in a standalone statement. + +To silence this warning, decorate the function with @tf.autograph.experimental.do_not_convert +INFO:tensorflow:Create CheckpointSaverHook. + +INFO:tensorflow:Create CheckpointSaverHook. + +Warning:tensorflow:From /tmpfs/src/tf_docs_env/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/util.py:96: DistributedIteratorV1.initialize (from tensorflow.python.distribute.input_lib) is deprecated and will be removed in a future version. +Instructions for updating: +Use the iterator's `initializer` property instead. + +Warning:tensorflow:From /tmpfs/src/tf_docs_env/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/util.py:96: DistributedIteratorV1.initialize (from tensorflow.python.distribute.input_lib) is deprecated and will be removed in a future version. +Instructions for updating: +Use the iterator's `initializer` property instead. + +INFO:tensorflow:Graph was finalized. + +INFO:tensorflow:Graph was finalized. + +INFO:tensorflow:Running local_init_op. + +INFO:tensorflow:Running local_init_op. + +INFO:tensorflow:Done running local_init_op. + +INFO:tensorflow:Done running local_init_op. + +INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 0... + +INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 0... + +INFO:tensorflow:Saving checkpoints for 0 into /tmp/multiworker/model.ckpt. + +INFO:tensorflow:Saving checkpoints for 0 into /tmp/multiworker/model.ckpt. + +INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 0... + +INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 0... + +INFO:tensorflow:loss = 2.3033497, step = 0 + +INFO:tensorflow:loss = 2.3033497, step = 0 + +INFO:tensorflow:global_step/sec: 195.373 + +INFO:tensorflow:global_step/sec: 195.373 + +INFO:tensorflow:loss = 2.3039753, step = 100 (0.514 sec) + +INFO:tensorflow:loss = 2.3039753, step = 100 (0.514 sec) + +INFO:tensorflow:global_step/sec: 214.711 + +INFO:tensorflow:global_step/sec: 214.711 + +INFO:tensorflow:loss = 2.3031363, step = 200 (0.465 sec) + +INFO:tensorflow:loss = 2.3031363, step = 200 (0.465 sec) + +INFO:tensorflow:global_step/sec: 217.488 + +INFO:tensorflow:global_step/sec: 217.488 + +INFO:tensorflow:loss = 2.3034592, step = 300 (0.460 sec) + +INFO:tensorflow:loss = 2.3034592, step = 300 (0.460 sec) + +INFO:tensorflow:global_step/sec: 218.917 + +INFO:tensorflow:global_step/sec: 218.917 + +INFO:tensorflow:loss = 2.3013198, step = 400 (0.457 sec) + +INFO:tensorflow:loss = 2.3013198, step = 400 (0.457 sec) + +INFO:tensorflow:global_step/sec: 219.726 + +INFO:tensorflow:global_step/sec: 219.726 + +INFO:tensorflow:loss = 2.3037362, step = 500 (0.455 sec) + +INFO:tensorflow:loss = 2.3037362, step = 500 (0.455 sec) + +INFO:tensorflow:global_step/sec: 219.401 + +INFO:tensorflow:global_step/sec: 219.401 + +INFO:tensorflow:loss = 2.3062348, step = 600 (0.455 sec) + +INFO:tensorflow:loss = 2.3062348, step = 600 (0.455 sec) + +INFO:tensorflow:global_step/sec: 220.068 + +INFO:tensorflow:global_step/sec: 220.068 + +INFO:tensorflow:loss = 2.300187, step = 700 (0.455 sec) + +INFO:tensorflow:loss = 2.300187, step = 700 (0.455 sec) + +INFO:tensorflow:global_step/sec: 246.384 + +INFO:tensorflow:global_step/sec: 246.384 + +INFO:tensorflow:loss = 2.30475, step = 800 (0.405 sec) + +INFO:tensorflow:loss = 2.30475, step = 800 (0.405 sec) + +INFO:tensorflow:global_step/sec: 587.13 + +INFO:tensorflow:global_step/sec: 587.13 + +INFO:tensorflow:loss = 2.3031988, step = 900 (0.170 sec) + +INFO:tensorflow:loss = 2.3031988, step = 900 (0.170 sec) + +INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 938... + +INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 938... + +INFO:tensorflow:Saving checkpoints for 938 into /tmp/multiworker/model.ckpt. + +INFO:tensorflow:Saving checkpoints for 938 into /tmp/multiworker/model.ckpt. + +INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 938... + +INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 938... + +INFO:tensorflow:Calling model_fn. + +INFO:tensorflow:Calling model_fn. + +INFO:tensorflow:Done calling model_fn. + +INFO:tensorflow:Done calling model_fn. + +INFO:tensorflow:Starting evaluation at 2020-09-22T19:53:28Z + +INFO:tensorflow:Starting evaluation at 2020-09-22T19:53:28Z + +INFO:tensorflow:Graph was finalized. + +INFO:tensorflow:Graph was finalized. + +INFO:tensorflow:Restoring parameters from /tmp/multiworker/model.ckpt-938 + +INFO:tensorflow:Restoring parameters from /tmp/multiworker/model.ckpt-938 + +INFO:tensorflow:Running local_init_op. + +INFO:tensorflow:Running local_init_op. + +INFO:tensorflow:Done running local_init_op. + +INFO:tensorflow:Done running local_init_op. + +INFO:tensorflow:Evaluation [10/100] + +INFO:tensorflow:Evaluation [10/100] + +INFO:tensorflow:Evaluation [20/100] + +INFO:tensorflow:Evaluation [20/100] + +INFO:tensorflow:Evaluation [30/100] + +INFO:tensorflow:Evaluation [30/100] + +INFO:tensorflow:Evaluation [40/100] + +INFO:tensorflow:Evaluation [40/100] + +INFO:tensorflow:Evaluation [50/100] + +INFO:tensorflow:Evaluation [50/100] + +INFO:tensorflow:Evaluation [60/100] + +INFO:tensorflow:Evaluation [60/100] + +INFO:tensorflow:Evaluation [70/100] + +INFO:tensorflow:Evaluation [70/100] + +INFO:tensorflow:Evaluation [80/100] + +INFO:tensorflow:Evaluation [80/100] + +INFO:tensorflow:Evaluation [90/100] + +INFO:tensorflow:Evaluation [90/100] + +INFO:tensorflow:Evaluation [100/100] + +INFO:tensorflow:Evaluation [100/100] + +INFO:tensorflow:Inference Time : 0.98988s + +INFO:tensorflow:Inference Time : 0.98988s + +INFO:tensorflow:Finished evaluation at 2020-09-22-19:53:29 + +INFO:tensorflow:Finished evaluation at 2020-09-22-19:53:29 + +INFO:tensorflow:Saving dict for global step 938: global_step = 938, loss = 2.3031592 + +INFO:tensorflow:Saving dict for global step 938: global_step = 938, loss = 2.3031592 + +INFO:tensorflow:Saving 'checkpoint_path' summary for global step 938: /tmp/multiworker/model.ckpt-938 + +INFO:tensorflow:Saving 'checkpoint_path' summary for global step 938: /tmp/multiworker/model.ckpt-938 + +INFO:tensorflow:Loss for final step: 1.1519132. + +INFO:tensorflow:Loss for final step: 1.1519132. + +({'loss': 2.3031592, 'global_step': 938}, []) + +``` + +# 优化训练后的模型性能 + +现在你已经有了由 [`tf.distribute.Strategy`](https://tensorflow.google.cn/api_docs/python/tf/distribute/Strategy) 的模型和能支持多工作器的 Estimator。你可以尝试使用下列技巧来优化多工作器训练的性能。 + +* *增加单批次的大小:* 此处的批次大小指的是每个 GPU 上的批次大小。通常来说,最大的批次大小应该适应 GPU 的内存大小。 +* *变量转换:* 尽可能将变量转换成 `tf.float`。官方的 ResNet 模型包括了如何完成的[样例](https://github.com/tensorflow/models/blob/8367cf6dabe11adf7628541706b660821f397dce/official/resnet/resnet_model.py#L466)。 +* *使用集群通信:* `MultiWorkerMirroredStrategy` 提供了好几种[集群通信的实现](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/distribute/cross_device_ops.py). + * `RING` 实现了基于环状的集群,使用了 gRPC 作为跨主机通讯层。 + * `NCCL` 使用了 [英伟达的 NCCL](https://developer.nvidia.com/nccl) 来实现集群。 + * `AUTO` 将选择延后至运行时。 + +集群实现的最优选择不仅基于 GPU 的数量和种类,也基于集群间的通信网络。想要覆盖自动的选项,需要指明 `MultiWorkerMirroredStrategy` 的构造器里的 `communication` 参数,例如让 `communication=tf.distribute.experimental.CollectiveCommunication.NCCL` 。 + +## 更多的代码示例 + +1. [端到端的示例](https://github.com/tensorflow/ecosystem/tree/master/distribution_strategy)里使用了 Kubernetes 模板。在这个例子里我们一开始使用了 Keras 模型,并使用了 [`tf.keras.estimator.model_to_estimator`](https://tensorflow.google.cn/api_docs/python/tf/keras/estimator/model_to_estimator) API 将其转换成了 Estimator。 +2. 官方的 [ResNet50](https://github.com/tensorflow/models/blob/master/official/resnet/imagenet_main.py) 模型,我们可以使用 `MirroredStrategy` 或 `MultiWorkerMirroredStrategy` 来训练它。 \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/038.md b/Tensorflow/TensorFlow2.0/038.md new file mode 100644 index 00000000..42f9c185 --- /dev/null +++ b/Tensorflow/TensorFlow2.0/038.md @@ -0,0 +1,524 @@ +# 使用分布策略保存和加载模型 + +> 原文:[https://tensorflow.google.cn/tutorials/distribute/save_and_load](https://tensorflow.google.cn/tutorials/distribute/save_and_load) + +## 概述 + +在训练期间一般需要保存和加载模型。有两组用于保存和加载 Keras 模型的 API:高级 API 和低级 API。本教程演示了在使用 [`tf.distribute.Strategy`](https://tensorflow.google.cn/api_docs/python/tf/distribute/Strategy) 时如何使用 SavedModel API。要了解 SavedModel 和序列化的相关概况,请参阅[保存的模型指南](https://tensorflow.google.cn/guide/saved_model)和 [Keras 模型序列化指南](https://tensorflow.google.cn/guide/keras/save_and_serialize)。让我们从一个简单的示例开始: + +导入依赖项: + +```py +import tensorflow_datasets as tfds + +import tensorflow as tf +tfds.disable_progress_bar() +``` + +使用 [`tf.distribute.Strategy`](https://tensorflow.google.cn/api_docs/python/tf/distribute/Strategy) 准备数据和模型: + +```py +mirrored_strategy = tf.distribute.MirroredStrategy() + +def get_data(): + datasets, ds_info = tfds.load(name='mnist', with_info=True, as_supervised=True) + mnist_train, mnist_test = datasets['train'], datasets['test'] + + BUFFER_SIZE = 10000 + + BATCH_SIZE_PER_REPLICA = 64 + BATCH_SIZE = BATCH_SIZE_PER_REPLICA * mirrored_strategy.num_replicas_in_sync + + def scale(image, label): + image = tf.cast(image, tf.float32) + image /= 255 + + return image, label + + train_dataset = mnist_train.map(scale).cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE) + eval_dataset = mnist_test.map(scale).batch(BATCH_SIZE) + + return train_dataset, eval_dataset + +def get_model(): + with mirrored_strategy.scope(): + model = tf.keras.Sequential([ + tf.keras.layers.Conv2D(32, 3, activation='relu', input_shape=(28, 28, 1)), + tf.keras.layers.MaxPooling2D(), + tf.keras.layers.Flatten(), + tf.keras.layers.Dense(64, activation='relu'), + tf.keras.layers.Dense(10) + ]) + + model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), + optimizer=tf.keras.optimizers.Adam(), + metrics=['accuracy']) + return model +``` + +```py +INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',) + +``` + +训练模型: + +```py +model = get_model() +train_dataset, eval_dataset = get_data() +model.fit(train_dataset, epochs=2) +``` + +```py +Epoch 1/2 +WARNING:tensorflow:From /tmpfs/src/tf_docs_env/lib/python3.6/site-packages/tensorflow/python/data/ops/multi_device_iterator_ops.py:601: get_next_as_optional (from tensorflow.python.data.ops.iterator_ops) is deprecated and will be removed in a future version. +Instructions for updating: +Use `tf.data.Iterator.get_next_as_optional()` instead. + +Warning:tensorflow:From /tmpfs/src/tf_docs_env/lib/python3.6/site-packages/tensorflow/python/data/ops/multi_device_iterator_ops.py:601: get_next_as_optional (from tensorflow.python.data.ops.iterator_ops) is deprecated and will be removed in a future version. +Instructions for updating: +Use `tf.data.Iterator.get_next_as_optional()` instead. + +INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',). + +INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',). + +INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',). + +INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',). + +INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',). + +INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',). + +INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',). + +INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',). + +INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',). + +INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',). + +INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',). + +INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',). + +INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',). + +INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',). + +INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',). + +INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',). + +938/938 [==============================] - 4s 5ms/step - loss: 0.1971 - accuracy: 0.9421 +INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',). + +INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',). + +INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',). + +INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',). + +Epoch 2/2 +938/938 [==============================] - 3s 3ms/step - loss: 0.0662 - accuracy: 0.9801 + + + +``` + +## 保存和加载模型 + +现在,您已经有一个简单的模型可供使用,让我们了解一下如何保存/加载 API。有两组可用的 API: + +* 高级 Keras `model.save` 和 [`tf.keras.models.load_model`](https://tensorflow.google.cn/api_docs/python/tf/keras/models/load_model) +* 低级 [`tf.saved_model.save`](https://tensorflow.google.cn/api_docs/python/tf/saved_model/save) 和 [`tf.saved_model.load`](https://tensorflow.google.cn/api_docs/python/tf/saved_model/load) + +### Keras API + +以下为使用 Keras API 保存和加载模型的示例: + +```py +keras_model_path = "/tmp/keras_save" +model.save(keras_model_path) # save() should be called out of strategy scope +``` + +```py +WARNING:tensorflow:From /tmpfs/src/tf_docs_env/lib/python3.6/site-packages/tensorflow/python/training/tracking/tracking.py:111: Model.state_updates (from tensorflow.python.keras.engine.training) is deprecated and will be removed in a future version. +Instructions for updating: +This property should not be used in TensorFlow 2.0, as updates are applied automatically. + +Warning:tensorflow:From /tmpfs/src/tf_docs_env/lib/python3.6/site-packages/tensorflow/python/training/tracking/tracking.py:111: Model.state_updates (from tensorflow.python.keras.engine.training) is deprecated and will be removed in a future version. +Instructions for updating: +This property should not be used in TensorFlow 2.0, as updates are applied automatically. + +Warning:tensorflow:From /tmpfs/src/tf_docs_env/lib/python3.6/site-packages/tensorflow/python/training/tracking/tracking.py:111: Layer.updates (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version. +Instructions for updating: +This property should not be used in TensorFlow 2.0, as updates are applied automatically. + +Warning:tensorflow:From /tmpfs/src/tf_docs_env/lib/python3.6/site-packages/tensorflow/python/training/tracking/tracking.py:111: Layer.updates (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version. +Instructions for updating: +This property should not be used in TensorFlow 2.0, as updates are applied automatically. + +INFO:tensorflow:Assets written to: /tmp/keras_save/assets + +INFO:tensorflow:Assets written to: /tmp/keras_save/assets + +``` + +恢复无 [`tf.distribute.Strategy`](https://tensorflow.google.cn/api_docs/python/tf/distribute/Strategy) 的模型: + +```py +restored_keras_model = tf.keras.models.load_model(keras_model_path) +restored_keras_model.fit(train_dataset, epochs=2) +``` + +```py +Epoch 1/2 +938/938 [==============================] - 3s 3ms/step - loss: 0.0480 - accuracy: 0.0990 +Epoch 2/2 +938/938 [==============================] - 2s 2ms/step - loss: 0.0334 - accuracy: 0.0989 + + + +``` + +恢复模型后,您可以继续在它上面进行训练,甚至无需再次调用 `compile()`,因为在保存之前已经对其进行了编译。模型以 TensorFlow 的标准 `SavedModel` proto 格式保存。有关更多信息,请参阅 [`saved_model` 格式指南](https://tensorflow.google.cn/guide/saved_model)。 + +现在,加载模型并使用 [`tf.distribute.Strategy`](https://tensorflow.google.cn/api_docs/python/tf/distribute/Strategy) 进行训练: + +```py +another_strategy = tf.distribute.OneDeviceStrategy("/cpu:0") +with another_strategy.scope(): + restored_keras_model_ds = tf.keras.models.load_model(keras_model_path) + restored_keras_model_ds.fit(train_dataset, epochs=2) +``` + +```py +Epoch 1/2 +938/938 [==============================] - 9s 9ms/step - loss: 0.0481 - accuracy: 0.0989 +Epoch 2/2 +938/938 [==============================] - 9s 9ms/step - loss: 0.0329 - accuracy: 0.0990 + +``` + +如您所见, [`tf.distribute.Strategy`](https://tensorflow.google.cn/api_docs/python/tf/distribute/Strategy) 可以按预期进行加载。此处使用的策略不必与保存前所用策略相同。 + +### [`tf.saved_model`](https://tensorflow.google.cn/api_docs/python/tf/saved_model) API + +现在,让我们看一下较低级别的 API。保存模型与 Keras API 类似: + +```py +model = get_model() # get a fresh model +saved_model_path = "/tmp/tf_save" +tf.saved_model.save(model, saved_model_path) +``` + +```py +INFO:tensorflow:Assets written to: /tmp/tf_save/assets + +INFO:tensorflow:Assets written to: /tmp/tf_save/assets + +``` + +可以使用 [`tf.saved_model.load()`](https://tensorflow.google.cn/api_docs/python/tf/saved_model/load) 进行加载。但是,由于该 API 级别较低(因此用例范围更广泛),所以不会返回 Keras 模型。相反,它返回一个对象,其中包含可用于进行推断的函数。例如: + +```py +DEFAULT_FUNCTION_KEY = "serving_default" +loaded = tf.saved_model.load(saved_model_path) +inference_func = loaded.signatures[DEFAULT_FUNCTION_KEY] +``` + +加载的对象可能包含多个函数,每个函数与一个键关联。`"serving_default"` 是使用已保存的 Keras 模型的推断函数的默认键。要使用此函数进行推断,请运行以下代码: + +```py +predict_dataset = eval_dataset.map(lambda image, label: image) +for batch in predict_dataset.take(1): + print(inference_func(batch)) +``` + +```py +{'dense_3': } + +``` + +您还可以采用分布式方式加载和进行推断: + +```py +another_strategy = tf.distribute.MirroredStrategy() +with another_strategy.scope(): + loaded = tf.saved_model.load(saved_model_path) + inference_func = loaded.signatures[DEFAULT_FUNCTION_KEY] + + dist_predict_dataset = another_strategy.experimental_distribute_dataset( + predict_dataset) + + # Calling the function in a distributed manner + for batch in dist_predict_dataset: + another_strategy.run(inference_func,args=(batch,)) +``` + +```py +INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',) + +INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',) + +Warning:tensorflow:Using MirroredStrategy eagerly has significant overhead currently. We will be working on improving this in the future, but for now please wrap `call_for_each_replica` or `experimental_run` or `experimental_run_v2` inside a tf.function to get the best performance. + +Warning:tensorflow:Using MirroredStrategy eagerly has significant overhead currently. We will be working on improving this in the future, but for now please wrap `call_for_each_replica` or `experimental_run` or `experimental_run_v2` inside a tf.function to get the best performance. + +Warning:tensorflow:Using MirroredStrategy eagerly has significant overhead currently. We will be working on improving this in the future, but for now please wrap `call_for_each_replica` or `experimental_run` or `experimental_run_v2` inside a tf.function to get the best performance. + +Warning:tensorflow:Using MirroredStrategy eagerly has significant overhead currently. We will be working on improving this in the future, but for now please wrap `call_for_each_replica` or `experimental_run` or `experimental_run_v2` inside a tf.function to get the best performance. + +Warning:tensorflow:Using MirroredStrategy eagerly has significant overhead currently. We will be working on improving this in the future, but for now please wrap `call_for_each_replica` or `experimental_run` or `experimental_run_v2` inside a tf.function to get the best performance. + +Warning:tensorflow:Using MirroredStrategy eagerly has significant overhead currently. We will be working on improving this in the future, but for now please wrap `call_for_each_replica` or `experimental_run` or `experimental_run_v2` inside a tf.function to get the best performance. + +Warning:tensorflow:Using MirroredStrategy eagerly has significant overhead currently. We will be working on improving this in the future, but for now please wrap `call_for_each_replica` or `experimental_run` or `experimental_run_v2` inside a tf.function to get the best performance. + +Warning:tensorflow:Using MirroredStrategy eagerly has significant overhead currently. We will be working on improving this in the future, but for now please wrap `call_for_each_replica` or `experimental_run` or `experimental_run_v2` inside a tf.function to get the best performance. + +Warning:tensorflow:Using MirroredStrategy eagerly has significant overhead currently. We will be working on improving this in the future, but for now please wrap `call_for_each_replica` or `experimental_run` or `experimental_run_v2` inside a tf.function to get the best performance. + +Warning:tensorflow:Using MirroredStrategy eagerly has significant overhead currently. We will be working on improving this in the future, but for now please wrap `call_for_each_replica` or `experimental_run` or `experimental_run_v2` inside a tf.function to get the best performance. + +``` + +调用已恢复的函数只是基于已保存模型的前向传递(预测)。如果您想继续训练加载的函数,或者将加载的函数嵌入到更大的模型中,应如何操作? 通常的做法是将此加载对象包装到 Keras 层以实现此目的。幸运的是,[TF Hub](https://tensorflow.google.cn/hub) 为此提供了 [hub.KerasLayer](https://github.com/tensorflow/hub/blob/master/tensorflow_hub/keras_layer.py),如下所示: + +```py +import tensorflow_hub as hub + +def build_model(loaded): + x = tf.keras.layers.Input(shape=(28, 28, 1), name='input_x') + # Wrap what's loaded to a KerasLayer + keras_layer = hub.KerasLayer(loaded, trainable=True)(x) + model = tf.keras.Model(x, keras_layer) + return model + +another_strategy = tf.distribute.MirroredStrategy() +with another_strategy.scope(): + loaded = tf.saved_model.load(saved_model_path) + model = build_model(loaded) + + model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), + optimizer=tf.keras.optimizers.Adam(), + metrics=['accuracy']) + model.fit(train_dataset, epochs=2) +``` + +```py +INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',) + +INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',) + +Epoch 1/2 +938/938 [==============================] - 3s 3ms/step - loss: 0.2059 - accuracy: 0.9393 +Epoch 2/2 +938/938 [==============================] - 3s 3ms/step - loss: 0.0681 - accuracy: 0.9799 + +``` + +如您所见,[`hub.KerasLayer`](https://tensorflow.google.cn/hub/api_docs/python/hub/KerasLayer) 可将从 [`tf.saved_model.load()`](https://tensorflow.google.cn/api_docs/python/tf/saved_model/load) 加载回的结果包装到可供构建其他模型的 Keras 层。这对于迁移学习非常实用。 + +### 我应使用哪种 API? + +对于保存,如果您使用的是 Keras 模型,那么始终建议使用 Keras 的 `model.save()` API。如果您所保存的不是 Keras 模型,那么您只能选择使用较低级的 API。 + +对于加载,使用哪种 API 取决于您要从加载的 API 中获得什么。如果您无法或不想获取 Keras 模型,请使用 [`tf.saved_model.load()`](https://tensorflow.google.cn/api_docs/python/tf/saved_model/load)。否则,请使用 [`tf.keras.models.load_model()`](https://tensorflow.google.cn/api_docs/python/tf/keras/models/load_model)。请注意,只有保存 Keras 模型后,才能恢复 Keras 模型。 + +可以混合使用 API。您可以使用 `model.save` 保存 Keras 模型,并使用低级 API [`tf.saved_model.load`](https://tensorflow.google.cn/api_docs/python/tf/saved_model/load) 加载非 Keras 模型。 + +```py +model = get_model() + +# Saving the model using Keras's save() API +model.save(keras_model_path) + +another_strategy = tf.distribute.MirroredStrategy() +# Loading the model using lower level API +with another_strategy.scope(): + loaded = tf.saved_model.load(keras_model_path) +``` + +```py +INFO:tensorflow:Assets written to: /tmp/keras_save/assets + +INFO:tensorflow:Assets written to: /tmp/keras_save/assets + +INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',) + +INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',) + +``` + +### 警告 + +有一种特殊情况,您的 Keras 模型没有明确定义的输入。例如,可以创建没有任何输入形状的序贯模型 (`Sequential([Dense(3), ...]`)。子类化模型在初始化后也没有明确定义的输入。在这种情况下,在保存和加载时都应坚持使用较低级别的 API,否则会出现错误。 + +要检查您的模型是否具有明确定义的输入,只需检查 `model.inputs` 是否为 `None`。如果非 `None`,则一切正常。在 `.fit`、`.evaluate`、`.predict` 中使用模型,或调用模型 (`model(inputs)`) 时,输入形状将自动定义。 + +以下为示例: + +```py +class SubclassedModel(tf.keras.Model): + + output_name = 'output_layer' + + def __init__(self): + super(SubclassedModel, self).__init__() + self._dense_layer = tf.keras.layers.Dense( + 5, dtype=tf.dtypes.float32, name=self.output_name) + + def call(self, inputs): + return self._dense_layer(inputs) + +my_model = SubclassedModel() +# my_model.save(keras_model_path) # ERROR! +tf.saved_model.save(my_model, saved_model_path) +``` + +```py +WARNING:tensorflow:Skipping full serialization of Keras layer <__main__.SubclassedModel object at 0x7f96b1c92320>, because it is not built. + +Warning:tensorflow:Skipping full serialization of Keras layer <__main__.SubclassedModel object at 0x7f96b1c92320>, because it is not built. + +Warning:tensorflow:Skipping full serialization of Keras layer , because it is not built. + +Warning:tensorflow:Skipping full serialization of Keras layer , because it is not built. + +INFO:tensorflow:Assets written to: /tmp/tf_save/assets + +INFO:tensorflow:Assets written to: /tmp/tf_save/assets + +``` \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/039.md b/Tensorflow/TensorFlow2.0/039.md new file mode 100644 index 00000000..b2995dda --- /dev/null +++ b/Tensorflow/TensorFlow2.0/039.md @@ -0,0 +1,2216 @@ +# Distributed Input + +> 原文:[https://tensorflow.google.cn/tutorials/distribute/input](https://tensorflow.google.cn/tutorials/distribute/input) + +The [tf.distribute](https://tensorflow.google.cn/guide/distributed_training) APIs provide an easy way for users to scale their training from a single machine to multiple machines. When scaling their model, users also have to distribute their input across multiple devices. [`tf.distribute`](https://tensorflow.google.cn/api_docs/python/tf/distribute) provides APIs using which you can automatically distribute your input across devices. + +This guide will show you the different ways in which you can create distributed dataset and iterators using [`tf.distribute`](https://tensorflow.google.cn/api_docs/python/tf/distribute) APIs. Additionally, the following topics will be covered: + +* Usage, sharding and batching options when using [`tf.distribute.Strategy.experimental_distribute_dataset`](https://tensorflow.google.cn/api_docs/python/tf/distribute/Strategy#experimental_distribute_dataset) and `tf.distribute.Strategy.experimental_distribute_datasets_from_function`. +* Different ways in which you can iterate over the distributed dataset. +* Differences between [`tf.distribute.Strategy.experimental_distribute_dataset`](https://tensorflow.google.cn/api_docs/python/tf/distribute/Strategy#experimental_distribute_dataset)/`tf.distribute.Strategy.experimental_distribute_datasets_from_function` APIs and [`tf.data`](https://tensorflow.google.cn/api_docs/python/tf/data) APIs as well any limitations that users may come across in their usage. + +This guide does not cover usage of distributed input with Keras APIs. + +## Distributed Datasets + +To use [`tf.distribute`](https://tensorflow.google.cn/api_docs/python/tf/distribute) APIs to scale, it is recommended that users use [`tf.data.Dataset`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset) to represent their input. [`tf.distribute`](https://tensorflow.google.cn/api_docs/python/tf/distribute) has been made to work efficiently with [`tf.data.Dataset`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset) (for example, automatic prefetch of data onto each accelerator device) with performance optimizations being regularly incorporated into the implementation. If you have a use case for using something other than [`tf.data.Dataset`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset), please refer a later [section](/tutorials/distribute/%22tensorinputs%22) in this guide. In a non distributed training loop, users first create a [`tf.data.Dataset`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset) instance and then iterate over the elements. For example: + +```py +import tensorflow as tf + +# Helper libraries +import numpy as np +import os + +print(tf.__version__) +``` + +```py +2.4.0 + +``` + +```py +global_batch_size = 16 +# Create a tf.data.Dataset object. +dataset = tf.data.Dataset.from_tensors(([1.], [1.])).repeat(100).batch(global_batch_size) + +@tf.function +def train_step(inputs): + features, labels = inputs + return labels - 0.3 * features + +# Iterate over the dataset using the for..in construct. +for inputs in dataset: + print(train_step(inputs)) +``` + +```py +tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7]], shape=(4, 1), dtype=float32) + +``` + +To allow users to use [`tf.distribute`](https://tensorflow.google.cn/api_docs/python/tf/distribute) strategy with minimal changes to a user’s existing code, two APIs were introduced which would distribute a [`tf.data.Dataset`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset) instance and return a distributed dataset object. A user could then iterate over this distributed dataset instance and train their model as before. Let us now look at the two APIs - [`tf.distribute.Strategy.experimental_distribute_dataset`](https://tensorflow.google.cn/api_docs/python/tf/distribute/Strategy#experimental_distribute_dataset) and `tf.distribute.Strategy.experimental_distribute_datasets_from_function` in more detail: + +### [`tf.distribute.Strategy.experimental_distribute_dataset`](https://tensorflow.google.cn/api_docs/python/tf/distribute/Strategy#experimental_distribute_dataset) + +#### Usage + +This API takes a [`tf.data.Dataset`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset) instance as input and returns a [`tf.distribute.DistributedDataset`](https://tensorflow.google.cn/api_docs/python/tf/distribute/DistributedDataset) instance. You should batch the input dataset with a value that is equal to the global batch size. This global batch size is the number of samples that you want to process across all devices in 1 step. You can iterate over this distributed dataset in a Pythonic fashion or create an iterator using `iter`. The returned object is not a [`tf.data.Dataset`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset) instance and does not support any other APIs that transform or inspect the dataset in any way. This is the recommended API if you don’t have specific ways in which you want to shard your input over different replicas. + +```py +global_batch_size = 16 +mirrored_strategy = tf.distribute.MirroredStrategy() + +dataset = tf.data.Dataset.from_tensors(([1.], [1.])).repeat(100).batch(global_batch_size) +# Distribute input using the `experimental_distribute_dataset`. +dist_dataset = mirrored_strategy.experimental_distribute_dataset(dataset) +# 1 global batch of data fed to the model in 1 step. +print(next(iter(dist_dataset))) +``` + +```py +INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',) +(, ) + +``` + +#### Properties + +##### Batching + +[`tf.distribute`](https://tensorflow.google.cn/api_docs/python/tf/distribute) rebatches the input [`tf.data.Dataset`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset) instance with a new batch size that is equal to the global batch size divided by the number of replicas in sync. The number of replicas in sync is equal to the number of devices that are taking part in the gradient allreduce during training. When a user calls `next` on the distributed iterator, a per replica batch size of data is returned on each replica. The rebatched dataset cardinality will always be a multiple of the number of replicas. Here are a couple of examples: + +* `tf.data.Dataset.range(6).batch(4, drop_remainder=False)` + + * Without distribution: + * Batch 1: [0, 1, 2, 3] + * Batch 2: [4, 5] + * With distribution over 2 replicas. The last batch ([4, 5]) is split between 2 replicas. + + * Batch 1: + + * Replica 1:[0, 1] + * Replica 2:[2, 3] + * Batch 2: + + * Replica 2: [4] + * Replica 2: [5] +* `tf.data.Dataset.range(4).batch(4)` + + * Without distribution: + * Batch 1: [[0], [1], [2], [3]] + * With distribution over 5 replicas: + * Batch 1: + * Replica 1: [0] + * Replica 2: [1] + * Replica 3: [2] + * Replica 4: [3] + * Replica 5: [] +* `tf.data.Dataset.range(8).batch(4)` + + * Without distribution: + * Batch 1: [0, 1, 2, 3] + * Batch 2: [4, 5, 6, 7] + * With distribution over 3 replicas: + * Batch 1: + * Replica 1: [0, 1] + * Replica 2: [2, 3] + * Replica 3: [] + * Batch 2: + * Replica 1: [4, 5] + * Replica 2: [6, 7] + * Replica 3: [] + +**Note:** The above examples only illustrate how a global batch is split on different replicas. It is not advisable to depend on the actual values that might end up on each replica as it can change depending on the implementation. + +Rebatching the dataset has a space complexity that increases linearly with the number of replicas. This means that for the multi worker training use case the input pipeline can run into OOM errors. + +##### Sharding + +[`tf.distribute`](https://tensorflow.google.cn/api_docs/python/tf/distribute) also autoshards the input dataset in multi worker training with `MultiWorkerMirroredStrategy` and `TPUStrategy`. Each dataset is created on the CPU device of the worker. Autosharding a dataset over a set of workers means that each worker is assigned a subset of the entire dataset (if the right [`tf.data.experimental.AutoShardPolicy`](https://tensorflow.google.cn/api_docs/python/tf/data/experimental/AutoShardPolicy) is set). This is to ensure that at each step, a global batch size of non overlapping dataset elements will be processed by each worker. Autosharding has a couple of different options that can be specified using [`tf.data.experimental.DistributeOptions`](https://tensorflow.google.cn/api_docs/python/tf/data/experimental/DistributeOptions). Note that there is no autosharding in multi worker training with `ParameterServerStrategy`, and more information on dataset creation with this strategy can be found in the [Parameter Server Strategy tutorial](/tutorials/distribute/parameter_server_training). + +```py +dataset = tf.data.Dataset.from_tensors(([1.],[1.])).repeat(64).batch(16) +options = tf.data.Options() +options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.DATA +dataset = dataset.with_options(options) +``` + +There are three different options that you can set for the [`tf.data.experimental.AutoShardPolicy`](https://tensorflow.google.cn/api_docs/python/tf/data/experimental/AutoShardPolicy): + +* AUTO: This is the default option which means an attempt will be made to shard by FILE. The attempt to shard by FILE fails if a file-based dataset is not detected. [`tf.distribute`](https://tensorflow.google.cn/api_docs/python/tf/distribute) will then fall back to sharding by DATA. Note that if the input dataset is file-based but the number of files is less than the number of workers, an `InvalidArgumentError` will be raised. If this happens, explicitly set the policy to [`AutoShardPolicy.DATA`](https://tensorflow.google.cn/api_docs/python/tf/data/experimental/AutoShardPolicy#DATA), or split your input source into smaller files such that number of files is greater than number of workers. +* FILE: This is the option if you want to shard the input files over all the workers. You should use this option if the number of input files is much larger than the number of workers and the data in the files is evenly distributed. The downside of this option is having idle workers if the data in the files is not evenly distributed. If the number of files is less than the number of workers, an `InvalidArgumentError` will be raised. If this happens, explicitly set the policy to [`AutoShardPolicy.DATA`](https://tensorflow.google.cn/api_docs/python/tf/data/experimental/AutoShardPolicy#DATA). For example, let us distribute 2 files over 2 workers with 1 replica each. File 1 contains [0, 1, 2, 3, 4, 5] and File 2 contains [6, 7, 8, 9, 10, 11]. Let the total number of replicas in sync be 2 and global batch size be 4. + + * Worker 0: + * Batch 1 = Replica 1: [0, 1] + * Batch 2 = Replica 1: [2, 3] + * Batch 3 = Replica 1: [4] + * Batch 4 = Replica 1: [5] + * Worker 1: + * Batch 1 = Replica 2: [6, 7] + * Batch 2 = Replica 2: [8, 9] + * Batch 3 = Replica 2: [10] + * Batch 4 = Replica 2: [11] +* DATA: This will autoshard the elements across all the workers. Each of the workers will read the entire dataset and only process the shard assigned to it. All other shards will be discarded. This is generally used if the number of input files is less than the number of workers and you want better sharding of data across all workers. The downside is that the entire dataset will be read on each worker. For example, let us distribute 1 files over 2 workers. File 1 contains [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]. Let the total number of replicas in sync be 2. + + * Worker 0: + * Batch 1 = Replica 1: [0, 1] + * Batch 2 = Replica 1: [4, 5] + * Batch 3 = Replica 1: [8, 9] + * Worker 1: + * Batch 1 = Replica 2: [2, 3] + * Batch 2 = Replica 2: [6, 7] + * Batch 3 = Replica 2: [10, 11] +* OFF: If you turn off autosharding, each worker will process all the data. For example, let us distribute 1 files over 2 workers. File 1 contains [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]. Let the total number of replicas in sync be 2\. Then each worker will see the following distribution: + + * Worker 0: + * Batch 1 = Replica 1: [0, 1] + * Batch 2 = Replica 1: [2, 3] + * Batch 3 = Replica 1: [4, 5] + * Batch 4 = Replica 1: [6, 7] + * Batch 5 = Replica 1: [8, 9] + * Batch 6 = Replica 1: [10, 11] + + * Worker 1: + + * Batch 1 = Replica 2: [0, 1] + + * Batch 2 = Replica 2: [2, 3] + + * Batch 3 = Replica 2: [4, 5] + + * Batch 4 = Replica 2: [6, 7] + + * Batch 5 = Replica 2: [8, 9] + + * Batch 6 = Replica 2: [10, 11] + +##### Prefetching + +By default, [`tf.distribute`](https://tensorflow.google.cn/api_docs/python/tf/distribute) adds a prefetch transformation at the end of the user provided [`tf.data.Dataset`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset) instance. The argument to the prefetch transformation which is `buffer_size` is equal to the number of replicas in sync. + +### `tf.distribute.Strategy.experimental_distribute_datasets_from_function` + +#### Usage + +This API takes an input function and returns a [`tf.distribute.DistributedDataset`](https://tensorflow.google.cn/api_docs/python/tf/distribute/DistributedDataset) instance. The input function that users pass in has a [`tf.distribute.InputContext`](https://tensorflow.google.cn/api_docs/python/tf/distribute/InputContext) argument and should return a [`tf.data.Dataset`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset) instance. With this API, [`tf.distribute`](https://tensorflow.google.cn/api_docs/python/tf/distribute) does not make any further changes to the user’s [`tf.data.Dataset`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset) instance returned from the input function. It is the responsibility of the user to batch and shard the dataset. [`tf.distribute`](https://tensorflow.google.cn/api_docs/python/tf/distribute) calls the input function on the CPU device of each of the workers. Apart from allowing users to specify their own batching and sharding logic, this API also demonstrates better scalability and performance compared to [`tf.distribute.Strategy.experimental_distribute_dataset`](https://tensorflow.google.cn/api_docs/python/tf/distribute/Strategy#experimental_distribute_dataset) when used for multi worker training. + +```py +mirrored_strategy = tf.distribute.MirroredStrategy() + +def dataset_fn(input_context): + batch_size = input_context.get_per_replica_batch_size(global_batch_size) + dataset = tf.data.Dataset.from_tensors(([1.],[1.])).repeat(64).batch(16) + dataset = dataset.shard( + input_context.num_input_pipelines, input_context.input_pipeline_id) + dataset = dataset.batch(batch_size) + dataset = dataset.prefetch(2) # This prefetches 2 batches per device. + return dataset + +dist_dataset = mirrored_strategy.experimental_distribute_datasets_from_function(dataset_fn) +``` + +```py +INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',) +WARNING:tensorflow:From :12: StrategyBase.experimental_distribute_datasets_from_function (from tensorflow.python.distribute.distribute_lib) is deprecated and will be removed in a future version. +Instructions for updating: +rename to distribute_datasets_from_function + +``` + +#### Properties + +##### Batching + +The [`tf.data.Dataset`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset) instance that is the return value of the input function should be batched using the per replica batch size. The per replica batch size is the global batch size divided by the number of replicas that are taking part in sync training. This is because [`tf.distribute`](https://tensorflow.google.cn/api_docs/python/tf/distribute) calls the input function on the CPU device of each of the workers. The dataset that is created on a given worker should be ready to use by all the replicas on that worker. + +##### Sharding + +The [`tf.distribute.InputContext`](https://tensorflow.google.cn/api_docs/python/tf/distribute/InputContext) object that is implicitly passed as an argument to the user’s input function is created by [`tf.distribute`](https://tensorflow.google.cn/api_docs/python/tf/distribute) under the hood. It has information about the number of workers, current worker id etc. This input function can handle sharding as per policies set by the user using these properties that are part of the [`tf.distribute.InputContext`](https://tensorflow.google.cn/api_docs/python/tf/distribute/InputContext) object. + +##### Prefetching + +[`tf.distribute`](https://tensorflow.google.cn/api_docs/python/tf/distribute) does not add a prefetch transformation at the end of the [`tf.data.Dataset`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset) returned by the user provided input function. + +**Note:** Both [`tf.distribute.Strategy.experimental_distribute_dataset`](https://tensorflow.google.cn/api_docs/python/tf/distribute/Strategy#experimental_distribute_dataset) and `tf.distribute.Strategy.experimental_distribute_datasets_from_function` return **[`tf.distribute.DistributedDataset`](https://tensorflow.google.cn/api_docs/python/tf/distribute/DistributedDataset) instances that are not of type [`tf.data.Dataset`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset)**. You can iterate over these instances (as shown in the Distributed Iterators section) and use the `element_spec` property. + +## Distributed Iterators + +Similar to non-distributed [`tf.data.Dataset`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset) instances, you will need to create an iterator on the [`tf.distribute.DistributedDataset`](https://tensorflow.google.cn/api_docs/python/tf/distribute/DistributedDataset) instances to iterate over it and access the elements in the [`tf.distribute.DistributedDataset`](https://tensorflow.google.cn/api_docs/python/tf/distribute/DistributedDataset). The following are the ways in which you can create an [`tf.distribute.DistributedIterator`](https://tensorflow.google.cn/api_docs/python/tf/distribute/DistributedIterator) and use it to train your model: + +### Usages + +#### Use a Pythonic for loop construct + +You can use a user friendly Pythonic loop to iterate over the [`tf.distribute.DistributedDataset`](https://tensorflow.google.cn/api_docs/python/tf/distribute/DistributedDataset). The elements returned from the [`tf.distribute.DistributedIterator`](https://tensorflow.google.cn/api_docs/python/tf/distribute/DistributedIterator) can be a single [`tf.Tensor`](https://tensorflow.google.cn/api_docs/python/tf/Tensor) or a [`tf.distribute.DistributedValues`](https://tensorflow.google.cn/api_docs/python/tf/distribute/DistributedValues) which contains a value per replica. Placing the loop inside a [`tf.function`](https://tensorflow.google.cn/api_docs/python/tf/function) will give a performance boost. However, `break` and `return` are currently not supported for a loop over a [`tf.distribute.DistributedDataset`](https://tensorflow.google.cn/api_docs/python/tf/distribute/DistributedDataset) that is placed inside of a [`tf.function`](https://tensorflow.google.cn/api_docs/python/tf/function). + +```py +global_batch_size = 16 +mirrored_strategy = tf.distribute.MirroredStrategy() + +dataset = tf.data.Dataset.from_tensors(([1.],[1.])).repeat(100).batch(global_batch_size) +dist_dataset = mirrored_strategy.experimental_distribute_dataset(dataset) + +@tf.function +def train_step(inputs): + features, labels = inputs + return labels - 0.3 * features + +for x in dist_dataset: + # train_step trains the model using the dataset elements + loss = mirrored_strategy.run(train_step, args=(x,)) + print("Loss is ", loss) +``` + +```py +INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7]], shape=(4, 1), dtype=float32) + +``` + +#### Use `iter` to create an explicit iterator + +To iterate over the elements in a [`tf.distribute.DistributedDataset`](https://tensorflow.google.cn/api_docs/python/tf/distribute/DistributedDataset) instance, you can create a [`tf.distribute.DistributedIterator`](https://tensorflow.google.cn/api_docs/python/tf/distribute/DistributedIterator) using the `iter` API on it. With an explicit iterator, you can iterate for a fixed number of steps. In order to get the next element from an [`tf.distribute.DistributedIterator`](https://tensorflow.google.cn/api_docs/python/tf/distribute/DistributedIterator) instance `dist_iterator`, you can call `next(dist_iterator)`, `dist_iterator.get_next()`, or `dist_iterator.get_next_as_optional()`. The former two are essentially the same: + +```py +num_epochs = 10 +steps_per_epoch = 5 +for epoch in range(num_epochs): + dist_iterator = iter(dist_dataset) + for step in range(steps_per_epoch): + # train_step trains the model using the dataset elements + loss = mirrored_strategy.run(train_step, args=(next(dist_iterator),)) + # which is the same as + # loss = mirrored_strategy.run(train_step, args=(dist_iterator.get_next(),)) + print("Loss is ", loss) +``` + +```py +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) +Loss is tf.Tensor( +[[0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7] + [0.7]], shape=(16, 1), dtype=float32) + +``` + +With `next()` or [`tf.distribute.DistributedIterator.get_next()`](https://tensorflow.google.cn/api_docs/python/tf/distribute/DistributedIterator#get_next), if the [`tf.distribute.DistributedIterator`](https://tensorflow.google.cn/api_docs/python/tf/distribute/DistributedIterator) has reached its end, an OutOfRange error will be thrown. The client can catch the error on python side and continue doing other work such as checkpointing and evaluation. However, this will not work if you are using a host training loop (i.e., run multiple steps per [`tf.function`](https://tensorflow.google.cn/api_docs/python/tf/function)), which looks like: + +```py +@tf.function +def train_fn(iterator): + for _ in tf.range(steps_per_loop): + strategy.run(step_fn, args=(next(iterator),)) +``` + +`train_fn` contains multiple steps by wrapping the step body inside a [`tf.range`](https://tensorflow.google.cn/api_docs/python/tf/range). In this case, different iterations in the loop with no dependency could start in parallel, so an OutOfRange error can be triggered in later iterations before the computation of previous iterations finishes. Once an OutOfRange error is thrown, all the ops in the function will be terminated right away. If this is some case that you would like to avoid, an alternative that does not throw an OutOfRange error is [`tf.distribute.DistributedIterator.get_next_as_optional()`](https://tensorflow.google.cn/api_docs/python/tf/distribute/DistributedIterator#get_next_as_optional). `get_next_as_optional` returns a [`tf.experimental.Optional`](https://tensorflow.google.cn/api_docs/python/tf/experimental/Optional) which contains the next element or no value if the [`tf.distribute.DistributedIterator`](https://tensorflow.google.cn/api_docs/python/tf/distribute/DistributedIterator) has reached to an end. + +```py +# You can break the loop with get_next_as_optional by checking if the Optional contains value +global_batch_size = 4 +steps_per_loop = 5 +strategy = tf.distribute.MirroredStrategy(devices=["GPU:0", "CPU:0"]) + +dataset = tf.data.Dataset.range(9).batch(global_batch_size) +distributed_iterator = iter(strategy.experimental_distribute_dataset(dataset)) + +@tf.function +def train_fn(distributed_iterator): + for _ in tf.range(steps_per_loop): + optional_data = distributed_iterator.get_next_as_optional() + if not optional_data.has_value(): + break + per_replica_results = strategy.run(lambda x:x, args=(optional_data.get_value(),)) + tf.print(strategy.experimental_local_results(per_replica_results)) +train_fn(distributed_iterator) +``` + +```py +WARNING:tensorflow:There are non-GPU devices in `tf.distribute.Strategy`, not using nccl allreduce. +INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:CPU:0') +([0 1], [2 3]) +([4 5], [6 7]) +([8], []) + +``` + +## Using `element_spec` property + +If you pass the elements of a distributed dataset to a [`tf.function`](https://tensorflow.google.cn/api_docs/python/tf/function) and want a [`tf.TypeSpec`](https://tensorflow.google.cn/api_docs/python/tf/TypeSpec) guarantee, you can specify the `input_signature` argument of the [`tf.function`](https://tensorflow.google.cn/api_docs/python/tf/function). The output of a distributed dataset is [`tf.distribute.DistributedValues`](https://tensorflow.google.cn/api_docs/python/tf/distribute/DistributedValues) which can represent the input to a single device or multiple devices. To get the [`tf.TypeSpec`](https://tensorflow.google.cn/api_docs/python/tf/TypeSpec) corresponding to this distributed value you can use the `element_spec` property of the distributed dataset or distributed iterator object. + +```py +global_batch_size = 16 +epochs = 5 +steps_per_epoch = 5 +mirrored_strategy = tf.distribute.MirroredStrategy() + +dataset = tf.data.Dataset.from_tensors(([1.],[1.])).repeat(100).batch(global_batch_size) +dist_dataset = mirrored_strategy.experimental_distribute_dataset(dataset) + +@tf.function(input_signature=[dist_dataset.element_spec]) +def train_step(per_replica_inputs): + def step_fn(inputs): + return 2 * inputs + + return mirrored_strategy.run(step_fn, args=(per_replica_inputs,)) + +for _ in range(epochs): + iterator = iter(dist_dataset) + for _ in range(steps_per_epoch): + output = train_step(next(iterator)) + tf.print(output) +``` + +```py +INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',) +([[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]]) +([[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]]) +([[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]]) +([[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]]) +([[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]]) +([[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]]) +([[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]]) +([[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]]) +([[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]]) +([[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]]) +([[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]]) +([[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]]) +([[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]]) +([[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]]) +([[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]]) +([[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]]) +([[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]]) +([[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]]) +([[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]]) +([[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]]) +([[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]]) +([[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]]) +([[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]]) +([[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]]) +([[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]], [[1] + [1] + [1] + ... + [1] + [1] + [1]]) + +``` + +## Partial Batches + +Partial batches are encountered when [`tf.data.Dataset`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset) instances that users create may contain batch sizes that are not evenly divisible by the number of replicas or when the cardinality of the dataset instance is not divisible by the batch size. This means that when the dataset is distributed over multiple replicas, the `next` call on some iterators will result in an OutOfRangeError. To handle this use case, [`tf.distribute`](https://tensorflow.google.cn/api_docs/python/tf/distribute) returns dummy batches of batch size 0 on replicas that do not have any more data to process. + +For the single worker case, if data is not returned by the `next` call on the iterator, dummy batches of 0 batch size are created and used along with the real data in the dataset. In the case of partial batches, the last global batch of data will contain real data alongside dummy batches of data. The stopping condition for processing data now checks if any of the replicas have data. If there is no data on any of the replicas, an OutOfRange error is thrown. + +For the multi worker case, the boolean value representing presence of data on each of the workers is aggregated using cross replica communication and this is used to identify if all the workers have finished processing the distributed dataset. Since this involves cross worker communication there is some performance penalty involved. + +## Caveats + +* When using [`tf.distribute.Strategy.experimental_distribute_dataset`](https://tensorflow.google.cn/api_docs/python/tf/distribute/Strategy#experimental_distribute_dataset) APIs with a multiple worker setup, users pass a [`tf.data.Dataset`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset) that reads from files. If the [`tf.data.experimental.AutoShardPolicy`](https://tensorflow.google.cn/api_docs/python/tf/data/experimental/AutoShardPolicy) is set to `AUTO` or `FILE`, the actual per step batch size may be smaller than the user defined global batch size. This can happen when the remaining elements in the file are less than the global batch size. Users can either exhaust the dataset without depending on the number of steps to run or set [`tf.data.experimental.AutoShardPolicy`](https://tensorflow.google.cn/api_docs/python/tf/data/experimental/AutoShardPolicy) to `DATA` to work around it. + +* Stateful dataset transformations are currently not supported with [`tf.distribute`](https://tensorflow.google.cn/api_docs/python/tf/distribute) and any stateful ops that the dataset may have are currently ignored. For example, if your dataset has a `map_fn` that uses [`tf.random.uniform`](https://tensorflow.google.cn/api_docs/python/tf/random/uniform) to rotate an image, then you have a dataset graph that depends on state (i.e the random seed) on the local machine where the python process is being executed. + +* Experimental [`tf.data.experimental.OptimizationOptions`](https://tensorflow.google.cn/api_docs/python/tf/data/experimental/OptimizationOptions) that are disabled by default can in certain contexts -- such as when used together with [`tf.distribute`](https://tensorflow.google.cn/api_docs/python/tf/distribute) -- cause a performance degradation. You should only enable them after you validate that they benefit the performance of your workload in a distribute setting. + +* Please refer to [this guide](https://tensorflow.google.cn/guide/data_performance) for how to optimize your input pipeline with [`tf.data`](https://tensorflow.google.cn/api_docs/python/tf/data) in general. A few additional tips: + + * If you have multiple workers and are using [`tf.data.Dataset.list_files`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset#list_files) to create a dataset from all files matching one or more glob patterns, remember to set the `seed` argument or set `shuffle=False` so that each worker shard the file consistently. + + * If your input pipeline includes both shuffling the data on record level and parsing the data, unless the unparsed data is significantly larger than the parsed data (which is usually not the case), shuffle first and then parse, as shown in the following example. This may benefit memory usage and performance. + +```py +d = tf.data.Dataset.list_files(pattern, shuffle=False) +d = d.shard(num_workers, worker_index) +d = d.repeat(num_epochs) +d = d.shuffle(shuffle_buffer_size) +d = d.interleave(tf.data.TFRecordDataset, + cycle_length=num_readers, block_length=1) +d = d.map(parser_fn, num_parallel_calls=num_map_threads) +``` + +* [`tf.data.Dataset.shuffle(buffer_size, seed=None, reshuffle_each_iteration=None)`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset#shuffle) maintain an internal buffer of `buffer_size` elements, and thus reducing `buffer_size` could aleviate OOM issue. + +* The order in which the data is processed by the workers when using `tf.distribute.experimental_distribute_dataset` or `tf.distribute.experimental_distribute_datasets_from_function` is not guaranteed. This is typically required if you are using [`tf.distribute`](https://tensorflow.google.cn/api_docs/python/tf/distribute) to scale prediction. You can however insert an index for each element in the batch and order outputs accordingly. The following snippet is an example of how to order outputs. + +**Note:** [`tf.distribute.MirroredStrategy()`](https://tensorflow.google.cn/api_docs/python/tf/distribute/MirroredStrategy) is used here for the sake of convenience. We only need to reorder inputs when we are using multiple workers and [`tf.distribute.MirroredStrategy`](https://tensorflow.google.cn/api_docs/python/tf/distribute/MirroredStrategy) is used to distribute training on a single worker. + +```py +mirrored_strategy = tf.distribute.MirroredStrategy() +dataset_size = 24 +batch_size = 6 +dataset = tf.data.Dataset.range(dataset_size).enumerate().batch(batch_size) +dist_dataset = mirrored_strategy.experimental_distribute_dataset(dataset) + +def predict(index, inputs): + outputs = 2 * inputs + return index, outputs + +result = {} +for index, inputs in dist_dataset: + output_index, outputs = mirrored_strategy.run(predict, args=(index, inputs)) + indices = list(mirrored_strategy.experimental_local_results(output_index)) + rindices = [] + for a in indices: + rindices.extend(a.numpy()) + outputs = list(mirrored_strategy.experimental_local_results(outputs)) + routputs = [] + for a in outputs: + routputs.extend(a.numpy()) + for i, value in zip(rindices, routputs): + result[i] = value + +print(result) +``` + +```py +INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',) +WARNING:tensorflow:Using MirroredStrategy eagerly has significant overhead currently. We will be working on improving this in the future, but for now please wrap `call_for_each_replica` or `experimental_run` or `run` inside a tf.function to get the best performance. +WARNING:tensorflow:Using MirroredStrategy eagerly has significant overhead currently. We will be working on improving this in the future, but for now please wrap `call_for_each_replica` or `experimental_run` or `run` inside a tf.function to get the best performance. +WARNING:tensorflow:Using MirroredStrategy eagerly has significant overhead currently. We will be working on improving this in the future, but for now please wrap `call_for_each_replica` or `experimental_run` or `run` inside a tf.function to get the best performance. +WARNING:tensorflow:Using MirroredStrategy eagerly has significant overhead currently. We will be working on improving this in the future, but for now please wrap `call_for_each_replica` or `experimental_run` or `run` inside a tf.function to get the best performance. +{0: 0, 1: 2, 2: 4, 3: 6, 4: 8, 5: 10, 6: 12, 7: 14, 8: 16, 9: 18, 10: 20, 11: 22, 12: 24, 13: 26, 14: 28, 15: 30, 16: 32, 17: 34, 18: 36, 19: 38, 20: 40, 21: 42, 22: 44, 23: 46} + +``` + +## How do I distribute my data if I am not using a canonical tf.data.Dataset instance? + +Sometimes users cannot use a [`tf.data.Dataset`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset) to represent their input and subsequently the above mentioned APIs to distribute the dataset to multiple devices. In such cases you can use raw tensors or inputs from a generator. + +### Use experimental_distribute_values_from_function for arbitrary tensor inputs + +`strategy.run` accepts [`tf.distribute.DistributedValues`](https://tensorflow.google.cn/api_docs/python/tf/distribute/DistributedValues) which is the output of `next(iterator)`. To pass the tensor values, use `experimental_distribute_values_from_function` to construct [`tf.distribute.DistributedValues`](https://tensorflow.google.cn/api_docs/python/tf/distribute/DistributedValues) from raw tensors. + +```py +mirrored_strategy = tf.distribute.MirroredStrategy() +worker_devices = mirrored_strategy.extended.worker_devices + +def value_fn(ctx): + return tf.constant(1.0) + +distributed_values = mirrored_strategy.experimental_distribute_values_from_function(value_fn) +for _ in range(4): + result = mirrored_strategy.run(lambda x:x, args=(distributed_values,)) + print(result) +``` + +```py +INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',) +WARNING:tensorflow:Using MirroredStrategy eagerly has significant overhead currently. We will be working on improving this in the future, but for now please wrap `call_for_each_replica` or `experimental_run` or `run` inside a tf.function to get the best performance. +tf.Tensor(1.0, shape=(), dtype=float32) +tf.Tensor(1.0, shape=(), dtype=float32) +tf.Tensor(1.0, shape=(), dtype=float32) +tf.Tensor(1.0, shape=(), dtype=float32) + +``` + +### Use tf.data.Dataset.from_generator if your input is from a generator + +If you have a generator function that you want to use, you can create a [`tf.data.Dataset`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset) instance using the `from_generator` API. + +**Note:** This is currently not supported for [`tf.distribute.TPUStrategy`](https://tensorflow.google.cn/api_docs/python/tf/distribute/TPUStrategy). + +```py +mirrored_strategy = tf.distribute.MirroredStrategy() +def input_gen(): + while True: + yield np.random.rand(4) + +# use Dataset.from_generator +dataset = tf.data.Dataset.from_generator( + input_gen, output_types=(tf.float32), output_shapes=tf.TensorShape([4])) +dist_dataset = mirrored_strategy.experimental_distribute_dataset(dataset) +iterator = iter(dist_dataset) +for _ in range(4): + mirrored_strategy.run(lambda x:x, args=(next(iterator),)) +``` + +```py +INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',) + +``` \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/040.md b/Tensorflow/TensorFlow2.0/040.md new file mode 100644 index 00000000..b580f28c --- /dev/null +++ b/Tensorflow/TensorFlow2.0/040.md @@ -0,0 +1 @@ +# 图像 \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/041.md b/Tensorflow/TensorFlow2.0/041.md new file mode 100644 index 00000000..2c2e17a8 --- /dev/null +++ b/Tensorflow/TensorFlow2.0/041.md @@ -0,0 +1,207 @@ +# 卷积神经网络(Convolutional Neural Network, CNN) + +> 原文:[https://tensorflow.google.cn/tutorials/images/cnn](https://tensorflow.google.cn/tutorials/images/cnn) + +**Note:** 我们的 TensorFlow 社区翻译了这些文档。因为社区翻译是尽力而为, 所以无法保证它们是最准确的,并且反映了最新的 [官方英文文档](https://tensorflow.google.cn/?hl=en)。如果您有改进此翻译的建议, 请提交 pull request 到 [tensorflow/docs-l10n](https://github.com/tensorflow/docs-l10n) GitHub 仓库。要志愿地撰写或者审核译文,请加入 [docs-zh-cn@tensorflow.org Google Group](https://groups.google.com/a/tensorflow.org/forum/#!forum/docs-zh-cn)。 + +### 导入 TensorFlow + +```py +import tensorflow as tf + +from tensorflow.keras import datasets, layers, models +import matplotlib.pyplot as plt +``` + +### 下载并准备 CIFAR10 数据集 + +CIFAR10 数据集包含 10 类,共 60000 张彩色图片,每类图片有 6000 张。此数据集中 50000 个样例被作为训练集,剩余 10000 个样例作为测试集。类之间相互度立,不存在重叠的部分。 + +```py +(train_images, train_labels), (test_images, test_labels) = datasets.cifar10.load_data() + +# 将像素的值标准化至 0 到 1 的区间内。 +train_images, test_images = train_images / 255.0, test_images / 255.0 +``` + +### 验证数据 + +我们将测试集的前 25 张图片和类名打印出来,来确保数据集被正确加载。 + +```py +class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', + 'dog', 'frog', 'horse', 'ship', 'truck'] + +plt.figure(figsize=(10,10)) +for i in range(25): + plt.subplot(5,5,i+1) + plt.xticks([]) + plt.yticks([]) + plt.grid(False) + plt.imshow(train_images[i], cmap=plt.cm.binary) + # 由于 CIFAR 的标签是 array, + # 因此您需要额外的索引(index)。 + plt.xlabel(class_names[train_labels[i][0]]) +plt.show() +``` + +![png](img/25a15211c7a5c4ce6da843197b4b85eb.png) + +### 构造卷积神经网络模型 + +下方展示的 6 行代码声明了了一个常见卷积神经网络,由几个 [Conv2D](https://tensorflow.google.cn/api_docs/python/tf/keras/layers/Conv2D) 和 [MaxPooling2D](https://tensorflow.google.cn/api_docs/python/tf/keras/layers/MaxPool2D) 层组成。 + +CNN 的输入是张量 (Tensor) 形式的 (image_height, image_width, color_channels),包含了图像高度、宽度及颜色信息。不需要输入 batch size。如果您不熟悉图像处理,颜色信息建议您使用 RGB 色彩模式,此模式下,`color_channels` 为 `(R,G,B)` 分别对应 RGB 的三个颜色通道(color channel)。在此示例中,我们的 CNN 输入,CIFAR 数据集中的图片,形状是 `(32, 32, 3)`。您可以在声明第一层时将形状赋值给参数 `input_shape` 。 + +```py +model = models.Sequential() +model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3))) +model.add(layers.MaxPooling2D((2, 2))) +model.add(layers.Conv2D(64, (3, 3), activation='relu')) +model.add(layers.MaxPooling2D((2, 2))) +model.add(layers.Conv2D(64, (3, 3), activation='relu')) +``` + +我们声明的 CNN 结构是: + +```py +model.summary() +``` + +```py +Model: "sequential" +_________________________________________________________________ +Layer (type) Output Shape Param # +================================================================= +conv2d (Conv2D) (None, 30, 30, 32) 896 +_________________________________________________________________ +max_pooling2d (MaxPooling2D) (None, 15, 15, 32) 0 +_________________________________________________________________ +conv2d_1 (Conv2D) (None, 13, 13, 64) 18496 +_________________________________________________________________ +max_pooling2d_1 (MaxPooling2 (None, 6, 6, 64) 0 +_________________________________________________________________ +conv2d_2 (Conv2D) (None, 4, 4, 64) 36928 +================================================================= +Total params: 56,320 +Trainable params: 56,320 +Non-trainable params: 0 +_________________________________________________________________ + +``` + +在上面的结构中,您可以看到每个 Conv2D 和 MaxPooling2D 层的输出都是一个三维的张量 (Tensor),其形状描述了 (height, width, channels)。越深的层中,宽度和高度都会收缩。每个 Conv2D 层输出的通道数量 (channels) 取决于声明层时的第一个参数(如:上面代码中的 32 或 64)。这样,由于宽度和高度的收缩,您便可以(从运算的角度)增加每个 Conv2D 层输出的通道数量 (channels)。 + +### 增加 Dense 层 + +*Dense 层等同于全连接 (Full Connected) 层。* +在模型的最后,您将把卷积后的输出张量(本例中形状为 (4, 4, 64))传给一个或多个 Dense 层来完成分类。Dense 层的输入为向量(一维),但前面层的输出是 3 维的张量 (Tensor)。因此您需要将三维张量展开 (flatten) 到 1 维,之后再传入一个或多个 Dense 层。CIFAR 数据集有 10 个类,因此您最终的 Dense 层需要 10 个输出及一个 softmax 激活函数。 + +```py +model.add(layers.Flatten()) +model.add(layers.Dense(64, activation='relu')) +model.add(layers.Dense(10)) +``` + +查看完整的 CNN 结构: + +```py +model.summary() +``` + +```py +Model: "sequential" +_________________________________________________________________ +Layer (type) Output Shape Param # +================================================================= +conv2d (Conv2D) (None, 30, 30, 32) 896 +_________________________________________________________________ +max_pooling2d (MaxPooling2D) (None, 15, 15, 32) 0 +_________________________________________________________________ +conv2d_1 (Conv2D) (None, 13, 13, 64) 18496 +_________________________________________________________________ +max_pooling2d_1 (MaxPooling2 (None, 6, 6, 64) 0 +_________________________________________________________________ +conv2d_2 (Conv2D) (None, 4, 4, 64) 36928 +_________________________________________________________________ +flatten (Flatten) (None, 1024) 0 +_________________________________________________________________ +dense (Dense) (None, 64) 65600 +_________________________________________________________________ +dense_1 (Dense) (None, 10) 650 +================================================================= +Total params: 122,570 +Trainable params: 122,570 +Non-trainable params: 0 +_________________________________________________________________ + +``` + +可以看出,在被传入两个 Dense 层之前,形状为 (4, 4, 64) 的输出被展平成了形状为 (1024) 的向量。 + +### 编译并训练模型 + +```py +model.compile(optimizer='adam', + loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), + metrics=['accuracy']) + +history = model.fit(train_images, train_labels, epochs=10, + validation_data=(test_images, test_labels)) +``` + +```py +Epoch 1/10 +1563/1563 [==============================] - 5s 3ms/step - loss: 1.5143 - accuracy: 0.4469 - val_loss: 1.2281 - val_accuracy: 0.5585 +Epoch 2/10 +1563/1563 [==============================] - 5s 3ms/step - loss: 1.1625 - accuracy: 0.5855 - val_loss: 1.2102 - val_accuracy: 0.5660 +Epoch 3/10 +1563/1563 [==============================] - 5s 3ms/step - loss: 1.0049 - accuracy: 0.6458 - val_loss: 0.9935 - val_accuracy: 0.6511 +Epoch 4/10 +1563/1563 [==============================] - 5s 3ms/step - loss: 0.9089 - accuracy: 0.6801 - val_loss: 0.9658 - val_accuracy: 0.6536 +Epoch 5/10 +1563/1563 [==============================] - 5s 3ms/step - loss: 0.8341 - accuracy: 0.7066 - val_loss: 0.9890 - val_accuracy: 0.6581 +Epoch 6/10 +1563/1563 [==============================] - 5s 3ms/step - loss: 0.7797 - accuracy: 0.7272 - val_loss: 0.8948 - val_accuracy: 0.6891 +Epoch 7/10 +1563/1563 [==============================] - 5s 3ms/step - loss: 0.7287 - accuracy: 0.7437 - val_loss: 0.9004 - val_accuracy: 0.6947 +Epoch 8/10 +1563/1563 [==============================] - 5s 3ms/step - loss: 0.6858 - accuracy: 0.7609 - val_loss: 0.8284 - val_accuracy: 0.7191 +Epoch 9/10 +1563/1563 [==============================] - 5s 3ms/step - loss: 0.6448 - accuracy: 0.7736 - val_loss: 0.8752 - val_accuracy: 0.7096 +Epoch 10/10 +1563/1563 [==============================] - 5s 3ms/step - loss: 0.6117 - accuracy: 0.7855 - val_loss: 0.8524 - val_accuracy: 0.7204 + +``` + +### 评估模型 + +```py +plt.plot(history.history['accuracy'], label='accuracy') +plt.plot(history.history['val_accuracy'], label = 'val_accuracy') +plt.xlabel('Epoch') +plt.ylabel('Accuracy') +plt.ylim([0.5, 1]) +plt.legend(loc='lower right') +plt.show() + +test_loss, test_acc = model.evaluate(test_images, test_labels, verbose=2) +``` + +![png](img/9564eb108080dfcb0a0231e7db795b06.png) + +```py +313/313 - 1s - loss: 0.8524 - accuracy: 0.7204 + +``` + +```py +print(test_acc) +``` + +```py +0.7203999757766724 + +``` + +我们搭建的简单的 CNN 模型在测试集上可以达到 70% 的准确率。对于只有几行的代码来说效果不错!对于另一种 CNN 结构可参考另一个使用的基于 Keras 子类 API 和 [`tf.GradientTape`](https://tensorflow.google.cn/api_docs/python/tf/GradientTape) 的样例 [here](https://tensorflow.google.cn/tutorials/quickstart/advanced)。 \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/042.md b/Tensorflow/TensorFlow2.0/042.md new file mode 100644 index 00000000..570f292a --- /dev/null +++ b/Tensorflow/TensorFlow2.0/042.md @@ -0,0 +1,603 @@ +# Image classification + +> 原文:[https://tensorflow.google.cn/tutorials/images/classification](https://tensorflow.google.cn/tutorials/images/classification) + +This tutorial shows how to classify images of flowers. It creates an image classifier using a [`keras.Sequential`](https://tensorflow.google.cn/api_docs/python/tf/keras/Sequential) model, and loads data using [`preprocessing.image_dataset_from_directory`](https://tensorflow.google.cn/api_docs/python/tf/keras/preprocessing/image_dataset_from_directory). You will gain practical experience with the following concepts: + +* Efficiently loading a dataset off disk. +* Identifying overfitting and applying techniques to mitigate it, including data augmentation and Dropout. + +This tutorial follows a basic machine learning workflow: + +1. Examine and understand data +2. Build an input pipeline +3. Build the model +4. Train the model +5. Test the model +6. Improve the model and repeat the process + +## Import TensorFlow and other libraries + +```py +import matplotlib.pyplot as plt +import numpy as np +import os +import PIL +import tensorflow as tf + +from tensorflow import keras +from tensorflow.keras import layers +from tensorflow.keras.models import Sequential +``` + +## Download and explore the dataset + +This tutorial uses a dataset of about 3,700 photos of flowers. The dataset contains 5 sub-directories, one per class: + +```py +flower_photo/ + daisy/ + dandelion/ + roses/ + sunflowers/ + tulips/ +``` + +```py +import pathlib +dataset_url = "https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz" +data_dir = tf.keras.utils.get_file('flower_photos', origin=dataset_url, untar=True) +data_dir = pathlib.Path(data_dir) +``` + +```py +Downloading data from https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz +228818944/228813984 [==============================] - 5s 0us/step + +``` + +After downloading, you should now have a copy of the dataset available. There are 3,670 total images: + +```py +image_count = len(list(data_dir.glob('*/*.jpg'))) +print(image_count) +``` + +```py +3670 + +``` + +Here are some roses: + +```py +roses = list(data_dir.glob('roses/*')) +PIL.Image.open(str(roses[0])) +``` + +![png](img/87abb24bd5c5230158bc1ff3b3bb5624.png) + +```py +PIL.Image.open(str(roses[1])) +``` + +![png](img/c5f05439bb7e2eb354fda7f89beadeb3.png) + +And some tulips: + +```py +tulips = list(data_dir.glob('tulips/*')) +PIL.Image.open(str(tulips[0])) +``` + +![png](img/dcd2e24d351259809e8bd2dfe61f3f59.png) + +```py +PIL.Image.open(str(tulips[1])) +``` + +![png](img/25794664318bbd0dc1284a9ea6754d14.png) + +# Load using keras.preprocessing + +Let's load these images off disk using the helpful [image_dataset_from_directory](https://tensorflow.google.cn/api_docs/python/tf/keras/preprocessing/image_dataset_from_directory) utility. This will take you from a directory of images on disk to a [`tf.data.Dataset`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset) in just a couple lines of code. If you like, you can also write your own data loading code from scratch by visiting the [load images](https://tensorflow.google.cn/tutorials/load_data/images) tutorial. + +## Create a dataset + +Define some parameters for the loader: + +```py +batch_size = 32 +img_height = 180 +img_width = 180 +``` + +It's good practice to use a validation split when developing your model. Let's use 80% of the images for training, and 20% for validation. + +```py +train_ds = tf.keras.preprocessing.image_dataset_from_directory( + data_dir, + validation_split=0.2, + subset="training", + seed=123, + image_size=(img_height, img_width), + batch_size=batch_size) +``` + +```py +Found 3670 files belonging to 5 classes. +Using 2936 files for training. + +``` + +```py +val_ds = tf.keras.preprocessing.image_dataset_from_directory( + data_dir, + validation_split=0.2, + subset="validation", + seed=123, + image_size=(img_height, img_width), + batch_size=batch_size) +``` + +```py +Found 3670 files belonging to 5 classes. +Using 734 files for validation. + +``` + +You can find the class names in the `class_names` attribute on these datasets. These correspond to the directory names in alphabetical order. + +```py +class_names = train_ds.class_names +print(class_names) +``` + +```py +['daisy', 'dandelion', 'roses', 'sunflowers', 'tulips'] + +``` + +## Visualize the data + +Here are the first 9 images from the training dataset. + +```py +import matplotlib.pyplot as plt + +plt.figure(figsize=(10, 10)) +for images, labels in train_ds.take(1): + for i in range(9): + ax = plt.subplot(3, 3, i + 1) + plt.imshow(images[i].numpy().astype("uint8")) + plt.title(class_names[labels[i]]) + plt.axis("off") +``` + +![png](img/01e618f7715193d849381e8d78c78c09.png) + +You will train a model using these datasets by passing them to `model.fit` in a moment. If you like, you can also manually iterate over the dataset and retrieve batches of images: + +```py +for image_batch, labels_batch in train_ds: + print(image_batch.shape) + print(labels_batch.shape) + break +``` + +```py +(32, 180, 180, 3) +(32,) + +``` + +The `image_batch` is a tensor of the shape `(32, 180, 180, 3)`. This is a batch of 32 images of shape `180x180x3` (the last dimension refers to color channels RGB). The `label_batch` is a tensor of the shape `(32,)`, these are corresponding labels to the 32 images. + +You can call `.numpy()` on the `image_batch` and `labels_batch` tensors to convert them to a `numpy.ndarray`. + +## Configure the dataset for performance + +Let's make sure to use buffered prefetching so you can yield data from disk without having I/O become blocking. These are two important methods you should use when loading data. + +[`Dataset.cache()`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset#cache) keeps the images in memory after they're loaded off disk during the first epoch. This will ensure the dataset does not become a bottleneck while training your model. If your dataset is too large to fit into memory, you can also use this method to create a performant on-disk cache. + +[`Dataset.prefetch()`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset#prefetch) overlaps data preprocessing and model execution while training. + +Interested readers can learn more about both methods, as well as how to cache data to disk in the [data performance guide](https://tensorflow.google.cn/guide/data_performance#prefetching). + +```py +AUTOTUNE = tf.data.experimental.AUTOTUNE + +train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE) +val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE) +``` + +## Standardize the data + +The RGB channel values are in the `[0, 255]` range. This is not ideal for a neural network; in general you should seek to make your input values small. Here, you will standardize values to be in the `[0, 1]` range by using a Rescaling layer. + +```py +normalization_layer = layers.experimental.preprocessing.Rescaling(1./255) +``` + +**Note:** The Keras Preprocessing utilities and layers introduced in this section are currently experimental and may change. + +There are two ways to use this layer. You can apply it to the dataset by calling map: + +```py +normalized_ds = train_ds.map(lambda x, y: (normalization_layer(x), y)) +image_batch, labels_batch = next(iter(normalized_ds)) +first_image = image_batch[0] +# Notice the pixels values are now in `[0,1]`. +print(np.min(first_image), np.max(first_image)) +``` + +```py +0.006427039 0.99052274 + +``` + +Or, you can include the layer inside your model definition, which can simplify deployment. Let's use the second approach here. + +**Note:** you previously resized images using the `image_size` argument of `image_dataset_from_directory`. If you want to include the resizing logic in your model as well, you can use the [Resizing](https://tensorflow.google.cn/api_docs/python/tf/keras/layers/experimental/preprocessing/Resizing) layer. + +# Create the model + +The model consists of three convolution blocks with a max pool layer in each of them. There's a fully connected layer with 128 units on top of it that is activated by a `relu` activation function. This model has not been tuned for high accuracy, the goal of this tutorial is to show a standard approach. + +```py +num_classes = 5 + +model = Sequential([ + layers.experimental.preprocessing.Rescaling(1./255, input_shape=(img_height, img_width, 3)), + layers.Conv2D(16, 3, padding='same', activation='relu'), + layers.MaxPooling2D(), + layers.Conv2D(32, 3, padding='same', activation='relu'), + layers.MaxPooling2D(), + layers.Conv2D(64, 3, padding='same', activation='relu'), + layers.MaxPooling2D(), + layers.Flatten(), + layers.Dense(128, activation='relu'), + layers.Dense(num_classes) +]) +``` + +## Compile the model + +For this tutorial, choose the [`optimizers.Adam`](https://tensorflow.google.cn/api_docs/python/tf/keras/optimizers/Adam) optimizer and [`losses.SparseCategoricalCrossentropy`](https://tensorflow.google.cn/api_docs/python/tf/keras/losses/SparseCategoricalCrossentropy) loss function. To view training and validation accuracy for each training epoch, pass the `metrics` argument. + +```py +model.compile(optimizer='adam', + loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), + metrics=['accuracy']) +``` + +## Model summary + +View all the layers of the network using the model's `summary` method: + +```py +model.summary() +``` + +```py +Model: "sequential" +_________________________________________________________________ +Layer (type) Output Shape Param # +================================================================= +rescaling_1 (Rescaling) (None, 180, 180, 3) 0 +_________________________________________________________________ +conv2d (Conv2D) (None, 180, 180, 16) 448 +_________________________________________________________________ +max_pooling2d (MaxPooling2D) (None, 90, 90, 16) 0 +_________________________________________________________________ +conv2d_1 (Conv2D) (None, 90, 90, 32) 4640 +_________________________________________________________________ +max_pooling2d_1 (MaxPooling2 (None, 45, 45, 32) 0 +_________________________________________________________________ +conv2d_2 (Conv2D) (None, 45, 45, 64) 18496 +_________________________________________________________________ +max_pooling2d_2 (MaxPooling2 (None, 22, 22, 64) 0 +_________________________________________________________________ +flatten (Flatten) (None, 30976) 0 +_________________________________________________________________ +dense (Dense) (None, 128) 3965056 +_________________________________________________________________ +dense_1 (Dense) (None, 5) 645 +================================================================= +Total params: 3,989,285 +Trainable params: 3,989,285 +Non-trainable params: 0 +_________________________________________________________________ + +``` + +## Train the model + +```py +epochs=10 +history = model.fit( + train_ds, + validation_data=val_ds, + epochs=epochs +) +``` + +```py +Epoch 1/10 +92/92 [==============================] - 3s 27ms/step - loss: 1.3816 - accuracy: 0.4077 - val_loss: 1.0884 - val_accuracy: 0.5518 +Epoch 2/10 +92/92 [==============================] - 1s 10ms/step - loss: 1.0222 - accuracy: 0.6039 - val_loss: 0.9661 - val_accuracy: 0.5872 +Epoch 3/10 +92/92 [==============================] - 1s 10ms/step - loss: 0.8417 - accuracy: 0.6778 - val_loss: 0.8763 - val_accuracy: 0.6417 +Epoch 4/10 +92/92 [==============================] - 1s 10ms/step - loss: 0.6234 - accuracy: 0.7691 - val_loss: 0.8961 - val_accuracy: 0.6444 +Epoch 5/10 +92/92 [==============================] - 1s 10ms/step - loss: 0.4066 - accuracy: 0.8580 - val_loss: 0.9164 - val_accuracy: 0.6717 +Epoch 6/10 +92/92 [==============================] - 1s 10ms/step - loss: 0.2379 - accuracy: 0.9234 - val_loss: 1.1665 - val_accuracy: 0.6417 +Epoch 7/10 +92/92 [==============================] - 1s 10ms/step - loss: 0.1372 - accuracy: 0.9571 - val_loss: 1.3581 - val_accuracy: 0.6621 +Epoch 8/10 +92/92 [==============================] - 1s 10ms/step - loss: 0.0802 - accuracy: 0.9789 - val_loss: 1.5392 - val_accuracy: 0.6526 +Epoch 9/10 +92/92 [==============================] - 1s 10ms/step - loss: 0.0405 - accuracy: 0.9918 - val_loss: 1.7072 - val_accuracy: 0.6730 +Epoch 10/10 +92/92 [==============================] - 1s 10ms/step - loss: 0.0311 - accuracy: 0.9925 - val_loss: 1.7984 - val_accuracy: 0.6458 + +``` + +## Visualize training results + +Create plots of loss and accuracy on the training and validation sets. + +```py +acc = history.history['accuracy'] +val_acc = history.history['val_accuracy'] + +loss = history.history['loss'] +val_loss = history.history['val_loss'] + +epochs_range = range(epochs) + +plt.figure(figsize=(8, 8)) +plt.subplot(1, 2, 1) +plt.plot(epochs_range, acc, label='Training Accuracy') +plt.plot(epochs_range, val_acc, label='Validation Accuracy') +plt.legend(loc='lower right') +plt.title('Training and Validation Accuracy') + +plt.subplot(1, 2, 2) +plt.plot(epochs_range, loss, label='Training Loss') +plt.plot(epochs_range, val_loss, label='Validation Loss') +plt.legend(loc='upper right') +plt.title('Training and Validation Loss') +plt.show() +``` + +![png](img/14fce8d9f2fd98077c5bf9a8db1f25ec.png) + +As you can see from the plots, training accuracy and validation accuracy are off by large margin and the model has achieved only around 60% accuracy on the validation set. + +Let's look at what went wrong and try to increase the overall performance of the model. + +## Overfitting + +In the plots above, the training accuracy is increasing linearly over time, whereas validation accuracy stalls around 60% in the training process. Also, the difference in accuracy between training and validation accuracy is noticeable—a sign of [overfitting](https://tensorflow.google.cn/tutorials/keras/overfit_and_underfit). + +When there are a small number of training examples, the model sometimes learns from noises or unwanted details from training examples—to an extent that it negatively impacts the performance of the model on new examples. This phenomenon is known as overfitting. It means that the model will have a difficult time generalizing on a new dataset. + +There are multiple ways to fight overfitting in the training process. In this tutorial, you'll use *data augmentation* and add *Dropout* to your model. + +## Data augmentation + +Overfitting generally occurs when there are a small number of training examples. [Data augmentation](https://tensorflow.google.cn/tutorials/images/data_augmentation) takes the approach of generating additional training data from your existing examples by augmenting them using random transformations that yield believable-looking images. This helps expose the model to more aspects of the data and generalize better. + +You will implement data augmentation using experimental [Keras Preprocessing Layers](https://tensorflow.google.cn/api_docs/python/tf/keras/layers/experimental/preprocessing/?version=nightly). These can be included inside your model like other layers, and run on the GPU. + +```py +data_augmentation = keras.Sequential( + [ + layers.experimental.preprocessing.RandomFlip("horizontal", + input_shape=(img_height, + img_width, + 3)), + layers.experimental.preprocessing.RandomRotation(0.1), + layers.experimental.preprocessing.RandomZoom(0.1), + ] +) +``` + +Let's visualize what a few augmented examples look like by applying data augmentation to the same image several times: + +```py +plt.figure(figsize=(10, 10)) +for images, _ in train_ds.take(1): + for i in range(9): + augmented_images = data_augmentation(images) + ax = plt.subplot(3, 3, i + 1) + plt.imshow(augmented_images[0].numpy().astype("uint8")) + plt.axis("off") +``` + +![png](img/696df8a523ce550bf177c7051cef2c75.png) + +You will use data augmentation to train a model in a moment. + +## Dropout + +Another technique to reduce overfitting is to introduce [Dropout](https://developers.google.cn/machine-learning/glossary#dropout_regularization) to the network, a form of *regularization*. + +When you apply Dropout to a layer it randomly drops out (by setting the activation to zero) a number of output units from the layer during the training process. Dropout takes a fractional number as its input value, in the form such as 0.1, 0.2, 0.4, etc. This means dropping out 10%, 20% or 40% of the output units randomly from the applied layer. + +Let's create a new neural network using [`layers.Dropout`](https://tensorflow.google.cn/api_docs/python/tf/keras/layers/Dropout), then train it using augmented images. + +```py +model = Sequential([ + data_augmentation, + layers.experimental.preprocessing.Rescaling(1./255), + layers.Conv2D(16, 3, padding='same', activation='relu'), + layers.MaxPooling2D(), + layers.Conv2D(32, 3, padding='same', activation='relu'), + layers.MaxPooling2D(), + layers.Conv2D(64, 3, padding='same', activation='relu'), + layers.MaxPooling2D(), + layers.Dropout(0.2), + layers.Flatten(), + layers.Dense(128, activation='relu'), + layers.Dense(num_classes) +]) +``` + +## Compile and train the model + +```py +model.compile(optimizer='adam', + loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), + metrics=['accuracy']) +``` + +```py +model.summary() +``` + +```py +Model: "sequential_2" +_________________________________________________________________ +Layer (type) Output Shape Param # +================================================================= +sequential_1 (Sequential) (None, 180, 180, 3) 0 +_________________________________________________________________ +rescaling_2 (Rescaling) (None, 180, 180, 3) 0 +_________________________________________________________________ +conv2d_3 (Conv2D) (None, 180, 180, 16) 448 +_________________________________________________________________ +max_pooling2d_3 (MaxPooling2 (None, 90, 90, 16) 0 +_________________________________________________________________ +conv2d_4 (Conv2D) (None, 90, 90, 32) 4640 +_________________________________________________________________ +max_pooling2d_4 (MaxPooling2 (None, 45, 45, 32) 0 +_________________________________________________________________ +conv2d_5 (Conv2D) (None, 45, 45, 64) 18496 +_________________________________________________________________ +max_pooling2d_5 (MaxPooling2 (None, 22, 22, 64) 0 +_________________________________________________________________ +dropout (Dropout) (None, 22, 22, 64) 0 +_________________________________________________________________ +flatten_1 (Flatten) (None, 30976) 0 +_________________________________________________________________ +dense_2 (Dense) (None, 128) 3965056 +_________________________________________________________________ +dense_3 (Dense) (None, 5) 645 +================================================================= +Total params: 3,989,285 +Trainable params: 3,989,285 +Non-trainable params: 0 +_________________________________________________________________ + +``` + +```py +epochs = 15 +history = model.fit( + train_ds, + validation_data=val_ds, + epochs=epochs +) +``` + +```py +Epoch 1/15 +92/92 [==============================] - 1s 13ms/step - loss: 1.4326 - accuracy: 0.3760 - val_loss: 1.1774 - val_accuracy: 0.5123 +Epoch 2/15 +92/92 [==============================] - 1s 12ms/step - loss: 1.1058 - accuracy: 0.5525 - val_loss: 0.9981 - val_accuracy: 0.5967 +Epoch 3/15 +92/92 [==============================] - 1s 12ms/step - loss: 1.0014 - accuracy: 0.5937 - val_loss: 0.9525 - val_accuracy: 0.6185 +Epoch 4/15 +92/92 [==============================] - 1s 12ms/step - loss: 0.9205 - accuracy: 0.6383 - val_loss: 0.9474 - val_accuracy: 0.6376 +Epoch 5/15 +92/92 [==============================] - 1s 12ms/step - loss: 0.8813 - accuracy: 0.6594 - val_loss: 0.9383 - val_accuracy: 0.6417 +Epoch 6/15 +92/92 [==============================] - 1s 12ms/step - loss: 0.8366 - accuracy: 0.6734 - val_loss: 0.8468 - val_accuracy: 0.6512 +Epoch 7/15 +92/92 [==============================] - 1s 12ms/step - loss: 0.7955 - accuracy: 0.6979 - val_loss: 0.8837 - val_accuracy: 0.6717 +Epoch 8/15 +92/92 [==============================] - 1s 12ms/step - loss: 0.7485 - accuracy: 0.7163 - val_loss: 0.8417 - val_accuracy: 0.6730 +Epoch 9/15 +92/92 [==============================] - 1s 12ms/step - loss: 0.7276 - accuracy: 0.7282 - val_loss: 0.8505 - val_accuracy: 0.6826 +Epoch 10/15 +92/92 [==============================] - 1s 12ms/step - loss: 0.6981 - accuracy: 0.7374 - val_loss: 0.7679 - val_accuracy: 0.6948 +Epoch 11/15 +92/92 [==============================] - 1s 12ms/step - loss: 0.6755 - accuracy: 0.7446 - val_loss: 0.7863 - val_accuracy: 0.6948 +Epoch 12/15 +92/92 [==============================] - 1s 12ms/step - loss: 0.6375 - accuracy: 0.7585 - val_loss: 0.7911 - val_accuracy: 0.7044 +Epoch 13/15 +92/92 [==============================] - 1s 12ms/step - loss: 0.6095 - accuracy: 0.7790 - val_loss: 0.7403 - val_accuracy: 0.7139 +Epoch 14/15 +92/92 [==============================] - 1s 12ms/step - loss: 0.6116 - accuracy: 0.7681 - val_loss: 0.7794 - val_accuracy: 0.7153 +Epoch 15/15 +92/92 [==============================] - 1s 12ms/step - loss: 0.5818 - accuracy: 0.7762 - val_loss: 0.7729 - val_accuracy: 0.7044 + +``` + +## Visualize training results + +After applying data augmentation and Dropout, there is less overfitting than before, and training and validation accuracy are closer aligned. + +```py +acc = history.history['accuracy'] +val_acc = history.history['val_accuracy'] + +loss = history.history['loss'] +val_loss = history.history['val_loss'] + +epochs_range = range(epochs) + +plt.figure(figsize=(8, 8)) +plt.subplot(1, 2, 1) +plt.plot(epochs_range, acc, label='Training Accuracy') +plt.plot(epochs_range, val_acc, label='Validation Accuracy') +plt.legend(loc='lower right') +plt.title('Training and Validation Accuracy') + +plt.subplot(1, 2, 2) +plt.plot(epochs_range, loss, label='Training Loss') +plt.plot(epochs_range, val_loss, label='Validation Loss') +plt.legend(loc='upper right') +plt.title('Training and Validation Loss') +plt.show() +``` + +![png](img/2127fb93f97c5aaf91e991540bbe84ed.png) + +## Predict on new data + +Finally, let's use our model to classify an image that wasn't included in the training or validation sets. + +**Note:** Data augmentation and Dropout layers are inactive at inference time. + +```py +sunflower_url = "https://storage.googleapis.com/download.tensorflow.org/example_images/592px-Red_sunflower.jpg" +sunflower_path = tf.keras.utils.get_file('Red_sunflower', origin=sunflower_url) + +img = keras.preprocessing.image.load_img( + sunflower_path, target_size=(img_height, img_width) +) +img_array = keras.preprocessing.image.img_to_array(img) +img_array = tf.expand_dims(img_array, 0) # Create a batch + +predictions = model.predict(img_array) +score = tf.nn.softmax(predictions[0]) + +print( + "This image most likely belongs to {} with a {:.2f} percent confidence." + .format(class_names[np.argmax(score)], 100 * np.max(score)) +) +``` + +```py +Downloading data from https://storage.googleapis.com/download.tensorflow.org/example_images/592px-Red_sunflower.jpg +122880/117948 [===============================] - 0s 0us/step +This image most likely belongs to sunflowers with a 99.45 percent confidence. + +``` \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/043.md b/Tensorflow/TensorFlow2.0/043.md new file mode 100644 index 00000000..651432d1 --- /dev/null +++ b/Tensorflow/TensorFlow2.0/043.md @@ -0,0 +1,1004 @@ +# Transfer learning and fine-tuning + +> 原文:[https://tensorflow.google.cn/tutorials/images/transfer_learning](https://tensorflow.google.cn/tutorials/images/transfer_learning) + +In this tutorial, you will learn how to classify images of cats and dogs by using transfer learning from a pre-trained network. + +A pre-trained model is a saved network that was previously trained on a large dataset, typically on a large-scale image-classification task. You either use the pretrained model as is or use transfer learning to customize this model to a given task. + +The intuition behind transfer learning for image classification is that if a model is trained on a large and general enough dataset, this model will effectively serve as a generic model of the visual world. You can then take advantage of these learned feature maps without having to start from scratch by training a large model on a large dataset. + +In this notebook, you will try two ways to customize a pretrained model: + +1. Feature Extraction: Use the representations learned by a previous network to extract meaningful features from new samples. You simply add a new classifier, which will be trained from scratch, on top of the pretrained model so that you can repurpose the feature maps learned previously for the dataset. + + You do not need to (re)train the entire model. The base convolutional network already contains features that are generically useful for classifying pictures. However, the final, classification part of the pretrained model is specific to the original classification task, and subsequently specific to the set of classes on which the model was trained. + +2. Fine-Tuning: Unfreeze a few of the top layers of a frozen model base and jointly train both the newly-added classifier layers and the last layers of the base model. This allows us to "fine-tune" the higher-order feature representations in the base model in order to make them more relevant for the specific task. + +You will follow the general machine learning workflow. + +1. Examine and understand the data +2. Build an input pipeline, in this case using Keras ImageDataGenerator +3. Compose the model + * Load in the pretrained base model (and pretrained weights) + * Stack the classification layers on top +4. Train the model +5. Evaluate model + +```py +pip install -q tf-nightly + +``` + +```py +WARNING: You are using pip version 20.2.2; however, version 20.2.3 is available. +You should consider upgrading via the '/tmpfs/src/tf_docs_env/bin/python -m pip install --upgrade pip' command. + +``` + +```py +import matplotlib.pyplot as plt +import numpy as np +import os +import tensorflow as tf + +from tensorflow.keras.preprocessing import image_dataset_from_directory +``` + +## Data preprocessing + +### Data download + +In this tutorial, you will use a dataset containing several thousand images of cats and dogs. Download and extract a zip file containing the images, then create a [`tf.data.Dataset`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset) for training and validation using the [`tf.keras.preprocessing.image_dataset_from_directory`](https://tensorflow.google.cn/api_docs/python/tf/keras/preprocessing/image_dataset_from_directory) utility. You can learn more about loading images in this [tutorial](https://tensorflow.google.cn/tutorials/load_data/images). + +```py +_URL = 'https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip' +path_to_zip = tf.keras.utils.get_file('cats_and_dogs.zip', origin=_URL, extract=True) +PATH = os.path.join(os.path.dirname(path_to_zip), 'cats_and_dogs_filtered') + +train_dir = os.path.join(PATH, 'train') +validation_dir = os.path.join(PATH, 'validation') + +BATCH_SIZE = 32 +IMG_SIZE = (160, 160) + +train_dataset = image_dataset_from_directory(train_dir, + shuffle=True, + batch_size=BATCH_SIZE, + image_size=IMG_SIZE) +``` + +```py +Downloading data from https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip +68608000/68606236 [==============================] - 1s 0us/step +Found 2000 files belonging to 2 classes. + +``` + +```py +validation_dataset = image_dataset_from_directory(validation_dir, + shuffle=True, + batch_size=BATCH_SIZE, + image_size=IMG_SIZE) +``` + +```py +Found 1000 files belonging to 2 classes. + +``` + +Show the first nine images and labels from the training set: + +```py +class_names = train_dataset.class_names + +plt.figure(figsize=(10, 10)) +for images, labels in train_dataset.take(1): + for i in range(9): + ax = plt.subplot(3, 3, i + 1) + plt.imshow(images[i].numpy().astype("uint8")) + plt.title(class_names[labels[i]]) + plt.axis("off") +``` + +![png](img/2a2da1f076940ee9e540d308733418d2.png) + +As the original dataset doesn't contains a test set, you will create one. To do so, determine how many batches of data are available in the validation set using `tf.data.experimental.cardinality`, then move 20% of them to a test set. + +```py +val_batches = tf.data.experimental.cardinality(validation_dataset) +test_dataset = validation_dataset.take(val_batches // 5) +validation_dataset = validation_dataset.skip(val_batches // 5) +``` + +```py +print('Number of validation batches: %d' % tf.data.experimental.cardinality(validation_dataset)) +print('Number of test batches: %d' % tf.data.experimental.cardinality(test_dataset)) +``` + +```py +Number of validation batches: 26 +Number of test batches: 6 + +``` + +### Configure the dataset for performance + +Use buffered prefetching to load images from disk without having I/O become blocking. To learn more about this method see the [data performance](https://tensorflow.google.cn/guide/data_performance) guide. + +```py +AUTOTUNE = tf.data.experimental.AUTOTUNE + +train_dataset = train_dataset.prefetch(buffer_size=AUTOTUNE) +validation_dataset = validation_dataset.prefetch(buffer_size=AUTOTUNE) +test_dataset = test_dataset.prefetch(buffer_size=AUTOTUNE) +``` + +### Use data augmentation + +When you don't have a large image dataset, it's a good practice to artificially introduce sample diversity by applying random, yet realistic, transformations to the training images, such as rotation and horizontal flipping. This helps expose the model to different aspects of the training data and reduce [overfitting](https://tensorflow.google.cn/tutorials/keras/overfit_and_underfit). You can learn more about data augmentation in this [tutorial](https://tensorflow.google.cn/tutorials/images/data_augmentation). + +```py +data_augmentation = tf.keras.Sequential([ + tf.keras.layers.experimental.preprocessing.RandomFlip('horizontal'), + tf.keras.layers.experimental.preprocessing.RandomRotation(0.2), +]) +``` + +**Note:** These layers are active only during training, when you call `model.fit`. They are inactive when the model is used in inference mode in `model.evaulate` or `model.fit`. + +Let's repeatedly apply these layers to the same image and see the result. + +```py +for image, _ in train_dataset.take(1): + plt.figure(figsize=(10, 10)) + first_image = image[0] + for i in range(9): + ax = plt.subplot(3, 3, i + 1) + augmented_image = data_augmentation(tf.expand_dims(first_image, 0)) + plt.imshow(augmented_image[0] / 255) + plt.axis('off') +``` + +![png](img/a208fe52aacc14e668b9b681805d8302.png) + +### Rescale pixel values + +In a moment, you will download `tf.keras.applications.MobileNetV2` for use as your base model. This model expects pixel vaues in `[-1,1]`, but at this point, the pixel values in your images are in `[0-255]`. To rescale them, use the preprocessing method included with the model. + +```py +preprocess_input = tf.keras.applications.mobilenet_v2.preprocess_input +``` + +**Note:** Alternatively, you could rescale pixel values from `[0,255]` to `[-1, 1]` using a [Rescaling](https://tensorflow.google.cn/api_docs/python/tf/keras/layers/experimental/preprocessing/Rescaling) layer. + +```py +rescale = tf.keras.layers.experimental.preprocessing.Rescaling(1./127.5, offset= -1) +``` + +**Note:** If using other `tf.keras.applications`, be sure to check the API doc to determine if they expect pixels in `[-1,1]` or `[0,1]`, or use the included `preprocess_input` function. + +## Create the base model from the pre-trained convnets + +You will create the base model from the **MobileNet V2** model developed at Google. This is pre-trained on the ImageNet dataset, a large dataset consisting of 1.4M images and 1000 classes. ImageNet is a research training dataset with a wide variety of categories like `jackfruit` and `syringe`. This base of knowledge will help us classify cats and dogs from our specific dataset. + +First, you need to pick which layer of MobileNet V2 you will use for feature extraction. The very last classification layer (on "top", as most diagrams of machine learning models go from bottom to top) is not very useful. Instead, you will follow the common practice to depend on the very last layer before the flatten operation. This layer is called the "bottleneck layer". The bottleneck layer features retain more generality as compared to the final/top layer. + +First, instantiate a MobileNet V2 model pre-loaded with weights trained on ImageNet. By specifying the **include_top=False** argument, you load a network that doesn't include the classification layers at the top, which is ideal for feature extraction. + +```py +# Create the base model from the pre-trained model MobileNet V2 +IMG_SHAPE = IMG_SIZE + (3,) +base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE, + include_top=False, + weights='imagenet') +``` + +```py +Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_160_no_top.h5 +9412608/9406464 [==============================] - 1s 0us/step + +``` + +This feature extractor converts each `160x160x3` image into a `5x5x1280` block of features. Let's see what it does to an example batch of images: + +```py +image_batch, label_batch = next(iter(train_dataset)) +feature_batch = base_model(image_batch) +print(feature_batch.shape) +``` + +```py +(32, 5, 5, 1280) + +``` + +## Feature extraction + +In this step, you will freeze the convolutional base created from the previous step and to use as a feature extractor. Additionally, you add a classifier on top of it and train the top-level classifier. + +### Freeze the convolutional base + +It is important to freeze the convolutional base before you compile and train the model. Freezing (by setting layer.trainable = False) prevents the weights in a given layer from being updated during training. MobileNet V2 has many layers, so setting the entire model's `trainable` flag to False will freeze all of them. + +```py +base_model.trainable = False +``` + +### Important note about BatchNormalization layers + +Many models contain `tf.keras.layers.BatchNormalization` layers. This layer is a special case and precautions should be taken in the context of fine-tuning, as shown later in this tutorial. + +When you set `layer.trainable = False`, the `BatchNormalization` layer will run in inference mode, and will not update its mean and variance statistics. + +When you unfreeze a model that contains BatchNormalization layers in order to do fine-tuning, you should keep the BatchNormalization layers in inference mode by passing `training = False` when calling the base model. Otherwise, the updates applied to the non-trainable weights will destroy what the model has learned. + +For details, see the [Transfer learning guide](https://tensorflow.google.cn/guide/keras/transfer_learning). + +```py +# Let's take a look at the base model architecture +base_model.summary() +``` + +```py +Model: "mobilenetv2_1.00_160" +__________________________________________________________________________________________________ +Layer (type) Output Shape Param # Connected to +================================================================================================== +input_1 (InputLayer) [(None, 160, 160, 3) 0 +__________________________________________________________________________________________________ +Conv1 (Conv2D) (None, 80, 80, 32) 864 input_1[0][0] +__________________________________________________________________________________________________ +bn_Conv1 (BatchNormalization) (None, 80, 80, 32) 128 Conv1[0][0] +__________________________________________________________________________________________________ +Conv1_relu (ReLU) (None, 80, 80, 32) 0 bn_Conv1[0][0] +__________________________________________________________________________________________________ +expanded_conv_depthwise (Depthw (None, 80, 80, 32) 288 Conv1_relu[0][0] +__________________________________________________________________________________________________ +expanded_conv_depthwise_BN (Bat (None, 80, 80, 32) 128 expanded_conv_depthwise[0][0] +__________________________________________________________________________________________________ +expanded_conv_depthwise_relu (R (None, 80, 80, 32) 0 expanded_conv_depthwise_BN[0][0] +__________________________________________________________________________________________________ +expanded_conv_project (Conv2D) (None, 80, 80, 16) 512 expanded_conv_depthwise_relu[0][0 +__________________________________________________________________________________________________ +expanded_conv_project_BN (Batch (None, 80, 80, 16) 64 expanded_conv_project[0][0] +__________________________________________________________________________________________________ +block_1_expand (Conv2D) (None, 80, 80, 96) 1536 expanded_conv_project_BN[0][0] +__________________________________________________________________________________________________ +block_1_expand_BN (BatchNormali (None, 80, 80, 96) 384 block_1_expand[0][0] +__________________________________________________________________________________________________ +block_1_expand_relu (ReLU) (None, 80, 80, 96) 0 block_1_expand_BN[0][0] +__________________________________________________________________________________________________ +block_1_pad (ZeroPadding2D) (None, 81, 81, 96) 0 block_1_expand_relu[0][0] +__________________________________________________________________________________________________ +block_1_depthwise (DepthwiseCon (None, 40, 40, 96) 864 block_1_pad[0][0] +__________________________________________________________________________________________________ +block_1_depthwise_BN (BatchNorm (None, 40, 40, 96) 384 block_1_depthwise[0][0] +__________________________________________________________________________________________________ +block_1_depthwise_relu (ReLU) (None, 40, 40, 96) 0 block_1_depthwise_BN[0][0] +__________________________________________________________________________________________________ +block_1_project (Conv2D) (None, 40, 40, 24) 2304 block_1_depthwise_relu[0][0] +__________________________________________________________________________________________________ +block_1_project_BN (BatchNormal (None, 40, 40, 24) 96 block_1_project[0][0] +__________________________________________________________________________________________________ +block_2_expand (Conv2D) (None, 40, 40, 144) 3456 block_1_project_BN[0][0] +__________________________________________________________________________________________________ +block_2_expand_BN (BatchNormali (None, 40, 40, 144) 576 block_2_expand[0][0] +__________________________________________________________________________________________________ +block_2_expand_relu (ReLU) (None, 40, 40, 144) 0 block_2_expand_BN[0][0] +__________________________________________________________________________________________________ +block_2_depthwise (DepthwiseCon (None, 40, 40, 144) 1296 block_2_expand_relu[0][0] +__________________________________________________________________________________________________ +block_2_depthwise_BN (BatchNorm (None, 40, 40, 144) 576 block_2_depthwise[0][0] +__________________________________________________________________________________________________ +block_2_depthwise_relu (ReLU) (None, 40, 40, 144) 0 block_2_depthwise_BN[0][0] +__________________________________________________________________________________________________ +block_2_project (Conv2D) (None, 40, 40, 24) 3456 block_2_depthwise_relu[0][0] +__________________________________________________________________________________________________ +block_2_project_BN (BatchNormal (None, 40, 40, 24) 96 block_2_project[0][0] +__________________________________________________________________________________________________ +block_2_add (Add) (None, 40, 40, 24) 0 block_1_project_BN[0][0] + block_2_project_BN[0][0] +__________________________________________________________________________________________________ +block_3_expand (Conv2D) (None, 40, 40, 144) 3456 block_2_add[0][0] +__________________________________________________________________________________________________ +block_3_expand_BN (BatchNormali (None, 40, 40, 144) 576 block_3_expand[0][0] +__________________________________________________________________________________________________ +block_3_expand_relu (ReLU) (None, 40, 40, 144) 0 block_3_expand_BN[0][0] +__________________________________________________________________________________________________ +block_3_pad (ZeroPadding2D) (None, 41, 41, 144) 0 block_3_expand_relu[0][0] +__________________________________________________________________________________________________ +block_3_depthwise (DepthwiseCon (None, 20, 20, 144) 1296 block_3_pad[0][0] +__________________________________________________________________________________________________ +block_3_depthwise_BN (BatchNorm (None, 20, 20, 144) 576 block_3_depthwise[0][0] +__________________________________________________________________________________________________ +block_3_depthwise_relu (ReLU) (None, 20, 20, 144) 0 block_3_depthwise_BN[0][0] +__________________________________________________________________________________________________ +block_3_project (Conv2D) (None, 20, 20, 32) 4608 block_3_depthwise_relu[0][0] +__________________________________________________________________________________________________ +block_3_project_BN (BatchNormal (None, 20, 20, 32) 128 block_3_project[0][0] +__________________________________________________________________________________________________ +block_4_expand (Conv2D) (None, 20, 20, 192) 6144 block_3_project_BN[0][0] +__________________________________________________________________________________________________ +block_4_expand_BN (BatchNormali (None, 20, 20, 192) 768 block_4_expand[0][0] +__________________________________________________________________________________________________ +block_4_expand_relu (ReLU) (None, 20, 20, 192) 0 block_4_expand_BN[0][0] +__________________________________________________________________________________________________ +block_4_depthwise (DepthwiseCon (None, 20, 20, 192) 1728 block_4_expand_relu[0][0] +__________________________________________________________________________________________________ +block_4_depthwise_BN (BatchNorm (None, 20, 20, 192) 768 block_4_depthwise[0][0] +__________________________________________________________________________________________________ +block_4_depthwise_relu (ReLU) (None, 20, 20, 192) 0 block_4_depthwise_BN[0][0] +__________________________________________________________________________________________________ +block_4_project (Conv2D) (None, 20, 20, 32) 6144 block_4_depthwise_relu[0][0] +__________________________________________________________________________________________________ +block_4_project_BN (BatchNormal (None, 20, 20, 32) 128 block_4_project[0][0] +__________________________________________________________________________________________________ +block_4_add (Add) (None, 20, 20, 32) 0 block_3_project_BN[0][0] + block_4_project_BN[0][0] +__________________________________________________________________________________________________ +block_5_expand (Conv2D) (None, 20, 20, 192) 6144 block_4_add[0][0] +__________________________________________________________________________________________________ +block_5_expand_BN (BatchNormali (None, 20, 20, 192) 768 block_5_expand[0][0] +__________________________________________________________________________________________________ +block_5_expand_relu (ReLU) (None, 20, 20, 192) 0 block_5_expand_BN[0][0] +__________________________________________________________________________________________________ +block_5_depthwise (DepthwiseCon (None, 20, 20, 192) 1728 block_5_expand_relu[0][0] +__________________________________________________________________________________________________ +block_5_depthwise_BN (BatchNorm (None, 20, 20, 192) 768 block_5_depthwise[0][0] +__________________________________________________________________________________________________ +block_5_depthwise_relu (ReLU) (None, 20, 20, 192) 0 block_5_depthwise_BN[0][0] +__________________________________________________________________________________________________ +block_5_project (Conv2D) (None, 20, 20, 32) 6144 block_5_depthwise_relu[0][0] +__________________________________________________________________________________________________ +block_5_project_BN (BatchNormal (None, 20, 20, 32) 128 block_5_project[0][0] +__________________________________________________________________________________________________ +block_5_add (Add) (None, 20, 20, 32) 0 block_4_add[0][0] + block_5_project_BN[0][0] +__________________________________________________________________________________________________ +block_6_expand (Conv2D) (None, 20, 20, 192) 6144 block_5_add[0][0] +__________________________________________________________________________________________________ +block_6_expand_BN (BatchNormali (None, 20, 20, 192) 768 block_6_expand[0][0] +__________________________________________________________________________________________________ +block_6_expand_relu (ReLU) (None, 20, 20, 192) 0 block_6_expand_BN[0][0] +__________________________________________________________________________________________________ +block_6_pad (ZeroPadding2D) (None, 21, 21, 192) 0 block_6_expand_relu[0][0] +__________________________________________________________________________________________________ +block_6_depthwise (DepthwiseCon (None, 10, 10, 192) 1728 block_6_pad[0][0] +__________________________________________________________________________________________________ +block_6_depthwise_BN (BatchNorm (None, 10, 10, 192) 768 block_6_depthwise[0][0] +__________________________________________________________________________________________________ +block_6_depthwise_relu (ReLU) (None, 10, 10, 192) 0 block_6_depthwise_BN[0][0] +__________________________________________________________________________________________________ +block_6_project (Conv2D) (None, 10, 10, 64) 12288 block_6_depthwise_relu[0][0] +__________________________________________________________________________________________________ +block_6_project_BN (BatchNormal (None, 10, 10, 64) 256 block_6_project[0][0] +__________________________________________________________________________________________________ +block_7_expand (Conv2D) (None, 10, 10, 384) 24576 block_6_project_BN[0][0] +__________________________________________________________________________________________________ +block_7_expand_BN (BatchNormali (None, 10, 10, 384) 1536 block_7_expand[0][0] +__________________________________________________________________________________________________ +block_7_expand_relu (ReLU) (None, 10, 10, 384) 0 block_7_expand_BN[0][0] +__________________________________________________________________________________________________ +block_7_depthwise (DepthwiseCon (None, 10, 10, 384) 3456 block_7_expand_relu[0][0] +__________________________________________________________________________________________________ +block_7_depthwise_BN (BatchNorm (None, 10, 10, 384) 1536 block_7_depthwise[0][0] +__________________________________________________________________________________________________ +block_7_depthwise_relu (ReLU) (None, 10, 10, 384) 0 block_7_depthwise_BN[0][0] +__________________________________________________________________________________________________ +block_7_project (Conv2D) (None, 10, 10, 64) 24576 block_7_depthwise_relu[0][0] +__________________________________________________________________________________________________ +block_7_project_BN (BatchNormal (None, 10, 10, 64) 256 block_7_project[0][0] +__________________________________________________________________________________________________ +block_7_add (Add) (None, 10, 10, 64) 0 block_6_project_BN[0][0] + block_7_project_BN[0][0] +__________________________________________________________________________________________________ +block_8_expand (Conv2D) (None, 10, 10, 384) 24576 block_7_add[0][0] +__________________________________________________________________________________________________ +block_8_expand_BN (BatchNormali (None, 10, 10, 384) 1536 block_8_expand[0][0] +__________________________________________________________________________________________________ +block_8_expand_relu (ReLU) (None, 10, 10, 384) 0 block_8_expand_BN[0][0] +__________________________________________________________________________________________________ +block_8_depthwise (DepthwiseCon (None, 10, 10, 384) 3456 block_8_expand_relu[0][0] +__________________________________________________________________________________________________ +block_8_depthwise_BN (BatchNorm (None, 10, 10, 384) 1536 block_8_depthwise[0][0] +__________________________________________________________________________________________________ +block_8_depthwise_relu (ReLU) (None, 10, 10, 384) 0 block_8_depthwise_BN[0][0] +__________________________________________________________________________________________________ +block_8_project (Conv2D) (None, 10, 10, 64) 24576 block_8_depthwise_relu[0][0] +__________________________________________________________________________________________________ +block_8_project_BN (BatchNormal (None, 10, 10, 64) 256 block_8_project[0][0] +__________________________________________________________________________________________________ +block_8_add (Add) (None, 10, 10, 64) 0 block_7_add[0][0] + block_8_project_BN[0][0] +__________________________________________________________________________________________________ +block_9_expand (Conv2D) (None, 10, 10, 384) 24576 block_8_add[0][0] +__________________________________________________________________________________________________ +block_9_expand_BN (BatchNormali (None, 10, 10, 384) 1536 block_9_expand[0][0] +__________________________________________________________________________________________________ +block_9_expand_relu (ReLU) (None, 10, 10, 384) 0 block_9_expand_BN[0][0] +__________________________________________________________________________________________________ +block_9_depthwise (DepthwiseCon (None, 10, 10, 384) 3456 block_9_expand_relu[0][0] +__________________________________________________________________________________________________ +block_9_depthwise_BN (BatchNorm (None, 10, 10, 384) 1536 block_9_depthwise[0][0] +__________________________________________________________________________________________________ +block_9_depthwise_relu (ReLU) (None, 10, 10, 384) 0 block_9_depthwise_BN[0][0] +__________________________________________________________________________________________________ +block_9_project (Conv2D) (None, 10, 10, 64) 24576 block_9_depthwise_relu[0][0] +__________________________________________________________________________________________________ +block_9_project_BN (BatchNormal (None, 10, 10, 64) 256 block_9_project[0][0] +__________________________________________________________________________________________________ +block_9_add (Add) (None, 10, 10, 64) 0 block_8_add[0][0] + block_9_project_BN[0][0] +__________________________________________________________________________________________________ +block_10_expand (Conv2D) (None, 10, 10, 384) 24576 block_9_add[0][0] +__________________________________________________________________________________________________ +block_10_expand_BN (BatchNormal (None, 10, 10, 384) 1536 block_10_expand[0][0] +__________________________________________________________________________________________________ +block_10_expand_relu (ReLU) (None, 10, 10, 384) 0 block_10_expand_BN[0][0] +__________________________________________________________________________________________________ +block_10_depthwise (DepthwiseCo (None, 10, 10, 384) 3456 block_10_expand_relu[0][0] +__________________________________________________________________________________________________ +block_10_depthwise_BN (BatchNor (None, 10, 10, 384) 1536 block_10_depthwise[0][0] +__________________________________________________________________________________________________ +block_10_depthwise_relu (ReLU) (None, 10, 10, 384) 0 block_10_depthwise_BN[0][0] +__________________________________________________________________________________________________ +block_10_project (Conv2D) (None, 10, 10, 96) 36864 block_10_depthwise_relu[0][0] +__________________________________________________________________________________________________ +block_10_project_BN (BatchNorma (None, 10, 10, 96) 384 block_10_project[0][0] +__________________________________________________________________________________________________ +block_11_expand (Conv2D) (None, 10, 10, 576) 55296 block_10_project_BN[0][0] +__________________________________________________________________________________________________ +block_11_expand_BN (BatchNormal (None, 10, 10, 576) 2304 block_11_expand[0][0] +__________________________________________________________________________________________________ +block_11_expand_relu (ReLU) (None, 10, 10, 576) 0 block_11_expand_BN[0][0] +__________________________________________________________________________________________________ +block_11_depthwise (DepthwiseCo (None, 10, 10, 576) 5184 block_11_expand_relu[0][0] +__________________________________________________________________________________________________ +block_11_depthwise_BN (BatchNor (None, 10, 10, 576) 2304 block_11_depthwise[0][0] +__________________________________________________________________________________________________ +block_11_depthwise_relu (ReLU) (None, 10, 10, 576) 0 block_11_depthwise_BN[0][0] +__________________________________________________________________________________________________ +block_11_project (Conv2D) (None, 10, 10, 96) 55296 block_11_depthwise_relu[0][0] +__________________________________________________________________________________________________ +block_11_project_BN (BatchNorma (None, 10, 10, 96) 384 block_11_project[0][0] +__________________________________________________________________________________________________ +block_11_add (Add) (None, 10, 10, 96) 0 block_10_project_BN[0][0] + block_11_project_BN[0][0] +__________________________________________________________________________________________________ +block_12_expand (Conv2D) (None, 10, 10, 576) 55296 block_11_add[0][0] +__________________________________________________________________________________________________ +block_12_expand_BN (BatchNormal (None, 10, 10, 576) 2304 block_12_expand[0][0] +__________________________________________________________________________________________________ +block_12_expand_relu (ReLU) (None, 10, 10, 576) 0 block_12_expand_BN[0][0] +__________________________________________________________________________________________________ +block_12_depthwise (DepthwiseCo (None, 10, 10, 576) 5184 block_12_expand_relu[0][0] +__________________________________________________________________________________________________ +block_12_depthwise_BN (BatchNor (None, 10, 10, 576) 2304 block_12_depthwise[0][0] +__________________________________________________________________________________________________ +block_12_depthwise_relu (ReLU) (None, 10, 10, 576) 0 block_12_depthwise_BN[0][0] +__________________________________________________________________________________________________ +block_12_project (Conv2D) (None, 10, 10, 96) 55296 block_12_depthwise_relu[0][0] +__________________________________________________________________________________________________ +block_12_project_BN (BatchNorma (None, 10, 10, 96) 384 block_12_project[0][0] +__________________________________________________________________________________________________ +block_12_add (Add) (None, 10, 10, 96) 0 block_11_add[0][0] + block_12_project_BN[0][0] +__________________________________________________________________________________________________ +block_13_expand (Conv2D) (None, 10, 10, 576) 55296 block_12_add[0][0] +__________________________________________________________________________________________________ +block_13_expand_BN (BatchNormal (None, 10, 10, 576) 2304 block_13_expand[0][0] +__________________________________________________________________________________________________ +block_13_expand_relu (ReLU) (None, 10, 10, 576) 0 block_13_expand_BN[0][0] +__________________________________________________________________________________________________ +block_13_pad (ZeroPadding2D) (None, 11, 11, 576) 0 block_13_expand_relu[0][0] +__________________________________________________________________________________________________ +block_13_depthwise (DepthwiseCo (None, 5, 5, 576) 5184 block_13_pad[0][0] +__________________________________________________________________________________________________ +block_13_depthwise_BN (BatchNor (None, 5, 5, 576) 2304 block_13_depthwise[0][0] +__________________________________________________________________________________________________ +block_13_depthwise_relu (ReLU) (None, 5, 5, 576) 0 block_13_depthwise_BN[0][0] +__________________________________________________________________________________________________ +block_13_project (Conv2D) (None, 5, 5, 160) 92160 block_13_depthwise_relu[0][0] +__________________________________________________________________________________________________ +block_13_project_BN (BatchNorma (None, 5, 5, 160) 640 block_13_project[0][0] +__________________________________________________________________________________________________ +block_14_expand (Conv2D) (None, 5, 5, 960) 153600 block_13_project_BN[0][0] +__________________________________________________________________________________________________ +block_14_expand_BN (BatchNormal (None, 5, 5, 960) 3840 block_14_expand[0][0] +__________________________________________________________________________________________________ +block_14_expand_relu (ReLU) (None, 5, 5, 960) 0 block_14_expand_BN[0][0] +__________________________________________________________________________________________________ +block_14_depthwise (DepthwiseCo (None, 5, 5, 960) 8640 block_14_expand_relu[0][0] +__________________________________________________________________________________________________ +block_14_depthwise_BN (BatchNor (None, 5, 5, 960) 3840 block_14_depthwise[0][0] +__________________________________________________________________________________________________ +block_14_depthwise_relu (ReLU) (None, 5, 5, 960) 0 block_14_depthwise_BN[0][0] +__________________________________________________________________________________________________ +block_14_project (Conv2D) (None, 5, 5, 160) 153600 block_14_depthwise_relu[0][0] +__________________________________________________________________________________________________ +block_14_project_BN (BatchNorma (None, 5, 5, 160) 640 block_14_project[0][0] +__________________________________________________________________________________________________ +block_14_add (Add) (None, 5, 5, 160) 0 block_13_project_BN[0][0] + block_14_project_BN[0][0] +__________________________________________________________________________________________________ +block_15_expand (Conv2D) (None, 5, 5, 960) 153600 block_14_add[0][0] +__________________________________________________________________________________________________ +block_15_expand_BN (BatchNormal (None, 5, 5, 960) 3840 block_15_expand[0][0] +__________________________________________________________________________________________________ +block_15_expand_relu (ReLU) (None, 5, 5, 960) 0 block_15_expand_BN[0][0] +__________________________________________________________________________________________________ +block_15_depthwise (DepthwiseCo (None, 5, 5, 960) 8640 block_15_expand_relu[0][0] +__________________________________________________________________________________________________ +block_15_depthwise_BN (BatchNor (None, 5, 5, 960) 3840 block_15_depthwise[0][0] +__________________________________________________________________________________________________ +block_15_depthwise_relu (ReLU) (None, 5, 5, 960) 0 block_15_depthwise_BN[0][0] +__________________________________________________________________________________________________ +block_15_project (Conv2D) (None, 5, 5, 160) 153600 block_15_depthwise_relu[0][0] +__________________________________________________________________________________________________ +block_15_project_BN (BatchNorma (None, 5, 5, 160) 640 block_15_project[0][0] +__________________________________________________________________________________________________ +block_15_add (Add) (None, 5, 5, 160) 0 block_14_add[0][0] + block_15_project_BN[0][0] +__________________________________________________________________________________________________ +block_16_expand (Conv2D) (None, 5, 5, 960) 153600 block_15_add[0][0] +__________________________________________________________________________________________________ +block_16_expand_BN (BatchNormal (None, 5, 5, 960) 3840 block_16_expand[0][0] +__________________________________________________________________________________________________ +block_16_expand_relu (ReLU) (None, 5, 5, 960) 0 block_16_expand_BN[0][0] +__________________________________________________________________________________________________ +block_16_depthwise (DepthwiseCo (None, 5, 5, 960) 8640 block_16_expand_relu[0][0] +__________________________________________________________________________________________________ +block_16_depthwise_BN (BatchNor (None, 5, 5, 960) 3840 block_16_depthwise[0][0] +__________________________________________________________________________________________________ +block_16_depthwise_relu (ReLU) (None, 5, 5, 960) 0 block_16_depthwise_BN[0][0] +__________________________________________________________________________________________________ +block_16_project (Conv2D) (None, 5, 5, 320) 307200 block_16_depthwise_relu[0][0] +__________________________________________________________________________________________________ +block_16_project_BN (BatchNorma (None, 5, 5, 320) 1280 block_16_project[0][0] +__________________________________________________________________________________________________ +Conv_1 (Conv2D) (None, 5, 5, 1280) 409600 block_16_project_BN[0][0] +__________________________________________________________________________________________________ +Conv_1_bn (BatchNormalization) (None, 5, 5, 1280) 5120 Conv_1[0][0] +__________________________________________________________________________________________________ +out_relu (ReLU) (None, 5, 5, 1280) 0 Conv_1_bn[0][0] +================================================================================================== +Total params: 2,257,984 +Trainable params: 0 +Non-trainable params: 2,257,984 +__________________________________________________________________________________________________ + +``` + +### Add a classification head + +To generate predictions from the block of features, average over the spatial `5x5` spatial locations, using a `tf.keras.layers.GlobalAveragePooling2D` layer to convert the features to a single 1280-element vector per image. + +```py +global_average_layer = tf.keras.layers.GlobalAveragePooling2D() +feature_batch_average = global_average_layer(feature_batch) +print(feature_batch_average.shape) +``` + +```py +(32, 1280) + +``` + +Apply a `tf.keras.layers.Dense` layer to convert these features into a single prediction per image. You don't need an activation function here because this prediction will be treated as a `logit`, or a raw prediction value. Positive numbers predict class 1, negative numbers predict class 0. + +```py +prediction_layer = tf.keras.layers.Dense(1) +prediction_batch = prediction_layer(feature_batch_average) +print(prediction_batch.shape) +``` + +```py +(32, 1) + +``` + +Build a model by chaining together the data augmentation, rescaling, base_model and feature extractor layers using the [Keras Functional API](https://tensorflow.google.cn/guide/keras/functional). As previously mentioned, use training=False as our model contains a BatchNormalization layer. + +```py +inputs = tf.keras.Input(shape=(160, 160, 3)) +x = data_augmentation(inputs) +x = preprocess_input(x) +x = base_model(x, training=False) +x = global_average_layer(x) +x = tf.keras.layers.Dropout(0.2)(x) +outputs = prediction_layer(x) +model = tf.keras.Model(inputs, outputs) +``` + +### Compile the model + +Compile the model before training it. Since there are two classes, use a binary cross-entropy loss with `from_logits=True` since the model provides a linear output. + +```py +base_learning_rate = 0.0001 +model.compile(optimizer=tf.keras.optimizers.Adam(lr=base_learning_rate), + loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), + metrics=['accuracy']) +``` + +```py +model.summary() +``` + +```py +Model: "model" +_________________________________________________________________ +Layer (type) Output Shape Param # +================================================================= +input_2 (InputLayer) [(None, 160, 160, 3)] 0 +_________________________________________________________________ +sequential (Sequential) (None, 160, 160, 3) 0 +_________________________________________________________________ +tf.math.truediv (TFOpLambda) (None, 160, 160, 3) 0 +_________________________________________________________________ +tf.math.subtract (TFOpLambda (None, 160, 160, 3) 0 +_________________________________________________________________ +mobilenetv2_1.00_160 (Functi (None, 5, 5, 1280) 2257984 +_________________________________________________________________ +global_average_pooling2d (Gl (None, 1280) 0 +_________________________________________________________________ +dropout (Dropout) (None, 1280) 0 +_________________________________________________________________ +dense (Dense) (None, 1) 1281 +================================================================= +Total params: 2,259,265 +Trainable params: 1,281 +Non-trainable params: 2,257,984 +_________________________________________________________________ + +``` + +The 2.5M parameters in MobileNet are frozen, but there are 1.2K *trainable* parameters in the Dense layer. These are divided between two `tf.Variable` objects, the weights and biases. + +```py +len(model.trainable_variables) +``` + +```py +2 + +``` + +### Train the model + +After training for 10 epochs, you should see ~94% accuracy on the validation set. + +```py +initial_epochs = 10 + +loss0, accuracy0 = model.evaluate(validation_dataset) +``` + +```py +26/26 [==============================] - 7s 199ms/step - loss: 0.9126 - accuracy: 0.3857 + +``` + +```py +print("initial loss: {:.2f}".format(loss0)) +print("initial accuracy: {:.2f}".format(accuracy0)) +``` + +```py +initial loss: 0.91 +initial accuracy: 0.38 + +``` + +```py +history = model.fit(train_dataset, + epochs=initial_epochs, + validation_data=validation_dataset) +``` + +```py +Epoch 1/10 +63/63 [==============================] - 22s 306ms/step - loss: 0.7643 - accuracy: 0.5285 - val_loss: 0.6197 - val_accuracy: 0.6324 +Epoch 2/10 +63/63 [==============================] - 19s 301ms/step - loss: 0.5608 - accuracy: 0.6780 - val_loss: 0.4556 - val_accuracy: 0.7710 +Epoch 3/10 +63/63 [==============================] - 19s 305ms/step - loss: 0.4410 - accuracy: 0.7810 - val_loss: 0.3487 - val_accuracy: 0.8428 +Epoch 4/10 +63/63 [==============================] - 19s 300ms/step - loss: 0.3661 - accuracy: 0.8270 - val_loss: 0.2757 - val_accuracy: 0.8948 +Epoch 5/10 +63/63 [==============================] - 19s 297ms/step - loss: 0.3097 - accuracy: 0.8680 - val_loss: 0.2295 - val_accuracy: 0.9134 +Epoch 6/10 +63/63 [==============================] - 19s 298ms/step - loss: 0.2851 - accuracy: 0.8795 - val_loss: 0.2078 - val_accuracy: 0.9257 +Epoch 7/10 +63/63 [==============================] - 21s 329ms/step - loss: 0.2658 - accuracy: 0.8865 - val_loss: 0.1758 - val_accuracy: 0.9418 +Epoch 8/10 +63/63 [==============================] - 19s 304ms/step - loss: 0.2432 - accuracy: 0.8990 - val_loss: 0.1697 - val_accuracy: 0.9369 +Epoch 9/10 +63/63 [==============================] - 19s 302ms/step - loss: 0.2305 - accuracy: 0.9025 - val_loss: 0.1516 - val_accuracy: 0.9468 +Epoch 10/10 +63/63 [==============================] - 20s 307ms/step - loss: 0.2104 - accuracy: 0.9170 - val_loss: 0.1451 - val_accuracy: 0.9493 + +``` + +### Learning curves + +Let's take a look at the learning curves of the training and validation accuracy/loss when using the MobileNet V2 base model as a fixed feature extractor. + +```py +acc = history.history['accuracy'] +val_acc = history.history['val_accuracy'] + +loss = history.history['loss'] +val_loss = history.history['val_loss'] + +plt.figure(figsize=(8, 8)) +plt.subplot(2, 1, 1) +plt.plot(acc, label='Training Accuracy') +plt.plot(val_acc, label='Validation Accuracy') +plt.legend(loc='lower right') +plt.ylabel('Accuracy') +plt.ylim([min(plt.ylim()),1]) +plt.title('Training and Validation Accuracy') + +plt.subplot(2, 1, 2) +plt.plot(loss, label='Training Loss') +plt.plot(val_loss, label='Validation Loss') +plt.legend(loc='upper right') +plt.ylabel('Cross Entropy') +plt.ylim([0,1.0]) +plt.title('Training and Validation Loss') +plt.xlabel('epoch') +plt.show() +``` + +![png](img/cd8127c26455c518a827f0ce6a07b1e0.png) + +**Note:** If you are wondering why the validation metrics are clearly better than the training metrics, the main factor is because layers like `tf.keras.layers.BatchNormalization` and `tf.keras.layers.Dropout` affect accuracy during training. They are turned off when calculating validation loss. + +To a lesser extent, it is also because training metrics report the average for an epoch, while validation metrics are evaluated after the epoch, so validation metrics see a model that has trained slightly longer. + +## Fine tuning + +In the feature extraction experiment, you were only training a few layers on top of an MobileNet V2 base model. The weights of the pre-trained network were **not** updated during training. + +One way to increase performance even further is to train (or "fine-tune") the weights of the top layers of the pre-trained model alongside the training of the classifier you added. The training process will force the weights to be tuned from generic feature maps to features associated specifically with the dataset. + +**Note:** This should only be attempted after you have trained the top-level classifier with the pre-trained model set to non-trainable. If you add a randomly initialized classifier on top of a pre-trained model and attempt to train all layers jointly, the magnitude of the gradient updates will be too large (due to the random weights from the classifier) and your pre-trained model will forget what it has learned. + +Also, you should try to fine-tune a small number of top layers rather than the whole MobileNet model. In most convolutional networks, the higher up a layer is, the more specialized it is. The first few layers learn very simple and generic features that generalize to almost all types of images. As you go higher up, the features are increasingly more specific to the dataset on which the model was trained. The goal of fine-tuning is to adapt these specialized features to work with the new dataset, rather than overwrite the generic learning. + +### Un-freeze the top layers of the model + +All you need to do is unfreeze the `base_model` and set the bottom layers to be un-trainable. Then, you should recompile the model (necessary for these changes to take effect), and resume training. + +```py +base_model.trainable = True +``` + +```py +# Let's take a look to see how many layers are in the base model +print("Number of layers in the base model: ", len(base_model.layers)) + +# Fine-tune from this layer onwards +fine_tune_at = 100 + +# Freeze all the layers before the `fine_tune_at` layer +for layer in base_model.layers[:fine_tune_at]: + layer.trainable = False +``` + +```py +Number of layers in the base model: 154 + +``` + +### Compile the model + +As you are training a much larger model and want to readapt the pretrained weights, it is important to use a lower learning rate at this stage. Otherwise, your model could overfit very quickly. + +```py +model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), + optimizer = tf.keras.optimizers.RMSprop(lr=base_learning_rate/10), + metrics=['accuracy']) +``` + +```py +model.summary() +``` + +```py +Model: "model" +_________________________________________________________________ +Layer (type) Output Shape Param # +================================================================= +input_2 (InputLayer) [(None, 160, 160, 3)] 0 +_________________________________________________________________ +sequential (Sequential) (None, 160, 160, 3) 0 +_________________________________________________________________ +tf.math.truediv (TFOpLambda) (None, 160, 160, 3) 0 +_________________________________________________________________ +tf.math.subtract (TFOpLambda (None, 160, 160, 3) 0 +_________________________________________________________________ +mobilenetv2_1.00_160 (Functi (None, 5, 5, 1280) 2257984 +_________________________________________________________________ +global_average_pooling2d (Gl (None, 1280) 0 +_________________________________________________________________ +dropout (Dropout) (None, 1280) 0 +_________________________________________________________________ +dense (Dense) (None, 1) 1281 +================================================================= +Total params: 2,259,265 +Trainable params: 1,862,721 +Non-trainable params: 396,544 +_________________________________________________________________ + +``` + +```py +len(model.trainable_variables) +``` + +```py +56 + +``` + +### Continue training the model + +If you trained to convergence earlier, this step will improve your accuracy by a few percentage points. + +```py +fine_tune_epochs = 10 +total_epochs = initial_epochs + fine_tune_epochs + +history_fine = model.fit(train_dataset, + epochs=total_epochs, + initial_epoch=history.epoch[-1], + validation_data=validation_dataset) +``` + +```py +Epoch 10/20 +63/63 [==============================] - 31s 426ms/step - loss: 0.1810 - accuracy: 0.9288 - val_loss: 0.0674 - val_accuracy: 0.9752 +Epoch 11/20 +63/63 [==============================] - 26s 406ms/step - loss: 0.1221 - accuracy: 0.9494 - val_loss: 0.0592 - val_accuracy: 0.9827 +Epoch 12/20 +63/63 [==============================] - 26s 402ms/step - loss: 0.1116 - accuracy: 0.9529 - val_loss: 0.0732 - val_accuracy: 0.9666 +Epoch 13/20 +63/63 [==============================] - 26s 402ms/step - loss: 0.0950 - accuracy: 0.9586 - val_loss: 0.0467 - val_accuracy: 0.9790 +Epoch 14/20 +63/63 [==============================] - 25s 396ms/step - loss: 0.1075 - accuracy: 0.9556 - val_loss: 0.0487 - val_accuracy: 0.9814 +Epoch 15/20 +63/63 [==============================] - 25s 396ms/step - loss: 0.0664 - accuracy: 0.9741 - val_loss: 0.0435 - val_accuracy: 0.9827 +Epoch 16/20 +63/63 [==============================] - 25s 398ms/step - loss: 0.0860 - accuracy: 0.9681 - val_loss: 0.0428 - val_accuracy: 0.9790 +Epoch 17/20 +63/63 [==============================] - 25s 394ms/step - loss: 0.0709 - accuracy: 0.9740 - val_loss: 0.0662 - val_accuracy: 0.9691 +Epoch 18/20 +63/63 [==============================] - 25s 394ms/step - loss: 0.0787 - accuracy: 0.9685 - val_loss: 0.0390 - val_accuracy: 0.9827 +Epoch 19/20 +63/63 [==============================] - 25s 394ms/step - loss: 0.0733 - accuracy: 0.9734 - val_loss: 0.0577 - val_accuracy: 0.9728 +Epoch 20/20 +63/63 [==============================] - 25s 395ms/step - loss: 0.0642 - accuracy: 0.9739 - val_loss: 0.0403 - val_accuracy: 0.9802 + +``` + +Let's take a look at the learning curves of the training and validation accuracy/loss when fine-tuning the last few layers of the MobileNet V2 base model and training the classifier on top of it. The validation loss is much higher than the training loss, so you may get some overfitting. + +You may also get some overfitting as the new training set is relatively small and similar to the original MobileNet V2 datasets. + +After fine tuning the model nearly reaches 98% accuracy on the validation set. + +```py +acc += history_fine.history['accuracy'] +val_acc += history_fine.history['val_accuracy'] + +loss += history_fine.history['loss'] +val_loss += history_fine.history['val_loss'] +``` + +```py +plt.figure(figsize=(8, 8)) +plt.subplot(2, 1, 1) +plt.plot(acc, label='Training Accuracy') +plt.plot(val_acc, label='Validation Accuracy') +plt.ylim([0.8, 1]) +plt.plot([initial_epochs-1,initial_epochs-1], + plt.ylim(), label='Start Fine Tuning') +plt.legend(loc='lower right') +plt.title('Training and Validation Accuracy') + +plt.subplot(2, 1, 2) +plt.plot(loss, label='Training Loss') +plt.plot(val_loss, label='Validation Loss') +plt.ylim([0, 1.0]) +plt.plot([initial_epochs-1,initial_epochs-1], + plt.ylim(), label='Start Fine Tuning') +plt.legend(loc='upper right') +plt.title('Training and Validation Loss') +plt.xlabel('epoch') +plt.show() +``` + +![png](img/ac978eb483a5f2c471a3cbeb70bb6784.png) + +### Evaluation and prediction + +Finaly you can verify the performance of the model on new data using test set. + +```py +loss, accuracy = model.evaluate(test_dataset) +print('Test accuracy :', accuracy) +``` + +```py +6/6 [==============================] - 1s 188ms/step - loss: 0.0559 - accuracy: 0.9792 +Test accuracy : 0.9791666865348816 + +``` + +And now you are all set to use this model to predict if your pet is a cat or dog. + +```py +#Retrieve a batch of images from the test set +image_batch, label_batch = test_dataset.as_numpy_iterator().next() +predictions = model.predict_on_batch(image_batch).flatten() + +# Apply a sigmoid since our model returns logits +predictions = tf.nn.sigmoid(predictions) +predictions = tf.where(predictions < 0.5, 0, 1) + +print('Predictions:\n', predictions.numpy()) +print('Labels:\n', label_batch) + +plt.figure(figsize=(10, 10)) +for i in range(9): + ax = plt.subplot(3, 3, i + 1) + plt.imshow(image_batch[i].astype("uint8")) + plt.title(class_names[predictions[i]]) + plt.axis("off") +``` + +```py +Predictions: + [0 1 0 0 1 1 1 1 1 0 0 1 0 1 1 1 0 0 1 0 1 0 1 1 0 1 1 0 1 0 0 0] +Labels: + [0 1 0 0 1 1 1 1 1 0 0 1 0 1 1 1 0 0 1 0 1 0 1 0 0 1 1 0 1 0 0 0] + +``` + +![png](img/968128f6d4f50734ad92597399dacd79.png) + +## Summary + +* **Using a pre-trained model for feature extraction**: When working with a small dataset, it is a common practice to take advantage of features learned by a model trained on a larger dataset in the same domain. This is done by instantiating the pre-trained model and adding a fully-connected classifier on top. The pre-trained model is "frozen" and only the weights of the classifier get updated during training. In this case, the convolutional base extracted all the features associated with each image and you just trained a classifier that determines the image class given that set of extracted features. + +* **Fine-tuning a pre-trained model**: To further improve performance, one might want to repurpose the top-level layers of the pre-trained models to the new dataset via fine-tuning. In this case, you tuned your weights such that your model learned high-level features specific to the dataset. This technique is usually recommended when the training dataset is large and very similar to the original dataset that the pre-trained model was trained on. + +To learn more, visit the [Transfer learning guide](https://tensorflow.google.cn/guide/keras/transfer_learning). + +```py +# MIT License +# +# Copyright (c) 2017 François Chollet # IGNORE_COPYRIGHT: cleared by OSS licensing +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. +``` \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/044.md b/Tensorflow/TensorFlow2.0/044.md new file mode 100644 index 00000000..8ff46660 --- /dev/null +++ b/Tensorflow/TensorFlow2.0/044.md @@ -0,0 +1,38 @@ +# Transfer learning with TensorFlow Hub + +> 原文:[https://tensorflow.google.cn/tutorials/images/transfer_learning_with_hub](https://tensorflow.google.cn/tutorials/images/transfer_learning_with_hub) + +[TensorFlow Hub](https://hub.tensorflow.google.cn/) is a repository of pre-trained TensorFlow models. + +This tutorial demonstrates how to: + +1. Use models from TensorFlow Hub with [`tf.keras`](https://tensorflow.google.cn/api_docs/python/tf/keras) +2. Use an image classification model from TensorFlow Hub +3. Do simple transfer learning to fine-tune a model for your own image classes + +## Setup + +```py +import numpy as np +import time + +import PIL.Image as Image +import matplotlib.pylab as plt + +import tensorflow as tf +import tensorflow_hub as hub +``` + +## An ImageNet classifier + +You'll start by using a pretrained classifer model to take an image and predict what it's an image of - no training required! + +### Download the classifier + +Use [`hub.KerasLayer`](https://tensorflow.google.cn/hub/api_docs/python/hub/KerasLayer) to load a [MobileNetV2 model](https://hub.tensorflow.google.cn/google/tf2-preview/mobilenet_v2/classification/2) from TensorFlow Hub. Any [compatible image classifier model](https://hub.tensorflow.google.cn/s?q=tf2&module-type=image-classification) from hub.tensorflow.google.cn will work here. + + + +```py +classifier_model ="https://hub.tensorflow.google.cn/google/tf2-preview/mobilenet_v2/classification/4" +``` \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/045.md b/Tensorflow/TensorFlow2.0/045.md new file mode 100644 index 00000000..801f8cb7 --- /dev/null +++ b/Tensorflow/TensorFlow2.0/045.md @@ -0,0 +1,500 @@ +# Data augmentation + +> 原文:[https://tensorflow.google.cn/tutorials/images/data_augmentation](https://tensorflow.google.cn/tutorials/images/data_augmentation) + +## Overview + +This tutorial demonstrates data augmentation: a technique to increase the diversity of your training set by applying random (but realistic) transformations such as image rotation. You will learn how to apply data augmentation in two ways. First, you will use [Keras Preprocessing Layers](https://tensorflow.google.cn/api_docs/python/tf/keras/layers/experimental/preprocessing/). Next, you will use [`tf.image`](https://tensorflow.google.cn/api_docs/python/tf/image). + +## Setup + +```py +pip install -q tf-nightly + +``` + +```py +WARNING: You are using pip version 20.2.2; however, version 20.2.3 is available. +You should consider upgrading via the '/tmpfs/src/tf_docs_env/bin/python -m pip install --upgrade pip' command. + +``` + +```py +import matplotlib.pyplot as plt +import numpy as np +import tensorflow as tf +import tensorflow_datasets as tfds + +from tensorflow.keras import layers +from tensorflow.keras.datasets import mnist +``` + +## Download a dataset + +This tutorial uses the [tf_flowers](https://tensorflow.google.cn/datasets/catalog/tf_flowers) dataset. For convenience, download the dataset using [TensorFlow Datasets](https://tensorflow.google.cn/datasets). If you would like to learn about others ways of importing data, see the [load images](https://tensorflow.google.cn/tutorials/load_data/images) tutorial. + +```py +(train_ds, val_ds, test_ds), metadata = tfds.load( + 'tf_flowers', + split=['train[:80%]', 'train[80%:90%]', 'train[90%:]'], + with_info=True, + as_supervised=True, +) +``` + +```py +Downloading and preparing dataset tf_flowers/3.0.1 (download: 218.21 MiB, generated: 221.83 MiB, total: 440.05 MiB) to /home/kbuilder/tensorflow_datasets/tf_flowers/3.0.1... + +Warning:absl:Dataset tf_flowers is hosted on GCS. It will automatically be downloaded to your +local data directory. If you'd instead prefer to read directly from our public +GCS bucket (recommended if you're running on GCP), you can instead pass +`try_gcs=True` to `tfds.load` or set `data_dir=gs://tfds-data/datasets`. + +Dataset tf_flowers downloaded and prepared to /home/kbuilder/tensorflow_datasets/tf_flowers/3.0.1\. Subsequent calls will reuse this data. + +``` + +The flowers dataset has five classes. + +```py +num_classes = metadata.features['label'].num_classes +print(num_classes) +``` + +```py +5 + +``` + +Let's retrieve an image from the dataset and use it to demonstrate data augmentation. + +```py +get_label_name = metadata.features['label'].int2str + +image, label = next(iter(train_ds)) +_ = plt.imshow(image) +_ = plt.title(get_label_name(label)) +``` + +![png](img/aa45f39cd51486760afc706f90cf0afa.png) + +## Use Keras preprocessing layers + +**Note:** The [Keras Preprocesing Layers](https://tensorflow.google.cn/api_docs/python/tf/keras/layers/experimental/preprocessing) introduced in this section are currently experimental. + +### Resizing and rescaling + +You can use preprocessing layers to [resize](https://tensorflow.google.cn/api_docs/python/tf/keras/layers/experimental/preprocessing/Resizing) your images to a consistent shape, and to [rescale](https://tensorflow.google.cn/api_docs/python/tf/keras/layers/experimental/preprocessing/Rescaling) pixel values. + +```py +IMG_SIZE = 180 + +resize_and_rescale = tf.keras.Sequential([ + layers.experimental.preprocessing.Resizing(IMG_SIZE, IMG_SIZE), + layers.experimental.preprocessing.Rescaling(1./255) +]) +``` + +**Note:** the rescaling layer above standardizes pixel values to `[0,1]`. If instead you wanted `[-1,1]`, you would write `Rescaling(1./127.5, offset=-1)`. + +You can see the result of applying these layers to an image. + +```py +result = resize_and_rescale(image) +_ = plt.imshow(result) +``` + +![png](img/35228c04a07ff13d63e7c28043db3950.png) + +You can verify the pixels are in `[0-1]`. + +```py +print("Min and max pixel values:", result.numpy().min(), result.numpy().max()) +``` + +```py +Min and max pixel values: 0.0 1.0 + +``` + +### Data augmentation + +You can use preprocessing layers for data augmentation as well. + +Let's create a few preprocessing layers and apply them repeatedly to the same image. + +```py +data_augmentation = tf.keras.Sequential([ + layers.experimental.preprocessing.RandomFlip("horizontal_and_vertical"), + layers.experimental.preprocessing.RandomRotation(0.2), +]) +``` + +```py +# Add the image to a batch +image = tf.expand_dims(image, 0) +``` + +```py +plt.figure(figsize=(10, 10)) +for i in range(9): + augmented_image = data_augmentation(image) + ax = plt.subplot(3, 3, i + 1) + plt.imshow(augmented_image[0]) + plt.axis("off") +``` + +![png](img/30586460013d859e496dd27ce6b18cbc.png) + +There are a variety of preprocessing [layers](https://tensorflow.google.cn/api_docs/python/tf/keras/layers/experimental/preprocessing) you can use for data augmentation including `layers.RandomContrast`, `layers.RandomCrop`, `layers.RandomZoom`, and others. + +### Two options to use the preprocessing layers + +There are two ways you can use these preprocessing layers, with important tradeoffs. + +#### Option 1: Make the preprocessing layers part of your model + +```py +model = tf.keras.Sequential([ + resize_and_rescale, + data_augmentation, + layers.Conv2D(16, 3, padding='same', activation='relu'), + layers.MaxPooling2D(), + # Rest of your model +]) +``` + +There are two important points to be aware of in this case: + +* Data augmentation will run on-device, synchronously with the rest of your layers, and benefit from GPU acceleration. + +* When you export your model using `model.save`, the preprocessing layers will be saved along with the rest of your model. If you later deploy this model, it will automatically standardize images (according to the configuration of your layers). This can save you from the effort of having to reimplement that logic server-side. + +**Note:** Data augmentation is inactive at test time so input images will only be augmented during calls to `model.fit` (not `model.evaluate` or `model.predict`). + +#### Option 2: Apply the preprocessing layers to your dataset + +```py +aug_ds = train_ds.map( + lambda x, y: (resize_and_rescale(x, training=True), y)) +``` + +With this approach, you use [`Dataset.map`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset#map) to create a dataset that yields batches of augmented images. In this case: + +* Data augmentation will happen asynchronously on the CPU, and is non-blocking. You can overlap the training of your model on the GPU with data preprocessing, using [`Dataset.prefetch`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset#prefetch), shown below. +* In this case the prepreprocessing layers will not be exported with the model when you call `model.save`. You will need to attach them to your model before saving it or reimplement them server-side. After training, you can attach the preprocessing layers before export. + +You can find an example of the first option in the [image classification](https://tensorflow.google.cn/tutorials/images/classification) tutorial. Let's demonstrate the second option here. + +### Apply the preprocessing layers to the datasets + +Configure the train, validation, and test datasets with the preprocessing layers you created above. You will also configure the datasets for performance, using parallel reads and buffered prefetching to yield batches from disk without I/O become blocking. You can learn more dataset performance in the [Better performance with the tf.data API](https://tensorflow.google.cn/guide/data_performance) guide. + +**Note:** data augmentation should only be applied to the training set. + +```py +batch_size = 32 +AUTOTUNE = tf.data.experimental.AUTOTUNE + +def prepare(ds, shuffle=False, augment=False): + # Resize and rescale all datasets + ds = ds.map(lambda x, y: (resize_and_rescale(x), y), + num_parallel_calls=AUTOTUNE) + + if shuffle: + ds = ds.shuffle(1000) + + # Batch all datasets + ds = ds.batch(batch_size) + + # Use data augmentation only on the training set + if augment: + ds = ds.map(lambda x, y: (data_augmentation(x, training=True), y), + num_parallel_calls=AUTOTUNE) + + # Use buffered prefecting on all datasets + return ds.prefetch(buffer_size=AUTOTUNE) +``` + +```py +train_ds = prepare(train_ds, shuffle=True, augment=True) +val_ds = prepare(val_ds) +test_ds = prepare(test_ds) +``` + +### Train a model + +For completeness, you will now train a model using these datasets. This model has not been tuned for accuracy (the goal is to show you the mechanics). + +```py +model = tf.keras.Sequential([ + layers.Conv2D(16, 3, padding='same', activation='relu'), + layers.MaxPooling2D(), + layers.Conv2D(32, 3, padding='same', activation='relu'), + layers.MaxPooling2D(), + layers.Conv2D(64, 3, padding='same', activation='relu'), + layers.MaxPooling2D(), + layers.Flatten(), + layers.Dense(128, activation='relu'), + layers.Dense(num_classes) +]) +``` + +```py +model.compile(optimizer='adam', + loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), + metrics=['accuracy']) +``` + +```py +epochs=5 +history = model.fit( + train_ds, + validation_data=val_ds, + epochs=epochs +) +``` + +```py +Epoch 1/5 +92/92 [==============================] - 30s 315ms/step - loss: 1.5078 - accuracy: 0.3428 - val_loss: 1.0809 - val_accuracy: 0.6240 +Epoch 2/5 +92/92 [==============================] - 28s 303ms/step - loss: 1.0781 - accuracy: 0.5724 - val_loss: 0.9762 - val_accuracy: 0.6322 +Epoch 3/5 +92/92 [==============================] - 28s 295ms/step - loss: 1.0083 - accuracy: 0.5900 - val_loss: 0.9570 - val_accuracy: 0.6376 +Epoch 4/5 +92/92 [==============================] - 28s 300ms/step - loss: 0.9537 - accuracy: 0.6116 - val_loss: 0.9081 - val_accuracy: 0.6485 +Epoch 5/5 +92/92 [==============================] - 28s 301ms/step - loss: 0.8816 - accuracy: 0.6525 - val_loss: 0.8353 - val_accuracy: 0.6594 + +``` + +```py +loss, acc = model.evaluate(test_ds) +print("Accuracy", acc) +``` + +```py +12/12 [==============================] - 1s 83ms/step - loss: 0.8226 - accuracy: 0.6567 +Accuracy 0.6566757559776306 + +``` + +### Custom data augmentation + +You can also create custom data augmenation layers. This tutorial shows two ways of doing so. First, you will create a [`layers.Lambda`](https://tensorflow.google.cn/api_docs/python/tf/keras/layers/Lambda) layer. This is a good way to write concise code. Next, you will write a new layer via [subclassing](https://tensorflow.google.cn/guide/keras/custom_layers_and_models), which gives you more control. Both layers will randomly invert the colors in an image, accoring to some probability. + +```py +def random_invert_img(x, p=0.5): + if tf.random.uniform([]) < p: + x = (255-x) + else: + x + return x +``` + +```py +def random_invert(factor=0.5): + return layers.Lambda(lambda x: random_invert_img(x, factor)) + +random_invert = random_invert() +``` + +```py +plt.figure(figsize=(10, 10)) +for i in range(9): + augmented_image = random_invert(image) + ax = plt.subplot(3, 3, i + 1) + plt.imshow(augmented_image[0].numpy().astype("uint8")) + plt.axis("off") +``` + +![png](img/5c6f6f5e851c052e9e53969cd0419cbb.png) + +Next, implement a custom layer by [subclassing](https://tensorflow.google.cn/guide/keras/custom_layers_and_models). + +```py +class RandomInvert(layers.Layer): + def __init__(self, factor=0.5, **kwargs): + super().__init__(**kwargs) + self.factor = factor + + def call(self, x): + return random_invert_img(x) +``` + +```py +_ = plt.imshow(RandomInvert()(image)[0]) +``` + +![png](img/8142c6b01c1a35d86e4ace60827bcce8.png) + +Both of these layers can be used as described in options 1 and 2 above. + +## Using tf.image + +The above `layers.preprocessing` utilities are convenient. For finer control, you can write your own data augmentation pipelines or layers using [`tf.data`](https://tensorflow.google.cn/api_docs/python/tf/data) and [`tf.image`](https://tensorflow.google.cn/api_docs/python/tf/image). You may also want to check out [TensorFlow Addons Image: Operations](https://tensorflow.google.cn/addons/tutorials/image_ops) and [TensorFlow I/O: Color Space Conversions](https://tensorflow.google.cn/io/tutorials/colorspace) + +Since the flowers dataset was previously configured with data augmentation, let's reimport it to start fresh. + +```py +(train_ds, val_ds, test_ds), metadata = tfds.load( + 'tf_flowers', + split=['train[:80%]', 'train[80%:90%]', 'train[90%:]'], + with_info=True, + as_supervised=True, +) +``` + +Retrieve an image to work with. + +```py +image, label = next(iter(train_ds)) +_ = plt.imshow(image) +_ = plt.title(get_label_name(label)) +``` + +![png](img/cfa82b128c103151f142dae7b5ddecda.png) + +Let's use the following function to visualize and compare the original and augmented images side-by-side. + +```py +def visualize(original, augmented): + fig = plt.figure() + plt.subplot(1,2,1) + plt.title('Original image') + plt.imshow(original) + + plt.subplot(1,2,2) + plt.title('Augmented image') + plt.imshow(augmented) +``` + +### Data augmentation + +### Flipping the image + +Flip the image either vertically or horizontally. + +```py +flipped = tf.image.flip_left_right(image) +visualize(image, flipped) +``` + +![png](img/dda6acab76c9a017bbe16c3bebb8e54c.png) + +### Grayscale the image + +Grayscale an image. + +```py +grayscaled = tf.image.rgb_to_grayscale(image) +visualize(image, tf.squeeze(grayscaled)) +_ = plt.colorbar() +``` + +![png](img/1d2f7cb104afa8ee05f37076045f9195.png) + +### Saturate the image + +Saturate an image by providing a saturation factor. + +```py +saturated = tf.image.adjust_saturation(image, 3) +visualize(image, saturated) +``` + +![png](img/7ef992617c160736f94c086cc0a754d5.png) + +### Change image brightness + +Change the brightness of image by providing a brightness factor. + +```py +bright = tf.image.adjust_brightness(image, 0.4) +visualize(image, bright) +``` + +![png](img/e46db7cde2b53be53d302c4b00d582a5.png) + +### Center crop the image + +Crop the image from center up to the image part you desire. + +```py +cropped = tf.image.central_crop(image, central_fraction=0.5) +visualize(image,cropped) +``` + +![png](img/fe72873df8e5156872c578827579ba34.png) + +### Rotate the image + +Rotate an image by 90 degrees. + +```py +rotated = tf.image.rot90(image) +visualize(image, rotated) +``` + +![png](img/f769d692ddcca3810cad6e32307d9b3a.png) + +### Apply augmentation to a dataset + +As before, apply data augmentation to a dataset using [`Dataset.map`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset#map). + +```py +def resize_and_rescale(image, label): + image = tf.cast(image, tf.float32) + image = tf.image.resize(image, [IMG_SIZE, IMG_SIZE]) + image = (image / 255.0) + return image, label +``` + +```py +def augment(image,label): + image, label = resize_and_rescale(image, label) + # Add 6 pixels of padding + image = tf.image.resize_with_crop_or_pad(image, IMG_SIZE + 6, IMG_SIZE + 6) + # Random crop back to the original size + image = tf.image.random_crop(image, size=[IMG_SIZE, IMG_SIZE, 3]) + image = tf.image.random_brightness(image, max_delta=0.5) # Random brightness + image = tf.clip_by_value(image, 0, 1) + return image, label +``` + +### Configure the datasets + +```py +train_ds = ( + train_ds + .shuffle(1000) + .map(augment, num_parallel_calls=AUTOTUNE) + .batch(batch_size) + .prefetch(AUTOTUNE) +) +``` + +```py +val_ds = ( + val_ds + .map(resize_and_rescale, num_parallel_calls=AUTOTUNE) + .batch(batch_size) + .prefetch(AUTOTUNE) +) +``` + +```py +test_ds = ( + test_ds + .map(resize_and_rescale, num_parallel_calls=AUTOTUNE) + .batch(batch_size) + .prefetch(AUTOTUNE) +) +``` + +These datasets can now be used to train a model as shown previously. + +## Next steps + +This tutorial demonstrated data augmentation using [Keras Preprocessing Layers](https://tensorflow.google.cn/api_docs/python/tf/keras/layers/experimental/preprocessing/) and [`tf.image`](https://tensorflow.google.cn/api_docs/python/tf/image). To learn how to include preprocessing layers inside your model, see the [Image classification](https://tensorflow.google.cn/tutorials/images/classification) tutorial. You may also be interested in learning how preprocessing layers can help you classify text, as shown in the [Basic text classification](https://tensorflow.google.cn/tutorials/keras/text_classification) tutorial. You can learn more about [`tf.data`](https://tensorflow.google.cn/api_docs/python/tf/data) in this [guide](https://tensorflow.google.cn/guide/data), and you can learn how to configure your input pipelines for performance [here](https://tensorflow.google.cn/guide/data_performance). \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/046.md b/Tensorflow/TensorFlow2.0/046.md new file mode 100644 index 00000000..15780f1f --- /dev/null +++ b/Tensorflow/TensorFlow2.0/046.md @@ -0,0 +1,326 @@ +# 图像分割 + +> 原文:[https://tensorflow.google.cn/tutorials/images/segmentation](https://tensorflow.google.cn/tutorials/images/segmentation) + +**Note:** 我们的 TensorFlow 社区翻译了这些文档。因为社区翻译是尽力而为, 所以无法保证它们是最准确的,并且反映了最新的 [官方英文文档](https://tensorflow.google.cn/?hl=en)。如果您有改进此翻译的建议, 请提交 pull request 到 [tensorflow/docs](https://github.com/tensorflow/docs) GitHub 仓库。要志愿地撰写或者审核译文,请加入 [docs-zh-cn@tensorflow.org Google Group](https://groups.google.com/a/tensorflow.org/forum/#!forum/docs-zh-cn)。 + +这篇教程将重点讨论图像分割任务,使用的是改进版的 [U-Net](https://lmb.informatik.uni-freiburg.de/people/ronneber/u-net/)。 + +## 什么是图像分割? + +目前你已经了解在图像分类中,神经网络的任务是给每张输入图像分配一个标签或者类别。但是,有时你想知道一个物体在一张图像中的位置、这个物体的形状、以及哪个像素属于哪个物体等等。这种情况下你会希望分割图像,也就是给图像中的每个像素各分配一个标签。因此,图像分割的任务是训练一个神经网络来输出该图像对每一个像素的掩码。这对从更底层,即像素层级,来理解图像很有帮助。图像分割在例如医疗图像、自动驾驶车辆以及卫星图像等领域有很多应用。 + +本教程将使用的数据集是 [Oxford-IIIT Pet 数据集](https://www.robots.ox.ac.uk/%7Evgg/data/pets/),由 Parkhi *et al.* 创建。该数据集由图像、图像所对应的标签、以及对像素逐一标记的掩码组成。掩码其实就是给每个像素的标签。每个像素分别属于以下三个类别中的一个: + +* 类别 1:像素是宠物的一部分。 +* 类别 2:像素是宠物的轮廓。 +* 类别 3:以上都不是/外围像素。 + +```py +pip install -q git+https://github.com/tensorflow/examples.git + +``` + +```py +WARNING: You are using pip version 20.2.2; however, version 20.2.3 is available. +You should consider upgrading via the '/tmpfs/src/tf_docs_env/bin/python -m pip install --upgrade pip' command. + +``` + +```py +import tensorflow as tf +``` + +```py +from tensorflow_examples.models.pix2pix import pix2pix + +import tensorflow_datasets as tfds +tfds.disable_progress_bar() + +from IPython.display import clear_output +import matplotlib.pyplot as plt +``` + +## 下载 Oxford-IIIT Pets 数据集 + +这个数据集已经集成在 Tensorflow datasets 中,只需下载即可。图像分割掩码在版本 3.0.0 中才被加入,因此我们特别选用这个版本。 + +```py +dataset, info = tfds.load('oxford_iiit_pet:3.*.*', with_info=True) +``` + +```py +Downloading and preparing dataset oxford_iiit_pet/3.2.0 (download: 773.52 MiB, generated: 774.69 MiB, total: 1.51 GiB) to /home/kbuilder/tensorflow_datasets/oxford_iiit_pet/3.2.0... +Shuffling and writing examples to /home/kbuilder/tensorflow_datasets/oxford_iiit_pet/3.2.0.incompleteXSR11A/oxford_iiit_pet-train.tfrecord +Shuffling and writing examples to /home/kbuilder/tensorflow_datasets/oxford_iiit_pet/3.2.0.incompleteXSR11A/oxford_iiit_pet-test.tfrecord +Dataset oxford_iiit_pet downloaded and prepared to /home/kbuilder/tensorflow_datasets/oxford_iiit_pet/3.2.0\. Subsequent calls will reuse this data. + +``` + +下面的代码进行了一个简单的图像翻转扩充。然后,将图像标准化到 [0,1]。最后,如上文提到的,像素点在图像分割掩码中被标记为 {1, 2, 3} 中的一个。为了方便起见,我们将分割掩码都减 1,得到了以下的标签:{0, 1, 2}。 + +```py +def normalize(input_image, input_mask): + input_image = tf.cast(input_image, tf.float32) / 255.0 + input_mask -= 1 + return input_image, input_mask +``` + +```py +@tf.function +def load_image_train(datapoint): + input_image = tf.image.resize(datapoint['image'], (128, 128)) + input_mask = tf.image.resize(datapoint['segmentation_mask'], (128, 128)) + + if tf.random.uniform(()) > 0.5: + input_image = tf.image.flip_left_right(input_image) + input_mask = tf.image.flip_left_right(input_mask) + + input_image, input_mask = normalize(input_image, input_mask) + + return input_image, input_mask +``` + +```py +def load_image_test(datapoint): + input_image = tf.image.resize(datapoint['image'], (128, 128)) + input_mask = tf.image.resize(datapoint['segmentation_mask'], (128, 128)) + + input_image, input_mask = normalize(input_image, input_mask) + + return input_image, input_mask +``` + +数据集已经包含了所需的测试集和训练集划分,所以我们也延续使用相同的划分。 + +```py +TRAIN_LENGTH = info.splits['train'].num_examples +BATCH_SIZE = 64 +BUFFER_SIZE = 1000 +STEPS_PER_EPOCH = TRAIN_LENGTH // BATCH_SIZE +``` + +```py +train = dataset['train'].map(load_image_train, num_parallel_calls=tf.data.experimental.AUTOTUNE) +test = dataset['test'].map(load_image_test) +``` + +```py +train_dataset = train.cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat() +train_dataset = train_dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) +test_dataset = test.batch(BATCH_SIZE) +``` + +我们来看一下数据集中的一例图像以及它所对应的掩码。 + +```py +def display(display_list): + plt.figure(figsize=(15, 15)) + + title = ['Input Image', 'True Mask', 'Predicted Mask'] + + for i in range(len(display_list)): + plt.subplot(1, len(display_list), i+1) + plt.title(title[i]) + plt.imshow(tf.keras.preprocessing.image.array_to_img(display_list[i])) + plt.axis('off') + plt.show() +``` + +```py +for image, mask in train.take(1): + sample_image, sample_mask = image, mask +display([sample_image, sample_mask]) +``` + +![png](img/a8a6734d5e53ebf66610af0af887bc96.png) + +## 定义模型 + +这里用到的模型是一个改版的 U-Net。U-Net 由一个编码器(下采样器(downsampler))和一个解码器(上采样器(upsampler))组成。为了学习到鲁棒的特征,同时减少可训练参数的数量,这里可以使用一个预训练模型作为编码器。因此,这项任务中的编码器将使用一个预训练的 MobileNetV2 模型,它的中间输出值将被使用。解码器将使用在 TensorFlow Examples 中的 [Pix2pix tutorial](https://github.com/tensorflow/examples/blob/master/tensorflow_examples/models/pix2pix/pix2pix.py) 里实施过的升频取样模块。 + +输出信道数量为 3 是因为每个像素有三种可能的标签。把这想象成一个多类别分类,每个像素都将被分到三个类别当中。 + +```py +OUTPUT_CHANNELS = 3 +``` + +如之前提到的,编码器是一个预训练的 MobileNetV2 模型,它在 [tf.keras.applications](https://tensorflow.google.cn/versions/r2.0/api_docs/python/tf/keras/applications) 中已被准备好并可以直接使用。编码器中包含模型中间层的一些特定输出。注意编码器在模型的训练过程中是不会被训练的。 + +```py +base_model = tf.keras.applications.MobileNetV2(input_shape=[128, 128, 3], include_top=False) + +# 使用这些层的激活设置 +layer_names = [ + 'block_1_expand_relu', # 64x64 + 'block_3_expand_relu', # 32x32 + 'block_6_expand_relu', # 16x16 + 'block_13_expand_relu', # 8x8 + 'block_16_project', # 4x4 +] +layers = [base_model.get_layer(name).output for name in layer_names] + +# 创建特征提取模型 +down_stack = tf.keras.Model(inputs=base_model.input, outputs=layers) + +down_stack.trainable = False +``` + +```py +Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_128_no_top.h5 +9412608/9406464 [==============================] - 0s 0us/step + +``` + +解码器/升频取样器是简单的一系列升频取样模块,在 TensorFlow examples 中曾被实施过。 + +```py +up_stack = [ + pix2pix.upsample(512, 3), # 4x4 -> 8x8 + pix2pix.upsample(256, 3), # 8x8 -> 16x16 + pix2pix.upsample(128, 3), # 16x16 -> 32x32 + pix2pix.upsample(64, 3), # 32x32 -> 64x64 +] +``` + +```py +def unet_model(output_channels): + inputs = tf.keras.layers.Input(shape=[128, 128, 3]) + x = inputs + + # 在模型中降频取样 + skips = down_stack(x) + x = skips[-1] + skips = reversed(skips[:-1]) + + # 升频取样然后建立跳跃连接 + for up, skip in zip(up_stack, skips): + x = up(x) + concat = tf.keras.layers.Concatenate() + x = concat([x, skip]) + + # 这是模型的最后一层 + last = tf.keras.layers.Conv2DTranspose( + output_channels, 3, strides=2, + padding='same') #64x64 -> 128x128 + + x = last(x) + + return tf.keras.Model(inputs=inputs, outputs=x) +``` + +## 训练模型 + +现在,要做的只剩下编译和训练模型了。这里用到的损失函数是 losses.sparse_categorical_crossentropy。使用这个损失函数是因为神经网络试图给每一个像素分配一个标签,和多类别预测是一样的。在正确的分割掩码中,每个像素点的值是 {0,1,2} 中的一个。同时神经网络也输出三个信道。本质上,每个信道都在尝试学习预测一个类别,而 losses.sparse_categorical_crossentropy 正是这一情形下推荐使用的损失函数。根据神经网络的输出值,分配给每个像素的标签为输出值最高的信道所表示的那一类。这就是 create_mask 函数所做的工作。 + +```py +model = unet_model(OUTPUT_CHANNELS) +model.compile(optimizer='adam', + loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), + metrics=['accuracy']) +``` + +快速浏览一下最终的模型架构: + +```py +tf.keras.utils.plot_model(model, show_shapes=True) +``` + +![png](img/fc1492a9c4124dcf0d9fb207c0a323d0.png) + +我们试着运行一下模型,看看它在训练之前给出的预测值。 + +```py +def create_mask(pred_mask): + pred_mask = tf.argmax(pred_mask, axis=-1) + pred_mask = pred_mask[..., tf.newaxis] + return pred_mask[0] +``` + +```py +def show_predictions(dataset=None, num=1): + if dataset: + for image, mask in dataset.take(num): + pred_mask = model.predict(image) + display([image[0], mask[0], create_mask(pred_mask)]) + else: + display([sample_image, sample_mask, + create_mask(model.predict(sample_image[tf.newaxis, ...]))]) +``` + +```py +show_predictions() +``` + +![png](img/79de81de8fa8f26b206d9f7e2e29232f.png) + +我们来观察模型是怎样随着训练而改善的。为达成这一目的,下面将定义一个 callback 函数。 + +```py +class DisplayCallback(tf.keras.callbacks.Callback): + def on_epoch_end(self, epoch, logs=None): + clear_output(wait=True) + show_predictions() + print ('\nSample Prediction after epoch {}\n'.format(epoch+1)) +``` + +```py +EPOCHS = 20 +VAL_SUBSPLITS = 5 +VALIDATION_STEPS = info.splits['test'].num_examples//BATCH_SIZE//VAL_SUBSPLITS + +model_history = model.fit(train_dataset, epochs=EPOCHS, + steps_per_epoch=STEPS_PER_EPOCH, + validation_steps=VALIDATION_STEPS, + validation_data=test_dataset, + callbacks=[DisplayCallback()]) +``` + +![png](img/dd1b792428257ee1ffcb4e02d4e81c11.png) + +```py + +Sample Prediction after epoch 20 + +57/57 [==============================] - 3s 54ms/step - loss: 0.1308 - accuracy: 0.9401 - val_loss: 0.3246 - val_accuracy: 0.8903 + +``` + +```py +loss = model_history.history['loss'] +val_loss = model_history.history['val_loss'] + +epochs = range(EPOCHS) + +plt.figure() +plt.plot(epochs, loss, 'r', label='Training loss') +plt.plot(epochs, val_loss, 'bo', label='Validation loss') +plt.title('Training and Validation Loss') +plt.xlabel('Epoch') +plt.ylabel('Loss Value') +plt.ylim([0, 1]) +plt.legend() +plt.show() +``` + +![png](img/12bbad2792cbf9031cf0f5c0e54b36a3.png) + +## 做出预测 + +我们来做几个预测。为了节省时间,这里只使用很少的周期(epoch)数,但是你可以设置更多的数量以获得更准确的结果。 + +```py +show_predictions(test_dataset, 3) +``` + +![png](img/a3923a442896cffee97920f98141a84c.png) + +![png](img/8fcdc694ecba49a443b3d3fa3db737c8.png) + +![png](img/58c58ebd47eeea7849c83cacae4000e9.png) + +## 接下来 + +现在你已经对图像分割是什么以及它的工作原理有所了解。你可以在本教程里尝试使用不同的中间层输出值,或者甚至使用不同的预训练模型。你也可以去 Kaggle 举办的 [Carvana](https://www.kaggle.com/c/carvana-image-masking-challenge/overview) 图像分割挑战赛上挑战自己。 + +你也可以看看 [Tensorflow Object Detection API](https://github.com/tensorflow/models/tree/master/research/object_detection) 上面其他的你可以使用自己数据进行再训练的模型。 \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/047.md b/Tensorflow/TensorFlow2.0/047.md new file mode 100644 index 00000000..584a81a7 --- /dev/null +++ b/Tensorflow/TensorFlow2.0/047.md @@ -0,0 +1 @@ +# 文本 \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/048.md b/Tensorflow/TensorFlow2.0/048.md new file mode 100644 index 00000000..ab522b00 --- /dev/null +++ b/Tensorflow/TensorFlow2.0/048.md @@ -0,0 +1,367 @@ +# 单词嵌入向量 + +> 原文:[https://tensorflow.google.cn/tutorials/text/word_embeddings](https://tensorflow.google.cn/tutorials/text/word_embeddings) + +本教程将介绍单词嵌入向量。包含完整的代码,可在小型数据集上从头开始训练单词嵌入向量,并使用 [Embedding Projector](http://projector.tensorflow.org)(如下图所示)可视化这些嵌入向量。 + +![Screenshot of the embedding projector](img/16ea92d12fa8170f3e79e4c56f9affd1.png) + +## 用数字表示文本 + +机器学习模型将向量(数字数组)作为输入。在处理文本时,我们必须先想出一种策略,将字符串转换为数字(或将文本“向量化”),然后再其馈入模型。在本部分中,我们将探究实现这一目标的三种策略。 + +### 独热编码 + +作为第一个想法,我们可以对词汇表中的每个单词进行“独热”编码。考虑这样一句话:“The cat sat on the mat”。这句话中的词汇(或唯一单词)是(cat、mat、on、sat、the)。为了表示每个单词,我们将创建一个长度等于词汇量的零向量,然后在与该单词对应的索引中放置一个 1。下图显示了这种方法。 + +![Diagram of one-hot encodings](img/717d3c9c631162f5b991acff83eda7bc.png) + +为了创建一个包含句子编码的向量,我们可以将每个单词的独热向量连接起来。 + +要点:这种方法效率低下。一个独热编码向量十分稀疏(这意味着大多数索引为零)。假设我们的词汇表中有 10,000 个单词。为了对每个单词进行独热编码,我们将创建一个其中 99.99% 的元素都为零的向量。 + +### 用一个唯一的数字编码每个单词 + +我们可以尝试的第二种方法是使用唯一的数字来编码每个单词。继续上面的示例,我们可以将 1 分配给“cat”,将 2 分配给“mat”,依此类推。然后,我们可以将句子“The cat sat on the mat”编码为一个密集向量,例如 [5, 1, 4, 3, 5, 2]。这种方法是高效的。现在,我们有了一个密集向量(所有元素均已满),而不是稀疏向量。 + +但是,这种方法有两个缺点: + +* 整数编码是任意的(它不会捕获单词之间的任何关系)。 + +* 对于要解释的模型而言,整数编码颇具挑战。例如,线性分类器针对每个特征学习一个权重。由于任何两个单词的相似性与其编码的相似性之间都没有关系,因此这种特征权重组合没有意义。 + +### 单词嵌入向量 + +单词嵌入向量为我们提供了一种使用高效、密集表示的方法,其中相似的单词具有相似的编码。重要的是,我们不必手动指定此编码。嵌入向量是浮点值的密集向量(向量的长度是您指定的参数)。它们是可以训练的参数(模型在训练过程中学习的权重,与模型学习密集层权重的方法相同),无需手动为嵌入向量指定值。8 维的单词嵌入向量(对于小型数据集)比较常见,而在处理大型数据集时最多可达 1024 维。维度更高的嵌入向量可以捕获单词之间的细粒度关系,但需要更多的数据来学习。 + +![Diagram of an embedding](img/4341c4ebffdd0a35a50322abd93518de.png) + +上面是一个单词嵌入向量的示意图。每个单词都表示为浮点值的 4 维向量。还可以将嵌入向量视为“查找表”。学习完这些权重后,我们可以通过在表中查找对应的密集向量来编码每个单词。 + +## 设置 + +```py +import tensorflow as tf +``` + +```py +from tensorflow import keras +from tensorflow.keras import layers + +import tensorflow_datasets as tfds +tfds.disable_progress_bar() +``` + +## 使用嵌入向量层 + +Keras 让使用单词嵌入向量变得轻而易举。我们来看一下[嵌入向量](https://tensorflow.google.cn/api_docs/python/tf/keras/layers/Embedding)层。 + +可以将嵌入向量层理解为一个从整数索引(代表特定单词)映射到密集向量(其嵌入向量)的查找表。嵌入向量的维数(或宽度)是一个参数,您可以试验它的数值,以了解多少维度适合您的问题,这与您试验密集层中神经元数量的方式非常相似。 + +```py +embedding_layer = layers.Embedding(1000, 5) +``` + +创建嵌入向量层时,嵌入向量的权重会随机初始化(就像其他任何层一样)。在训练过程中,通过反向传播来逐渐调整这些权重。训练后,学习到的单词嵌入向量将粗略地编码单词之间的相似性(因为它们是针对训练模型的特定问题而学习的)。 + +如果将整数传递给嵌入向量层,结果会将每个整数替换为嵌入向量表中的向量: + +```py +result = embedding_layer(tf.constant([1,2,3])) +result.numpy() +``` + +```py +array([[ 0.02629578, 0.0097797 , -0.04365711, 0.03760537, 0.0260709 ], + [ 0.03876719, 0.01541508, -0.0483237 , 0.03976549, 0.04153169], + [ 0.03035608, 0.0410546 , -0.03654389, -0.01073235, 0.02143143]], + dtype=float32) + +``` + +对于文本或序列问题,嵌入向量层采用整数组成的 2D 张量,其形状为 `(samples, sequence_length)`,其中每个条目都是一个整数序列。它可以嵌入可变长度的序列。您可以在形状为 `(32, 10)`(32 个长度为 10 的序列组成的批次)或 `(64, 15)`(64 个长度为 15 的序列组成的批次)的批次上方馈入嵌入向量层。 + +返回的张量比输入多一个轴,嵌入向量沿新的最后一个轴对齐。向其传递 `(2, 3)` 输入批次,输出为 `(2, 3, N)` + +```py +result = embedding_layer(tf.constant([[0,1,2],[3,4,5]])) +result.shape +``` + +```py +TensorShape([2, 3, 5]) + +``` + +当给定一个序列批次作为输入时,嵌入向量层将返回形状为 `(samples, sequence_length, embedding_dimensionality)` 的 3D 浮点张量。为了从可变长度的序列转换为固定表示,有多种标准方法。您可以先使用 RNN、注意力或池化层,然后再将其传递给密集层。本教程使用池化,因为它最简单。接下来,学习[使用 RNN 进行文本分类](/tutorials/text/text_classification_rnn)教程是一个不错的选择。 + +## 从头开始学习嵌入向量 + +在本教程中,您将基于 IMDB 电影评论来训练情感分类器。在此过程中,模型将从头开始学习嵌入向量。我们将使用经过预处理的数据集。 + +要从头开始加载文本数据集,请参阅[加载文本教程](https://tensorflow.google.cn/tutorials/load_data/text)。 + +```py +(train_data, test_data), info = tfds.load( + 'imdb_reviews/subwords8k', + split = (tfds.Split.TRAIN, tfds.Split.TEST), + with_info=True, as_supervised=True) +``` + +```py +WARNING:absl:TFDS datasets with text encoding are deprecated and will be removed in a future version. Instead, you should use the plain text version and tokenize the text using `tensorflow_text` (See: https://www.tensorflow.org/tutorials/tensorflow_text/intro#tfdata_example) + +``` + +获取编码器 (`tfds.features.text.SubwordTextEncoder`),并快速浏览词汇表。 + +词汇表中的“*”代表空格。请注意词汇表如何包含完整单词(以“*”结尾)以及可用于构建更大单词的部分单词: + +```py +encoder = info.features['text'].encoder +encoder.subwords[:20] +``` + +```py +['the_', + ', ', + '. ', + 'a_', + 'and_', + 'of_', + 'to_', + 's_', + 'is_', + 'br', + 'in_', + 'I_', + 'that_', + 'this_', + 'it_', + ' /><', + ' />', + 'was_', + 'The_', + 'as_'] + +``` + +电影评论的长度可以不同。我们将使用 `padded_batch` 方法来标准化评论的长度。 + +```py +train_batches = train_data.shuffle(1000).padded_batch(10) +test_batches = test_data.shuffle(1000).padded_batch(10) +``` + +导入时,评论的文本是整数编码的(每个整数代表词汇表中的特定单词或单词部分)。 + +请注意尾随零,因为批次会填充为最长的示例。 + +```py +train_batch, train_labels = next(iter(train_batches)) +train_batch.numpy() +``` + +```py +array([[5739, 46, 674, ..., 0, 0, 0], + [ 274, 2732, 1289, ..., 0, 0, 0], + [ 19, 118, 874, ..., 0, 0, 0], + ..., + [ 324, 12, 118, ..., 0, 0, 0], + [ 12, 31, 165, ..., 0, 0, 0], + [ 131, 196, 7968, ..., 0, 0, 0]]) + +``` + +### 创建一个简单模型 + +我们将使用 [Keras 序列式 API](https://tensorflow.google.cn/guide/keras) 定义模型。在这种情况下,它是一个“连续词袋”样式的模型。 + +* 接下来,嵌入向量层将采用整数编码的词汇表,并查找每个单词索引的嵌入向量。在模型训练时会学习这些向量。向量会向输出数组添加维度。得到的维度为:`(batch, sequence, embedding)`。 + +* 接下来,通过对序列维度求平均值,GlobalAveragePooling1D 层会返回每个样本的固定长度输出向量。这让模型能够以最简单的方式处理可变长度的输入。 + +* 此固定长度输出向量通过一个包含 16 个隐藏单元的完全连接(密集)层进行流水线传输。 + +* 最后一层与单个输出节点密集连接。利用 Sigmoid 激活函数,得出此值是 0 到 1 之间的浮点数,表示评论为正面的概率(或置信度)。 + +小心:此模型不使用遮盖,而是使用零填充作为输入的一部分,因此填充长度可能会影响输出。要解决此问题,请参阅[遮盖和填充指南](https://tensorflow.google.cn/guide/keras/masking_and_padding)。 + +```py +embedding_dim=16 + +model = keras.Sequential([ + layers.Embedding(encoder.vocab_size, embedding_dim), + layers.GlobalAveragePooling1D(), + layers.Dense(16, activation='relu'), + layers.Dense(1) +]) + +model.summary() +``` + +```py +Model: "sequential" +_________________________________________________________________ +Layer (type) Output Shape Param # +================================================================= +embedding_1 (Embedding) (None, None, 16) 130960 +_________________________________________________________________ +global_average_pooling1d (Gl (None, 16) 0 +_________________________________________________________________ +dense (Dense) (None, 16) 272 +_________________________________________________________________ +dense_1 (Dense) (None, 1) 17 +================================================================= +Total params: 131,249 +Trainable params: 131,249 +Non-trainable params: 0 +_________________________________________________________________ + +``` + +### 编译和训练模型 + +```py +model.compile(optimizer='adam', + loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), + metrics=['accuracy']) + +history = model.fit( + train_batches, + epochs=10, + validation_data=test_batches, validation_steps=20) +``` + +```py +Epoch 1/10 +2500/2500 [==============================] - 10s 4ms/step - loss: 0.4984 - accuracy: 0.7022 - val_loss: 0.3781 - val_accuracy: 0.8550 +Epoch 2/10 +2500/2500 [==============================] - 10s 4ms/step - loss: 0.2807 - accuracy: 0.8854 - val_loss: 0.3049 - val_accuracy: 0.8600 +Epoch 3/10 +2500/2500 [==============================] - 10s 4ms/step - loss: 0.2288 - accuracy: 0.9100 - val_loss: 0.3979 - val_accuracy: 0.8550 +Epoch 4/10 +2500/2500 [==============================] - 10s 4ms/step - loss: 0.1971 - accuracy: 0.9245 - val_loss: 0.4573 - val_accuracy: 0.8500 +Epoch 5/10 +2500/2500 [==============================] - 10s 4ms/step - loss: 0.1747 - accuracy: 0.9340 - val_loss: 0.3457 - val_accuracy: 0.8550 +Epoch 6/10 +2500/2500 [==============================] - 10s 4ms/step - loss: 0.1571 - accuracy: 0.9423 - val_loss: 0.4098 - val_accuracy: 0.8550 +Epoch 7/10 +2500/2500 [==============================] - 10s 4ms/step - loss: 0.1414 - accuracy: 0.9489 - val_loss: 0.4089 - val_accuracy: 0.8550 +Epoch 8/10 +2500/2500 [==============================] - 10s 4ms/step - loss: 0.1319 - accuracy: 0.9517 - val_loss: 0.5068 - val_accuracy: 0.7900 +Epoch 9/10 +2500/2500 [==============================] - 10s 4ms/step - loss: 0.1189 - accuracy: 0.9578 - val_loss: 0.4304 - val_accuracy: 0.8500 +Epoch 10/10 +2500/2500 [==============================] - 10s 4ms/step - loss: 0.1110 - accuracy: 0.9619 - val_loss: 0.6972 - val_accuracy: 0.8250 + +``` + +通过这种方法,我们的模型可以达到约 88% 的验证准确率(请注意,该模型过度拟合,因此训练准确率要高得多)。 + +```py +import matplotlib.pyplot as plt + +history_dict = history.history + +acc = history_dict['accuracy'] +val_acc = history_dict['val_accuracy'] +loss=history_dict['loss'] +val_loss=history_dict['val_loss'] + +epochs = range(1, len(acc) + 1) + +plt.figure(figsize=(12,9)) +plt.plot(epochs, loss, 'bo', label='Training loss') +plt.plot(epochs, val_loss, 'b', label='Validation loss') +plt.title('Training and validation loss') +plt.xlabel('Epochs') +plt.ylabel('Loss') +plt.legend() +plt.show() + +plt.figure(figsize=(12,9)) +plt.plot(epochs, acc, 'bo', label='Training acc') +plt.plot(epochs, val_acc, 'b', label='Validation acc') +plt.title('Training and validation accuracy') +plt.xlabel('Epochs') +plt.ylabel('Accuracy') +plt.legend(loc='lower right') +plt.ylim((0.5,1)) +plt.show() +``` + +![png](img/815371be4cdb93da43df2c0cb17bb929.png) + +![png](img/f9f505f9e0bb94757eb576cd0aa1c1f3.png) + +## 检索学习的嵌入向量 + +接下来,我们检索在训练期间学习的单词嵌入向量。这将是一个形状为 `(vocab_size, embedding-dimension)` 的矩阵。 + +```py +e = model.layers[0] +weights = e.get_weights()[0] +print(weights.shape) # shape: (vocab_size, embedding_dim) +``` + +```py +(8185, 16) + +``` + +现在,我们将权重写入磁盘。要使用 [Embedding Projector](http://projector.tensorflow.org),我们将以制表符分隔的格式上传两个文件:一个向量文件(包含嵌入向量)和一个元数据文件(包含单词)。 + +```py +import io + +encoder = info.features['text'].encoder + +out_v = io.open('vecs.tsv', 'w', encoding='utf-8') +out_m = io.open('meta.tsv', 'w', encoding='utf-8') + +for num, word in enumerate(encoder.subwords): + vec = weights[num+1] # skip 0, it's padding. + out_m.write(word + "\n") + out_v.write('\t'.join([str(x) for x in vec]) + "\n") +out_v.close() +out_m.close() +``` + +如果您正在 [Colaboratory](https://colab.research.google.com) 中运行本教程,则可以使用以下代码段将这些文件下载到本地计算机上(或使用文件浏览器,*View -> Table of contents -> File browser*)。 + +```py +try: + from google.colab import files +except ImportError: + pass +else: + files.download('vecs.tsv') + files.download('meta.tsv') +``` + +## 可视化嵌入向量 + +为了可视化嵌入向量,我们将它们上传到 Embedding Projector。 + +打开 [Embedding Projector](http://projector.tensorflow.org/)(也可以在本地 TensorBoard 实例中运行)。 + +* 点击“Load data”。 + +* 上传我们在上面创建的两个文件:`vecs.tsv` 和 `meta.tsv`。 + +现在将显示您已训练的嵌入向量。您可以搜索单词以查找其最邻近。例如,尝试搜索“beautiful”,您可能会看到“wonderful”等相邻单词。 + +注:您的结果可能会略有不同,具体取决于训练嵌入向量层之前如何随机初始化权重。 + +注:您可以试验性地使用更简单的模型来生成更多可解释的嵌入向量。尝试删除 `Dense(16)` 层,重新训练模型,然后再次可视化嵌入向量。 + +![Screenshot of the embedding projector](img/16ea92d12fa8170f3e79e4c56f9affd1.png) + +## 后续步骤 + +本教程向您展示了如何在小数据集上从头开始训练和可视化单词嵌入向量。 + +* 要了解循环网络,请参阅 [Keras RNN 指南](https://tensorflow.google.cn/guide/keras/rnn)。 + +* 要详细了解文本分类(包括整个工作流,以及如果您对何时使用嵌入向量还是独热编码感到好奇),我们建议您阅读这篇实用的文本分类[指南](https://developers.google.cn/machine-learning/guides/text-classification/step-2-5)。 \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/049.md b/Tensorflow/TensorFlow2.0/049.md new file mode 100644 index 00000000..3b901e95 --- /dev/null +++ b/Tensorflow/TensorFlow2.0/049.md @@ -0,0 +1,368 @@ +# 使用 RNN 进行文本分类 + +> 原文:[https://tensorflow.google.cn/tutorials/text/text_classification_rnn](https://tensorflow.google.cn/tutorials/text/text_classification_rnn) + +此文本分类教程将在 [IMDB 大型电影评论数据集](http://ai.stanford.edu/%7Eamaas/data/sentiment/)上训练[循环神经网络](https://developers.google.cn/machine-learning/glossary/#recurrent_neural_network),以进行情感分析。 + +## 设置 + +```py +import tensorflow_datasets as tfds +import tensorflow as tf +``` + +导入 `matplotlib` 并创建一个辅助函数来绘制计算图: + +```py +import matplotlib.pyplot as plt + +def plot_graphs(history, metric): + plt.plot(history.history[metric]) + plt.plot(history.history['val_'+metric], '') + plt.xlabel("Epochs") + plt.ylabel(metric) + plt.legend([metric, 'val_'+metric]) + plt.show() +``` + +## 设置输入流水线 + +IMDB 大型电影评论数据集是一个*二进制分类*数据集——所有评论都具有*正面*或*负面*情绪。 + +使用 [TFDS](https://tensorflow.google.cn/datasets) 下载数据集。 + +```py +dataset, info = tfds.load('imdb_reviews/subwords8k', with_info=True, + as_supervised=True) +train_dataset, test_dataset = dataset['train'], dataset['test'] +``` + +```py +WARNING:absl:TFDS datasets with text encoding are deprecated and will be removed in a future version. Instead, you should use the plain text version and tokenize the text using `tensorflow_text` (See: https://www.tensorflow.org/tutorials/tensorflow_text/intro#tfdata_example) + +Downloading and preparing dataset imdb_reviews/subwords8k/1.0.0 (download: 80.23 MiB, generated: Unknown size, total: 80.23 MiB) to /home/kbuilder/tensorflow_datasets/imdb_reviews/subwords8k/1.0.0... +Shuffling and writing examples to /home/kbuilder/tensorflow_datasets/imdb_reviews/subwords8k/1.0.0.incomplete7GBYY4/imdb_reviews-train.tfrecord +Shuffling and writing examples to /home/kbuilder/tensorflow_datasets/imdb_reviews/subwords8k/1.0.0.incomplete7GBYY4/imdb_reviews-test.tfrecord +Shuffling and writing examples to /home/kbuilder/tensorflow_datasets/imdb_reviews/subwords8k/1.0.0.incomplete7GBYY4/imdb_reviews-unsupervised.tfrecord +Dataset imdb_reviews downloaded and prepared to /home/kbuilder/tensorflow_datasets/imdb_reviews/subwords8k/1.0.0\. Subsequent calls will reuse this data. + +``` + +数据集 `info` 包括编码器 (`tfds.features.text.SubwordTextEncoder`)。 + +```py +encoder = info.features['text'].encoder +``` + +```py +print('Vocabulary size: {}'.format(encoder.vocab_size)) +``` + +```py +Vocabulary size: 8185 + +``` + +此文本编码器将以可逆方式对任何字符串进行编码,并在必要时退回到字节编码。 + +```py +sample_string = 'Hello TensorFlow.' + +encoded_string = encoder.encode(sample_string) +print('Encoded string is {}'.format(encoded_string)) + +original_string = encoder.decode(encoded_string) +print('The original string: "{}"'.format(original_string)) +``` + +```py +Encoded string is [4025, 222, 6307, 2327, 4043, 2120, 7975] +The original string: "Hello TensorFlow." + +``` + +```py +assert original_string == sample_string +``` + +```py +for index in encoded_string: + print('{} ----> {}'.format(index, encoder.decode([index]))) +``` + +```py +4025 ----> Hell +222 ----> o +6307 ----> Ten +2327 ----> sor +4043 ----> Fl +2120 ----> ow +7975 ----> . + +``` + +## 准备用于训练的数据 + +接下来,创建这些编码字符串的批次。使用 `padded_batch` 方法将序列零填充至批次中最长字符串的长度: + +```py +BUFFER_SIZE = 10000 +BATCH_SIZE = 64 +``` + +```py +train_dataset = train_dataset.shuffle(BUFFER_SIZE) +train_dataset = train_dataset.padded_batch(BATCH_SIZE) + +test_dataset = test_dataset.padded_batch(BATCH_SIZE) +``` + +## 创建模型 + +构建一个 [`tf.keras.Sequential`](https://tensorflow.google.cn/api_docs/python/tf/keras/Sequential) 模型并从嵌入向量层开始。嵌入向量层每个单词存储一个向量。调用时,它会将单词索引序列转换为向量序列。这些向量是可训练的。(在足够的数据上)训练后,具有相似含义的单词通常具有相似的向量。 + +与通过 [`tf.keras.layers.Dense`](https://tensorflow.google.cn/api_docs/python/tf/keras/layers/Dense) 层传递独热编码向量的等效运算相比,这种索引查找方法要高效得多。 + +循环神经网络 (RNN) 通过遍历元素来处理序列输入。RNN 将输出从一个时间步骤传递到其输入,然后传递到下一个步骤。 + +[`tf.keras.layers.Bidirectional`](https://tensorflow.google.cn/api_docs/python/tf/keras/layers/Bidirectional) 包装器也可以与 RNN 层一起使用。这将通过 RNN 层向前和向后传播输入,然后连接输出。这有助于 RNN 学习长程依赖关系。 + +```py +model = tf.keras.Sequential([ + tf.keras.layers.Embedding(encoder.vocab_size, 64), + tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)), + tf.keras.layers.Dense(64, activation='relu'), + tf.keras.layers.Dense(1) +]) +``` + +请注意,我们在这里选择 Keras 序贯模型,因为模型中的所有层都只有单个输入并产生单个输出。如果要使用有状态 RNN 层,则可能需要使用 Keras 函数式 API 或模型子类化来构建模型,以便可以检索和重用 RNN 层状态。有关更多详细信息,请参阅 [Keras RNN 指南](https://tensorflow.google.cn/guide/keras/rnn#rnn_state_reuse)。 + +编译 Keras 模型以配置训练过程: + +```py +model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), + optimizer=tf.keras.optimizers.Adam(1e-4), + metrics=['accuracy']) +``` + +## 训练模型 + +```py +history = model.fit(train_dataset, epochs=10, + validation_data=test_dataset, + validation_steps=30) +``` + +```py +Epoch 1/10 +391/391 [==============================] - 41s 105ms/step - loss: 0.6363 - accuracy: 0.5736 - val_loss: 0.4592 - val_accuracy: 0.8010 +Epoch 2/10 +391/391 [==============================] - 41s 105ms/step - loss: 0.3426 - accuracy: 0.8556 - val_loss: 0.3710 - val_accuracy: 0.8417 +Epoch 3/10 +391/391 [==============================] - 42s 107ms/step - loss: 0.2520 - accuracy: 0.9047 - val_loss: 0.3444 - val_accuracy: 0.8719 +Epoch 4/10 +391/391 [==============================] - 41s 105ms/step - loss: 0.2103 - accuracy: 0.9228 - val_loss: 0.3348 - val_accuracy: 0.8625 +Epoch 5/10 +391/391 [==============================] - 42s 106ms/step - loss: 0.1803 - accuracy: 0.9360 - val_loss: 0.3591 - val_accuracy: 0.8552 +Epoch 6/10 +391/391 [==============================] - 42s 106ms/step - loss: 0.1589 - accuracy: 0.9450 - val_loss: 0.4146 - val_accuracy: 0.8635 +Epoch 7/10 +391/391 [==============================] - 41s 105ms/step - loss: 0.1466 - accuracy: 0.9505 - val_loss: 0.3780 - val_accuracy: 0.8484 +Epoch 8/10 +391/391 [==============================] - 41s 106ms/step - loss: 0.1463 - accuracy: 0.9485 - val_loss: 0.4074 - val_accuracy: 0.8156 +Epoch 9/10 +391/391 [==============================] - 41s 106ms/step - loss: 0.1327 - accuracy: 0.9555 - val_loss: 0.4608 - val_accuracy: 0.8589 +Epoch 10/10 +391/391 [==============================] - 41s 105ms/step - loss: 0.1666 - accuracy: 0.9404 - val_loss: 0.4364 - val_accuracy: 0.8422 + +``` + +```py +test_loss, test_acc = model.evaluate(test_dataset) + +print('Test Loss: {}'.format(test_loss)) +print('Test Accuracy: {}'.format(test_acc)) +``` + +```py +391/391 [==============================] - 17s 43ms/step - loss: 0.4305 - accuracy: 0.8477 +Test Loss: 0.43051090836524963 +Test Accuracy: 0.8476799726486206 + +``` + +上面的模型没有遮盖应用于序列的填充。如果在填充序列上进行训练并在未填充序列上进行测试,则可能导致倾斜。理想情况下,您可以[使用遮盖](https://tensorflow.google.cn/guide/keras/masking_and_padding)来避免这种情况,但是正如您在下面看到的那样,它只会对输出产生很小的影响。 + +如果预测 >= 0.5,则为正,否则为负。 + +```py +def pad_to_size(vec, size): + zeros = [0] * (size - len(vec)) + vec.extend(zeros) + return vec +``` + +```py +def sample_predict(sample_pred_text, pad): + encoded_sample_pred_text = encoder.encode(sample_pred_text) + + if pad: + encoded_sample_pred_text = pad_to_size(encoded_sample_pred_text, 64) + encoded_sample_pred_text = tf.cast(encoded_sample_pred_text, tf.float32) + predictions = model.predict(tf.expand_dims(encoded_sample_pred_text, 0)) + + return (predictions) +``` + +```py +# predict on a sample text without padding. + +sample_pred_text = ('The movie was cool. The animation and the graphics ' + 'were out of this world. I would recommend this movie.') +predictions = sample_predict(sample_pred_text, pad=False) +print(predictions) +``` + +```py +[[-0.11829309]] + +``` + +```py +# predict on a sample text with padding + +sample_pred_text = ('The movie was cool. The animation and the graphics ' + 'were out of this world. I would recommend this movie.') +predictions = sample_predict(sample_pred_text, pad=True) +print(predictions) +``` + +```py +[[-1.162545]] + +``` + +```py +plot_graphs(history, 'accuracy') +``` + +![png](img/267bdfdd72740285a56d6dbc3f34c679.png) + +```py +plot_graphs(history, 'loss') +``` + +![png](img/ae60ced5a9a18ef2a947912ada799ca0.png) + +## 堆叠两个或更多 LSTM 层 + +Keras 循环层有两种可用的模式,这些模式由 `return_sequences` 构造函数参数控制: + +* 返回每个时间步骤的连续输出的完整序列(形状为 `(batch_size, timesteps, output_features)` 的 3D 张量)。 +* 仅返回每个输入序列的最后一个输出(形状为 (batch_size, output_features) 的 2D 张量)。 + +```py +model = tf.keras.Sequential([ + tf.keras.layers.Embedding(encoder.vocab_size, 64), + tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True)), + tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)), + tf.keras.layers.Dense(64, activation='relu'), + tf.keras.layers.Dropout(0.5), + tf.keras.layers.Dense(1) +]) +``` + +```py +model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), + optimizer=tf.keras.optimizers.Adam(1e-4), + metrics=['accuracy']) +``` + +```py +history = model.fit(train_dataset, epochs=10, + validation_data=test_dataset, + validation_steps=30) +``` + +```py +Epoch 1/10 +391/391 [==============================] - 75s 192ms/step - loss: 0.6484 - accuracy: 0.5630 - val_loss: 0.4876 - val_accuracy: 0.7464 +Epoch 2/10 +391/391 [==============================] - 74s 190ms/step - loss: 0.3603 - accuracy: 0.8528 - val_loss: 0.3533 - val_accuracy: 0.8490 +Epoch 3/10 +391/391 [==============================] - 75s 191ms/step - loss: 0.2666 - accuracy: 0.9018 - val_loss: 0.3393 - val_accuracy: 0.8703 +Epoch 4/10 +391/391 [==============================] - 75s 193ms/step - loss: 0.2151 - accuracy: 0.9267 - val_loss: 0.3451 - val_accuracy: 0.8604 +Epoch 5/10 +391/391 [==============================] - 76s 194ms/step - loss: 0.1806 - accuracy: 0.9422 - val_loss: 0.3687 - val_accuracy: 0.8708 +Epoch 6/10 +391/391 [==============================] - 75s 193ms/step - loss: 0.1623 - accuracy: 0.9495 - val_loss: 0.3836 - val_accuracy: 0.8594 +Epoch 7/10 +391/391 [==============================] - 76s 193ms/step - loss: 0.1382 - accuracy: 0.9598 - val_loss: 0.4173 - val_accuracy: 0.8573 +Epoch 8/10 +391/391 [==============================] - 76s 194ms/step - loss: 0.1227 - accuracy: 0.9664 - val_loss: 0.4586 - val_accuracy: 0.8542 +Epoch 9/10 +391/391 [==============================] - 76s 194ms/step - loss: 0.0997 - accuracy: 0.9749 - val_loss: 0.4939 - val_accuracy: 0.8547 +Epoch 10/10 +391/391 [==============================] - 76s 194ms/step - loss: 0.0973 - accuracy: 0.9748 - val_loss: 0.5222 - val_accuracy: 0.8526 + +``` + +```py +test_loss, test_acc = model.evaluate(test_dataset) + +print('Test Loss: {}'.format(test_loss)) +print('Test Accuracy: {}'.format(test_acc)) +``` + +```py +391/391 [==============================] - 30s 78ms/step - loss: 0.5205 - accuracy: 0.8572 +Test Loss: 0.5204932689666748 +Test Accuracy: 0.857200026512146 + +``` + +```py +# predict on a sample text without padding. + +sample_pred_text = ('The movie was not good. The animation and the graphics ' + 'were terrible. I would not recommend this movie.') +predictions = sample_predict(sample_pred_text, pad=False) +print(predictions) +``` + +```py +[[-2.6377363]] + +``` + +```py +# predict on a sample text with padding + +sample_pred_text = ('The movie was not good. The animation and the graphics ' + 'were terrible. I would not recommend this movie.') +predictions = sample_predict(sample_pred_text, pad=True) +print(predictions) +``` + +```py +[[-3.0502243]] + +``` + +```py +plot_graphs(history, 'accuracy') +``` + +![png](img/ee3ae6c62d5acf6adfea6458312bcb02.png) + +```py +plot_graphs(history, 'loss') +``` + +![png](img/f2f53e7a4522a77ce6e821a299a77c76.png) + +检查其他现有循环层,例如 [GRU 层](https://tensorflow.google.cn/api_docs/python/tf/keras/layers/GRU)。 + +如果您对构建自定义 RNN 感兴趣,请参阅 [Keras RNN 指南](https://tensorflow.google.cn/guide/keras/rnn)。 \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/050.md b/Tensorflow/TensorFlow2.0/050.md new file mode 100644 index 00000000..1c102aae --- /dev/null +++ b/Tensorflow/TensorFlow2.0/050.md @@ -0,0 +1,841 @@ +# 循环神经网络(RNN)文本生成 + +> 原文:[https://tensorflow.google.cn/tutorials/text/text_generation](https://tensorflow.google.cn/tutorials/text/text_generation) + + + +本教程演示如何使用基于字符的 RNN 生成文本。我们将使用 Andrej Karpathy 在[《循环神经网络不合理的有效性》](http://karpathy.github.io/2015/05/21/rnn-effectiveness/)一文中提供的莎士比亚作品数据集。给定此数据中的一个字符序列 (“Shakespear”),训练一个模型以预测该序列的下一个字符(“e”)。通过重复调用该模型,可以生成更长的文本序列。 + +请注意:启用 GPU 加速可以更快地执行此笔记本。在 Colab 中依次选择:*运行时 > 更改运行时类型 > 硬件加速器 > GPU*。如果在本地运行,请确保 TensorFlow 的版本为 1.11 或更高。 + +本教程包含使用 [tf.keras](https://tensorflow.google.cn/programmers_guide/keras) 和 [eager execution](https://tensorflow.google.cn/programmers_guide/eager) 实现的可运行代码。以下是当本教程中的模型训练 30 个周期 (epoch),并以字符串 “Q” 开头时的示例输出: + +```py +QUEENE: +I had thought thou hadst a Roman; for the oracle, +Thus by All bids the man against the word, +Which are so weak of care, by old care done; +Your children were in your holy love, +And the precipitation through the bleeding throne. + +BISHOP OF ELY: +Marry, and will, my lord, to weep in such a one were prettiest; +Yet now I was adopted heir +Of the world's lamentable day, +To watch the next way with his father with his face? + +ESCALUS: +The cause why then we are all resolved more sons. + +VOLUMNIA: +O, no, no, no, no, no, no, no, no, no, no, no, no, no, no, no, no, no, no, no, no, it is no sin it should be dead, +And love and pale as any will to that word. + +QUEEN ELIZABETH: +But how long have I heard the soul for this world, +And show his hands of life be proved to stand. + +PETRUCHIO: +I say he look'd on, if I must be content +To stay him from the fatal of our country's bliss. +His lordship pluck'd from this sentence then for prey, +And then let us twain, being the moon, +were she such a case as fills m + +``` + +虽然有些句子符合语法规则,但是大多数句子没有意义。这个模型尚未学习到单词的含义,但请考虑以下几点: + +* 此模型是基于字符的。训练开始时,模型不知道如何拼写一个英文单词,甚至不知道单词是文本的一个单位。 + +* 输出文本的结构类似于剧本 -- 文本块通常以讲话者的名字开始;而且与数据集类似,讲话者的名字采用全大写字母。 + +* 如下文所示,此模型由小批次 (batch) 文本训练而成(每批 100 个字符)。即便如此,此模型仍然能生成更长的文本序列,并且结构连贯。 + +## 设置 + +### 导入 TensorFlow 和其他库 + +```py +import tensorflow as tf + +import numpy as np +import os +import time +``` + +### 下载莎士比亚数据集 + +修改下面一行代码,在你自己的数据上运行此代码。 + +```py +path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt') +``` + +```py +Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt +1122304/1115394 [==============================] - 0s 0us/step + +``` + +### 读取数据 + +首先,看一看文本: + +```py +# 读取并为 py2 compat 解码 +text = open(path_to_file, 'rb').read().decode(encoding='utf-8') + +# 文本长度是指文本中的字符个数 +print ('Length of text: {} characters'.format(len(text))) +``` + +```py +Length of text: 1115394 characters + +``` + +```py +# 看一看文本中的前 250 个字符 +print(text[:250]) +``` + +```py +First Citizen: +Before we proceed any further, hear me speak. + +All: +Speak, speak. + +First Citizen: +You are all resolved rather to die than to famish? + +All: +Resolved. resolved. + +First Citizen: +First, you know Caius Marcius is chief enemy to the people. + +``` + +```py +# 文本中的非重复字符 +vocab = sorted(set(text)) +print ('{} unique characters'.format(len(vocab))) +``` + +```py +65 unique characters + +``` + +## 处理文本 + +### 向量化文本 + +在训练之前,我们需要将字符串映射到数字表示值。创建两个查找表格:一个将字符映射到数字,另一个将数字映射到字符。 + +```py +# 创建从非重复字符到索引的映射 +char2idx = {u:i for i, u in enumerate(vocab)} +idx2char = np.array(vocab) + +text_as_int = np.array([char2idx[c] for c in text]) +``` + +现在,每个字符都有一个整数表示值。请注意,我们将字符映射至索引 0 至 `len(unique)`. + +```py +print('{') +for char,_ in zip(char2idx, range(20)): + print(' {:4s}: {:3d},'.format(repr(char), char2idx[char])) +print(' ...\n}') +``` + +```py +{ + '\n': 0, + ' ' : 1, + '!' : 2, + '$' : 3, + '&' : 4, + "'" : 5, + ',' : 6, + '-' : 7, + '.' : 8, + '3' : 9, + ':' : 10, + ';' : 11, + '?' : 12, + 'A' : 13, + 'B' : 14, + 'C' : 15, + 'D' : 16, + 'E' : 17, + 'F' : 18, + 'G' : 19, + ... +} + +``` + +```py +# 显示文本首 13 个字符的整数映射 +print ('{} ---- characters mapped to int ---- > {}'.format(repr(text[:13]), text_as_int[:13])) +``` + +```py +'First Citizen' ---- characters mapped to int ---- > [18 47 56 57 58 1 15 47 58 47 64 43 52] + +``` + +### 预测任务 + +给定一个字符或者一个字符序列,下一个最可能出现的字符是什么?这就是我们训练模型要执行的任务。输入进模型的是一个字符序列,我们训练这个模型来预测输出 -- 每个时间步(time step)预测下一个字符是什么。 + +由于 RNN 是根据前面看到的元素维持内部状态,那么,给定此时计算出的所有字符,下一个字符是什么? + +### 创建训练样本和目标 + +接下来,将文本划分为样本序列。每个输入序列包含文本中的 `seq_length` 个字符。 + +对于每个输入序列,其对应的目标包含相同长度的文本,但是向右顺移一个字符。 + +将文本拆分为长度为 `seq_length+1` 的文本块。例如,假设 `seq_length` 为 4 而且文本为 “Hello”, 那么输入序列将为 “Hell”,目标序列将为 “ello”。 + +为此,首先使用 [`tf.data.Dataset.from_tensor_slices`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset#from_tensor_slices) 函数把文本向量转换为字符索引流。 + +```py +# 设定每个输入句子长度的最大值 +seq_length = 100 +examples_per_epoch = len(text)//seq_length + +# 创建训练样本 / 目标 +char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int) + +for i in char_dataset.take(5): + print(idx2char[i.numpy()]) +``` + +```py +F +i +r +s +t + +``` + +`batch` 方法使我们能轻松把单个字符转换为所需长度的序列。 + +```py +sequences = char_dataset.batch(seq_length+1, drop_remainder=True) + +for item in sequences.take(5): + print(repr(''.join(idx2char[item.numpy()]))) +``` + +```py +'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou ' +'are all resolved rather to die than to famish?\n\nAll:\nResolved. resolved.\n\nFirst Citizen:\nFirst, you k' +"now Caius Marcius is chief enemy to the people.\n\nAll:\nWe know't, we know't.\n\nFirst Citizen:\nLet us ki" +"ll him, and we'll have corn at our own price.\nIs't a verdict?\n\nAll:\nNo more talking on't; let it be d" +'one: away, away!\n\nSecond Citizen:\nOne word, good citizens.\n\nFirst Citizen:\nWe are accounted poor citi' + +``` + +对于每个序列,使用 `map` 方法先复制再顺移,以创建输入文本和目标文本。`map` 方法可以将一个简单的函数应用到每一个批次 (batch)。 + +```py +def split_input_target(chunk): + input_text = chunk[:-1] + target_text = chunk[1:] + return input_text, target_text + +dataset = sequences.map(split_input_target) +``` + +打印第一批样本的输入与目标值: + +```py +for input_example, target_example in dataset.take(1): + print ('Input data: ', repr(''.join(idx2char[input_example.numpy()]))) + print ('Target data:', repr(''.join(idx2char[target_example.numpy()]))) +``` + +```py +Input data: 'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou' +Target data: 'irst Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou ' + +``` + +这些向量的每个索引均作为一个时间步来处理。作为时间步 0 的输入,模型接收到 “F” 的索引,并尝试预测 “i” 的索引为下一个字符。在下一个时间步,模型执行相同的操作,但是 `RNN` 不仅考虑当前的输入字符,还会考虑上一步的信息。 + +```py +for i, (input_idx, target_idx) in enumerate(zip(input_example[:5], target_example[:5])): + print("Step {:4d}".format(i)) + print(" input: {} ({:s})".format(input_idx, repr(idx2char[input_idx]))) + print(" expected output: {} ({:s})".format(target_idx, repr(idx2char[target_idx]))) +``` + +```py +Step 0 + input: 18 ('F') + expected output: 47 ('i') +Step 1 + input: 47 ('i') + expected output: 56 ('r') +Step 2 + input: 56 ('r') + expected output: 57 ('s') +Step 3 + input: 57 ('s') + expected output: 58 ('t') +Step 4 + input: 58 ('t') + expected output: 1 (' ') + +``` + +### 创建训练批次 + +前面我们使用 [`tf.data`](https://tensorflow.google.cn/api_docs/python/tf/data) 将文本拆分为可管理的序列。但是在把这些数据输送至模型之前,我们需要将数据重新排列 (shuffle) 并打包为批次。 + +```py +# 批大小 +BATCH_SIZE = 64 + +# 设定缓冲区大小,以重新排列数据集 +# (TF 数据被设计为可以处理可能是无限的序列, +# 所以它不会试图在内存中重新排列整个序列。相反, +# 它维持一个缓冲区,在缓冲区重新排列元素。) +BUFFER_SIZE = 10000 + +dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True) + +dataset +``` + +```py + + +``` + +## 创建模型 + +使用 [`tf.keras.Sequential`](https://tensorflow.google.cn/api_docs/python/tf/keras/Sequential) 定义模型。在这个简单的例子中,我们使用了三个层来定义模型: + +* [`tf.keras.layers.Embedding`](https://tensorflow.google.cn/api_docs/python/tf/keras/layers/Embedding):输入层。一个可训练的对照表,它会将每个字符的数字映射到一个 `embedding_dim` 维度的向量。 +* [`tf.keras.layers.GRU`](https://tensorflow.google.cn/api_docs/python/tf/keras/layers/GRU):一种 RNN 类型,其大小由 `units=rnn_units` 指定(这里你也可以使用一个 LSTM 层)。 +* [`tf.keras.layers.Dense`](https://tensorflow.google.cn/api_docs/python/tf/keras/layers/Dense):输出层,带有 `vocab_size` 个输出。 + +```py +# 词集的长度 +vocab_size = len(vocab) + +# 嵌入的维度 +embedding_dim = 256 + +# RNN 的单元数量 +rnn_units = 1024 +``` + +```py +def build_model(vocab_size, embedding_dim, rnn_units, batch_size): + model = tf.keras.Sequential([ + tf.keras.layers.Embedding(vocab_size, embedding_dim, + batch_input_shape=[batch_size, None]), + tf.keras.layers.GRU(rnn_units, + return_sequences=True, + stateful=True, + recurrent_initializer='glorot_uniform'), + tf.keras.layers.Dense(vocab_size) + ]) + return model +``` + +```py +model = build_model( + vocab_size = len(vocab), + embedding_dim=embedding_dim, + rnn_units=rnn_units, + batch_size=BATCH_SIZE) +``` + +对于每个字符,模型会查找嵌入,把嵌入当作输入运行 GRU 一个时间步,并用密集层生成逻辑回归 (logits),预测下一个字符的对数可能性。 ![数据在模型中传输的示意图](img/643d654e7e1e3d928041b42363e0f099.png) + +## 试试这个模型 + +现在运行这个模型,看看它是否按预期运行。 + +首先检查输出的形状: + +```py +for input_example_batch, target_example_batch in dataset.take(1): + example_batch_predictions = model(input_example_batch) + print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)") +``` + +```py +(64, 100, 65) # (batch_size, sequence_length, vocab_size) + +``` + +在上面的例子中,输入的序列长度为 `100`, 但是这个模型可以在任何长度的输入上运行: + +```py +model.summary() +``` + +```py +Model: "sequential" +_________________________________________________________________ +Layer (type) Output Shape Param # +================================================================= +embedding (Embedding) (64, None, 256) 16640 +_________________________________________________________________ +gru (GRU) (64, None, 1024) 3938304 +_________________________________________________________________ +dense (Dense) (64, None, 65) 66625 +================================================================= +Total params: 4,021,569 +Trainable params: 4,021,569 +Non-trainable params: 0 +_________________________________________________________________ + +``` + +为了获得模型的实际预测,我们需要从输出分布中抽样,以获得实际的字符索引。这个分布是根据对字符集的逻辑回归定义的。 + +请注意:从这个分布中 *抽样* 很重要,因为取分布的 *最大值自变量点集(argmax)* 很容易使模型卡在循环中。 + +试试这个批次中的第一个样本: + +```py +sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1) +sampled_indices = tf.squeeze(sampled_indices,axis=-1).numpy() +``` + +这使我们得到每个时间步预测的下一个字符的索引。 + +```py +sampled_indices +``` + +```py +array([ 3, 19, 11, 8, 17, 50, 14, 5, 16, 57, 51, 53, 17, 54, 9, 11, 22, + 13, 36, 57, 57, 50, 47, 22, 5, 7, 1, 59, 3, 26, 52, 2, 62, 30, + 54, 18, 62, 9, 63, 2, 22, 11, 18, 12, 63, 0, 13, 16, 38, 49, 21, + 25, 22, 53, 39, 63, 3, 26, 39, 15, 21, 56, 49, 39, 20, 55, 5, 39, + 61, 29, 21, 39, 39, 63, 48, 11, 27, 42, 59, 0, 19, 58, 57, 27, 40, + 13, 53, 13, 7, 4, 21, 32, 10, 57, 18, 30, 54, 36, 12, 3]) + +``` + +解码它们,以查看此未经训练的模型预测的文本: + +```py +print("Input: \n", repr("".join(idx2char[input_example_batch[0]]))) +print() +print("Next Char Predictions: \n", repr("".join(idx2char[sampled_indices ]))) +``` + +```py +Input: + 'e, I say! madam! sweet-heart! why, bride!\nWhat, not a word? you take your pennyworths now;\nSleep for' + +Next Char Predictions: + "$G;.ElB'DsmoEp3;JAXssliJ'- u$Nn!xRpFx3y!J;F?y\nADZkIMJoay$NaCIrkaHq'awQIaayj;Odu\nGtsObAoA-&IT:sFRpX?$" + +``` + +## 训练模型 + +此时,这个问题可以被视为一个标准的分类问题:给定先前的 RNN 状态和这一时间步的输入,预测下一个字符的类别。 + +### 添加优化器和损失函数 + +标准的 [`tf.keras.losses.sparse_categorical_crossentropy`](https://tensorflow.google.cn/api_docs/python/tf/keras/losses/sparse_categorical_crossentropy) 损失函数在这里适用,因为它被应用于预测的最后一个维度。 + +因为我们的模型返回逻辑回归,所以我们需要设定命令行参数 `from_logits`。 + +```py +def loss(labels, logits): + return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True) + +example_batch_loss = loss(target_example_batch, example_batch_predictions) +print("Prediction shape: ", example_batch_predictions.shape, " # (batch_size, sequence_length, vocab_size)") +print("scalar_loss: ", example_batch_loss.numpy().mean()) +``` + +```py +Prediction shape: (64, 100, 65) # (batch_size, sequence_length, vocab_size) +scalar_loss: 4.1736827 + +``` + +使用 [`tf.keras.Model.compile`](https://tensorflow.google.cn/api_docs/python/tf/keras/Model#compile) 方法配置训练步骤。我们将使用 [`tf.keras.optimizers.Adam`](https://tensorflow.google.cn/api_docs/python/tf/keras/optimizers/Adam) 并采用默认参数,以及损失函数。 + +```py +model.compile(optimizer='adam', loss=loss) +``` + +### 配置检查点 + +使用 [`tf.keras.callbacks.ModelCheckpoint`](https://tensorflow.google.cn/api_docs/python/tf/keras/callbacks/ModelCheckpoint) 来确保训练过程中保存检查点。 + +```py +# 检查点保存至的目录 +checkpoint_dir = './training_checkpoints' + +# 检查点的文件名 +checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}") + +checkpoint_callback=tf.keras.callbacks.ModelCheckpoint( + filepath=checkpoint_prefix, + save_weights_only=True) +``` + +### 执行训练 + +为保持训练时间合理,使用 10 个周期来训练模型。在 Colab 中,将运行时设置为 GPU 以加速训练。 + +```py +EPOCHS=10 +``` + +```py +history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback]) +``` + +```py +Epoch 1/10 +172/172 [==============================] - 5s 27ms/step - loss: 2.6663 +Epoch 2/10 +172/172 [==============================] - 5s 27ms/step - loss: 1.9452 +Epoch 3/10 +172/172 [==============================] - 5s 27ms/step - loss: 1.6797 +Epoch 4/10 +172/172 [==============================] - 5s 27ms/step - loss: 1.5355 +Epoch 5/10 +172/172 [==============================] - 5s 27ms/step - loss: 1.4493 +Epoch 6/10 +172/172 [==============================] - 5s 27ms/step - loss: 1.3900 +Epoch 7/10 +172/172 [==============================] - 5s 27ms/step - loss: 1.3457 +Epoch 8/10 +172/172 [==============================] - 5s 26ms/step - loss: 1.3076 +Epoch 9/10 +172/172 [==============================] - 5s 27ms/step - loss: 1.2732 +Epoch 10/10 +172/172 [==============================] - 5s 27ms/step - loss: 1.2412 + +``` + +## 生成文本 + +### 恢复最新的检查点 + +为保持此次预测步骤简单,将批大小设定为 1。 + +由于 RNN 状态从时间步传递到时间步的方式,模型建立好之后只接受固定的批大小。 + +若要使用不同的 `batch_size` 来运行模型,我们需要重建模型并从检查点中恢复权重。 + +```py +tf.train.latest_checkpoint(checkpoint_dir) +``` + +```py +'./training_checkpoints/ckpt_10' + +``` + +```py +model = build_model(vocab_size, embedding_dim, rnn_units, batch_size=1) + +model.load_weights(tf.train.latest_checkpoint(checkpoint_dir)) + +model.build(tf.TensorShape([1, None])) +``` + +```py +model.summary() +``` + +```py +Model: "sequential_1" +_________________________________________________________________ +Layer (type) Output Shape Param # +================================================================= +embedding_1 (Embedding) (1, None, 256) 16640 +_________________________________________________________________ +gru_1 (GRU) (1, None, 1024) 3938304 +_________________________________________________________________ +dense_1 (Dense) (1, None, 65) 66625 +================================================================= +Total params: 4,021,569 +Trainable params: 4,021,569 +Non-trainable params: 0 +_________________________________________________________________ + +``` + +### 预测循环 + +下面的代码块生成文本: + +* 首先设置起始字符串,初始化 RNN 状态并设置要生成的字符个数。 + +* 用起始字符串和 RNN 状态,获取下一个字符的预测分布。 + +* 然后,用分类分布计算预测字符的索引。把这个预测字符当作模型的下一个输入。 + +* 模型返回的 RNN 状态被输送回模型。现在,模型有更多上下文可以学习,而非只有一个字符。在预测出下一个字符后,更改过的 RNN 状态被再次输送回模型。模型就是这样,通过不断从前面预测的字符获得更多上下文,进行学习。 + +![为生成文本,模型的输出被输送回模型作为输入](img/6ae78bb4c1ad3a2e0ade4489d4fdf706.png) + +查看生成的文本,你会发现这个模型知道什么时候使用大写字母,什么时候分段,而且模仿出了莎士比亚式的词汇。由于训练的周期小,模型尚未学会生成连贯的句子。 + +```py +def generate_text(model, start_string): + # 评估步骤(用学习过的模型生成文本) + + # 要生成的字符个数 + num_generate = 1000 + + # 将起始字符串转换为数字(向量化) + input_eval = [char2idx[s] for s in start_string] + input_eval = tf.expand_dims(input_eval, 0) + + # 空字符串用于存储结果 + text_generated = [] + + # 低温度会生成更可预测的文本 + # 较高温度会生成更令人惊讶的文本 + # 可以通过试验以找到最好的设定 + temperature = 1.0 + + # 这里批大小为 1 + model.reset_states() + for i in range(num_generate): + predictions = model(input_eval) + # 删除批次的维度 + predictions = tf.squeeze(predictions, 0) + + # 用分类分布预测模型返回的字符 + predictions = predictions / temperature + predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy() + + # 把预测字符和前面的隐藏状态一起传递给模型作为下一个输入 + input_eval = tf.expand_dims([predicted_id], 0) + + text_generated.append(idx2char[predicted_id]) + + return (start_string + ''.join(text_generated)) +``` + +```py +print(generate_text(model, start_string=u"ROMEO: ")) +``` + +```py +ROMEO: in't, Romeo rather +say, bid me not say, the adden, and you man for all. +Now, good Cart, or do held. Well, leaving her son, +Some stomacame, brother, Edommen. + +PROSPERO: +My lord Hastings, for death, +Or as believell you be accoment. + +TRANIO: +Mistraising? come, get abseng house: +The that was a life upon none of the equard sud, +Great Aufidius any joy; +For well a fool, and loveth one stay, +To whom Gare his moved me of Marcius shoulded. +Pite o'erposens to him. + +KING RICHARD II: +Come, civil and live, if wet to help and raisen fellow. + +CORIOLANUS: +Mark, here, sir. But the palace-hate will be at him in +some wondering danger, my bestilent. + +DUKE OF AUMERLE: +You, my lord? my dearly uncles for, +If't be fown'd for truth enough not him, +He talk of youngest young princely sake. + +ROMEO: +This let me have a still before the queen +First worthy angel. Would yes, by return. + +BAPTISTA: +You have dan, +Dies, renown awrifes; I'll say you. + +Provost: +And, come, make it out. + +LEONTES: +They call thee, hangions, +Not + +``` + +若想改进结果,最简单的方式是延长训练时间 (试试 `EPOCHS=30`)。 + +你还可以试验使用不同的起始字符串,或者尝试增加另一个 RNN 层以提高模型的准确率,亦或调整温度参数以生成更多或者更少的随机预测。 + +## 高级:自定义训练 + +上面的训练步骤简单,但是能控制的地方不多。 + +至此,你已经知道如何手动运行模型。现在,让我们打开训练循环,并自己实现它。这是一些任务的起点,例如实现 *课程学习* 以帮助稳定模型的开环输出。 + +你将使用 [`tf.GradientTape`](https://tensorflow.google.cn/api_docs/python/tf/GradientTape) 跟踪梯度。关于此方法的更多信息请参阅 [eager execution 指南](https://tensorflow.google.cn/guide/eager)。 + +步骤如下: + +* 首先,初始化 RNN 状态,使用 [`tf.keras.Model.reset_states`](https://tensorflow.google.cn/api_docs/python/tf/keras/Model#reset_states) 方法。 + +* 然后,迭代数据集(逐批次)并计算每次迭代对应的 *预测*。 + +* 打开一个 [`tf.GradientTape`](https://tensorflow.google.cn/api_docs/python/tf/GradientTape) 并计算该上下文时的预测和损失。 + +* 使用 `tf.GradientTape.grads` 方法,计算当前模型变量情况下的损失梯度。 + +* 最后,使用优化器的 `tf.train.Optimizer.apply_gradients` 方法向下迈出一步。 + +```py +model = build_model( + vocab_size = len(vocab), + embedding_dim=embedding_dim, + rnn_units=rnn_units, + batch_size=BATCH_SIZE) +``` + +```py +WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer +WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer.iter +WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer.beta_1 +WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer.beta_2 +WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer.decay +WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer.learning_rate +WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer's state 'm' for (root).layer_with_weights-0.embeddings +WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer's state 'm' for (root).layer_with_weights-2.kernel +WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer's state 'm' for (root).layer_with_weights-2.bias +WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer's state 'm' for (root).layer_with_weights-1.cell.kernel +WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer's state 'm' for (root).layer_with_weights-1.cell.recurrent_kernel +WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer's state 'm' for (root).layer_with_weights-1.cell.bias +WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer's state 'v' for (root).layer_with_weights-0.embeddings +WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer's state 'v' for (root).layer_with_weights-2.kernel +WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer's state 'v' for (root).layer_with_weights-2.bias +WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer's state 'v' for (root).layer_with_weights-1.cell.kernel +WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer's state 'v' for (root).layer_with_weights-1.cell.recurrent_kernel +WARNING:tensorflow:Unresolved object in checkpoint: (root).optimizer's state 'v' for (root).layer_with_weights-1.cell.bias +WARNING:tensorflow:A checkpoint was restored (e.g. tf.train.Checkpoint.restore or tf.keras.Model.load_weights) but not all checkpointed values were used. See above for specific issues. Use expect_partial() on the load status object, e.g. tf.train.Checkpoint.restore(...).expect_partial(), to silence these warnings, or use assert_consumed() to make the check explicit. See https://www.tensorflow.org/guide/checkpoint#loading_mechanics for details. + +``` + +```py +optimizer = tf.keras.optimizers.Adam() +``` + +```py +@tf.function +def train_step(inp, target): + with tf.GradientTape() as tape: + predictions = model(inp) + loss = tf.reduce_mean( + tf.keras.losses.sparse_categorical_crossentropy( + target, predictions, from_logits=True)) + grads = tape.gradient(loss, model.trainable_variables) + optimizer.apply_gradients(zip(grads, model.trainable_variables)) + + return loss +``` + +```py +# 训练步骤 +EPOCHS = 10 + +for epoch in range(EPOCHS): + start = time.time() + + # 在每个训练周期开始时,初始化隐藏状态 + # 隐藏状态最初为 None + hidden = model.reset_states() + + for (batch_n, (inp, target)) in enumerate(dataset): + loss = train_step(inp, target) + + if batch_n % 100 == 0: + template = 'Epoch {} Batch {} Loss {}' + print(template.format(epoch+1, batch_n, loss)) + + # 每 5 个训练周期,保存(检查点)1 次模型 + if (epoch + 1) % 5 == 0: + model.save_weights(checkpoint_prefix.format(epoch=epoch)) + + print ('Epoch {} Loss {:.4f}'.format(epoch+1, loss)) + print ('Time taken for 1 epoch {} sec\n'.format(time.time() - start)) + +model.save_weights(checkpoint_prefix.format(epoch=epoch)) +``` + +```py +Epoch 1 Batch 0 Loss 4.173541069030762 +Epoch 1 Batch 100 Loss 2.3451342582702637 +Epoch 1 Loss 2.1603 +Time taken for 1 epoch 6.5293896198272705 sec + +Epoch 2 Batch 0 Loss 2.1137943267822266 +Epoch 2 Batch 100 Loss 1.9266924858093262 +Epoch 2 Loss 1.7417 +Time taken for 1 epoch 5.6192779541015625 sec + +Epoch 3 Batch 0 Loss 1.775771975517273 +Epoch 3 Batch 100 Loss 1.657868504524231 +Epoch 3 Loss 1.5520 +Time taken for 1 epoch 5.231291770935059 sec + +Epoch 4 Batch 0 Loss 1.543768048286438 +Epoch 4 Batch 100 Loss 1.5487240552902222 +Epoch 4 Loss 1.4920 +Time taken for 1 epoch 5.363192319869995 sec + +Epoch 5 Batch 0 Loss 1.4550749063491821 +Epoch 5 Batch 100 Loss 1.4589751958847046 +Epoch 5 Loss 1.4171 +Time taken for 1 epoch 5.297640085220337 sec + +Epoch 6 Batch 0 Loss 1.376267671585083 +Epoch 6 Batch 100 Loss 1.3637677431106567 +Epoch 6 Loss 1.3818 +Time taken for 1 epoch 5.299052476882935 sec + +Epoch 7 Batch 0 Loss 1.2916797399520874 +Epoch 7 Batch 100 Loss 1.3284915685653687 +Epoch 7 Loss 1.3983 +Time taken for 1 epoch 5.277729749679565 sec + +Epoch 8 Batch 0 Loss 1.2573177814483643 +Epoch 8 Batch 100 Loss 1.2979872226715088 +Epoch 8 Loss 1.3120 +Time taken for 1 epoch 5.250093460083008 sec + +Epoch 9 Batch 0 Loss 1.3046417236328125 +Epoch 9 Batch 100 Loss 1.2858468294143677 +Epoch 9 Loss 1.3266 +Time taken for 1 epoch 5.280868291854858 sec + +Epoch 10 Batch 0 Loss 1.1859409809112549 +Epoch 10 Batch 100 Loss 1.2690430879592896 +Epoch 10 Loss 1.2733 +Time taken for 1 epoch 5.34737491607666 sec + +``` \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/051.md b/Tensorflow/TensorFlow2.0/051.md new file mode 100644 index 00000000..33131cb0 --- /dev/null +++ b/Tensorflow/TensorFlow2.0/051.md @@ -0,0 +1,716 @@ +# 基于注意力的神经机器翻译 + +> 原文:[https://tensorflow.google.cn/tutorials/text/nmt_with_attention](https://tensorflow.google.cn/tutorials/text/nmt_with_attention) + +**Note:** 我们的 TensorFlow 社区翻译了这些文档。因为社区翻译是尽力而为, 所以无法保证它们是最准确的,并且反映了最新的 [官方英文文档](https://tensorflow.google.cn/?hl=en)。如果您有改进此翻译的建议, 请提交 pull request 到 [tensorflow/docs](https://github.com/tensorflow/docs) GitHub 仓库。要志愿地撰写或者审核译文,请加入 [docs-zh-cn@tensorflow.org Google Group](https://groups.google.com/a/tensorflow.org/forum/#!forum/docs-zh-cn)。 + +此笔记本训练一个将西班牙语翻译为英语的序列到序列(sequence to sequence,简写为 seq2seq)模型。此例子难度较高,需要对序列到序列模型的知识有一定了解。 + +训练完此笔记本中的模型后,你将能够输入一个西班牙语句子,例如 *"¿todavia estan en casa?"*,并返回其英语翻译 *"are you still at home?"* + +对于一个简单的例子来说,翻译质量令人满意。但是更有趣的可能是生成的注意力图:它显示在翻译过程中,输入句子的哪些部分受到了模型的注意。 + +![spanish-english attention plot](img/295a20785cb201af0f19ee7414550082.png) + +请注意:运行这个例子用一个 P100 GPU 需要花大约 10 分钟。 + +```py +import tensorflow as tf + +import matplotlib.pyplot as plt +import matplotlib.ticker as ticker +from sklearn.model_selection import train_test_split + +import unicodedata +import re +import numpy as np +import os +import io +import time +``` + +## 下载和准备数据集 + +我们将使用 [http://www.manythings.org/anki/](http://www.manythings.org/anki/) 提供的一个语言数据集。这个数据集包含如下格式的语言翻译对: + +```py +May I borrow this book? ¿Puedo tomar prestado este libro? +``` + +这个数据集中有很多种语言可供选择。我们将使用英语 - 西班牙语数据集。为方便使用,我们在谷歌云上提供了此数据集的一份副本。但是你也可以自己下载副本。下载完数据集后,我们将采取下列步骤准备数据: + +1. 给每个句子添加一个 *开始* 和一个 *结束* 标记(token)。 +2. 删除特殊字符以清理句子。 +3. 创建一个单词索引和一个反向单词索引(即一个从单词映射至 id 的词典和一个从 id 映射至单词的词典)。 +4. 将每个句子填充(pad)到最大长度。 + +```py +# 下载文件 +path_to_zip = tf.keras.utils.get_file( + 'spa-eng.zip', origin='http://storage.googleapis.com/download.tensorflow.org/data/spa-eng.zip', + extract=True) + +path_to_file = os.path.dirname(path_to_zip)+"/spa-eng/spa.txt" +``` + +```py +Downloading data from http://storage.googleapis.com/download.tensorflow.org/data/spa-eng.zip +2646016/2638744 [==============================] - 0s 0us/step + +``` + +```py +# 将 unicode 文件转换为 ascii +def unicode_to_ascii(s): + return ''.join(c for c in unicodedata.normalize('NFD', s) + if unicodedata.category(c) != 'Mn') + +def preprocess_sentence(w): + w = unicode_to_ascii(w.lower().strip()) + + # 在单词与跟在其后的标点符号之间插入一个空格 + # 例如: "he is a boy." => "he is a boy ." + # 参考:https://stackoverflow.com/questions/3645931/python-padding-punctuation-with-white-spaces-keeping-punctuation + w = re.sub(r"([?.!,¿])", r" \1 ", w) + w = re.sub(r'[" "]+', " ", w) + + # 除了 (a-z, A-Z, ".", "?", "!", ","),将所有字符替换为空格 + w = re.sub(r"[^a-zA-Z?.!,¿]+", " ", w) + + w = w.rstrip().strip() + + # 给句子加上开始和结束标记 + # 以便模型知道何时开始和结束预测 + w = ' ' + w + ' ' + return w +``` + +```py +en_sentence = u"May I borrow this book?" +sp_sentence = u"¿Puedo tomar prestado este libro?" +print(preprocess_sentence(en_sentence)) +print(preprocess_sentence(sp_sentence).encode('utf-8')) +``` + +```py + may i borrow this book ? +b' \xc2\xbf puedo tomar prestado este libro ? ' + +``` + +```py +# 1\. 去除重音符号 +# 2\. 清理句子 +# 3\. 返回这样格式的单词对:[ENGLISH, SPANISH] +def create_dataset(path, num_examples): + lines = io.open(path, encoding='UTF-8').read().strip().split('\n') + + word_pairs = [[preprocess_sentence(w) for w in l.split('\t')] for l in lines[:num_examples]] + + return zip(*word_pairs) +``` + +```py +en, sp = create_dataset(path_to_file, None) +print(en[-1]) +print(sp[-1]) +``` + +```py + if you want to sound like a native speaker , you must be willing to practice saying the same sentence over and over in the same way that banjo players practice the same phrase over and over until they can play it correctly and at the desired tempo . + si quieres sonar como un hablante nativo , debes estar dispuesto a practicar diciendo la misma frase una y otra vez de la misma manera en que un musico de banjo practica el mismo fraseo una y otra vez hasta que lo puedan tocar correctamente y en el tiempo esperado . + +``` + +```py +def max_length(tensor): + return max(len(t) for t in tensor) +``` + +```py +def tokenize(lang): + lang_tokenizer = tf.keras.preprocessing.text.Tokenizer( + filters='') + lang_tokenizer.fit_on_texts(lang) + + tensor = lang_tokenizer.texts_to_sequences(lang) + + tensor = tf.keras.preprocessing.sequence.pad_sequences(tensor, + padding='post') + + return tensor, lang_tokenizer +``` + +```py +def load_dataset(path, num_examples=None): + # 创建清理过的输入输出对 + targ_lang, inp_lang = create_dataset(path, num_examples) + + input_tensor, inp_lang_tokenizer = tokenize(inp_lang) + target_tensor, targ_lang_tokenizer = tokenize(targ_lang) + + return input_tensor, target_tensor, inp_lang_tokenizer, targ_lang_tokenizer +``` + +### 限制数据集的大小以加快实验速度(可选) + +在超过 10 万个句子的完整数据集上训练需要很长时间。为了更快地训练,我们可以将数据集的大小限制为 3 万个句子(当然,翻译质量也会随着数据的减少而降低): + +```py +# 尝试实验不同大小的数据集 +num_examples = 30000 +input_tensor, target_tensor, inp_lang, targ_lang = load_dataset(path_to_file, num_examples) + +# 计算目标张量的最大长度 (max_length) +max_length_targ, max_length_inp = max_length(target_tensor), max_length(input_tensor) +``` + +```py +# 采用 80 - 20 的比例切分训练集和验证集 +input_tensor_train, input_tensor_val, target_tensor_train, target_tensor_val = train_test_split(input_tensor, target_tensor, test_size=0.2) + +# 显示长度 +print(len(input_tensor_train), len(target_tensor_train), len(input_tensor_val), len(target_tensor_val)) +``` + +```py +24000 24000 6000 6000 + +``` + +```py +def convert(lang, tensor): + for t in tensor: + if t!=0: + print ("%d ----> %s" % (t, lang.index_word[t])) +``` + +```py +print ("Input Language; index to word mapping") +convert(inp_lang, input_tensor_train[0]) +print () +print ("Target Language; index to word mapping") +convert(targ_lang, target_tensor_train[0]) +``` + +```py +Input Language; index to word mapping +1 ----> +13 ----> la +1999 ----> belleza +7 ----> es +8096 ----> subjetiva +3 ----> . +2 ----> + +Target Language; index to word mapping +1 ----> +1148 ----> beauty +8 ----> is +4299 ----> subjective +3 ----> . +2 ----> + +``` + +### 创建一个 tf.data 数据集 + +```py +BUFFER_SIZE = len(input_tensor_train) +BATCH_SIZE = 64 +steps_per_epoch = len(input_tensor_train)//BATCH_SIZE +embedding_dim = 256 +units = 1024 +vocab_inp_size = len(inp_lang.word_index)+1 +vocab_tar_size = len(targ_lang.word_index)+1 + +dataset = tf.data.Dataset.from_tensor_slices((input_tensor_train, target_tensor_train)).shuffle(BUFFER_SIZE) +dataset = dataset.batch(BATCH_SIZE, drop_remainder=True) +``` + +```py +example_input_batch, example_target_batch = next(iter(dataset)) +example_input_batch.shape, example_target_batch.shape +``` + +```py +(TensorShape([64, 16]), TensorShape([64, 11])) + +``` + +## 编写编码器 (encoder) 和解码器 (decoder) 模型 + +实现一个基于注意力的编码器 - 解码器模型。关于这种模型,你可以阅读 TensorFlow 的 [神经机器翻译 (序列到序列) 教程](https://github.com/tensorflow/nmt)。本示例采用一组更新的 API。此笔记本实现了上述序列到序列教程中的 [注意力方程式](https://github.com/tensorflow/nmt#background-on-the-attention-mechanism)。下图显示了注意力机制为每个输入单词分配一个权重,然后解码器将这个权重用于预测句子中的下一个单词。下图和公式是 [Luong 的论文](https://arxiv.org/abs/1508.04025v5)中注意力机制的一个例子。 + +![attention mechanism](img/b8397a070205f9293fbc989d8421eec5.png) + +输入经过编码器模型,编码器模型为我们提供形状为 *(批大小,最大长度,隐藏层大小)* 的编码器输出和形状为 *(批大小,隐藏层大小)* 的编码器隐藏层状态。 + +下面是所实现的方程式: + +![attention equation 0](img/20fad379e19d0355132a97db41137f4b.png) ![attention equation 1](img/9c9248a99f6346e02b6be5c21e5ab7be.png) + +本教程的编码器采用 [Bahdanau 注意力](https://arxiv.org/pdf/1409.0473.pdf)。在用简化形式编写之前,让我们先决定符号: + +* FC = 完全连接(密集)层 +* EO = 编码器输出 +* H = 隐藏层状态 +* X = 解码器输入 + +以及伪代码: + +* `score = FC(tanh(FC(EO) + FC(H)))` +* `attention weights = softmax(score, axis = 1)`。 Softmax 默认被应用于最后一个轴,但是这里我们想将它应用于 *第一个轴*, 因为分数 (score) 的形状是 *(批大小,最大长度,隐藏层大小)*。最大长度 (`max_length`) 是我们的输入的长度。因为我们想为每个输入分配一个权重,所以 softmax 应该用在这个轴上。 +* `context vector = sum(attention weights * EO, axis = 1)`。选择第一个轴的原因同上。 +* `embedding output` = 解码器输入 X 通过一个嵌入层。 +* `merged vector = concat(embedding output, context vector)` +* 此合并后的向量随后被传送到 GRU + +每个步骤中所有向量的形状已在代码的注释中阐明: + +```py +class Encoder(tf.keras.Model): + def __init__(self, vocab_size, embedding_dim, enc_units, batch_sz): + super(Encoder, self).__init__() + self.batch_sz = batch_sz + self.enc_units = enc_units + self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim) + self.gru = tf.keras.layers.GRU(self.enc_units, + return_sequences=True, + return_state=True, + recurrent_initializer='glorot_uniform') + + def call(self, x, hidden): + x = self.embedding(x) + output, state = self.gru(x, initial_state = hidden) + return output, state + + def initialize_hidden_state(self): + return tf.zeros((self.batch_sz, self.enc_units)) +``` + +```py +encoder = Encoder(vocab_inp_size, embedding_dim, units, BATCH_SIZE) + +# 样本输入 +sample_hidden = encoder.initialize_hidden_state() +sample_output, sample_hidden = encoder(example_input_batch, sample_hidden) +print ('Encoder output shape: (batch size, sequence length, units) {}'.format(sample_output.shape)) +print ('Encoder Hidden state shape: (batch size, units) {}'.format(sample_hidden.shape)) +``` + +```py +Encoder output shape: (batch size, sequence length, units) (64, 16, 1024) +Encoder Hidden state shape: (batch size, units) (64, 1024) + +``` + +```py +class BahdanauAttention(tf.keras.layers.Layer): + def __init__(self, units): + super(BahdanauAttention, self).__init__() + self.W1 = tf.keras.layers.Dense(units) + self.W2 = tf.keras.layers.Dense(units) + self.V = tf.keras.layers.Dense(1) + + def call(self, query, values): + # 隐藏层的形状 == (批大小,隐藏层大小) + # hidden_with_time_axis 的形状 == (批大小,1,隐藏层大小) + # 这样做是为了执行加法以计算分数 + hidden_with_time_axis = tf.expand_dims(query, 1) + + # 分数的形状 == (批大小,最大长度,1) + # 我们在最后一个轴上得到 1, 因为我们把分数应用于 self.V + # 在应用 self.V 之前,张量的形状是(批大小,最大长度,单位) + score = self.V(tf.nn.tanh( + self.W1(values) + self.W2(hidden_with_time_axis))) + + # 注意力权重 (attention_weights) 的形状 == (批大小,最大长度,1) + attention_weights = tf.nn.softmax(score, axis=1) + + # 上下文向量 (context_vector) 求和之后的形状 == (批大小,隐藏层大小) + context_vector = attention_weights * values + context_vector = tf.reduce_sum(context_vector, axis=1) + + return context_vector, attention_weights +``` + +```py +attention_layer = BahdanauAttention(10) +attention_result, attention_weights = attention_layer(sample_hidden, sample_output) + +print("Attention result shape: (batch size, units) {}".format(attention_result.shape)) +print("Attention weights shape: (batch_size, sequence_length, 1) {}".format(attention_weights.shape)) +``` + +```py +Attention result shape: (batch size, units) (64, 1024) +Attention weights shape: (batch_size, sequence_length, 1) (64, 16, 1) + +``` + +```py +class Decoder(tf.keras.Model): + def __init__(self, vocab_size, embedding_dim, dec_units, batch_sz): + super(Decoder, self).__init__() + self.batch_sz = batch_sz + self.dec_units = dec_units + self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim) + self.gru = tf.keras.layers.GRU(self.dec_units, + return_sequences=True, + return_state=True, + recurrent_initializer='glorot_uniform') + self.fc = tf.keras.layers.Dense(vocab_size) + + # 用于注意力 + self.attention = BahdanauAttention(self.dec_units) + + def call(self, x, hidden, enc_output): + # 编码器输出 (enc_output) 的形状 == (批大小,最大长度,隐藏层大小) + context_vector, attention_weights = self.attention(hidden, enc_output) + + # x 在通过嵌入层后的形状 == (批大小,1,嵌入维度) + x = self.embedding(x) + + # x 在拼接 (concatenation) 后的形状 == (批大小,1,嵌入维度 + 隐藏层大小) + x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1) + + # 将合并后的向量传送到 GRU + output, state = self.gru(x) + + # 输出的形状 == (批大小 * 1,隐藏层大小) + output = tf.reshape(output, (-1, output.shape[2])) + + # 输出的形状 == (批大小,vocab) + x = self.fc(output) + + return x, state, attention_weights +``` + +```py +decoder = Decoder(vocab_tar_size, embedding_dim, units, BATCH_SIZE) + +sample_decoder_output, _, _ = decoder(tf.random.uniform((64, 1)), + sample_hidden, sample_output) + +print ('Decoder output shape: (batch_size, vocab size) {}'.format(sample_decoder_output.shape)) +``` + +```py +Decoder output shape: (batch_size, vocab size) (64, 4935) + +``` + +## 定义优化器和损失函数 + +```py +optimizer = tf.keras.optimizers.Adam() +loss_object = tf.keras.losses.SparseCategoricalCrossentropy( + from_logits=True, reduction='none') + +def loss_function(real, pred): + mask = tf.math.logical_not(tf.math.equal(real, 0)) + loss_ = loss_object(real, pred) + + mask = tf.cast(mask, dtype=loss_.dtype) + loss_ *= mask + + return tf.reduce_mean(loss_) +``` + +## 检查点(基于对象保存) + +```py +checkpoint_dir = './training_checkpoints' +checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt") +checkpoint = tf.train.Checkpoint(optimizer=optimizer, + encoder=encoder, + decoder=decoder) +``` + +## 训练 + +1. 将 *输入* 传送至 *编码器*,编码器返回 *编码器输出* 和 *编码器隐藏层状态*。 +2. 将编码器输出、编码器隐藏层状态和解码器输入(即 *开始标记*)传送至解码器。 +3. 解码器返回 *预测* 和 *解码器隐藏层状态*。 +4. 解码器隐藏层状态被传送回模型,预测被用于计算损失。 +5. 使用 *教师强制 (teacher forcing)* 决定解码器的下一个输入。 +6. *教师强制* 是将 *目标词* 作为 *下一个输入* 传送至解码器的技术。 +7. 最后一步是计算梯度,并将其应用于优化器和反向传播。 + +```py +@tf.function +def train_step(inp, targ, enc_hidden): + loss = 0 + + with tf.GradientTape() as tape: + enc_output, enc_hidden = encoder(inp, enc_hidden) + + dec_hidden = enc_hidden + + dec_input = tf.expand_dims([targ_lang.word_index['']] * BATCH_SIZE, 1) + + # 教师强制 - 将目标词作为下一个输入 + for t in range(1, targ.shape[1]): + # 将编码器输出 (enc_output) 传送至解码器 + predictions, dec_hidden, _ = decoder(dec_input, dec_hidden, enc_output) + + loss += loss_function(targ[:, t], predictions) + + # 使用教师强制 + dec_input = tf.expand_dims(targ[:, t], 1) + + batch_loss = (loss / int(targ.shape[1])) + + variables = encoder.trainable_variables + decoder.trainable_variables + + gradients = tape.gradient(loss, variables) + + optimizer.apply_gradients(zip(gradients, variables)) + + return batch_loss +``` + +```py +EPOCHS = 10 + +for epoch in range(EPOCHS): + start = time.time() + + enc_hidden = encoder.initialize_hidden_state() + total_loss = 0 + + for (batch, (inp, targ)) in enumerate(dataset.take(steps_per_epoch)): + batch_loss = train_step(inp, targ, enc_hidden) + total_loss += batch_loss + + if batch % 100 == 0: + print('Epoch {} Batch {} Loss {:.4f}'.format(epoch + 1, + batch, + batch_loss.numpy())) + # 每 2 个周期(epoch),保存(检查点)一次模型 + if (epoch + 1) % 2 == 0: + checkpoint.save(file_prefix = checkpoint_prefix) + + print('Epoch {} Loss {:.4f}'.format(epoch + 1, + total_loss / steps_per_epoch)) + print('Time taken for 1 epoch {} sec\n'.format(time.time() - start)) +``` + +```py +Epoch 1 Batch 0 Loss 4.6508 +Epoch 1 Batch 100 Loss 2.1923 +Epoch 1 Batch 200 Loss 1.7957 +Epoch 1 Batch 300 Loss 1.7889 +Epoch 1 Loss 2.0564 +Time taken for 1 epoch 28.358328819274902 sec + +Epoch 2 Batch 0 Loss 1.5558 +Epoch 2 Batch 100 Loss 1.5256 +Epoch 2 Batch 200 Loss 1.4604 +Epoch 2 Batch 300 Loss 1.3006 +Epoch 2 Loss 1.4770 +Time taken for 1 epoch 16.062172651290894 sec + +Epoch 3 Batch 0 Loss 1.1928 +Epoch 3 Batch 100 Loss 1.1909 +Epoch 3 Batch 200 Loss 1.0559 +Epoch 3 Batch 300 Loss 0.9279 +Epoch 3 Loss 1.1305 +Time taken for 1 epoch 15.620810270309448 sec + +Epoch 4 Batch 0 Loss 0.8910 +Epoch 4 Batch 100 Loss 0.7890 +Epoch 4 Batch 200 Loss 0.8234 +Epoch 4 Batch 300 Loss 0.8448 +Epoch 4 Loss 0.8080 +Time taken for 1 epoch 15.983836889266968 sec + +Epoch 5 Batch 0 Loss 0.4728 +Epoch 5 Batch 100 Loss 0.7090 +Epoch 5 Batch 200 Loss 0.6280 +Epoch 5 Batch 300 Loss 0.5421 +Epoch 5 Loss 0.5710 +Time taken for 1 epoch 15.588238716125488 sec + +Epoch 6 Batch 0 Loss 0.4209 +Epoch 6 Batch 100 Loss 0.3995 +Epoch 6 Batch 200 Loss 0.4426 +Epoch 6 Batch 300 Loss 0.4470 +Epoch 6 Loss 0.4063 +Time taken for 1 epoch 15.882423639297485 sec + +Epoch 7 Batch 0 Loss 0.2503 +Epoch 7 Batch 100 Loss 0.3373 +Epoch 7 Batch 200 Loss 0.3342 +Epoch 7 Batch 300 Loss 0.2955 +Epoch 7 Loss 0.2938 +Time taken for 1 epoch 15.601640939712524 sec + +Epoch 8 Batch 0 Loss 0.1662 +Epoch 8 Batch 100 Loss 0.1923 +Epoch 8 Batch 200 Loss 0.2131 +Epoch 8 Batch 300 Loss 0.2464 +Epoch 8 Loss 0.2175 +Time taken for 1 epoch 15.917790412902832 sec + +Epoch 9 Batch 0 Loss 0.1450 +Epoch 9 Batch 100 Loss 0.1351 +Epoch 9 Batch 200 Loss 0.2102 +Epoch 9 Batch 300 Loss 0.2188 +Epoch 9 Loss 0.1659 +Time taken for 1 epoch 15.727098941802979 sec + +Epoch 10 Batch 0 Loss 0.0995 +Epoch 10 Batch 100 Loss 0.1190 +Epoch 10 Batch 200 Loss 0.1444 +Epoch 10 Batch 300 Loss 0.1280 +Epoch 10 Loss 0.1294 +Time taken for 1 epoch 15.857161045074463 sec + +``` + +## 翻译 + +* 评估函数类似于训练循环,不同之处在于在这里我们不使用 *教师强制*。每个时间步的解码器输入是其先前的预测、隐藏层状态和编码器输出。 +* 当模型预测 *结束标记* 时停止预测。 +* 存储 *每个时间步的注意力权重*。 + +请注意:对于一个输入,编码器输出仅计算一次。 + +```py +def evaluate(sentence): + attention_plot = np.zeros((max_length_targ, max_length_inp)) + + sentence = preprocess_sentence(sentence) + + inputs = [inp_lang.word_index[i] for i in sentence.split(' ')] + inputs = tf.keras.preprocessing.sequence.pad_sequences([inputs], + maxlen=max_length_inp, + padding='post') + inputs = tf.convert_to_tensor(inputs) + + result = '' + + hidden = [tf.zeros((1, units))] + enc_out, enc_hidden = encoder(inputs, hidden) + + dec_hidden = enc_hidden + dec_input = tf.expand_dims([targ_lang.word_index['']], 0) + + for t in range(max_length_targ): + predictions, dec_hidden, attention_weights = decoder(dec_input, + dec_hidden, + enc_out) + + # 存储注意力权重以便后面制图 + attention_weights = tf.reshape(attention_weights, (-1, )) + attention_plot[t] = attention_weights.numpy() + + predicted_id = tf.argmax(predictions[0]).numpy() + + result += targ_lang.index_word[predicted_id] + ' ' + + if targ_lang.index_word[predicted_id] == '': + return result, sentence, attention_plot + + # 预测的 ID 被输送回模型 + dec_input = tf.expand_dims([predicted_id], 0) + + return result, sentence, attention_plot +``` + +```py +# 注意力权重制图函数 +def plot_attention(attention, sentence, predicted_sentence): + fig = plt.figure(figsize=(10,10)) + ax = fig.add_subplot(1, 1, 1) + ax.matshow(attention, cmap='viridis') + + fontdict = {'fontsize': 14} + + ax.set_xticklabels([''] + sentence, fontdict=fontdict, rotation=90) + ax.set_yticklabels([''] + predicted_sentence, fontdict=fontdict) + + ax.xaxis.set_major_locator(ticker.MultipleLocator(1)) + ax.yaxis.set_major_locator(ticker.MultipleLocator(1)) + + plt.show() +``` + +```py +def translate(sentence): + result, sentence, attention_plot = evaluate(sentence) + + print('Input: %s' % (sentence)) + print('Predicted translation: {}'.format(result)) + + attention_plot = attention_plot[:len(result.split(' ')), :len(sentence.split(' '))] + plot_attention(attention_plot, sentence.split(' '), result.split(' ')) +``` + +## 恢复最新的检查点并验证 + +```py +# 恢复检查点目录 (checkpoint_dir) 中最新的检查点 +checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) +``` + +```py + + +``` + +```py +translate(u'hace mucho frio aqui.') +``` + +```py +Input: hace mucho frio aqui . +Predicted translation: it s very cold here . + +``` + +![png](img/86f4e22b402c9e48d76da7068ace2175.png) + +```py +translate(u'esta es mi vida.') +``` + +```py +Input: esta es mi vida . +Predicted translation: this is my life . + +``` + +![png](img/5ae7b3b0f94a71db86b4168d116179ff.png) + +```py +translate(u'¿todavia estan en casa?') +``` + +```py +Input: ¿ todavia estan en casa ? +Predicted translation: are you still at home ? + +``` + +![png](img/3e8e9f9ba0ac0f802575b228ffa360c0.png) + +```py +# 错误的翻译 +translate(u'trata de averiguarlo.') +``` + +```py +Input: trata de averiguarlo . +Predicted translation: try to be coming . + +``` + +![png](img/996d41e44b9998dc439ec88b9b370cec.png) + +## 下一步 + +* [下载一个不同的数据集](http://www.manythings.org/anki/)实验翻译,例如英语到德语或者英语到法语。 +* 实验在更大的数据集上训练,或者增加训练周期。 \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/052.md b/Tensorflow/TensorFlow2.0/052.md new file mode 100644 index 00000000..2ab62756 --- /dev/null +++ b/Tensorflow/TensorFlow2.0/052.md @@ -0,0 +1,819 @@ +# Image captioning with visual attention + +> 原文:[https://tensorflow.google.cn/tutorials/text/image_captioning](https://tensorflow.google.cn/tutorials/text/image_captioning) + + + +Given an image like the example below, our goal is to generate a caption such as "a surfer riding on a wave". + +![Man Surfing](img/72fcb6a7bcc602106e2c60268d3642c5.png) + +*[Image Source](https://commons.wikimedia.org/wiki/Surfing#/media/File:Surfing_in_Hawaii.jpg); License: Public Domain* + +To accomplish this, you'll use an attention-based model, which enables us to see what parts of the image the model focuses on as it generates a caption. + +![Prediction](img/7534c154062dc8f522f01d83838f3161.png) + +The model architecture is similar to [Show, Attend and Tell: Neural Image Caption Generation with Visual Attention](https://arxiv.org/abs/1502.03044). + +This notebook is an end-to-end example. When you run the notebook, it downloads the [MS-COCO](http://cocodataset.org/#home) dataset, preprocesses and caches a subset of images using Inception V3, trains an encoder-decoder model, and generates captions on new images using the trained model. + +In this example, you will train a model on a relatively small amount of data—the first 30,000 captions for about 20,000 images (because there are multiple captions per image in the dataset). + +```py +import tensorflow as tf + +# You'll generate plots of attention in order to see which parts of an image +# our model focuses on during captioning +import matplotlib.pyplot as plt + +# Scikit-learn includes many helpful utilities +from sklearn.model_selection import train_test_split +from sklearn.utils import shuffle + +import collections +import random +import re +import numpy as np +import os +import time +import json +from glob import glob +from PIL import Image +import pickle +``` + +## Download and prepare the MS-COCO dataset + +You will use the [MS-COCO dataset](http://cocodataset.org/#home) to train our model. The dataset contains over 82,000 images, each of which has at least 5 different caption annotations. The code below downloads and extracts the dataset automatically. + +**Caution:** large download ahead**. You'll use the training set, which is a 13GB file. + +```py +# Download caption annotation files +annotation_folder = '/annotations/' +if not os.path.exists(os.path.abspath('.') + annotation_folder): + annotation_zip = tf.keras.utils.get_file('captions.zip', + cache_subdir=os.path.abspath('.'), + origin = 'http://images.cocodataset.org/annotations/annotations_trainval2014.zip', + extract = True) + annotation_file = os.path.dirname(annotation_zip)+'/annotations/captions_train2014.json' + os.remove(annotation_zip) + +# Download image files +image_folder = '/train2014/' +if not os.path.exists(os.path.abspath('.') + image_folder): + image_zip = tf.keras.utils.get_file('train2014.zip', + cache_subdir=os.path.abspath('.'), + origin = 'http://images.cocodataset.org/zips/train2014.zip', + extract = True) + PATH = os.path.dirname(image_zip) + image_folder + os.remove(image_zip) +else: + PATH = os.path.abspath('.') + image_folder +``` + +```py +Downloading data from http://images.cocodataset.org/annotations/annotations_trainval2014.zip +252878848/252872794 [==============================] - 7s 0us/step +Downloading data from http://images.cocodataset.org/zips/train2014.zip +13510574080/13510573713 [==============================] - 374s 0us/step + +``` + +## Optional: limit the size of the training set + +To speed up training for this tutorial, you'll use a subset of 30,000 captions and their corresponding images to train our model. Choosing to use more data would result in improved captioning quality. + +```py +with open(annotation_file, 'r') as f: + annotations = json.load(f) +``` + +```py +# Group all captions together having the same image ID. +image_path_to_caption = collections.defaultdict(list) +for val in annotations['annotations']: + caption = f" {val['caption']} " + image_path = PATH + 'COCO_train2014_' + '%012d.jpg' % (val['image_id']) + image_path_to_caption[image_path].append(caption) +``` + +```py +image_paths = list(image_path_to_caption.keys()) +random.shuffle(image_paths) + +# Select the first 6000 image_paths from the shuffled set. +# Approximately each image id has 5 captions associated with it, so that will +# lead to 30,000 examples. +train_image_paths = image_paths[:6000] +print(len(train_image_paths)) +``` + +```py +6000 + +``` + +```py +train_captions = [] +img_name_vector = [] + +for image_path in train_image_paths: + caption_list = image_path_to_caption[image_path] + train_captions.extend(caption_list) + img_name_vector.extend([image_path] * len(caption_list)) +``` + +```py +print(train_captions[0]) +Image.open(img_name_vector[0]) +``` + +```py + a woman in a blue dress is playing tennis + +``` + +![png](img/77a9a1e4b542e966076c493155a71253.png) + +## Preprocess the images using InceptionV3 + +Next, you will use InceptionV3 (which is pretrained on Imagenet) to classify each image. You will extract features from the last convolutional layer. + +First, you will convert the images into InceptionV3's expected format by: + +* Resizing the image to 299px by 299px +* [Preprocess the images](https://cloud.google.com/tpu/docs/inception-v3-advanced#preprocessing_stage) using the [preprocess_input](https://tensorflow.google.cn/api_docs/python/tf/keras/applications/inception_v3/preprocess_input) method to normalize the image so that it contains pixels in the range of -1 to 1, which matches the format of the images used to train InceptionV3. + +```py +def load_image(image_path): + img = tf.io.read_file(image_path) + img = tf.image.decode_jpeg(img, channels=3) + img = tf.image.resize(img, (299, 299)) + img = tf.keras.applications.inception_v3.preprocess_input(img) + return img, image_path +``` + +## Initialize InceptionV3 and load the pretrained Imagenet weights + +Now you'll create a tf.keras model where the output layer is the last convolutional layer in the InceptionV3 architecture. The shape of the output of this layer is `8x8x2048`. You use the last convolutional layer because you are using attention in this example. You don't perform this initialization during training because it could become a bottleneck. + +* You forward each image through the network and store the resulting vector in a dictionary (image_name --> feature_vector). +* After all the images are passed through the network, you pickle the dictionary and save it to disk. + +```py +image_model = tf.keras.applications.InceptionV3(include_top=False, + weights='imagenet') +new_input = image_model.input +hidden_layer = image_model.layers[-1].output + +image_features_extract_model = tf.keras.Model(new_input, hidden_layer) +``` + +```py +Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5 +87916544/87910968 [==============================] - 1s 0us/step + +``` + +## Caching the features extracted from InceptionV3 + +You will pre-process each image with InceptionV3 and cache the output to disk. Caching the output in RAM would be faster but also memory intensive, requiring 8 * 8 * 2048 floats per image. At the time of writing, this exceeds the memory limitations of Colab (currently 12GB of memory). + +Performance could be improved with a more sophisticated caching strategy (for example, by sharding the images to reduce random access disk I/O), but that would require more code. + +The caching will take about 10 minutes to run in Colab with a GPU. If you'd like to see a progress bar, you can: + +1. install [tqdm](https://github.com/tqdm/tqdm): + + `!pip install -q tqdm` + +2. Import tqdm: + + `from tqdm import tqdm` + +3. Change the following line: + + `for img, path in image_dataset:` + + to: + + `for img, path in tqdm(image_dataset):` + +```py +# Get unique images +encode_train = sorted(set(img_name_vector)) + +# Feel free to change batch_size according to your system configuration +image_dataset = tf.data.Dataset.from_tensor_slices(encode_train) +image_dataset = image_dataset.map( + load_image, num_parallel_calls=tf.data.experimental.AUTOTUNE).batch(16) + +for img, path in image_dataset: + batch_features = image_features_extract_model(img) + batch_features = tf.reshape(batch_features, + (batch_features.shape[0], -1, batch_features.shape[3])) + + for bf, p in zip(batch_features, path): + path_of_feature = p.numpy().decode("utf-8") + np.save(path_of_feature, bf.numpy()) +``` + +## Preprocess and tokenize the captions + +* First, you'll tokenize the captions (for example, by splitting on spaces). This gives us a vocabulary of all of the unique words in the data (for example, "surfing", "football", and so on). +* Next, you'll limit the vocabulary size to the top 5,000 words (to save memory). You'll replace all other words with the token "UNK" (unknown). +* You then create word-to-index and index-to-word mappings. +* Finally, you pad all sequences to be the same length as the longest one. + +```py +# Find the maximum length of any caption in our dataset +def calc_max_length(tensor): + return max(len(t) for t in tensor) +``` + +```py +# Choose the top 5000 words from the vocabulary +top_k = 5000 +tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=top_k, + oov_token="", + filters='!"#$%&()*+.,-/:;=?@[\]^_`{|}~ ') +tokenizer.fit_on_texts(train_captions) +train_seqs = tokenizer.texts_to_sequences(train_captions) +``` + +```py +tokenizer.word_index[''] = 0 +tokenizer.index_word[0] = '' +``` + +```py +# Create the tokenized vectors +train_seqs = tokenizer.texts_to_sequences(train_captions) +``` + +```py +# Pad each vector to the max_length of the captions +# If you do not provide a max_length value, pad_sequences calculates it automatically +cap_vector = tf.keras.preprocessing.sequence.pad_sequences(train_seqs, padding='post') +``` + +```py +# Calculates the max_length, which is used to store the attention weights +max_length = calc_max_length(train_seqs) +``` + +## Split the data into training and testing + +```py +img_to_cap_vector = collections.defaultdict(list) +for img, cap in zip(img_name_vector, cap_vector): + img_to_cap_vector[img].append(cap) + +# Create training and validation sets using an 80-20 split randomly. +img_keys = list(img_to_cap_vector.keys()) +random.shuffle(img_keys) + +slice_index = int(len(img_keys)*0.8) +img_name_train_keys, img_name_val_keys = img_keys[:slice_index], img_keys[slice_index:] + +img_name_train = [] +cap_train = [] +for imgt in img_name_train_keys: + capt_len = len(img_to_cap_vector[imgt]) + img_name_train.extend([imgt] * capt_len) + cap_train.extend(img_to_cap_vector[imgt]) + +img_name_val = [] +cap_val = [] +for imgv in img_name_val_keys: + capv_len = len(img_to_cap_vector[imgv]) + img_name_val.extend([imgv] * capv_len) + cap_val.extend(img_to_cap_vector[imgv]) +``` + +```py +len(img_name_train), len(cap_train), len(img_name_val), len(cap_val) +``` + +```py +(24009, 24009, 6001, 6001) + +``` + +## Create a tf.data dataset for training + +Our images and captions are ready! Next, let's create a tf.data dataset to use for training our model. + +```py +# Feel free to change these parameters according to your system's configuration + +BATCH_SIZE = 64 +BUFFER_SIZE = 1000 +embedding_dim = 256 +units = 512 +vocab_size = top_k + 1 +num_steps = len(img_name_train) // BATCH_SIZE +# Shape of the vector extracted from InceptionV3 is (64, 2048) +# These two variables represent that vector shape +features_shape = 2048 +attention_features_shape = 64 +``` + +```py +# Load the numpy files +def map_func(img_name, cap): + img_tensor = np.load(img_name.decode('utf-8')+'.npy') + return img_tensor, cap +``` + +```py +dataset = tf.data.Dataset.from_tensor_slices((img_name_train, cap_train)) + +# Use map to load the numpy files in parallel +dataset = dataset.map(lambda item1, item2: tf.numpy_function( + map_func, [item1, item2], [tf.float32, tf.int32]), + num_parallel_calls=tf.data.experimental.AUTOTUNE) + +# Shuffle and batch +dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE) +dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) +``` + +## Model + +Fun fact: the decoder below is identical to the one in the example for [Neural Machine Translation with Attention](https://tensorflow.google.cn/tutorials/sequences/nmt_with_attention). + +The model architecture is inspired by the [Show, Attend and Tell](https://arxiv.org/pdf/1502.03044.pdf) paper. + +* In this example, you extract the features from the lower convolutional layer of InceptionV3 giving us a vector of shape (8, 8, 2048). +* You squash that to a shape of (64, 2048). +* This vector is then passed through the CNN Encoder (which consists of a single Fully connected layer). +* The RNN (here GRU) attends over the image to predict the next word. + +```py +class BahdanauAttention(tf.keras.Model): + def __init__(self, units): + super(BahdanauAttention, self).__init__() + self.W1 = tf.keras.layers.Dense(units) + self.W2 = tf.keras.layers.Dense(units) + self.V = tf.keras.layers.Dense(1) + + def call(self, features, hidden): + # features(CNN_encoder output) shape == (batch_size, 64, embedding_dim) + + # hidden shape == (batch_size, hidden_size) + # hidden_with_time_axis shape == (batch_size, 1, hidden_size) + hidden_with_time_axis = tf.expand_dims(hidden, 1) + + # attention_hidden_layer shape == (batch_size, 64, units) + attention_hidden_layer = (tf.nn.tanh(self.W1(features) + + self.W2(hidden_with_time_axis))) + + # score shape == (batch_size, 64, 1) + # This gives you an unnormalized score for each image feature. + score = self.V(attention_hidden_layer) + + # attention_weights shape == (batch_size, 64, 1) + attention_weights = tf.nn.softmax(score, axis=1) + + # context_vector shape after sum == (batch_size, hidden_size) + context_vector = attention_weights * features + context_vector = tf.reduce_sum(context_vector, axis=1) + + return context_vector, attention_weights +``` + +```py +class CNN_Encoder(tf.keras.Model): + # Since you have already extracted the features and dumped it using pickle + # This encoder passes those features through a Fully connected layer + def __init__(self, embedding_dim): + super(CNN_Encoder, self).__init__() + # shape after fc == (batch_size, 64, embedding_dim) + self.fc = tf.keras.layers.Dense(embedding_dim) + + def call(self, x): + x = self.fc(x) + x = tf.nn.relu(x) + return x +``` + +```py +class RNN_Decoder(tf.keras.Model): + def __init__(self, embedding_dim, units, vocab_size): + super(RNN_Decoder, self).__init__() + self.units = units + + self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim) + self.gru = tf.keras.layers.GRU(self.units, + return_sequences=True, + return_state=True, + recurrent_initializer='glorot_uniform') + self.fc1 = tf.keras.layers.Dense(self.units) + self.fc2 = tf.keras.layers.Dense(vocab_size) + + self.attention = BahdanauAttention(self.units) + + def call(self, x, features, hidden): + # defining attention as a separate model + context_vector, attention_weights = self.attention(features, hidden) + + # x shape after passing through embedding == (batch_size, 1, embedding_dim) + x = self.embedding(x) + + # x shape after concatenation == (batch_size, 1, embedding_dim + hidden_size) + x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1) + + # passing the concatenated vector to the GRU + output, state = self.gru(x) + + # shape == (batch_size, max_length, hidden_size) + x = self.fc1(output) + + # x shape == (batch_size * max_length, hidden_size) + x = tf.reshape(x, (-1, x.shape[2])) + + # output shape == (batch_size * max_length, vocab) + x = self.fc2(x) + + return x, state, attention_weights + + def reset_state(self, batch_size): + return tf.zeros((batch_size, self.units)) +``` + +```py +encoder = CNN_Encoder(embedding_dim) +decoder = RNN_Decoder(embedding_dim, units, vocab_size) +``` + +```py +optimizer = tf.keras.optimizers.Adam() +loss_object = tf.keras.losses.SparseCategoricalCrossentropy( + from_logits=True, reduction='none') + +def loss_function(real, pred): + mask = tf.math.logical_not(tf.math.equal(real, 0)) + loss_ = loss_object(real, pred) + + mask = tf.cast(mask, dtype=loss_.dtype) + loss_ *= mask + + return tf.reduce_mean(loss_) +``` + +## Checkpoint + +```py +checkpoint_path = "./checkpoints/train" +ckpt = tf.train.Checkpoint(encoder=encoder, + decoder=decoder, + optimizer = optimizer) +ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=5) +``` + +```py +start_epoch = 0 +if ckpt_manager.latest_checkpoint: + start_epoch = int(ckpt_manager.latest_checkpoint.split('-')[-1]) + # restoring the latest checkpoint in checkpoint_path + ckpt.restore(ckpt_manager.latest_checkpoint) +``` + +## Training + +* You extract the features stored in the respective `.npy` files and then pass those features through the encoder. +* The encoder output, hidden state(initialized to 0) and the decoder input (which is the start token) is passed to the decoder. +* The decoder returns the predictions and the decoder hidden state. +* The decoder hidden state is then passed back into the model and the predictions are used to calculate the loss. +* Use teacher forcing to decide the next input to the decoder. +* Teacher forcing is the technique where the target word is passed as the next input to the decoder. +* The final step is to calculate the gradients and apply it to the optimizer and backpropagate. + +```py +# adding this in a separate cell because if you run the training cell +# many times, the loss_plot array will be reset +loss_plot = [] +``` + +```py +@tf.function +def train_step(img_tensor, target): + loss = 0 + + # initializing the hidden state for each batch + # because the captions are not related from image to image + hidden = decoder.reset_state(batch_size=target.shape[0]) + + dec_input = tf.expand_dims([tokenizer.word_index['']] * target.shape[0], 1) + + with tf.GradientTape() as tape: + features = encoder(img_tensor) + + for i in range(1, target.shape[1]): + # passing the features through the decoder + predictions, hidden, _ = decoder(dec_input, features, hidden) + + loss += loss_function(target[:, i], predictions) + + # using teacher forcing + dec_input = tf.expand_dims(target[:, i], 1) + + total_loss = (loss / int(target.shape[1])) + + trainable_variables = encoder.trainable_variables + decoder.trainable_variables + + gradients = tape.gradient(loss, trainable_variables) + + optimizer.apply_gradients(zip(gradients, trainable_variables)) + + return loss, total_loss +``` + +```py +EPOCHS = 20 + +for epoch in range(start_epoch, EPOCHS): + start = time.time() + total_loss = 0 + + for (batch, (img_tensor, target)) in enumerate(dataset): + batch_loss, t_loss = train_step(img_tensor, target) + total_loss += t_loss + + if batch % 100 == 0: + print ('Epoch {} Batch {} Loss {:.4f}'.format( + epoch + 1, batch, batch_loss.numpy() / int(target.shape[1]))) + # storing the epoch end loss value to plot later + loss_plot.append(total_loss / num_steps) + + if epoch % 5 == 0: + ckpt_manager.save() + + print ('Epoch {} Loss {:.6f}'.format(epoch + 1, + total_loss/num_steps)) + print ('Time taken for 1 epoch {} sec\n'.format(time.time() - start)) +``` + +```py +Epoch 1 Batch 0 Loss 2.0618 +Epoch 1 Batch 100 Loss 1.1516 +Epoch 1 Batch 200 Loss 0.9201 +Epoch 1 Batch 300 Loss 0.8922 +Epoch 1 Loss 1.040854 +Time taken for 1 epoch 100.07987594604492 sec + +Epoch 2 Batch 0 Loss 0.8678 +Epoch 2 Batch 100 Loss 0.8257 +Epoch 2 Batch 200 Loss 0.8268 +Epoch 2 Batch 300 Loss 0.7109 +Epoch 2 Loss 0.786627 +Time taken for 1 epoch 36.52699089050293 sec + +Epoch 3 Batch 0 Loss 0.7747 +Epoch 3 Batch 100 Loss 0.7220 +Epoch 3 Batch 200 Loss 0.7071 +Epoch 3 Batch 300 Loss 0.7065 +Epoch 3 Loss 0.708941 +Time taken for 1 epoch 36.67209577560425 sec + +Epoch 4 Batch 0 Loss 0.7542 +Epoch 4 Batch 100 Loss 0.6422 +Epoch 4 Batch 200 Loss 0.6024 +Epoch 4 Batch 300 Loss 0.7107 +Epoch 4 Loss 0.657265 +Time taken for 1 epoch 36.70520520210266 sec + +Epoch 5 Batch 0 Loss 0.6684 +Epoch 5 Batch 100 Loss 0.6549 +Epoch 5 Batch 200 Loss 0.6364 +Epoch 5 Batch 300 Loss 0.6250 +Epoch 5 Loss 0.616459 +Time taken for 1 epoch 36.51219129562378 sec + +Epoch 6 Batch 0 Loss 0.6531 +Epoch 6 Batch 100 Loss 0.5622 +Epoch 6 Batch 200 Loss 0.5688 +Epoch 6 Batch 300 Loss 0.6302 +Epoch 6 Loss 0.581336 +Time taken for 1 epoch 37.36966156959534 sec + +Epoch 7 Batch 0 Loss 0.5335 +Epoch 7 Batch 100 Loss 0.5362 +Epoch 7 Batch 200 Loss 0.5960 +Epoch 7 Batch 300 Loss 0.5382 +Epoch 7 Loss 0.558110 +Time taken for 1 epoch 36.8504319190979 sec + +Epoch 8 Batch 0 Loss 0.5242 +Epoch 8 Batch 100 Loss 0.5142 +Epoch 8 Batch 200 Loss 0.5458 +Epoch 8 Batch 300 Loss 0.4814 +Epoch 8 Loss 0.523847 +Time taken for 1 epoch 36.90491819381714 sec + +Epoch 9 Batch 0 Loss 0.5318 +Epoch 9 Batch 100 Loss 0.4869 +Epoch 9 Batch 200 Loss 0.4791 +Epoch 9 Batch 300 Loss 0.4719 +Epoch 9 Loss 0.496363 +Time taken for 1 epoch 36.52782845497131 sec + +Epoch 10 Batch 0 Loss 0.4707 +Epoch 10 Batch 100 Loss 0.4642 +Epoch 10 Batch 200 Loss 0.4685 +Epoch 10 Batch 300 Loss 0.4659 +Epoch 10 Loss 0.470341 +Time taken for 1 epoch 36.24022054672241 sec + +Epoch 11 Batch 0 Loss 0.4530 +Epoch 11 Batch 100 Loss 0.4947 +Epoch 11 Batch 200 Loss 0.4457 +Epoch 11 Batch 300 Loss 0.4617 +Epoch 11 Loss 0.447154 +Time taken for 1 epoch 36.481024980545044 sec + +Epoch 12 Batch 0 Loss 0.4359 +Epoch 12 Batch 100 Loss 0.4257 +Epoch 12 Batch 200 Loss 0.4124 +Epoch 12 Batch 300 Loss 0.4302 +Epoch 12 Loss 0.424052 +Time taken for 1 epoch 37.11701226234436 sec + +Epoch 13 Batch 0 Loss 0.4531 +Epoch 13 Batch 100 Loss 0.4064 +Epoch 13 Batch 200 Loss 0.3677 +Epoch 13 Batch 300 Loss 0.3942 +Epoch 13 Loss 0.402709 +Time taken for 1 epoch 36.868356466293335 sec + +Epoch 14 Batch 0 Loss 0.3967 +Epoch 14 Batch 100 Loss 0.3455 +Epoch 14 Batch 200 Loss 0.3742 +Epoch 14 Batch 300 Loss 0.3905 +Epoch 14 Loss 0.382572 +Time taken for 1 epoch 36.95557117462158 sec + +Epoch 15 Batch 0 Loss 0.3754 +Epoch 15 Batch 100 Loss 0.3721 +Epoch 15 Batch 200 Loss 0.3633 +Epoch 15 Batch 300 Loss 0.3830 +Epoch 15 Loss 0.364831 +Time taken for 1 epoch 36.37884545326233 sec + +Epoch 16 Batch 0 Loss 0.3873 +Epoch 16 Batch 100 Loss 0.3499 +Epoch 16 Batch 200 Loss 0.3437 +Epoch 16 Batch 300 Loss 0.3232 +Epoch 16 Loss 0.346227 +Time taken for 1 epoch 36.44292426109314 sec + +Epoch 17 Batch 0 Loss 0.3250 +Epoch 17 Batch 100 Loss 0.3218 +Epoch 17 Batch 200 Loss 0.3703 +Epoch 17 Batch 300 Loss 0.2849 +Epoch 17 Loss 0.328413 +Time taken for 1 epoch 36.11301136016846 sec + +Epoch 18 Batch 0 Loss 0.3032 +Epoch 18 Batch 100 Loss 0.3321 +Epoch 18 Batch 200 Loss 0.3112 +Epoch 18 Batch 300 Loss 0.3129 +Epoch 18 Loss 0.315071 +Time taken for 1 epoch 36.2520546913147 sec + +Epoch 19 Batch 0 Loss 0.3005 +Epoch 19 Batch 100 Loss 0.3190 +Epoch 19 Batch 200 Loss 0.3243 +Epoch 19 Batch 300 Loss 0.2861 +Epoch 19 Loss 0.301502 +Time taken for 1 epoch 36.188610553741455 sec + +Epoch 20 Batch 0 Loss 0.3263 +Epoch 20 Batch 100 Loss 0.3182 +Epoch 20 Batch 200 Loss 0.2885 +Epoch 20 Batch 300 Loss 0.2923 +Epoch 20 Loss 0.285932 +Time taken for 1 epoch 36.192723989486694 sec + +``` + +```py +plt.plot(loss_plot) +plt.xlabel('Epochs') +plt.ylabel('Loss') +plt.title('Loss Plot') +plt.show() +``` + +![png](img/f40a6da0d8471d4b9b979d456cb09d0d.png) + +## Caption! + +* The evaluate function is similar to the training loop, except you don't use teacher forcing here. The input to the decoder at each time step is its previous predictions along with the hidden state and the encoder output. +* Stop predicting when the model predicts the end token. +* And store the attention weights for every time step. + +```py +def evaluate(image): + attention_plot = np.zeros((max_length, attention_features_shape)) + + hidden = decoder.reset_state(batch_size=1) + + temp_input = tf.expand_dims(load_image(image)[0], 0) + img_tensor_val = image_features_extract_model(temp_input) + img_tensor_val = tf.reshape(img_tensor_val, (img_tensor_val.shape[0], -1, img_tensor_val.shape[3])) + + features = encoder(img_tensor_val) + + dec_input = tf.expand_dims([tokenizer.word_index['']], 0) + result = [] + + for i in range(max_length): + predictions, hidden, attention_weights = decoder(dec_input, features, hidden) + + attention_plot[i] = tf.reshape(attention_weights, (-1, )).numpy() + + predicted_id = tf.random.categorical(predictions, 1)[0][0].numpy() + result.append(tokenizer.index_word[predicted_id]) + + if tokenizer.index_word[predicted_id] == '': + return result, attention_plot + + dec_input = tf.expand_dims([predicted_id], 0) + + attention_plot = attention_plot[:len(result), :] + return result, attention_plot +``` + +```py +def plot_attention(image, result, attention_plot): + temp_image = np.array(Image.open(image)) + + fig = plt.figure(figsize=(10, 10)) + + len_result = len(result) + for l in range(len_result): + temp_att = np.resize(attention_plot[l], (8, 8)) + ax = fig.add_subplot(len_result//2, len_result//2, l+1) + ax.set_title(result[l]) + img = ax.imshow(temp_image) + ax.imshow(temp_att, cmap='gray', alpha=0.6, extent=img.get_extent()) + + plt.tight_layout() + plt.show() +``` + +```py +# captions on the validation set +rid = np.random.randint(0, len(img_name_val)) +image = img_name_val[rid] +real_caption = ' '.join([tokenizer.index_word[i] for i in cap_val[rid] if i not in [0]]) +result, attention_plot = evaluate(image) + +print ('Real Caption:', real_caption) +print ('Prediction Caption:', ' '.join(result)) +plot_attention(image, result, attention_plot) +``` + +```py +Real Caption: a clock is on display on the surface of a building +Prediction Caption: a metal wall with a brick in the middle is that has some brown wall that looks out the ground + +``` + +![png](img/9cada0d075f4e1a104766ddd3754aba4.png) + +## Try it on your own images + +For fun, below we've provided a method you can use to caption your own images with the model we've just trained. Keep in mind, it was trained on a relatively small amount of data, and your images may be different from the training data (so be prepared for weird results!) + +```py +image_url = 'https://tensorflow.org/images/surf.jpg' +image_extension = image_url[-4:] +image_path = tf.keras.utils.get_file('image'+image_extension, + origin=image_url) + +result, attention_plot = evaluate(image_path) +print ('Prediction Caption:', ' '.join(result)) +plot_attention(image_path, result, attention_plot) +# opening the image +Image.open(image_path) +``` + +```py +Downloading data from https://tensorflow.org/images/surf.jpg +65536/64400 [==============================] - 0s 2us/step +Prediction Caption: a kid in their best to fall + +``` + +![png](img/e3e3424830f874b566c07a0e86696a13.png) + +![png](img/17877a5940e1f7245c707d3ecf9783e3.png) + +# Next steps + +Congrats! You've just trained an image captioning model with attention. Next, take a look at this example [Neural Machine Translation with Attention](https://tensorflow.google.cn/tutorials/sequences/nmt_with_attention). It uses a similar architecture to translate between Spanish and English sentences. You can also experiment with training the code in this notebook on a different dataset. \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/053.md b/Tensorflow/TensorFlow2.0/053.md new file mode 100644 index 00000000..c49b1a16 --- /dev/null +++ b/Tensorflow/TensorFlow2.0/053.md @@ -0,0 +1,1591 @@ +# 理解语言的 Transformer 模型 + +> 原文:[https://tensorflow.google.cn/tutorials/text/transformer](https://tensorflow.google.cn/tutorials/text/transformer) + + + +**Note:** 我们的 TensorFlow 社区翻译了这些文档。因为社区翻译是尽力而为, 所以无法保证它们是最准确的,并且反映了最新的 [官方英文文档](https://tensorflow.google.cn/?hl=en)。如果您有改进此翻译的建议, 请提交 pull request 到 [tensorflow/docs](https://github.com/tensorflow/docs) GitHub 仓库。要志愿地撰写或者审核译文,请加入 [docs-zh-cn@tensorflow.org Google Group](https://groups.google.com/a/tensorflow.org/forum/#!forum/docs-zh-cn) + +本教程训练了一个 [Transformer 模型](https://arxiv.org/abs/1706.03762) 用于将葡萄牙语翻译成英语。这是一个高级示例,假定您具备[文本生成(text generation)](/tutorials/text/text_generation)和 [注意力机制(attention)](/tutorials/text/nmt_with_attention) 的知识。 + +Transformer 模型的核心思想是*自注意力机制(self-attention)*——能注意输入序列的不同位置以计算该序列的表示的能力。Transformer 创建了多层自注意力层(self-attetion layers)组成的堆栈,下文的*按比缩放的点积注意力(Scaled dot product attention)*和*多头注意力(Multi-head attention)*部分对此进行了说明。 + +一个 transformer 模型用自注意力层而非 [RNNs](/tutorials/text/text_classification_rnn) 或 [CNNs](https://tensorflow.google.cn/tutorials/images/intro_to_cnns) 来处理变长的输入。这种通用架构有一系列的优势: + +* 它不对数据间的时间/空间关系做任何假设。这是处理一组对象(objects)的理想选择(例如,[星际争霸单位(StarCraft units)](https://deepmind.com/blog/alphastar-mastering-real-time-strategy-game-starcraft-ii/#block-8))。 +* 层输出可以并行计算,而非像 RNN 这样的序列计算。 +* 远距离项可以影响彼此的输出,而无需经过许多 RNN 步骤或卷积层(例如,参见[场景记忆 Transformer(Scene Memory Transformer)](https://arxiv.org/pdf/1903.03878.pdf)) +* 它能学习长距离的依赖。在许多序列任务中,这是一项挑战。 + +该架构的缺点是: + +* 对于时间序列,一个单位时间的输出是从*整个历史记录*计算的,而非仅从输入和当前的隐含状态计算得到。这*可能*效率较低。 +* 如果输入*确实*有时间/空间的关系,像文本,则必须加入一些位置编码,否则模型将有效地看到一堆单词。 + +在此 notebook 中训练完模型后,您将能输入葡萄牙语句子,得到其英文翻译。 + +![Attention heatmap](img/f8876684e2b6e5576c9f4dc1029bb237.png) + +```py +import tensorflow_datasets as tfds +import tensorflow as tf + +import time +import numpy as np +import matplotlib.pyplot as plt +``` + +## 设置输入流水线(input pipeline) + +使用 [TFDS](https://tensorflow.google.cn/datasets) 来导入 [葡萄牙语-英语翻译数据集](https://github.com/neulab/word-embeddings-for-nmt),该数据集来自于 [TED 演讲开放翻译项目](https://www.ted.com/participate/translate). + +该数据集包含来约 50000 条训练样本,1100 条验证样本,以及 2000 条测试样本。 + +```py +examples, metadata = tfds.load('ted_hrlr_translate/pt_to_en', with_info=True, + as_supervised=True) +train_examples, val_examples = examples['train'], examples['validation'] +``` + +```py +Downloading and preparing dataset ted_hrlr_translate/pt_to_en/1.0.0 (download: 124.94 MiB, generated: Unknown size, total: 124.94 MiB) to /home/kbuilder/tensorflow_datasets/ted_hrlr_translate/pt_to_en/1.0.0... +Shuffling and writing examples to /home/kbuilder/tensorflow_datasets/ted_hrlr_translate/pt_to_en/1.0.0.incomplete3YLR59/ted_hrlr_translate-train.tfrecord +Shuffling and writing examples to /home/kbuilder/tensorflow_datasets/ted_hrlr_translate/pt_to_en/1.0.0.incomplete3YLR59/ted_hrlr_translate-validation.tfrecord +Shuffling and writing examples to /home/kbuilder/tensorflow_datasets/ted_hrlr_translate/pt_to_en/1.0.0.incomplete3YLR59/ted_hrlr_translate-test.tfrecord +Dataset ted_hrlr_translate downloaded and prepared to /home/kbuilder/tensorflow_datasets/ted_hrlr_translate/pt_to_en/1.0.0\. Subsequent calls will reuse this data. + +``` + +从训练数据集创建自定义子词分词器(subwords tokenizer)。 + +```py +tokenizer_en = tfds.features.text.SubwordTextEncoder.build_from_corpus( + (en.numpy() for pt, en in train_examples), target_vocab_size=2**13) + +tokenizer_pt = tfds.features.text.SubwordTextEncoder.build_from_corpus( + (pt.numpy() for pt, en in train_examples), target_vocab_size=2**13) +``` + +```py +sample_string = 'Transformer is awesome.' + +tokenized_string = tokenizer_en.encode(sample_string) +print ('Tokenized string is {}'.format(tokenized_string)) + +original_string = tokenizer_en.decode(tokenized_string) +print ('The original string: {}'.format(original_string)) + +assert original_string == sample_string +``` + +```py +Tokenized string is [7915, 1248, 7946, 7194, 13, 2799, 7877] +The original string: Transformer is awesome. + +``` + +如果单词不在词典中,则分词器(tokenizer)通过将单词分解为子词来对字符串进行编码。 + +```py +for ts in tokenized_string: + print ('{} ----> {}'.format(ts, tokenizer_en.decode([ts]))) +``` + +```py +7915 ----> T +1248 ----> ran +7946 ----> s +7194 ----> former +13 ----> is +2799 ----> awesome +7877 ----> . + +``` + +```py +BUFFER_SIZE = 20000 +BATCH_SIZE = 64 +``` + +将开始和结束标记(token)添加到输入和目标。 + +```py +def encode(lang1, lang2): + lang1 = [tokenizer_pt.vocab_size] + tokenizer_pt.encode( + lang1.numpy()) + [tokenizer_pt.vocab_size+1] + + lang2 = [tokenizer_en.vocab_size] + tokenizer_en.encode( + lang2.numpy()) + [tokenizer_en.vocab_size+1] + + return lang1, lang2 +``` + +Note:为了使本示例较小且相对较快,删除长度大于 40 个标记的样本。 + +```py +MAX_LENGTH = 40 +``` + +```py +def filter_max_length(x, y, max_length=MAX_LENGTH): + return tf.logical_and(tf.size(x) <= max_length, + tf.size(y) <= max_length) +``` + +`.map()` 内部的操作以图模式(graph mode)运行,`.map()` 接收一个不具有 numpy 属性的图张量(graph tensor)。该`分词器(tokenizer)`需要将一个字符串或 Unicode 符号,编码成整数。因此,您需要在 [`tf.py_function`](https://tensorflow.google.cn/api_docs/python/tf/py_function) 内部运行编码过程,[`tf.py_function`](https://tensorflow.google.cn/api_docs/python/tf/py_function) 接收一个 eager 张量,该 eager 张量有一个包含字符串值的 numpy 属性。 + +```py +def tf_encode(pt, en): + result_pt, result_en = tf.py_function(encode, [pt, en], [tf.int64, tf.int64]) + result_pt.set_shape([None]) + result_en.set_shape([None]) + + return result_pt, result_en +``` + +```py +train_dataset = train_examples.map(tf_encode) +train_dataset = train_dataset.filter(filter_max_length) +# 将数据集缓存到内存中以加快读取速度。 +train_dataset = train_dataset.cache() +train_dataset = train_dataset.shuffle(BUFFER_SIZE).padded_batch(BATCH_SIZE) +train_dataset = train_dataset.prefetch(tf.data.experimental.AUTOTUNE) + +val_dataset = val_examples.map(tf_encode) +val_dataset = val_dataset.filter(filter_max_length).padded_batch(BATCH_SIZE) +``` + +```py +pt_batch, en_batch = next(iter(val_dataset)) +pt_batch, en_batch +``` + +```py +(, + ) + +``` + +## 位置编码(Positional encoding) + +因为该模型并不包括任何的循环(recurrence)或卷积,所以模型添加了位置编码,为模型提供一些关于单词在句子中相对位置的信息。 + +位置编码向量被加到嵌入(embedding)向量中。嵌入表示一个 d 维空间的标记,在 d 维空间中有着相似含义的标记会离彼此更近。但是,嵌入并没有对在一句话中的词的相对位置进行编码。因此,当加上位置编码后,词将基于*它们含义的相似度以及它们在句子中的位置*,在 d 维空间中离彼此更近。 + +参看 [位置编码](https://github.com/tensorflow/examples/blob/master/community/en/position_encoding.ipynb) 的 notebook 了解更多信息。计算位置编码的公式如下: + +$$\Large{PE_{(pos, 2i)} = sin(pos / 10000^{2i / d_{model} })} $$$$\Large{PE_{(pos, 2i+1)} = cos(pos / 10000^{2i / d_{model} })} $$ + +```py +def get_angles(pos, i, d_model): + angle_rates = 1 / np.power(10000, (2 * (i//2)) / np.float32(d_model)) + return pos * angle_rates +``` + +```py +def positional_encoding(position, d_model): + angle_rads = get_angles(np.arange(position)[:, np.newaxis], + np.arange(d_model)[np.newaxis, :], + d_model) + + # 将 sin 应用于数组中的偶数索引(indices);2i + angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2]) + + # 将 cos 应用于数组中的奇数索引;2i+1 + angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2]) + + pos_encoding = angle_rads[np.newaxis, ...] + + return tf.cast(pos_encoding, dtype=tf.float32) +``` + +```py +pos_encoding = positional_encoding(50, 512) +print (pos_encoding.shape) + +plt.pcolormesh(pos_encoding[0], cmap='RdBu') +plt.xlabel('Depth') +plt.xlim((0, 512)) +plt.ylabel('Position') +plt.colorbar() +plt.show() +``` + +```py +(1, 50, 512) + +``` + +![png](img/0dda76c01237658213cec93698233a22.png) + +## 遮挡(Masking) + +遮挡一批序列中所有的填充标记(pad tokens)。这确保了模型不会将填充作为输入。该 mask 表明填充值 `0` 出现的位置:在这些位置 mask 输出 `1`,否则输出 `0`。 + +```py +def create_padding_mask(seq): + seq = tf.cast(tf.math.equal(seq, 0), tf.float32) + + # 添加额外的维度来将填充加到 + # 注意力对数(logits)。 + return seq[:, tf.newaxis, tf.newaxis, :] # (batch_size, 1, 1, seq_len) +``` + +```py +x = tf.constant([[7, 6, 0, 0, 1], [1, 2, 3, 0, 0], [0, 0, 0, 4, 5]]) +create_padding_mask(x) +``` + +```py + + +``` + +前瞻遮挡(look-ahead mask)用于遮挡一个序列中的后续标记(future tokens)。换句话说,该 mask 表明了不应该使用的条目。 + +这意味着要预测第三个词,将仅使用第一个和第二个词。与此类似,预测第四个词,仅使用第一个,第二个和第三个词,依此类推。 + +```py +def create_look_ahead_mask(size): + mask = 1 - tf.linalg.band_part(tf.ones((size, size)), -1, 0) + return mask # (seq_len, seq_len) +``` + +```py +x = tf.random.uniform((1, 3)) +temp = create_look_ahead_mask(x.shape[1]) +temp +``` + +```py + + +``` + +## 按比缩放的点积注意力(Scaled dot product attention) + +![scaled_dot_product_attention](img/0eda1b45396cd1c02a76bd76397b9a76.png) + +Transformer 使用的注意力函数有三个输入:Q(请求(query))、K(主键(key))、V(数值(value))。用于计算注意力权重的等式为: + +$$\Large{Attention(Q, K, V) = softmax_k(\frac{QK^T}{\sqrt{d_k} }) V} $$ + +点积注意力被缩小了深度的平方根倍。这样做是因为对于较大的深度值,点积的大小会增大,从而推动 softmax 函数往仅有很小的梯度的方向靠拢,导致了一种很硬的(hard)softmax。 + +例如,假设 `Q` 和 `K` 的均值为 0,方差为 1。它们的矩阵乘积将有均值为 0,方差为 `dk`。因此,*`dk` 的平方根*被用于缩放(而非其他数值),因为,`Q` 和 `K` 的矩阵乘积的均值本应该为 0,方差本应该为 1,这样会获得一个更平缓的 softmax。 + +遮挡(mask)与 -1e9(接近于负无穷)相乘。这样做是因为遮挡与缩放的 Q 和 K 的矩阵乘积相加,并在 softmax 之前立即应用。目标是将这些单元归零,因为 softmax 的较大负数输入在输出中接近于零。 + +```py +def scaled_dot_product_attention(q, k, v, mask): + """计算注意力权重。 + q, k, v 必须具有匹配的前置维度。 + k, v 必须有匹配的倒数第二个维度,例如:seq_len_k = seq_len_v。 + 虽然 mask 根据其类型(填充或前瞻)有不同的形状, + 但是 mask 必须能进行广播转换以便求和。 + + 参数: + q: 请求的形状 == (..., seq_len_q, depth) + k: 主键的形状 == (..., seq_len_k, depth) + v: 数值的形状 == (..., seq_len_v, depth_v) + mask: Float 张量,其形状能转换成 + (..., seq_len_q, seq_len_k)。默认为 None。 + + 返回值: + 输出,注意力权重 + """ + + matmul_qk = tf.matmul(q, k, transpose_b=True) # (..., seq_len_q, seq_len_k) + + # 缩放 matmul_qk + dk = tf.cast(tf.shape(k)[-1], tf.float32) + scaled_attention_logits = matmul_qk / tf.math.sqrt(dk) + + # 将 mask 加入到缩放的张量上。 + if mask is not None: + scaled_attention_logits += (mask * -1e9) + + # softmax 在最后一个轴(seq_len_k)上归一化,因此分数 + # 相加等于 1。 + attention_weights = tf.nn.softmax(scaled_attention_logits, axis=-1) # (..., seq_len_q, seq_len_k) + + output = tf.matmul(attention_weights, v) # (..., seq_len_q, depth_v) + + return output, attention_weights +``` + +当 softmax 在 K 上进行归一化后,它的值决定了分配到 Q 的重要程度。 + +输出表示注意力权重和 V(数值)向量的乘积。这确保了要关注的词保持原样,而无关的词将被清除掉。 + +```py +def print_out(q, k, v): + temp_out, temp_attn = scaled_dot_product_attention( + q, k, v, None) + print ('Attention weights are:') + print (temp_attn) + print ('Output is:') + print (temp_out) +``` + +```py +np.set_printoptions(suppress=True) + +temp_k = tf.constant([[10,0,0], + [0,10,0], + [0,0,10], + [0,0,10]], dtype=tf.float32) # (4, 3) + +temp_v = tf.constant([[ 1,0], + [ 10,0], + [ 100,5], + [1000,6]], dtype=tf.float32) # (4, 2) + +# 这条 `请求(query)符合第二个`主键(key)`, +# 因此返回了第二个`数值(value)`。 +temp_q = tf.constant([[0, 10, 0]], dtype=tf.float32) # (1, 3) +print_out(temp_q, temp_k, temp_v) +``` + +```py +Attention weights are: +tf.Tensor([[0\. 1\. 0\. 0.]], shape=(1, 4), dtype=float32) +Output is: +tf.Tensor([[10\. 0.]], shape=(1, 2), dtype=float32) + +``` + +```py +# 这条请求符合重复出现的主键(第三第四个), +# 因此,对所有的相关数值取了平均。 +temp_q = tf.constant([[0, 0, 10]], dtype=tf.float32) # (1, 3) +print_out(temp_q, temp_k, temp_v) +``` + +```py +Attention weights are: +tf.Tensor([[0\. 0\. 0.5 0.5]], shape=(1, 4), dtype=float32) +Output is: +tf.Tensor([[550\. 5.5]], shape=(1, 2), dtype=float32) + +``` + +```py +# 这条请求符合第一和第二条主键, +# 因此,对它们的数值去了平均。 +temp_q = tf.constant([[10, 10, 0]], dtype=tf.float32) # (1, 3) +print_out(temp_q, temp_k, temp_v) +``` + +```py +Attention weights are: +tf.Tensor([[0.5 0.5 0\. 0\. ]], shape=(1, 4), dtype=float32) +Output is: +tf.Tensor([[5.5 0\. ]], shape=(1, 2), dtype=float32) + +``` + +将所有请求一起*传递*。 + +```py +temp_q = tf.constant([[0, 0, 10], [0, 10, 0], [10, 10, 0]], dtype=tf.float32) # (3, 3) +print_out(temp_q, temp_k, temp_v) +``` + +```py +Attention weights are: +tf.Tensor( +[[0\. 0\. 0.5 0.5] + [0\. 1\. 0\. 0\. ] + [0.5 0.5 0\. 0\. ]], shape=(3, 4), dtype=float32) +Output is: +tf.Tensor( +[[550\. 5.5] + [ 10\. 0\. ] + [ 5.5 0\. ]], shape=(3, 2), dtype=float32) + +``` + +## 多头注意力(Multi-head attention) + +![multi-head attention](img/2f1f2fc54135afd798139d45c013ef1f.png) + +多头注意力由四部分组成: + +* 线性层并分拆成多头。 +* 按比缩放的点积注意力。 +* 多头及联。 +* 最后一层线性层。 + +每个多头注意力块有三个输入:Q(请求)、K(主键)、V(数值)。这些输入经过线性(Dense)层,并分拆成多头。 + +将上面定义的 `scaled_dot_product_attention` 函数应用于每个头(进行了广播(broadcasted)以提高效率)。注意力这步必须使用一个恰当的 mask。然后将每个头的注意力输出连接起来(用[`tf.transpose`](https://tensorflow.google.cn/api_docs/python/tf/transpose) 和 [`tf.reshape`](https://tensorflow.google.cn/api_docs/python/tf/reshape)),并放入最后的 `Dense` 层。 + +Q、K、和 V 被拆分到了多个头,而非单个的注意力头,因为多头允许模型共同注意来自不同表示空间的不同位置的信息。在分拆后,每个头部的维度减少,因此总的计算成本与有着全部维度的单个注意力头相同。 + +```py +class MultiHeadAttention(tf.keras.layers.Layer): + def __init__(self, d_model, num_heads): + super(MultiHeadAttention, self).__init__() + self.num_heads = num_heads + self.d_model = d_model + + assert d_model % self.num_heads == 0 + + self.depth = d_model // self.num_heads + + self.wq = tf.keras.layers.Dense(d_model) + self.wk = tf.keras.layers.Dense(d_model) + self.wv = tf.keras.layers.Dense(d_model) + + self.dense = tf.keras.layers.Dense(d_model) + + def split_heads(self, x, batch_size): + """分拆最后一个维度到 (num_heads, depth). + 转置结果使得形状为 (batch_size, num_heads, seq_len, depth) + """ + x = tf.reshape(x, (batch_size, -1, self.num_heads, self.depth)) + return tf.transpose(x, perm=[0, 2, 1, 3]) + + def call(self, v, k, q, mask): + batch_size = tf.shape(q)[0] + + q = self.wq(q) # (batch_size, seq_len, d_model) + k = self.wk(k) # (batch_size, seq_len, d_model) + v = self.wv(v) # (batch_size, seq_len, d_model) + + q = self.split_heads(q, batch_size) # (batch_size, num_heads, seq_len_q, depth) + k = self.split_heads(k, batch_size) # (batch_size, num_heads, seq_len_k, depth) + v = self.split_heads(v, batch_size) # (batch_size, num_heads, seq_len_v, depth) + + # scaled_attention.shape == (batch_size, num_heads, seq_len_q, depth) + # attention_weights.shape == (batch_size, num_heads, seq_len_q, seq_len_k) + scaled_attention, attention_weights = scaled_dot_product_attention( + q, k, v, mask) + + scaled_attention = tf.transpose(scaled_attention, perm=[0, 2, 1, 3]) # (batch_size, seq_len_q, num_heads, depth) + + concat_attention = tf.reshape(scaled_attention, + (batch_size, -1, self.d_model)) # (batch_size, seq_len_q, d_model) + + output = self.dense(concat_attention) # (batch_size, seq_len_q, d_model) + + return output, attention_weights +``` + +创建一个 `MultiHeadAttention` 层进行尝试。在序列中的每个位置 `y`,`MultiHeadAttention` 在序列中的所有其他位置运行所有 8 个注意力头,在每个位置 y,返回一个新的同样长度的向量。 + +```py +temp_mha = MultiHeadAttention(d_model=512, num_heads=8) +y = tf.random.uniform((1, 60, 512)) # (batch_size, encoder_sequence, d_model) +out, attn = temp_mha(y, k=y, q=y, mask=None) +out.shape, attn.shape +``` + +```py +(TensorShape([1, 60, 512]), TensorShape([1, 8, 60, 60])) + +``` + +## 点式前馈网络(Point wise feed forward network) + +点式前馈网络由两层全联接层组成,两层之间有一个 ReLU 激活函数。 + +```py +def point_wise_feed_forward_network(d_model, dff): + return tf.keras.Sequential([ + tf.keras.layers.Dense(dff, activation='relu'), # (batch_size, seq_len, dff) + tf.keras.layers.Dense(d_model) # (batch_size, seq_len, d_model) + ]) +``` + +```py +sample_ffn = point_wise_feed_forward_network(512, 2048) +sample_ffn(tf.random.uniform((64, 50, 512))).shape +``` + +```py +TensorShape([64, 50, 512]) + +``` + +## 编码与解码(Encoder and decoder) + +![transformer](img/9c9dba05c6503363ec77df2a3d25a70b.png) + +Transformer 模型与标准的[具有注意力机制的序列到序列模型(sequence to sequence with attention model)](/tutorials/text/nmt_with_attention),遵循相同的一般模式。 + +* 输入语句经过 `N` 个编码器层,为序列中的每个词/标记生成一个输出。 +* 解码器关注编码器的输出以及它自身的输入(自注意力)来预测下一个词。 + +### 编码器层(Encoder layer) + +每个编码器层包括以下子层: + +1. 多头注意力(有填充遮挡) +2. 点式前馈网络(Point wise feed forward networks)。 + +每个子层在其周围有一个残差连接,然后进行层归一化。残差连接有助于避免深度网络中的梯度消失问题。 + +每个子层的输出是 `LayerNorm(x + Sublayer(x))`。归一化是在 `d_model`(最后一个)维度完成的。Transformer 中有 N 个编码器层。 + +```py +class EncoderLayer(tf.keras.layers.Layer): + def __init__(self, d_model, num_heads, dff, rate=0.1): + super(EncoderLayer, self).__init__() + + self.mha = MultiHeadAttention(d_model, num_heads) + self.ffn = point_wise_feed_forward_network(d_model, dff) + + self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6) + self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6) + + self.dropout1 = tf.keras.layers.Dropout(rate) + self.dropout2 = tf.keras.layers.Dropout(rate) + + def call(self, x, training, mask): + + attn_output, _ = self.mha(x, x, x, mask) # (batch_size, input_seq_len, d_model) + attn_output = self.dropout1(attn_output, training=training) + out1 = self.layernorm1(x + attn_output) # (batch_size, input_seq_len, d_model) + + ffn_output = self.ffn(out1) # (batch_size, input_seq_len, d_model) + ffn_output = self.dropout2(ffn_output, training=training) + out2 = self.layernorm2(out1 + ffn_output) # (batch_size, input_seq_len, d_model) + + return out2 +``` + +```py +sample_encoder_layer = EncoderLayer(512, 8, 2048) + +sample_encoder_layer_output = sample_encoder_layer( + tf.random.uniform((64, 43, 512)), False, None) + +sample_encoder_layer_output.shape # (batch_size, input_seq_len, d_model) +``` + +```py +TensorShape([64, 43, 512]) + +``` + +### 解码器层(Decoder layer) + +每个解码器层包括以下子层: + +1. 遮挡的多头注意力(前瞻遮挡和填充遮挡) +2. 多头注意力(用填充遮挡)。V(数值)和 K(主键)接收*编码器输出*作为输入。Q(请求)接收*遮挡的多头注意力子层的输出*。 +3. 点式前馈网络 + +每个子层在其周围有一个残差连接,然后进行层归一化。每个子层的输出是 `LayerNorm(x + Sublayer(x))`。归一化是在 `d_model`(最后一个)维度完成的。 + +Transformer 中共有 N 个解码器层。 + +当 Q 接收到解码器的第一个注意力块的输出,并且 K 接收到编码器的输出时,注意力权重表示根据编码器的输出赋予解码器输入的重要性。换一种说法,解码器通过查看编码器输出和对其自身输出的自注意力,预测下一个词。参看按比缩放的点积注意力部分的演示。 + +```py +class DecoderLayer(tf.keras.layers.Layer): + def __init__(self, d_model, num_heads, dff, rate=0.1): + super(DecoderLayer, self).__init__() + + self.mha1 = MultiHeadAttention(d_model, num_heads) + self.mha2 = MultiHeadAttention(d_model, num_heads) + + self.ffn = point_wise_feed_forward_network(d_model, dff) + + self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6) + self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6) + self.layernorm3 = tf.keras.layers.LayerNormalization(epsilon=1e-6) + + self.dropout1 = tf.keras.layers.Dropout(rate) + self.dropout2 = tf.keras.layers.Dropout(rate) + self.dropout3 = tf.keras.layers.Dropout(rate) + + def call(self, x, enc_output, training, + look_ahead_mask, padding_mask): + # enc_output.shape == (batch_size, input_seq_len, d_model) + + attn1, attn_weights_block1 = self.mha1(x, x, x, look_ahead_mask) # (batch_size, target_seq_len, d_model) + attn1 = self.dropout1(attn1, training=training) + out1 = self.layernorm1(attn1 + x) + + attn2, attn_weights_block2 = self.mha2( + enc_output, enc_output, out1, padding_mask) # (batch_size, target_seq_len, d_model) + attn2 = self.dropout2(attn2, training=training) + out2 = self.layernorm2(attn2 + out1) # (batch_size, target_seq_len, d_model) + + ffn_output = self.ffn(out2) # (batch_size, target_seq_len, d_model) + ffn_output = self.dropout3(ffn_output, training=training) + out3 = self.layernorm3(ffn_output + out2) # (batch_size, target_seq_len, d_model) + + return out3, attn_weights_block1, attn_weights_block2 +``` + +```py +sample_decoder_layer = DecoderLayer(512, 8, 2048) + +sample_decoder_layer_output, _, _ = sample_decoder_layer( + tf.random.uniform((64, 50, 512)), sample_encoder_layer_output, + False, None, None) + +sample_decoder_layer_output.shape # (batch_size, target_seq_len, d_model) +``` + +```py +TensorShape([64, 50, 512]) + +``` + +### 编码器(Encoder) + +`编码器` 包括: + +1. 输入嵌入(Input Embedding) +2. 位置编码(Positional Encoding) +3. N 个编码器层(encoder layers) + +输入经过嵌入(embedding)后,该嵌入与位置编码相加。该加法结果的输出是编码器层的输入。编码器的输出是解码器的输入。 + +```py +class Encoder(tf.keras.layers.Layer): + def __init__(self, num_layers, d_model, num_heads, dff, input_vocab_size, + maximum_position_encoding, rate=0.1): + super(Encoder, self).__init__() + + self.d_model = d_model + self.num_layers = num_layers + + self.embedding = tf.keras.layers.Embedding(input_vocab_size, d_model) + self.pos_encoding = positional_encoding(maximum_position_encoding, + self.d_model) + + self.enc_layers = [EncoderLayer(d_model, num_heads, dff, rate) + for _ in range(num_layers)] + + self.dropout = tf.keras.layers.Dropout(rate) + + def call(self, x, training, mask): + + seq_len = tf.shape(x)[1] + + # 将嵌入和位置编码相加。 + x = self.embedding(x) # (batch_size, input_seq_len, d_model) + x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32)) + x += self.pos_encoding[:, :seq_len, :] + + x = self.dropout(x, training=training) + + for i in range(self.num_layers): + x = self.enc_layers[i](x, training, mask) + + return x # (batch_size, input_seq_len, d_model) +``` + +```py +sample_encoder = Encoder(num_layers=2, d_model=512, num_heads=8, + dff=2048, input_vocab_size=8500, + maximum_position_encoding=10000) + +sample_encoder_output = sample_encoder(tf.random.uniform((64, 62)), + training=False, mask=None) + +print (sample_encoder_output.shape) # (batch_size, input_seq_len, d_model) +``` + +```py +(64, 62, 512) + +``` + +### 解码器(Decoder) + +`解码器`包括: + +1. 输出嵌入(Output Embedding) +2. 位置编码(Positional Encoding) +3. N 个解码器层(decoder layers) + +目标(target)经过一个嵌入后,该嵌入和位置编码相加。该加法结果是解码器层的输入。解码器的输出是最后的线性层的输入。 + +```py +class Decoder(tf.keras.layers.Layer): + def __init__(self, num_layers, d_model, num_heads, dff, target_vocab_size, + maximum_position_encoding, rate=0.1): + super(Decoder, self).__init__() + + self.d_model = d_model + self.num_layers = num_layers + + self.embedding = tf.keras.layers.Embedding(target_vocab_size, d_model) + self.pos_encoding = positional_encoding(maximum_position_encoding, d_model) + + self.dec_layers = [DecoderLayer(d_model, num_heads, dff, rate) + for _ in range(num_layers)] + self.dropout = tf.keras.layers.Dropout(rate) + + def call(self, x, enc_output, training, + look_ahead_mask, padding_mask): + + seq_len = tf.shape(x)[1] + attention_weights = {} + + x = self.embedding(x) # (batch_size, target_seq_len, d_model) + x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32)) + x += self.pos_encoding[:, :seq_len, :] + + x = self.dropout(x, training=training) + + for i in range(self.num_layers): + x, block1, block2 = self.dec_layers[i](x, enc_output, training, + look_ahead_mask, padding_mask) + + attention_weights['decoder_layer{}_block1'.format(i+1)] = block1 + attention_weights['decoder_layer{}_block2'.format(i+1)] = block2 + + # x.shape == (batch_size, target_seq_len, d_model) + return x, attention_weights +``` + +```py +sample_decoder = Decoder(num_layers=2, d_model=512, num_heads=8, + dff=2048, target_vocab_size=8000, + maximum_position_encoding=5000) + +output, attn = sample_decoder(tf.random.uniform((64, 26)), + enc_output=sample_encoder_output, + training=False, look_ahead_mask=None, + padding_mask=None) + +output.shape, attn['decoder_layer2_block2'].shape +``` + +```py +(TensorShape([64, 26, 512]), TensorShape([64, 8, 26, 62])) + +``` + +## 创建 Transformer + +Transformer 包括编码器,解码器和最后的线性层。解码器的输出是线性层的输入,返回线性层的输出。 + +```py +class Transformer(tf.keras.Model): + def __init__(self, num_layers, d_model, num_heads, dff, input_vocab_size, + target_vocab_size, pe_input, pe_target, rate=0.1): + super(Transformer, self).__init__() + + self.encoder = Encoder(num_layers, d_model, num_heads, dff, + input_vocab_size, pe_input, rate) + + self.decoder = Decoder(num_layers, d_model, num_heads, dff, + target_vocab_size, pe_target, rate) + + self.final_layer = tf.keras.layers.Dense(target_vocab_size) + + def call(self, inp, tar, training, enc_padding_mask, + look_ahead_mask, dec_padding_mask): + + enc_output = self.encoder(inp, training, enc_padding_mask) # (batch_size, inp_seq_len, d_model) + + # dec_output.shape == (batch_size, tar_seq_len, d_model) + dec_output, attention_weights = self.decoder( + tar, enc_output, training, look_ahead_mask, dec_padding_mask) + + final_output = self.final_layer(dec_output) # (batch_size, tar_seq_len, target_vocab_size) + + return final_output, attention_weights +``` + +```py +sample_transformer = Transformer( + num_layers=2, d_model=512, num_heads=8, dff=2048, + input_vocab_size=8500, target_vocab_size=8000, + pe_input=10000, pe_target=6000) + +temp_input = tf.random.uniform((64, 62)) +temp_target = tf.random.uniform((64, 26)) + +fn_out, _ = sample_transformer(temp_input, temp_target, training=False, + enc_padding_mask=None, + look_ahead_mask=None, + dec_padding_mask=None) + +fn_out.shape # (batch_size, tar_seq_len, target_vocab_size) +``` + +```py +TensorShape([64, 26, 8000]) + +``` + +## 配置超参数(hyperparameters) + +为了让本示例小且相对较快,已经减小了*num_layers、 d_model 和 dff* 的值。 + +Transformer 的基础模型使用的数值为:*num_layers=6*,*d_model = 512*,*dff = 2048*。关于所有其他版本的 Transformer,请查阅[论文](https://arxiv.org/abs/1706.03762)。 + +Note:通过改变以下数值,您可以获得在许多任务上达到最先进水平的模型。 + +```py +num_layers = 4 +d_model = 128 +dff = 512 +num_heads = 8 + +input_vocab_size = tokenizer_pt.vocab_size + 2 +target_vocab_size = tokenizer_en.vocab_size + 2 +dropout_rate = 0.1 +``` + +## 优化器(Optimizer) + +根据[论文](https://arxiv.org/abs/1706.03762)中的公式,将 Adam 优化器与自定义的学习速率调度程序(scheduler)配合使用。 + +$$\Large{lrate = d_{model}^{-0.5} * min(step{\_}num^{-0.5}, step{\_}num * warmup{\_}steps^{-1.5})}$$ + +```py +class CustomSchedule(tf.keras.optimizers.schedules.LearningRateSchedule): + def __init__(self, d_model, warmup_steps=4000): + super(CustomSchedule, self).__init__() + + self.d_model = d_model + self.d_model = tf.cast(self.d_model, tf.float32) + + self.warmup_steps = warmup_steps + + def __call__(self, step): + arg1 = tf.math.rsqrt(step) + arg2 = step * (self.warmup_steps ** -1.5) + + return tf.math.rsqrt(self.d_model) * tf.math.minimum(arg1, arg2) +``` + +```py +learning_rate = CustomSchedule(d_model) + +optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.98, + epsilon=1e-9) +``` + +```py +temp_learning_rate_schedule = CustomSchedule(d_model) + +plt.plot(temp_learning_rate_schedule(tf.range(40000, dtype=tf.float32))) +plt.ylabel("Learning Rate") +plt.xlabel("Train Step") +``` + +```py +Text(0.5, 0, 'Train Step') + +``` + +![png](img/852e0228b5aebca16dfadf758d11e902.png) + +## 损失函数与指标(Loss and metrics) + +由于目标序列是填充(padded)过的,因此在计算损失函数时,应用填充遮挡非常重要。 + +```py +loss_object = tf.keras.losses.SparseCategoricalCrossentropy( + from_logits=True, reduction='none') +``` + +```py +def loss_function(real, pred): + mask = tf.math.logical_not(tf.math.equal(real, 0)) + loss_ = loss_object(real, pred) + + mask = tf.cast(mask, dtype=loss_.dtype) + loss_ *= mask + + return tf.reduce_mean(loss_) +``` + +```py +train_loss = tf.keras.metrics.Mean(name='train_loss') +train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy( + name='train_accuracy') +``` + +## 训练与检查点(Training and checkpointing) + +```py +transformer = Transformer(num_layers, d_model, num_heads, dff, + input_vocab_size, target_vocab_size, + pe_input=input_vocab_size, + pe_target=target_vocab_size, + rate=dropout_rate) +``` + +```py +def create_masks(inp, tar): + # 编码器填充遮挡 + enc_padding_mask = create_padding_mask(inp) + + # 在解码器的第二个注意力模块使用。 + # 该填充遮挡用于遮挡编码器的输出。 + dec_padding_mask = create_padding_mask(inp) + + # 在解码器的第一个注意力模块使用。 + # 用于填充(pad)和遮挡(mask)解码器获取到的输入的后续标记(future tokens)。 + look_ahead_mask = create_look_ahead_mask(tf.shape(tar)[1]) + dec_target_padding_mask = create_padding_mask(tar) + combined_mask = tf.maximum(dec_target_padding_mask, look_ahead_mask) + + return enc_padding_mask, combined_mask, dec_padding_mask +``` + +创建检查点的路径和检查点管理器(manager)。这将用于在每 `n` 个周期(epochs)保存检查点。 + +```py +checkpoint_path = "./checkpoints/train" + +ckpt = tf.train.Checkpoint(transformer=transformer, + optimizer=optimizer) + +ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=5) + +# 如果检查点存在,则恢复最新的检查点。 +if ckpt_manager.latest_checkpoint: + ckpt.restore(ckpt_manager.latest_checkpoint) + print ('Latest checkpoint restored!!') +``` + +目标(target)被分成了 tar_inp 和 tar_real。tar_inp 作为输入传递到解码器。`tar_real` 是位移了 1 的同一个输入:在 `tar_inp` 中的每个位置,`tar_real` 包含了应该被预测到的下一个标记(token)。 + +例如,`sentence` = "SOS A lion in the jungle is sleeping EOS" + +`tar_inp` = "SOS A lion in the jungle is sleeping" + +`tar_real` = "A lion in the jungle is sleeping EOS" + +Transformer 是一个自回归(auto-regressive)模型:它一次作一个部分的预测,然后使用到目前为止的自身的输出来决定下一步要做什么。 + +在训练过程中,本示例使用了 teacher-forcing 的方法(就像[文本生成教程](https://tensorflow.google.cn/tutorials/text/text_generation)中一样)。无论模型在当前时间步骤下预测出什么,teacher-forcing 方法都会将真实的输出传递到下一个时间步骤上。 + +当 transformer 预测每个词时,*自注意力(self-attention)*功能使它能够查看输入序列中前面的单词,从而更好地预测下一个单词。 + +为了防止模型在期望的输出上达到峰值,模型使用了前瞻遮挡(look-ahead mask)。 + +```py +EPOCHS = 20 +``` + +```py +# 该 @tf.function 将追踪-编译 train_step 到 TF 图中,以便更快地 +# 执行。该函数专用于参数张量的精确形状。为了避免由于可变序列长度或可变 +# 批次大小(最后一批次较小)导致的再追踪,使用 input_signature 指定 +# 更多的通用形状。 + +train_step_signature = [ + tf.TensorSpec(shape=(None, None), dtype=tf.int64), + tf.TensorSpec(shape=(None, None), dtype=tf.int64), +] + +@tf.function(input_signature=train_step_signature) +def train_step(inp, tar): + tar_inp = tar[:, :-1] + tar_real = tar[:, 1:] + + enc_padding_mask, combined_mask, dec_padding_mask = create_masks(inp, tar_inp) + + with tf.GradientTape() as tape: + predictions, _ = transformer(inp, tar_inp, + True, + enc_padding_mask, + combined_mask, + dec_padding_mask) + loss = loss_function(tar_real, predictions) + + gradients = tape.gradient(loss, transformer.trainable_variables) + optimizer.apply_gradients(zip(gradients, transformer.trainable_variables)) + + train_loss(loss) + train_accuracy(tar_real, predictions) +``` + +葡萄牙语作为输入语言,英语为目标语言。 + +```py +for epoch in range(EPOCHS): + start = time.time() + + train_loss.reset_states() + train_accuracy.reset_states() + + # inp -> portuguese, tar -> english + for (batch, (inp, tar)) in enumerate(train_dataset): + train_step(inp, tar) + + if batch % 50 == 0: + print ('Epoch {} Batch {} Loss {:.4f} Accuracy {:.4f}'.format( + epoch + 1, batch, train_loss.result(), train_accuracy.result())) + + if (epoch + 1) % 5 == 0: + ckpt_save_path = ckpt_manager.save() + print ('Saving checkpoint for epoch {} at {}'.format(epoch+1, + ckpt_save_path)) + + print ('Epoch {} Loss {:.4f} Accuracy {:.4f}'.format(epoch + 1, + train_loss.result(), + train_accuracy.result())) + + print ('Time taken for 1 epoch: {} secs\n'.format(time.time() - start)) +``` + +```py +Epoch 1 Batch 0 Loss 4.3163 Accuracy 0.0004 +Epoch 1 Batch 50 Loss 4.2825 Accuracy 0.0018 +Epoch 1 Batch 100 Loss 4.2022 Accuracy 0.0134 +Epoch 1 Batch 150 Loss 4.1526 Accuracy 0.0180 +Epoch 1 Batch 200 Loss 4.0717 Accuracy 0.0204 +Epoch 1 Batch 250 Loss 3.9939 Accuracy 0.0224 +Epoch 1 Batch 300 Loss 3.9193 Accuracy 0.0242 +Epoch 1 Batch 350 Loss 3.8341 Accuracy 0.0264 +Epoch 1 Batch 400 Loss 3.7555 Accuracy 0.0301 +Epoch 1 Batch 450 Loss 3.6772 Accuracy 0.0335 +Epoch 1 Batch 500 Loss 3.6066 Accuracy 0.0367 +Epoch 1 Batch 550 Loss 3.5419 Accuracy 0.0403 +Epoch 1 Batch 600 Loss 3.4774 Accuracy 0.0440 +Epoch 1 Batch 650 Loss 3.4262 Accuracy 0.0476 +Epoch 1 Batch 700 Loss 3.3708 Accuracy 0.0510 +Epoch 1 Loss 3.3693 Accuracy 0.0512 +Time taken for 1 epoch: 60.16580581665039 secs + +Epoch 2 Batch 0 Loss 2.5117 Accuracy 0.1003 +Epoch 2 Batch 50 Loss 2.5981 Accuracy 0.1028 +Epoch 2 Batch 100 Loss 2.5820 Accuracy 0.1058 +Epoch 2 Batch 150 Loss 2.5412 Accuracy 0.1078 +Epoch 2 Batch 200 Loss 2.5172 Accuracy 0.1099 +Epoch 2 Batch 250 Loss 2.4793 Accuracy 0.1118 +Epoch 2 Batch 300 Loss 2.4630 Accuracy 0.1138 +Epoch 2 Batch 350 Loss 2.4430 Accuracy 0.1156 +Epoch 2 Batch 400 Loss 2.4333 Accuracy 0.1174 +Epoch 2 Batch 450 Loss 2.4241 Accuracy 0.1191 +Epoch 2 Batch 500 Loss 2.4140 Accuracy 0.1207 +Epoch 2 Batch 550 Loss 2.4004 Accuracy 0.1220 +Epoch 2 Batch 600 Loss 2.3853 Accuracy 0.1232 +Epoch 2 Batch 650 Loss 2.3757 Accuracy 0.1244 +Epoch 2 Batch 700 Loss 2.3634 Accuracy 0.1255 +Epoch 2 Loss 2.3630 Accuracy 0.1255 +Time taken for 1 epoch: 31.393303871154785 secs + +Epoch 3 Batch 0 Loss 2.0532 Accuracy 0.1424 +Epoch 3 Batch 50 Loss 2.1493 Accuracy 0.1433 +Epoch 3 Batch 100 Loss 2.1437 Accuracy 0.1437 +Epoch 3 Batch 150 Loss 2.1445 Accuracy 0.1448 +Epoch 3 Batch 200 Loss 2.1471 Accuracy 0.1451 +Epoch 3 Batch 250 Loss 2.1426 Accuracy 0.1458 +Epoch 3 Batch 300 Loss 2.1416 Accuracy 0.1466 +Epoch 3 Batch 350 Loss 2.1400 Accuracy 0.1476 +Epoch 3 Batch 400 Loss 2.1315 Accuracy 0.1480 +Epoch 3 Batch 450 Loss 2.1271 Accuracy 0.1483 +Epoch 3 Batch 500 Loss 2.1217 Accuracy 0.1488 +Epoch 3 Batch 550 Loss 2.1167 Accuracy 0.1492 +Epoch 3 Batch 600 Loss 2.1111 Accuracy 0.1497 +Epoch 3 Batch 650 Loss 2.1069 Accuracy 0.1502 +Epoch 3 Batch 700 Loss 2.1020 Accuracy 0.1509 +Epoch 3 Loss 2.1011 Accuracy 0.1509 +Time taken for 1 epoch: 31.126026153564453 secs + +Epoch 4 Batch 0 Loss 1.8764 Accuracy 0.1534 +Epoch 4 Batch 50 Loss 1.9276 Accuracy 0.1609 +Epoch 4 Batch 100 Loss 1.9371 Accuracy 0.1636 +Epoch 4 Batch 150 Loss 1.9392 Accuracy 0.1644 +Epoch 4 Batch 200 Loss 1.9391 Accuracy 0.1654 +Epoch 4 Batch 250 Loss 1.9351 Accuracy 0.1660 +Epoch 4 Batch 300 Loss 1.9323 Accuracy 0.1669 +Epoch 4 Batch 350 Loss 1.9289 Accuracy 0.1675 +Epoch 4 Batch 400 Loss 1.9238 Accuracy 0.1684 +Epoch 4 Batch 450 Loss 1.9193 Accuracy 0.1692 +Epoch 4 Batch 500 Loss 1.9146 Accuracy 0.1704 +Epoch 4 Batch 550 Loss 1.9124 Accuracy 0.1714 +Epoch 4 Batch 600 Loss 1.9038 Accuracy 0.1726 +Epoch 4 Batch 650 Loss 1.8971 Accuracy 0.1735 +Epoch 4 Batch 700 Loss 1.8924 Accuracy 0.1747 +Epoch 4 Loss 1.8911 Accuracy 0.1746 +Time taken for 1 epoch: 31.81874442100525 secs + +Epoch 5 Batch 0 Loss 1.7212 Accuracy 0.1915 +Epoch 5 Batch 50 Loss 1.7288 Accuracy 0.1948 +Epoch 5 Batch 100 Loss 1.7079 Accuracy 0.1925 +Epoch 5 Batch 150 Loss 1.7059 Accuracy 0.1940 +Epoch 5 Batch 200 Loss 1.7098 Accuracy 0.1947 +Epoch 5 Batch 250 Loss 1.7009 Accuracy 0.1953 +Epoch 5 Batch 300 Loss 1.7049 Accuracy 0.1967 +Epoch 5 Batch 350 Loss 1.7009 Accuracy 0.1975 +Epoch 5 Batch 400 Loss 1.6954 Accuracy 0.1979 +Epoch 5 Batch 450 Loss 1.6900 Accuracy 0.1985 +Epoch 5 Batch 500 Loss 1.6858 Accuracy 0.1993 +Epoch 5 Batch 550 Loss 1.6856 Accuracy 0.2002 +Epoch 5 Batch 600 Loss 1.6810 Accuracy 0.2009 +Epoch 5 Batch 650 Loss 1.6746 Accuracy 0.2016 +Epoch 5 Batch 700 Loss 1.6698 Accuracy 0.2022 +Saving checkpoint for epoch 5 at ./checkpoints/train/ckpt-1 +Epoch 5 Loss 1.6697 Accuracy 0.2022 +Time taken for 1 epoch: 31.367265462875366 secs + +Epoch 6 Batch 0 Loss 1.4858 Accuracy 0.2155 +Epoch 6 Batch 50 Loss 1.4763 Accuracy 0.2153 +Epoch 6 Batch 100 Loss 1.4967 Accuracy 0.2173 +Epoch 6 Batch 150 Loss 1.4930 Accuracy 0.2183 +Epoch 6 Batch 200 Loss 1.4938 Accuracy 0.2190 +Epoch 6 Batch 250 Loss 1.5026 Accuracy 0.2205 +Epoch 6 Batch 300 Loss 1.5025 Accuracy 0.2208 +Epoch 6 Batch 350 Loss 1.4974 Accuracy 0.2208 +Epoch 6 Batch 400 Loss 1.4984 Accuracy 0.2214 +Epoch 6 Batch 450 Loss 1.4972 Accuracy 0.2220 +Epoch 6 Batch 500 Loss 1.4940 Accuracy 0.2222 +Epoch 6 Batch 550 Loss 1.4911 Accuracy 0.2223 +Epoch 6 Batch 600 Loss 1.4896 Accuracy 0.2227 +Epoch 6 Batch 650 Loss 1.4849 Accuracy 0.2231 +Epoch 6 Batch 700 Loss 1.4819 Accuracy 0.2237 +Epoch 6 Loss 1.4821 Accuracy 0.2237 +Time taken for 1 epoch: 31.269275426864624 secs + +Epoch 7 Batch 0 Loss 1.5062 Accuracy 0.2517 +Epoch 7 Batch 50 Loss 1.3157 Accuracy 0.2398 +Epoch 7 Batch 100 Loss 1.3275 Accuracy 0.2426 +Epoch 7 Batch 150 Loss 1.3256 Accuracy 0.2413 +Epoch 7 Batch 200 Loss 1.3212 Accuracy 0.2413 +Epoch 7 Batch 250 Loss 1.3213 Accuracy 0.2424 +Epoch 7 Batch 300 Loss 1.3195 Accuracy 0.2424 +Epoch 7 Batch 350 Loss 1.3158 Accuracy 0.2425 +Epoch 7 Batch 400 Loss 1.3112 Accuracy 0.2430 +Epoch 7 Batch 450 Loss 1.3050 Accuracy 0.2432 +Epoch 7 Batch 500 Loss 1.3025 Accuracy 0.2436 +Epoch 7 Batch 550 Loss 1.3013 Accuracy 0.2441 +Epoch 7 Batch 600 Loss 1.2981 Accuracy 0.2447 +Epoch 7 Batch 650 Loss 1.2952 Accuracy 0.2447 +Epoch 7 Batch 700 Loss 1.2947 Accuracy 0.2452 +Epoch 7 Loss 1.2947 Accuracy 0.2453 +Time taken for 1 epoch: 31.002289056777954 secs + +Epoch 8 Batch 0 Loss 1.0162 Accuracy 0.2484 +Epoch 8 Batch 50 Loss 1.1310 Accuracy 0.2597 +Epoch 8 Batch 100 Loss 1.1398 Accuracy 0.2611 +Epoch 8 Batch 150 Loss 1.1428 Accuracy 0.2607 +Epoch 8 Batch 200 Loss 1.1476 Accuracy 0.2613 +Epoch 8 Batch 250 Loss 1.1415 Accuracy 0.2614 +Epoch 8 Batch 300 Loss 1.1396 Accuracy 0.2620 +Epoch 8 Batch 350 Loss 1.1410 Accuracy 0.2625 +Epoch 8 Batch 400 Loss 1.1397 Accuracy 0.2627 +Epoch 8 Batch 450 Loss 1.1400 Accuracy 0.2629 +Epoch 8 Batch 500 Loss 1.1387 Accuracy 0.2629 +Epoch 8 Batch 550 Loss 1.1365 Accuracy 0.2632 +Epoch 8 Batch 600 Loss 1.1360 Accuracy 0.2632 +Epoch 8 Batch 650 Loss 1.1364 Accuracy 0.2636 +Epoch 8 Batch 700 Loss 1.1345 Accuracy 0.2638 +Epoch 8 Loss 1.1345 Accuracy 0.2638 +Time taken for 1 epoch: 31.10622811317444 secs + +Epoch 9 Batch 0 Loss 1.0940 Accuracy 0.3234 +Epoch 9 Batch 50 Loss 1.0242 Accuracy 0.2814 +Epoch 9 Batch 100 Loss 1.0237 Accuracy 0.2809 +Epoch 9 Batch 150 Loss 1.0214 Accuracy 0.2797 +Epoch 9 Batch 200 Loss 1.0180 Accuracy 0.2790 +Epoch 9 Batch 250 Loss 1.0185 Accuracy 0.2787 +Epoch 9 Batch 300 Loss 1.0184 Accuracy 0.2786 +Epoch 9 Batch 350 Loss 1.0166 Accuracy 0.2780 +Epoch 9 Batch 400 Loss 1.0162 Accuracy 0.2780 +Epoch 9 Batch 450 Loss 1.0146 Accuracy 0.2775 +Epoch 9 Batch 500 Loss 1.0163 Accuracy 0.2775 +Epoch 9 Batch 550 Loss 1.0183 Accuracy 0.2778 +Epoch 9 Batch 600 Loss 1.0188 Accuracy 0.2780 +Epoch 9 Batch 650 Loss 1.0199 Accuracy 0.2781 +Epoch 9 Batch 700 Loss 1.0234 Accuracy 0.2785 +Epoch 9 Loss 1.0232 Accuracy 0.2785 +Time taken for 1 epoch: 30.981273889541626 secs + +Epoch 10 Batch 0 Loss 0.8119 Accuracy 0.2732 +Epoch 10 Batch 50 Loss 0.9071 Accuracy 0.2880 +Epoch 10 Batch 100 Loss 0.9160 Accuracy 0.2894 +Epoch 10 Batch 150 Loss 0.9165 Accuracy 0.2896 +Epoch 10 Batch 200 Loss 0.9201 Accuracy 0.2897 +Epoch 10 Batch 250 Loss 0.9230 Accuracy 0.2895 +Epoch 10 Batch 300 Loss 0.9252 Accuracy 0.2900 +Epoch 10 Batch 350 Loss 0.9270 Accuracy 0.2900 +Epoch 10 Batch 400 Loss 0.9254 Accuracy 0.2899 +Epoch 10 Batch 450 Loss 0.9267 Accuracy 0.2895 +Epoch 10 Batch 500 Loss 0.9302 Accuracy 0.2890 +Epoch 10 Batch 550 Loss 0.9307 Accuracy 0.2889 +Epoch 10 Batch 600 Loss 0.9322 Accuracy 0.2890 +Epoch 10 Batch 650 Loss 0.9352 Accuracy 0.2892 +Epoch 10 Batch 700 Loss 0.9375 Accuracy 0.2890 +Saving checkpoint for epoch 10 at ./checkpoints/train/ckpt-2 +Epoch 10 Loss 0.9379 Accuracy 0.2891 +Time taken for 1 epoch: 31.26957678794861 secs + +Epoch 11 Batch 0 Loss 0.8713 Accuracy 0.2925 +Epoch 11 Batch 50 Loss 0.8490 Accuracy 0.3006 +Epoch 11 Batch 100 Loss 0.8531 Accuracy 0.3021 +Epoch 11 Batch 150 Loss 0.8443 Accuracy 0.3002 +Epoch 11 Batch 200 Loss 0.8487 Accuracy 0.3003 +Epoch 11 Batch 250 Loss 0.8535 Accuracy 0.2998 +Epoch 11 Batch 300 Loss 0.8542 Accuracy 0.2999 +Epoch 11 Batch 350 Loss 0.8569 Accuracy 0.2995 +Epoch 11 Batch 400 Loss 0.8586 Accuracy 0.2991 +Epoch 11 Batch 450 Loss 0.8596 Accuracy 0.2987 +Epoch 11 Batch 500 Loss 0.8608 Accuracy 0.2984 +Epoch 11 Batch 550 Loss 0.8628 Accuracy 0.2982 +Epoch 11 Batch 600 Loss 0.8642 Accuracy 0.2980 +Epoch 11 Batch 650 Loss 0.8671 Accuracy 0.2981 +Epoch 11 Batch 700 Loss 0.8693 Accuracy 0.2982 +Epoch 11 Loss 0.8693 Accuracy 0.2982 +Time taken for 1 epoch: 32.054973125457764 secs + +Epoch 12 Batch 0 Loss 0.7781 Accuracy 0.3277 +Epoch 12 Batch 50 Loss 0.7705 Accuracy 0.3061 +Epoch 12 Batch 100 Loss 0.7835 Accuracy 0.3090 +Epoch 12 Batch 150 Loss 0.7882 Accuracy 0.3070 +Epoch 12 Batch 200 Loss 0.7926 Accuracy 0.3069 +Epoch 12 Batch 250 Loss 0.7952 Accuracy 0.3075 +Epoch 12 Batch 300 Loss 0.7989 Accuracy 0.3073 +Epoch 12 Batch 350 Loss 0.8016 Accuracy 0.3069 +Epoch 12 Batch 400 Loss 0.8045 Accuracy 0.3073 +Epoch 12 Batch 450 Loss 0.8063 Accuracy 0.3070 +Epoch 12 Batch 500 Loss 0.8068 Accuracy 0.3066 +Epoch 12 Batch 550 Loss 0.8099 Accuracy 0.3064 +Epoch 12 Batch 600 Loss 0.8112 Accuracy 0.3060 +Epoch 12 Batch 650 Loss 0.8122 Accuracy 0.3058 +Epoch 12 Batch 700 Loss 0.8144 Accuracy 0.3057 +Epoch 12 Loss 0.8140 Accuracy 0.3056 +Time taken for 1 epoch: 30.988539934158325 secs + +Epoch 13 Batch 0 Loss 0.6429 Accuracy 0.3097 +Epoch 13 Batch 50 Loss 0.7236 Accuracy 0.3075 +Epoch 13 Batch 100 Loss 0.7323 Accuracy 0.3110 +Epoch 13 Batch 150 Loss 0.7314 Accuracy 0.3106 +Epoch 13 Batch 200 Loss 0.7346 Accuracy 0.3115 +Epoch 13 Batch 250 Loss 0.7388 Accuracy 0.3119 +Epoch 13 Batch 300 Loss 0.7461 Accuracy 0.3129 +Epoch 13 Batch 350 Loss 0.7477 Accuracy 0.3129 +Epoch 13 Batch 400 Loss 0.7488 Accuracy 0.3127 +Epoch 13 Batch 450 Loss 0.7526 Accuracy 0.3126 +Epoch 13 Batch 500 Loss 0.7566 Accuracy 0.3129 +Epoch 13 Batch 550 Loss 0.7606 Accuracy 0.3127 +Epoch 13 Batch 600 Loss 0.7626 Accuracy 0.3124 +Epoch 13 Batch 650 Loss 0.7644 Accuracy 0.3121 +Epoch 13 Batch 700 Loss 0.7657 Accuracy 0.3122 +Epoch 13 Loss 0.7658 Accuracy 0.3122 +Time taken for 1 epoch: 31.11562490463257 secs + +Epoch 14 Batch 0 Loss 0.6303 Accuracy 0.3125 +Epoch 14 Batch 50 Loss 0.6793 Accuracy 0.3209 +Epoch 14 Batch 100 Loss 0.6850 Accuracy 0.3220 +Epoch 14 Batch 150 Loss 0.6885 Accuracy 0.3205 +Epoch 14 Batch 200 Loss 0.6975 Accuracy 0.3208 +Epoch 14 Batch 250 Loss 0.7003 Accuracy 0.3199 +Epoch 14 Batch 300 Loss 0.7045 Accuracy 0.3197 +Epoch 14 Batch 350 Loss 0.7081 Accuracy 0.3205 +Epoch 14 Batch 400 Loss 0.7095 Accuracy 0.3197 +Epoch 14 Batch 450 Loss 0.7120 Accuracy 0.3189 +Epoch 14 Batch 500 Loss 0.7135 Accuracy 0.3186 +Epoch 14 Batch 550 Loss 0.7167 Accuracy 0.3186 +Epoch 14 Batch 600 Loss 0.7191 Accuracy 0.3183 +Epoch 14 Batch 650 Loss 0.7215 Accuracy 0.3182 +Epoch 14 Batch 700 Loss 0.7234 Accuracy 0.3179 +Epoch 14 Loss 0.7238 Accuracy 0.3179 +Time taken for 1 epoch: 30.928674936294556 secs + +Epoch 15 Batch 0 Loss 0.5921 Accuracy 0.3298 +Epoch 15 Batch 50 Loss 0.6525 Accuracy 0.3265 +Epoch 15 Batch 100 Loss 0.6511 Accuracy 0.3255 +Epoch 15 Batch 150 Loss 0.6618 Accuracy 0.3250 +Epoch 15 Batch 200 Loss 0.6659 Accuracy 0.3244 +Epoch 15 Batch 250 Loss 0.6658 Accuracy 0.3243 +Epoch 15 Batch 300 Loss 0.6666 Accuracy 0.3241 +Epoch 15 Batch 350 Loss 0.6695 Accuracy 0.3236 +Epoch 15 Batch 400 Loss 0.6723 Accuracy 0.3237 +Epoch 15 Batch 450 Loss 0.6759 Accuracy 0.3235 +Epoch 15 Batch 500 Loss 0.6783 Accuracy 0.3238 +Epoch 15 Batch 550 Loss 0.6800 Accuracy 0.3230 +Epoch 15 Batch 600 Loss 0.6826 Accuracy 0.3226 +Epoch 15 Batch 650 Loss 0.6850 Accuracy 0.3226 +Epoch 15 Batch 700 Loss 0.6884 Accuracy 0.3225 +Saving checkpoint for epoch 15 at ./checkpoints/train/ckpt-3 +Epoch 15 Loss 0.6887 Accuracy 0.3225 +Time taken for 1 epoch: 31.319037675857544 secs + +Epoch 16 Batch 0 Loss 0.5955 Accuracy 0.3319 +Epoch 16 Batch 50 Loss 0.6032 Accuracy 0.3275 +Epoch 16 Batch 100 Loss 0.6082 Accuracy 0.3308 +Epoch 16 Batch 150 Loss 0.6231 Accuracy 0.3322 +Epoch 16 Batch 200 Loss 0.6284 Accuracy 0.3314 +Epoch 16 Batch 250 Loss 0.6318 Accuracy 0.3313 +Epoch 16 Batch 300 Loss 0.6353 Accuracy 0.3306 +Epoch 16 Batch 350 Loss 0.6387 Accuracy 0.3304 +Epoch 16 Batch 400 Loss 0.6411 Accuracy 0.3302 +Epoch 16 Batch 450 Loss 0.6445 Accuracy 0.3294 +Epoch 16 Batch 500 Loss 0.6467 Accuracy 0.3288 +Epoch 16 Batch 550 Loss 0.6497 Accuracy 0.3289 +Epoch 16 Batch 600 Loss 0.6527 Accuracy 0.3286 +Epoch 16 Batch 650 Loss 0.6545 Accuracy 0.3283 +Epoch 16 Batch 700 Loss 0.6580 Accuracy 0.3280 +Epoch 16 Loss 0.6579 Accuracy 0.3280 +Time taken for 1 epoch: 31.29152202606201 secs + +Epoch 17 Batch 0 Loss 0.6259 Accuracy 0.3560 +Epoch 17 Batch 50 Loss 0.5865 Accuracy 0.3384 +Epoch 17 Batch 100 Loss 0.5974 Accuracy 0.3386 +Epoch 17 Batch 150 Loss 0.5999 Accuracy 0.3366 +Epoch 17 Batch 200 Loss 0.6053 Accuracy 0.3362 +Epoch 17 Batch 250 Loss 0.6058 Accuracy 0.3349 +Epoch 17 Batch 300 Loss 0.6092 Accuracy 0.3351 +Epoch 17 Batch 350 Loss 0.6106 Accuracy 0.3350 +Epoch 17 Batch 400 Loss 0.6127 Accuracy 0.3342 +Epoch 17 Batch 450 Loss 0.6141 Accuracy 0.3338 +Epoch 17 Batch 500 Loss 0.6171 Accuracy 0.3333 +Epoch 17 Batch 550 Loss 0.6198 Accuracy 0.3327 +Epoch 17 Batch 600 Loss 0.6224 Accuracy 0.3323 +Epoch 17 Batch 650 Loss 0.6248 Accuracy 0.3319 +Epoch 17 Batch 700 Loss 0.6276 Accuracy 0.3318 +Epoch 17 Loss 0.6277 Accuracy 0.3319 +Time taken for 1 epoch: 31.021020889282227 secs + +Epoch 18 Batch 0 Loss 0.5560 Accuracy 0.3509 +Epoch 18 Batch 50 Loss 0.5549 Accuracy 0.3416 +Epoch 18 Batch 100 Loss 0.5651 Accuracy 0.3412 +Epoch 18 Batch 150 Loss 0.5679 Accuracy 0.3397 +Epoch 18 Batch 200 Loss 0.5743 Accuracy 0.3396 +Epoch 18 Batch 250 Loss 0.5788 Accuracy 0.3399 +Epoch 18 Batch 300 Loss 0.5830 Accuracy 0.3405 +Epoch 18 Batch 350 Loss 0.5842 Accuracy 0.3387 +Epoch 18 Batch 400 Loss 0.5885 Accuracy 0.3385 +Epoch 18 Batch 450 Loss 0.5893 Accuracy 0.3379 +Epoch 18 Batch 500 Loss 0.5921 Accuracy 0.3380 +Epoch 18 Batch 550 Loss 0.5953 Accuracy 0.3371 +Epoch 18 Batch 600 Loss 0.5979 Accuracy 0.3365 +Epoch 18 Batch 650 Loss 0.6012 Accuracy 0.3363 +Epoch 18 Batch 700 Loss 0.6036 Accuracy 0.3358 +Epoch 18 Loss 0.6037 Accuracy 0.3358 +Time taken for 1 epoch: 31.022820949554443 secs + +Epoch 19 Batch 0 Loss 0.5412 Accuracy 0.3710 +Epoch 19 Batch 50 Loss 0.5427 Accuracy 0.3460 +Epoch 19 Batch 100 Loss 0.5431 Accuracy 0.3452 +Epoch 19 Batch 150 Loss 0.5443 Accuracy 0.3421 +Epoch 19 Batch 200 Loss 0.5474 Accuracy 0.3419 +Epoch 19 Batch 250 Loss 0.5527 Accuracy 0.3418 +Epoch 19 Batch 300 Loss 0.5561 Accuracy 0.3414 +Epoch 19 Batch 350 Loss 0.5596 Accuracy 0.3413 +Epoch 19 Batch 400 Loss 0.5640 Accuracy 0.3412 +Epoch 19 Batch 450 Loss 0.5658 Accuracy 0.3407 +Epoch 19 Batch 500 Loss 0.5694 Accuracy 0.3403 +Epoch 19 Batch 550 Loss 0.5720 Accuracy 0.3404 +Epoch 19 Batch 600 Loss 0.5744 Accuracy 0.3400 +Epoch 19 Batch 650 Loss 0.5780 Accuracy 0.3402 +Epoch 19 Batch 700 Loss 0.5803 Accuracy 0.3403 +Epoch 19 Loss 0.5802 Accuracy 0.3403 +Time taken for 1 epoch: 30.98587203025818 secs + +Epoch 20 Batch 0 Loss 0.5481 Accuracy 0.3458 +Epoch 20 Batch 50 Loss 0.5221 Accuracy 0.3459 +Epoch 20 Batch 100 Loss 0.5201 Accuracy 0.3470 +Epoch 20 Batch 150 Loss 0.5273 Accuracy 0.3471 +Epoch 20 Batch 200 Loss 0.5294 Accuracy 0.3463 +Epoch 20 Batch 250 Loss 0.5363 Accuracy 0.3454 +Epoch 20 Batch 300 Loss 0.5384 Accuracy 0.3446 +Epoch 20 Batch 350 Loss 0.5398 Accuracy 0.3441 +Epoch 20 Batch 400 Loss 0.5422 Accuracy 0.3441 +Epoch 20 Batch 450 Loss 0.5454 Accuracy 0.3439 +Epoch 20 Batch 500 Loss 0.5480 Accuracy 0.3440 +Epoch 20 Batch 550 Loss 0.5516 Accuracy 0.3441 +Epoch 20 Batch 600 Loss 0.5544 Accuracy 0.3434 +Epoch 20 Batch 650 Loss 0.5572 Accuracy 0.3432 +Epoch 20 Batch 700 Loss 0.5595 Accuracy 0.3428 +Saving checkpoint for epoch 20 at ./checkpoints/train/ckpt-4 +Epoch 20 Loss 0.5597 Accuracy 0.3427 +Time taken for 1 epoch: 31.170108795166016 secs + +``` + +## 评估(Evaluate) + +以下步骤用于评估: + +* 用葡萄牙语分词器(`tokenizer_pt`)编码输入语句。此外,添加开始和结束标记,这样输入就与模型训练的内容相同。这是编码器输入。 +* 解码器输入为 `start token == tokenizer_en.vocab_size`。 +* 计算填充遮挡和前瞻遮挡。 +* `解码器`通过查看`编码器输出`和它自身的输出(自注意力)给出预测。 +* 选择最后一个词并计算它的 argmax。 +* 将预测的词连接到解码器输入,然后传递给解码器。 +* 在这种方法中,解码器根据它预测的之前的词预测下一个。 + +Note:这里使用的模型具有较小的能力以保持相对较快,因此预测可能不太正确。要复现论文中的结果,请使用全部数据集,并通过修改上述超参数来使用基础 transformer 模型或者 transformer XL。 + +```py +def evaluate(inp_sentence): + start_token = [tokenizer_pt.vocab_size] + end_token = [tokenizer_pt.vocab_size + 1] + + # 输入语句是葡萄牙语,增加开始和结束标记 + inp_sentence = start_token + tokenizer_pt.encode(inp_sentence) + end_token + encoder_input = tf.expand_dims(inp_sentence, 0) + + # 因为目标是英语,输入 transformer 的第一个词应该是 + # 英语的开始标记。 + decoder_input = [tokenizer_en.vocab_size] + output = tf.expand_dims(decoder_input, 0) + + for i in range(MAX_LENGTH): + enc_padding_mask, combined_mask, dec_padding_mask = create_masks( + encoder_input, output) + + # predictions.shape == (batch_size, seq_len, vocab_size) + predictions, attention_weights = transformer(encoder_input, + output, + False, + enc_padding_mask, + combined_mask, + dec_padding_mask) + + # 从 seq_len 维度选择最后一个词 + predictions = predictions[: ,-1:, :] # (batch_size, 1, vocab_size) + + predicted_id = tf.cast(tf.argmax(predictions, axis=-1), tf.int32) + + # 如果 predicted_id 等于结束标记,就返回结果 + if predicted_id == tokenizer_en.vocab_size+1: + return tf.squeeze(output, axis=0), attention_weights + + # 连接 predicted_id 与输出,作为解码器的输入传递到解码器。 + output = tf.concat([output, predicted_id], axis=-1) + + return tf.squeeze(output, axis=0), attention_weights +``` + +```py +def plot_attention_weights(attention, sentence, result, layer): + fig = plt.figure(figsize=(16, 8)) + + sentence = tokenizer_pt.encode(sentence) + + attention = tf.squeeze(attention[layer], axis=0) + + for head in range(attention.shape[0]): + ax = fig.add_subplot(2, 4, head+1) + + # 画出注意力权重 + ax.matshow(attention[head][:-1, :], cmap='viridis') + + fontdict = {'fontsize': 10} + + ax.set_xticks(range(len(sentence)+2)) + ax.set_yticks(range(len(result))) + + ax.set_ylim(len(result)-1.5, -0.5) + + ax.set_xticklabels( + ['']+[tokenizer_pt.decode([i]) for i in sentence]+[''], + fontdict=fontdict, rotation=90) + + ax.set_yticklabels([tokenizer_en.decode([i]) for i in result + if i < tokenizer_en.vocab_size], + fontdict=fontdict) + + ax.set_xlabel('Head {}'.format(head+1)) + + plt.tight_layout() + plt.show() +``` + +```py +def translate(sentence, plot=''): + result, attention_weights = evaluate(sentence) + + predicted_sentence = tokenizer_en.decode([i for i in result + if i < tokenizer_en.vocab_size]) + + print('Input: {}'.format(sentence)) + print('Predicted translation: {}'.format(predicted_sentence)) + + if plot: + plot_attention_weights(attention_weights, sentence, result, plot) +``` + +```py +translate("este é um problema que temos que resolver.") +print ("Real translation: this is a problem we have to solve .") +``` + +```py +Input: este é um problema que temos que resolver. +Predicted translation: this is a problem that we have to solve the united states is that we have to solve the world . +Real translation: this is a problem we have to solve . + +``` + +```py +translate("os meus vizinhos ouviram sobre esta ideia.") +print ("Real translation: and my neighboring homes heard about this idea .") +``` + +```py +Input: os meus vizinhos ouviram sobre esta ideia. +Predicted translation: my neighbors heard about this idea . +Real translation: and my neighboring homes heard about this idea . + +``` + +```py +translate("vou então muito rapidamente partilhar convosco algumas histórias de algumas coisas mágicas que aconteceram.") +print ("Real translation: so i 'll just share with you some stories very quickly of some magical things that have happened .") +``` + +```py +Input: vou então muito rapidamente partilhar convosco algumas histórias de algumas coisas mágicas que aconteceram. +Predicted translation: so i 'm going to share with you a couple of exciting stories of some magical things that happened . +Real translation: so i 'll just share with you some stories very quickly of some magical things that have happened . + +``` + +您可以为 `plot` 参数传递不同的层和解码器的注意力模块。 + +```py +translate("este é o primeiro livro que eu fiz.", plot='decoder_layer4_block2') +print ("Real translation: this is the first book i've ever done.") +``` + +```py +Input: este é o primeiro livro que eu fiz. +Predicted translation: this is the first book that i made . + +``` + +![png](img/8317a6a03bbf1bff4913755d5d89c9c6.png) + +```py +Real translation: this is the first book i've ever done. + +``` + +## 总结 + +在本教程中,您已经学习了位置编码,多头注意力,遮挡的重要性以及如何创建一个 transformer。 + +尝试使用一个不同的数据集来训练 transformer。您可也可以通过修改上述的超参数来创建基础 transformer 或者 transformer XL。您也可以使用这里定义的层来创建 [BERT](https://arxiv.org/abs/1810.04805) 并训练最先进的模型。此外,您可以实现 beam search 得到更好的预测。 \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/054.md b/Tensorflow/TensorFlow2.0/054.md new file mode 100644 index 00000000..4fd3a519 --- /dev/null +++ b/Tensorflow/TensorFlow2.0/054.md @@ -0,0 +1,996 @@ +# Fine-tuning a BERT model + +> 原文:[https://tensorflow.google.cn/official_models/fine_tuning_bert](https://tensorflow.google.cn/official_models/fine_tuning_bert) + +In this example, we will work through fine-tuning a BERT model using the tensorflow-models PIP package. + +The pretrained BERT model this tutorial is based on is also available on [TensorFlow Hub](https://tensorflow.org/hub), to see how to use it refer to the [Hub Appendix](#hub_bert) + +## Setup + +### Install the TensorFlow Model Garden pip package + +* `tf-models-official` is the stable Model Garden package. Note that it may not include the latest changes in the `tensorflow_models` github repo. To include latest changes, you may install `tf-models-nightly`, which is the nightly Model Garden package created daily automatically. +* pip will install all models and dependencies automatically. + +```py +pip install -q tf-models-official==2.3.0 + +``` + +```py +WARNING: You are using pip version 20.2.3; however, version 20.2.4 is available. +You should consider upgrading via the '/tmpfs/src/tf_docs_env/bin/python -m pip install --upgrade pip' command. + +``` + +### Imports + +```py +import os + +import numpy as np +import matplotlib.pyplot as plt + +import tensorflow as tf + +import tensorflow_hub as hub +import tensorflow_datasets as tfds +tfds.disable_progress_bar() + +from official.modeling import tf_utils +from official import nlp +from official.nlp import bert + +# Load the required submodules +import official.nlp.optimization +import official.nlp.bert.bert_models +import official.nlp.bert.configs +import official.nlp.bert.run_classifier +import official.nlp.bert.tokenization +import official.nlp.data.classifier_data_lib +import official.nlp.modeling.losses +import official.nlp.modeling.models +import official.nlp.modeling.networks +``` + +### Resources + +This directory contains the configuration, vocabulary, and a pre-trained checkpoint used in this tutorial: + +```py +gs_folder_bert = "gs://cloud-tpu-checkpoints/bert/keras_bert/uncased_L-12_H-768_A-12" +tf.io.gfile.listdir(gs_folder_bert) +``` + +```py +['bert_config.json', + 'bert_model.ckpt.data-00000-of-00001', + 'bert_model.ckpt.index', + 'vocab.txt'] + +``` + +You can get a pre-trained BERT encoder from [TensorFlow Hub](https://hub.tensorflow.google.cn/tensorflow/bert_en_uncased_L-12_H-768_A-12/2): + +```py +hub_url_bert = "https://hub.tensorflow.google.cn/tensorflow/bert_en_uncased_L-12_H-768_A-12/2" +``` + +## The data + +For this example we used the [GLUE MRPC dataset from TFDS](https://tensorflow.google.cn/datasets/catalog/glue#gluemrpc). + +This dataset is not set up so that it can be directly fed into the BERT model, so this section also handles the necessary preprocessing. + +### Get the dataset from TensorFlow Datasets + +The Microsoft Research Paraphrase Corpus (Dolan & Brockett, 2005) is a corpus of sentence pairs automatically extracted from online news sources, with human annotations for whether the sentences in the pair are semantically equivalent. + +* Number of labels: 2. +* Size of training dataset: 3668. +* Size of evaluation dataset: 408. +* Maximum sequence length of training and evaluation dataset: 128. + +```py +glue, info = tfds.load('glue/mrpc', with_info=True, + # It's small, load the whole dataset + batch_size=-1) +``` + +```py +Downloading and preparing dataset glue/mrpc/1.0.0 (download: 1.43 MiB, generated: Unknown size, total: 1.43 MiB) to /home/kbuilder/tensorflow_datasets/glue/mrpc/1.0.0... +Shuffling and writing examples to /home/kbuilder/tensorflow_datasets/glue/mrpc/1.0.0.incompleteKZIBN9/glue-train.tfrecord +Shuffling and writing examples to /home/kbuilder/tensorflow_datasets/glue/mrpc/1.0.0.incompleteKZIBN9/glue-validation.tfrecord +Shuffling and writing examples to /home/kbuilder/tensorflow_datasets/glue/mrpc/1.0.0.incompleteKZIBN9/glue-test.tfrecord +Dataset glue downloaded and prepared to /home/kbuilder/tensorflow_datasets/glue/mrpc/1.0.0\. Subsequent calls will reuse this data. + +``` + +```py +list(glue.keys()) +``` + +```py +['test', 'train', 'validation'] + +``` + +The `info` object describes the dataset and it's features: + +```py +info.features +``` + +```py +FeaturesDict({ + 'idx': tf.int32, + 'label': ClassLabel(shape=(), dtype=tf.int64, num_classes=2), + 'sentence1': Text(shape=(), dtype=tf.string), + 'sentence2': Text(shape=(), dtype=tf.string), +}) + +``` + +The two classes are: + +```py +info.features['label'].names +``` + +```py +['not_equivalent', 'equivalent'] + +``` + +Here is one example from the training set: + +```py +glue_train = glue['train'] + +for key, value in glue_train.items(): + print(f"{key:9s}: {value[0].numpy()}") +``` + +```py +idx : 1680 +label : 0 +sentence1: b'The identical rovers will act as robotic geologists , searching for evidence of past water .' +sentence2: b'The rovers act as robotic geologists , moving on six wheels .' + +``` + +### The BERT tokenizer + +To fine tune a pre-trained model you need to be sure that you're using exactly the same tokenization, vocabulary, and index mapping as you used during training. + +The BERT tokenizer used in this tutorial is written in pure Python (It's not built out of TensorFlow ops). So you can't just plug it into your model as a `keras.layer` like you can with [`preprocessing.TextVectorization`](https://tensorflow.google.cn/api_docs/python/tf/keras/layers/experimental/preprocessing/TextVectorization). + +The following code rebuilds the tokenizer that was used by the base model: + +```py +# Set up tokenizer to generate Tensorflow dataset +tokenizer = bert.tokenization.FullTokenizer( + vocab_file=os.path.join(gs_folder_bert, "vocab.txt"), + do_lower_case=True) + +print("Vocab size:", len(tokenizer.vocab)) +``` + +```py +Vocab size: 30522 + +``` + +Tokenize a sentence: + +```py +tokens = tokenizer.tokenize("Hello TensorFlow!") +print(tokens) +ids = tokenizer.convert_tokens_to_ids(tokens) +print(ids) +``` + +```py +['hello', 'tensor', '##flow', '!'] +[7592, 23435, 12314, 999] + +``` + +### Preprocess the data + +The section manually preprocessed the dataset into the format expected by the model. + +This dataset is small, so preprocessing can be done quickly and easily in memory. For larger datasets the `tf_models` library includes some tools for preprocessing and re-serializing a dataset. See [Appendix: Re-encoding a large dataset](#re_encoding_tools) for details. + +#### Encode the sentences + +The model expects its two inputs sentences to be concatenated together. This input is expected to start with a `[CLS]` "This is a classification problem" token, and each sentence should end with a `[SEP]` "Separator" token: + +```py +tokenizer.convert_tokens_to_ids(['[CLS]', '[SEP]']) +``` + +```py +[101, 102] + +``` + +Start by encoding all the sentences while appending a `[SEP]` token, and packing them into ragged-tensors: + +```py +def encode_sentence(s): + tokens = list(tokenizer.tokenize(s.numpy())) + tokens.append('[SEP]') + return tokenizer.convert_tokens_to_ids(tokens) + +sentence1 = tf.ragged.constant([ + encode_sentence(s) for s in glue_train["sentence1"]]) +sentence2 = tf.ragged.constant([ + encode_sentence(s) for s in glue_train["sentence2"]]) +``` + +```py +print("Sentence1 shape:", sentence1.shape.as_list()) +print("Sentence2 shape:", sentence2.shape.as_list()) +``` + +```py +Sentence1 shape: [3668, None] +Sentence2 shape: [3668, None] + +``` + +Now prepend a `[CLS]` token, and concatenate the ragged tensors to form a single `input_word_ids` tensor for each example. [`RaggedTensor.to_tensor()`](https://tensorflow.google.cn/api_docs/python/tf/RaggedTensor#to_tensor) zero pads to the longest sequence. + +```py +cls = [tokenizer.convert_tokens_to_ids(['[CLS]'])]*sentence1.shape[0] +input_word_ids = tf.concat([cls, sentence1, sentence2], axis=-1) +_ = plt.pcolormesh(input_word_ids.to_tensor()) +``` + +![png](img/10d71bce93ec45ba7076ef15a37bcb28.png) + +#### Mask and input type + +The model expects two additional inputs: + +* The input mask +* The input type + +The mask allows the model to cleanly differentiate between the content and the padding. The mask has the same shape as the `input_word_ids`, and contains a `1` anywhere the `input_word_ids` is not padding. + +```py +input_mask = tf.ones_like(input_word_ids).to_tensor() + +plt.pcolormesh(input_mask) +``` + +```py + + +``` + +![png](img/1f9a0765029471b20952ac80887f73a4.png) + +The "input type" also has the same shape, but inside the non-padded region, contains a `0` or a `1` indicating which sentence the token is a part of. + +```py +type_cls = tf.zeros_like(cls) +type_s1 = tf.zeros_like(sentence1) +type_s2 = tf.ones_like(sentence2) +input_type_ids = tf.concat([type_cls, type_s1, type_s2], axis=-1).to_tensor() + +plt.pcolormesh(input_type_ids) +``` + +```py + + +``` + +![png](img/e06760b4112e8fd989cdb1f7a948bc17.png) + +#### Put it all together + +Collect the above text parsing code into a single function, and apply it to each split of the `glue/mrpc` dataset. + +```py +def encode_sentence(s, tokenizer): + tokens = list(tokenizer.tokenize(s)) + tokens.append('[SEP]') + return tokenizer.convert_tokens_to_ids(tokens) + +def bert_encode(glue_dict, tokenizer): + num_examples = len(glue_dict["sentence1"]) + + sentence1 = tf.ragged.constant([ + encode_sentence(s, tokenizer) + for s in np.array(glue_dict["sentence1"])]) + sentence2 = tf.ragged.constant([ + encode_sentence(s, tokenizer) + for s in np.array(glue_dict["sentence2"])]) + + cls = [tokenizer.convert_tokens_to_ids(['[CLS]'])]*sentence1.shape[0] + input_word_ids = tf.concat([cls, sentence1, sentence2], axis=-1) + + input_mask = tf.ones_like(input_word_ids).to_tensor() + + type_cls = tf.zeros_like(cls) + type_s1 = tf.zeros_like(sentence1) + type_s2 = tf.ones_like(sentence2) + input_type_ids = tf.concat( + [type_cls, type_s1, type_s2], axis=-1).to_tensor() + + inputs = { + 'input_word_ids': input_word_ids.to_tensor(), + 'input_mask': input_mask, + 'input_type_ids': input_type_ids} + + return inputs +``` + +```py +glue_train = bert_encode(glue['train'], tokenizer) +glue_train_labels = glue['train']['label'] + +glue_validation = bert_encode(glue['validation'], tokenizer) +glue_validation_labels = glue['validation']['label'] + +glue_test = bert_encode(glue['test'], tokenizer) +glue_test_labels = glue['test']['label'] +``` + +Each subset of the data has been converted to a dictionary of features, and a set of labels. Each feature in the input dictionary has the same shape, and the number of labels should match: + +```py +for key, value in glue_train.items(): + print(f'{key:15s} shape: {value.shape}') + +print(f'glue_train_labels shape: {glue_train_labels.shape}') +``` + +```py +input_word_ids shape: (3668, 103) +input_mask shape: (3668, 103) +input_type_ids shape: (3668, 103) +glue_train_labels shape: (3668,) + +``` + +## The model + +### Build the model + +The first step is to download the configuration for the pre-trained model. + +```py +import json + +bert_config_file = os.path.join(gs_folder_bert, "bert_config.json") +config_dict = json.loads(tf.io.gfile.GFile(bert_config_file).read()) + +bert_config = bert.configs.BertConfig.from_dict(config_dict) + +config_dict +``` + +```py +{'attention_probs_dropout_prob': 0.1, + 'hidden_act': 'gelu', + 'hidden_dropout_prob': 0.1, + 'hidden_size': 768, + 'initializer_range': 0.02, + 'intermediate_size': 3072, + 'max_position_embeddings': 512, + 'num_attention_heads': 12, + 'num_hidden_layers': 12, + 'type_vocab_size': 2, + 'vocab_size': 30522} + +``` + +The `config` defines the core BERT Model, which is a Keras model to predict the outputs of `num_classes` from the inputs with maximum sequence length `max_seq_length`. + +This function returns both the encoder and the classifier. + +```py +bert_classifier, bert_encoder = bert.bert_models.classifier_model( + bert_config, num_labels=2) +``` + +The classifier has three inputs and one output: + +```py +tf.keras.utils.plot_model(bert_classifier, show_shapes=True, dpi=48) +``` + +![png](img/906a04e5434908ec33033e39f2e83f6b.png) + +Run it on a test batch of data 10 examples from the training set. The output is the logits for the two classes: + +```py +glue_batch = {key: val[:10] for key, val in glue_train.items()} + +bert_classifier( + glue_batch, training=True +).numpy() +``` + +```py +array([[ 0.08382261, 0.34465584], + [ 0.02057236, 0.24053624], + [ 0.04930754, 0.1117427 ], + [ 0.17041089, 0.20810834], + [ 0.21667874, 0.2840511 ], + [ 0.02325345, 0.33799925], + [-0.06198866, 0.13532838], + [ 0.084592 , 0.20711854], + [-0.04323687, 0.17096342], + [ 0.23759182, 0.16801538]], dtype=float32) + +``` + +The `TransformerEncoder` in the center of the classifier above **is** the `bert_encoder`. + +Inspecting the encoder, we see its stack of `Transformer` layers connected to those same three inputs: + +```py +tf.keras.utils.plot_model(bert_encoder, show_shapes=True, dpi=48) +``` + +![png](img/6d5e829de3a867f7bb56dff003b7e217.png) + +### Restore the encoder weights + +When built the encoder is randomly initialized. Restore the encoder's weights from the checkpoint: + +```py +checkpoint = tf.train.Checkpoint(model=bert_encoder) +checkpoint.restore( + os.path.join(gs_folder_bert, 'bert_model.ckpt')).assert_consumed() +``` + +```py + + +``` + +**Note:** The pretrained `TransformerEncoder` is also available on [TensorFlow Hub](https://tensorflow.org/hub). See the [Hub appendix](#hub_bert) for details. + +### Set up the optimizer + +BERT adopts the Adam optimizer with weight decay (aka "[AdamW](https://arxiv.org/abs/1711.05101)"). It also employs a learning rate schedule that firstly warms up from 0 and then decays to 0. + +```py +# Set up epochs and steps +epochs = 3 +batch_size = 32 +eval_batch_size = 32 + +train_data_size = len(glue_train_labels) +steps_per_epoch = int(train_data_size / batch_size) +num_train_steps = steps_per_epoch * epochs +warmup_steps = int(epochs * train_data_size * 0.1 / batch_size) + +# creates an optimizer with learning rate schedule +optimizer = nlp.optimization.create_optimizer( + 2e-5, num_train_steps=num_train_steps, num_warmup_steps=warmup_steps) +``` + +This returns an `AdamWeightDecay` optimizer with the learning rate schedule set: + +```py +type(optimizer) +``` + +```py +official.nlp.optimization.AdamWeightDecay + +``` + +To see an example of how to customize the optimizer and it's schedule, see the [Optimizer schedule appendix](#optiizer_schedule). + +### Train the model + +The metric is accuracy and we use sparse categorical cross-entropy as loss. + +```py +metrics = [tf.keras.metrics.SparseCategoricalAccuracy('accuracy', dtype=tf.float32)] +loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) + +bert_classifier.compile( + optimizer=optimizer, + loss=loss, + metrics=metrics) + +bert_classifier.fit( + glue_train, glue_train_labels, + validation_data=(glue_validation, glue_validation_labels), + batch_size=32, + epochs=epochs) +``` + +```py +Epoch 1/3 +115/115 [==============================] - 26s 222ms/step - loss: 0.6151 - accuracy: 0.6611 - val_loss: 0.5462 - val_accuracy: 0.7451 +Epoch 2/3 +115/115 [==============================] - 24s 212ms/step - loss: 0.4447 - accuracy: 0.8010 - val_loss: 0.4150 - val_accuracy: 0.8309 +Epoch 3/3 +115/115 [==============================] - 24s 213ms/step - loss: 0.2830 - accuracy: 0.8964 - val_loss: 0.3697 - val_accuracy: 0.8480 + + + +``` + +Now run the fine-tuned model on a custom example to see that it works. + +Start by encoding some sentence pairs: + +```py +my_examples = bert_encode( + glue_dict = { + 'sentence1':[ + 'The rain in Spain falls mainly on the plain.', + 'Look I fine tuned BERT.'], + 'sentence2':[ + 'It mostly rains on the flat lands of Spain.', + 'Is it working? This does not match.'] + }, + tokenizer=tokenizer) +``` + +The model should report class `1` "match" for the first example and class `0` "no-match" for the second: + +```py +result = bert_classifier(my_examples, training=False) + +result = tf.argmax(result).numpy() +result +``` + +```py +array([1, 0]) + +``` + +```py +np.array(info.features['label'].names)[result] +``` + +```py +array(['equivalent', 'not_equivalent'], dtype=', + 'dropout_rate': 0.1, + 'initializer': , + 'max_sequence_length': 512, + 'num_layers': 12} + +``` + +```py +manual_encoder = nlp.modeling.networks.TransformerEncoder(**transformer_config) +``` + +Restore the weights: + +```py +checkpoint = tf.train.Checkpoint(model=manual_encoder) +checkpoint.restore( + os.path.join(gs_folder_bert, 'bert_model.ckpt')).assert_consumed() +``` + +```py + + +``` + +Test run it: + +```py +result = manual_encoder(my_examples, training=True) + +print("Sequence output shape:", result[0].shape) +print("Pooled output shape:", result[1].shape) +``` + +```py +Sequence output shape: (2, 23, 768) +Pooled output shape: (2, 768) + +``` + +Wrap it in a classifier: + +```py +manual_classifier = nlp.modeling.models.BertClassifier( + bert_encoder, + num_classes=2, + dropout_rate=transformer_config['dropout_rate'], + initializer=tf.keras.initializers.TruncatedNormal( + stddev=bert_config.initializer_range)) +``` + +```py +manual_classifier(my_examples, training=True).numpy() +``` + +```py +array([[ 0.07863025, -0.02940944], + [ 0.30274656, 0.27299827]], dtype=float32) + +``` + +### Optimizers and schedules + +The optimizer used to train the model was created using the `nlp.optimization.create_optimizer` function: + +```py +optimizer = nlp.optimization.create_optimizer( + 2e-5, num_train_steps=num_train_steps, num_warmup_steps=warmup_steps) +``` + +That high level wrapper sets up the learning rate schedules and the optimizer. + +The base learning rate schedule used here is a linear decay to zero over the training run: + +```py +epochs = 3 +batch_size = 32 +eval_batch_size = 32 + +train_data_size = len(glue_train_labels) +steps_per_epoch = int(train_data_size / batch_size) +num_train_steps = steps_per_epoch * epochs +``` + +```py +decay_schedule = tf.keras.optimizers.schedules.PolynomialDecay( + initial_learning_rate=2e-5, + decay_steps=num_train_steps, + end_learning_rate=0) + +plt.plot([decay_schedule(n) for n in range(num_train_steps)]) +``` + +```py +[] + +``` + +![png](img/868f946086995ef931b7b454d904e14b.png) + +This, in turn is wrapped in a `WarmUp` schedule that linearly increases the learning rate to the target value over the first 10% of training: + +```py +warmup_steps = num_train_steps * 0.1 + +warmup_schedule = nlp.optimization.WarmUp( + initial_learning_rate=2e-5, + decay_schedule_fn=decay_schedule, + warmup_steps=warmup_steps) + +# The warmup overshoots, because it warms up to the `initial_learning_rate` +# following the original implementation. You can set +# `initial_learning_rate=decay_schedule(warmup_steps)` if you don't like the +# overshoot. +plt.plot([warmup_schedule(n) for n in range(num_train_steps)]) +``` + +```py +[] + +``` + +![png](img/c542bc6784512a8abdc2e3a85a1e1905.png) + +Then create the `nlp.optimization.AdamWeightDecay` using that schedule, configured for the BERT model: + +```py +optimizer = nlp.optimization.AdamWeightDecay( + learning_rate=warmup_schedule, + weight_decay_rate=0.01, + epsilon=1e-6, + exclude_from_weight_decay=['LayerNorm', 'layer_norm', 'bias']) +``` \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/055.md b/Tensorflow/TensorFlow2.0/055.md new file mode 100644 index 00000000..26d06b71 --- /dev/null +++ b/Tensorflow/TensorFlow2.0/055.md @@ -0,0 +1 @@ +# 结构化数据 \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/056.md b/Tensorflow/TensorFlow2.0/056.md new file mode 100644 index 00000000..a6ffa462 --- /dev/null +++ b/Tensorflow/TensorFlow2.0/056.md @@ -0,0 +1,16973 @@ +# 对结构化数据进行分类 + +> 原文:[https://tensorflow.google.cn/tutorials/structured_data/feature_columns](https://tensorflow.google.cn/tutorials/structured_data/feature_columns) + +**Note:** 我们的 TensorFlow 社区翻译了这些文档。因为社区翻译是尽力而为, 所以无法保证它们是最准确的,并且反映了最新的 [官方英文文档](https://tensorflow.google.cn/?hl=en)。如果您有改进此翻译的建议, 请提交 pull request 到 [tensorflow/docs](https://github.com/tensorflow/docs) GitHub 仓库。要志愿地撰写或者审核译文,请加入 [docs-zh-cn@tensorflow.org Google Group](https://groups.google.com/a/tensorflow.org/forum/#!forum/docs-zh-cn)。 + +本教程演示了如何对结构化数据进行分类(例如,CSV 中的表格数据)。我们将使用 [Keras](https://tensorflow.google.cn/guide/keras) 来定义模型,将[特征列(feature columns)](https://tensorflow.google.cn/guide/feature_columns) 作为从 CSV 中的列(columns)映射到用于训练模型的特征(features)的桥梁。本教程包括了以下内容的完整代码: + +* 用 [Pandas](https://pandas.pydata.org/) 导入 CSV 文件。 +* 用 [tf.data](https://tensorflow.google.cn/guide/datasets) 建立了一个输入流水线(pipeline),用于对行进行分批(batch)和随机排序(shuffle)。 +* 用特征列将 CSV 中的列映射到用于训练模型的特征。 +* 用 Keras 构建,训练并评估模型。 + +## 数据集 + +我们将使用一个小型 [数据集](https://archive.ics.uci.edu/ml/datasets/heart+Disease),该数据集由克利夫兰心脏病诊所基金会(Cleveland Clinic Foundation for Heart Disease)提供。CSV 中有几百行数据。每行描述了一个病人(patient),每列描述了一个属性(attribute)。我们将使用这些信息来预测一位病人是否患有心脏病,这是在该数据集上的二分类任务。 + +下面是该数据集的[描述](https://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/heart-disease.names)。 请注意,有数值(numeric)和类别(categorical)类型的列。 + +> | 列 | 描述 | 特征类型 | 数据类型 | +> | --- | --- | --- | --- | +> | Age | 年龄以年为单位 | Numerical | integer | +> | Sex | (1 = 男;0 = 女) | Categorical | integer | +> | CP | 胸痛类型(0,1,2,3,4) | Categorical | integer | +> | Trestbpd | 静息血压(入院时,以 mm Hg 计) | Numerical | integer | +> | Chol | 血清胆固醇(mg/dl) | Numerical | integer | +> | FBS | (空腹血糖> 120 mg/dl)(1 = true;0 = false) | Categorical | integer | +> | RestECG | 静息心电图结果(0,1,2) | Categorical | integer | +> | Thalach | 达到的最大心率 | Numerical | integer | +> | Exang | 运动诱发心绞痛(1 =是;0 =否) | Categorical | integer | +> | Oldpeak | 与休息时相比由运动引起的 ST 节段下降 | Numerical | integer | +> | Slope | 在运动高峰 ST 段的斜率 | Numerical | float | +> | CA | 荧光透视法染色的大血管动脉(0-3)的数量 | Numerical | integer | +> | Thal | 3 =正常;6 =固定缺陷;7 =可逆缺陷 | Categorical | string | +> | Target | 心脏病诊断(1 = true;0 = false) | Classification | integer | + +## 导入 TensorFlow 和其他库 + +```py +pip install -q sklearn + +``` + +```py +WARNING: You are using pip version 20.2.2; however, version 20.2.3 is available. +You should consider upgrading via the '/tmpfs/src/tf_docs_env/bin/python -m pip install --upgrade pip' command. + +``` + +```py +import numpy as np +import pandas as pd + +import tensorflow as tf + +from tensorflow import feature_column +from tensorflow.keras import layers +from sklearn.model_selection import train_test_split +``` + +## 使用 Pandas 创建一个 dataframe + +[Pandas](https://pandas.pydata.org/) 是一个 Python 库,它有许多有用的实用程序,用于加载和处理结构化数据。我们将使用 Pandas 从 URL 下载数据集,并将其加载到 dataframe 中。 + +```py +URL = 'https://storage.googleapis.com/applied-dl/heart.csv' +dataframe = pd.read_csv(URL) +dataframe.head() +``` + + + +## 将 dataframe 拆分为训练、验证和测试集 + +我们下载的数据集是一个 CSV 文件。 我们将其拆分为训练、验证和测试集。 + +```py +train, test = train_test_split(dataframe, test_size=0.2) +train, val = train_test_split(train, test_size=0.2) +print(len(train), 'train examples') +print(len(val), 'validation examples') +print(len(test), 'test examples') +``` + +```py +193 train examples +49 validation examples +61 test examples + +``` + +## 用 tf.data 创建输入流水线 + +接下来,我们将使用 [tf.data](https://tensorflow.google.cn/guide/datasets) 包装 dataframe。这让我们能将特征列作为一座桥梁,该桥梁将 Pandas dataframe 中的列映射到用于训练模型的特征。如果我们使用一个非常大的 CSV 文件(非常大以至于它不能放入内存),我们将使用 tf.data 直接从磁盘读取它。本教程不涉及这一点。 + +```py +# 一种从 Pandas Dataframe 创建 tf.data 数据集的实用程序方法(utility method) +def df_to_dataset(dataframe, shuffle=True, batch_size=32): + dataframe = dataframe.copy() + labels = dataframe.pop('target') + ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels)) + if shuffle: + ds = ds.shuffle(buffer_size=len(dataframe)) + ds = ds.batch(batch_size) + return ds +``` + +```py +batch_size = 5 # 小批量大小用于演示 +train_ds = df_to_dataset(train, batch_size=batch_size) +val_ds = df_to_dataset(val, shuffle=False, batch_size=batch_size) +test_ds = df_to_dataset(test, shuffle=False, batch_size=batch_size) +``` + +## 理解输入流水线 + +现在我们已经创建了输入流水线,让我们调用它来查看它返回的数据的格式。 我们使用了一小批量大小来保持输出的可读性。 + +```py +for feature_batch, label_batch in train_ds.take(1): + print('Every feature:', list(feature_batch.keys())) + print('A batch of ages:', feature_batch['age']) + print('A batch of targets:', label_batch ) +``` + +```py +Every feature: ['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal'] +A batch of ages: tf.Tensor([58 44 44 50 54], shape=(5,), dtype=int64) +A batch of targets: tf.Tensor([0 1 0 0 1], shape=(5,), dtype=int64) + +``` + +我们可以看到数据集返回了一个字典,该字典从列名称(来自 dataframe)映射到 dataframe 中行的列值。 + +## 演示几种特征列 + +TensorFlow 提供了多种特征列。本节中,我们将创建几类特征列,并演示特征列如何转换 dataframe 中的列。 + +```py +# 我们将使用该批数据演示几种特征列 +example_batch = next(iter(train_ds))[0] +``` + +```py +# 用于创建一个特征列 +# 并转换一批次数据的一个实用程序方法 +def demo(feature_column): + feature_layer = layers.DenseFeatures(feature_column) + print(feature_layer(example_batch).numpy()) +``` + +### 数值列 + +一个特征列的输出将成为模型的输入(使用上面定义的 demo 函数,我们将能准确地看到 dataframe 中的每列的转换方式)。 [数值列(numeric column)](https://tensorflow.google.cn/api_docs/python/tf/feature_column/numeric_column) 是最简单的列类型。它用于表示实数特征。使用此列时,模型将从 dataframe 中接收未更改的列值。 + +```py +age = feature_column.numeric_column("age") +demo(age) +``` + +```py +WARNING:tensorflow:Layer dense_features is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2\. The layer has dtype float32 because its dtype defaults to floatx. + +If you intended to run this layer in float32, you can safely ignore this warning. If in doubt, this warning is likely only an issue if you are porting a TensorFlow 1.X model to TensorFlow 2. + +To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor. + +[[63.] + [62.] + [59.] + [74.] + [68.]] + +``` + +在这个心脏病数据集中,dataframe 中的大多数列都是数值列。 + +### 分桶列 + +通常,您不希望将数字直接输入模型,而是根据数值范围将其值分成不同的类别。考虑代表一个人年龄的原始数据。我们可以用 [分桶列(bucketized column)](https://tensorflow.google.cn/api_docs/python/tf/feature_column/bucketized_column)将年龄分成几个分桶(buckets),而不是将年龄表示成数值列。请注意下面的 one-hot 数值表示每行匹配的年龄范围。 + +```py +age_buckets = feature_column.bucketized_column(age, boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65]) +demo(age_buckets) +``` + +```py +WARNING:tensorflow:Layer dense_features_1 is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2\. The layer has dtype float32 because its dtype defaults to floatx. + +If you intended to run this layer in float32, you can safely ignore this warning. If in doubt, this warning is likely only an issue if you are porting a TensorFlow 1.X model to TensorFlow 2. + +To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor. + +[[0\. 0\. 0\. 0\. 0\. 0\. 0\. 0\. 0\. 1\. 0.] + [0\. 0\. 0\. 0\. 0\. 0\. 0\. 0\. 0\. 1\. 0.] + [0\. 0\. 0\. 0\. 0\. 0\. 0\. 0\. 1\. 0\. 0.] + [0\. 0\. 0\. 0\. 0\. 0\. 0\. 0\. 0\. 0\. 1.] + [0\. 0\. 0\. 0\. 0\. 0\. 0\. 0\. 0\. 0\. 1.]] + +``` + +### 分类列 + +在此数据集中,thal 用字符串表示(如 'fixed','normal',或 'reversible')。我们无法直接将字符串提供给模型。相反,我们必须首先将它们映射到数值。分类词汇列(categorical vocabulary columns)提供了一种用 one-hot 向量表示字符串的方法(就像您在上面看到的年龄分桶一样)。词汇表可以用 [categorical_column_with_vocabulary_list](https://tensorflow.google.cn/api_docs/python/tf/feature_column/categorical_column_with_vocabulary_list) 作为 list 传递,或者用 [categorical_column_with_vocabulary_file](https://tensorflow.google.cn/api_docs/python/tf/feature_column/categorical_column_with_vocabulary_file) 从文件中加载。 + +```py +thal = feature_column.categorical_column_with_vocabulary_list( + 'thal', ['fixed', 'normal', 'reversible']) + +thal_one_hot = feature_column.indicator_column(thal) +demo(thal_one_hot) +``` + +```py +WARNING:tensorflow:Layer dense_features_2 is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2\. The layer has dtype float32 because its dtype defaults to floatx. + +If you intended to run this layer in float32, you can safely ignore this warning. If in doubt, this warning is likely only an issue if you are porting a TensorFlow 1.X model to TensorFlow 2. + +To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor. + +[[0\. 0\. 1.] + [0\. 0\. 1.] + [1\. 0\. 0.] + [0\. 1\. 0.] + [0\. 0\. 1.]] + +``` + +在更复杂的数据集中,许多列都是分类列(如 strings)。在处理分类数据时,特征列最有价值。尽管在该数据集中只有一列分类列,但我们将使用它来演示在处理其他数据集时,可以使用的几种重要的特征列。 + +### 嵌入列 + +假设我们不是只有几个可能的字符串,而是每个类别有数千(或更多)值。 由于多种原因,随着类别数量的增加,使用 one-hot 编码训练神经网络变得不可行。我们可以使用嵌入列来克服此限制。[嵌入列(embedding column)](https://tensorflow.google.cn/api_docs/python/tf/feature_column/embedding_column)将数据表示为一个低维度密集向量,而非多维的 one-hot 向量,该低维度密集向量可以包含任何数,而不仅仅是 0 或 1。嵌入的大小(在下面的示例中为 8)是必须调整的参数。 + +关键点:当分类列具有许多可能的值时,最好使用嵌入列。我们在这里使用嵌入列用于演示目的,为此您有一个完整的示例,以在将来可以修改用于其他数据集。 + +```py +# 注意到嵌入列的输入是我们之前创建的类别列 +thal_embedding = feature_column.embedding_column(thal, dimension=8) +demo(thal_embedding) +``` + +```py +WARNING:tensorflow:Layer dense_features_3 is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2\. The layer has dtype float32 because its dtype defaults to floatx. + +If you intended to run this layer in float32, you can safely ignore this warning. If in doubt, this warning is likely only an issue if you are porting a TensorFlow 1.X model to TensorFlow 2. + +To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor. + +[[ 0.00543996 -0.5059579 0.0389499 -0.20236802 0.11128058 0.59121 + 0.14891742 -0.11942385] + [ 0.00543996 -0.5059579 0.0389499 -0.20236802 0.11128058 0.59121 + 0.14891742 -0.11942385] + [ 0.09787773 -0.5861865 -0.3713007 -0.1747458 -0.01538717 0.55458224 + 0.12537968 -0.11748305] + [-0.00701649 0.28291813 0.23547529 -0.5102454 -0.5388726 0.5154376 + 0.12235989 0.44484815] + [ 0.00543996 -0.5059579 0.0389499 -0.20236802 0.11128058 0.59121 + 0.14891742 -0.11942385]] + +``` + +### 经过哈希处理的特征列 + +表示具有大量数值的分类列的另一种方法是使用 [categorical_column_with_hash_bucket](https://tensorflow.google.cn/api_docs/python/tf/feature_column/categorical_column_with_hash_bucket)。该特征列计算输入的一个哈希值,然后选择一个 `hash_bucket_size` 分桶来编码字符串。使用此列时,您不需要提供词汇表,并且可以选择使 hash_buckets 的数量远远小于实际类别的数量以节省空间。 + +关键点:该技术的一个重要缺点是可能存在冲突,不同的字符串被映射到同一个范围。实际上,无论如何,经过哈希处理的特征列对某些数据集都有效。 + +```py +thal_hashed = feature_column.categorical_column_with_hash_bucket( + 'thal', hash_bucket_size=1000) +demo(feature_column.indicator_column(thal_hashed)) +``` + +```py +WARNING:tensorflow:Layer dense_features_4 is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2\. The layer has dtype float32 because its dtype defaults to floatx. + +If you intended to run this layer in float32, you can safely ignore this warning. If in doubt, this warning is likely only an issue if you are porting a TensorFlow 1.X model to TensorFlow 2. + +To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor. + +[[0\. 0\. 0\. ... 0\. 0\. 0.] + [0\. 0\. 0\. ... 0\. 0\. 0.] + [0\. 0\. 0\. ... 0\. 0\. 0.] + [0\. 0\. 0\. ... 0\. 0\. 0.] + [0\. 0\. 0\. ... 0\. 0\. 0.]] + +``` + +### 组合的特征列 + +将多种特征组合到一个特征中,称为[特征组合(feature crosses)](https://developers.google.cn/machine-learning/glossary/#feature_cross),它让模型能够为每种特征组合学习单独的权重。此处,我们将创建一个 age 和 thal 组合的新特征。请注意,`crossed_column` 不会构建所有可能组合的完整列表(可能非常大)。相反,它由 `hashed_column` 支持,因此您可以选择表的大小。 + +```py +crossed_feature = feature_column.crossed_column([age_buckets, thal], hash_bucket_size=1000) +demo(feature_column.indicator_column(crossed_feature)) +``` + +```py +WARNING:tensorflow:Layer dense_features_5 is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2\. The layer has dtype float32 because its dtype defaults to floatx. + +If you intended to run this layer in float32, you can safely ignore this warning. If in doubt, this warning is likely only an issue if you are porting a TensorFlow 1.X model to TensorFlow 2. + +To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor. + +[[0\. 0\. 0\. ... 0\. 0\. 0.] + [0\. 0\. 0\. ... 0\. 0\. 0.] + [0\. 0\. 0\. ... 0\. 0\. 0.] + [0\. 0\. 0\. ... 0\. 0\. 0.] + [0\. 0\. 0\. ... 0\. 0\. 0.]] + +``` + +## 选择要使用的列 + +我们已经了解了如何使用几种类型的特征列。 现在我们将使用它们来训练模型。本教程的目标是向您展示使用特征列所需的完整代码(例如,机制)。我们任意地选择了几列来训练我们的模型。 + +关键点:如果您的目标是建立一个准确的模型,请尝试使用您自己的更大的数据集,并仔细考虑哪些特征最有意义,以及如何表示它们。 + +```py +feature_columns = [] + +# 数值列 +for header in ['age', 'trestbps', 'chol', 'thalach', 'oldpeak', 'slope', 'ca']: + feature_columns.append(feature_column.numeric_column(header)) + +# 分桶列 +age_buckets = feature_column.bucketized_column(age, boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65]) +feature_columns.append(age_buckets) + +# 分类列 +thal = feature_column.categorical_column_with_vocabulary_list( + 'thal', ['fixed', 'normal', 'reversible']) +thal_one_hot = feature_column.indicator_column(thal) +feature_columns.append(thal_one_hot) + +# 嵌入列 +thal_embedding = feature_column.embedding_column(thal, dimension=8) +feature_columns.append(thal_embedding) + +# 组合列 +crossed_feature = feature_column.crossed_column([age_buckets, thal], hash_bucket_size=1000) +crossed_feature = feature_column.indicator_column(crossed_feature) +feature_columns.append(crossed_feature) +``` + +### 建立一个新的特征层 + +现在我们已经定义了我们的特征列,我们将使用[密集特征(DenseFeatures)](https://tensorflow.google.cn/versions/r2.0/api_docs/python/tf/keras/layers/DenseFeatures)层将特征列输入到我们的 Keras 模型中。 + +```py +feature_layer = tf.keras.layers.DenseFeatures(feature_columns) +``` + +之前,我们使用一个小批量大小来演示特征列如何运转。我们将创建一个新的更大批量的输入流水线。 + +```py +batch_size = 32 +train_ds = df_to_dataset(train, batch_size=batch_size) +val_ds = df_to_dataset(val, shuffle=False, batch_size=batch_size) +test_ds = df_to_dataset(test, shuffle=False, batch_size=batch_size) +``` + +## 创建,编译和训练模型 + +```py +model = tf.keras.Sequential([ + feature_layer, + layers.Dense(128, activation='relu'), + layers.Dense(128, activation='relu'), + layers.Dense(1, activation='sigmoid') +]) + +model.compile(optimizer='adam', + loss='binary_crossentropy', + metrics=['accuracy'], + run_eagerly=True) + +model.fit(train_ds, + validation_data=val_ds, + epochs=5) +``` + +```py +Epoch 1/5 +WARNING:tensorflow:Layers in a Sequential model should only have a single input tensor, but we receive a input: {'age': , 'sex': , 'cp': , 'trestbps': , 'chol': , 'fbs': , 'restecg': , 'thalach': , 'exang': , 'oldpeak': , 'slope': , 'ca': , 'thal': } +Consider rewriting this model with the Functional API. +WARNING:tensorflow:Layer dense_features_6 is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2\. The layer has dtype float32 because its dtype defaults to floatx. + +If you intended to run this layer in float32, you can safely ignore this warning. If in doubt, this warning is likely only an issue if you are porting a TensorFlow 1.X model to TensorFlow 2. + +To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor. + +1/7 [===>..........................] - ETA: 0s - loss: 1.9156 - accuracy: 0.8438WARNING:tensorflow:Layers in a Sequential model should only have a single input tensor, but we receive a input: {'age': , 'sex': , 'cp': , 'trestbps': , 'chol': , 'fbs': , 'restecg': , 'thalach': , 'exang': , 'oldpeak': , 'slope': , 'ca': , 'thal': } +Consider rewriting this model with the Functional API. +WARNING:tensorflow:Layers in a Sequential model should only have a single input tensor, but we receive a input: {'age': , 'sex': , 'cp': , 'trestbps': , 'chol': , 'fbs': , 'restecg': , 'thalach': , 'exang': , 'oldpeak': , 'slope': , 'ca': , 'thal': } +Consider rewriting this model with the Functional API. +3/7 [===========>..................] - ETA: 0s - loss: 2.4590 - accuracy: 0.6354WARNING:tensorflow:Layers in a Sequential model should only have a single input tensor, but we receive a input: {'age': , 'sex': , 'cp': , 'trestbps': , 'chol': , 'fbs': , 'restecg': , 'thalach': , 'exang': , 'oldpeak': , 'slope': , 'ca': , 'thal': } +Consider rewriting this model with the Functional API. +WARNING:tensorflow:Layers in a Sequential model should only have a single input tensor, but we receive a input: {'age': , 'sex': , 'cp': , 'trestbps': , 'chol': , 'fbs': , 'restecg': , 'thalach': , 'exang': , 'oldpeak': , 'slope': , 'ca': , 'thal': } +Consider rewriting this model with the Functional API. +5/7 [====================>.........] - ETA: 0s - loss: 2.0828 - accuracy: 0.5938WARNING:tensorflow:Layers in a Sequential model should only have a single input tensor, but we receive a input: {'age': , 'sex': , 'cp': , 'trestbps': , 'chol': , 'fbs': , 'restecg': , 'thalach': , 'exang': , 'oldpeak': , 'slope': , 'ca': , 'thal': } +Consider rewriting this model with the Functional API. +WARNING:tensorflow:Layers in a Sequential model should only have a single input tensor, but we receive a input: {'age': , 'sex': , 'cp': , 'trestbps': , 'chol': , 'fbs': , 'restecg': , 'thalach': , 'exang': , 'oldpeak': , 'slope': , 'ca': , 'thal': } +Consider rewriting this model with the Functional API. +7/7 [==============================] - ETA: 0s - loss: 2.0670 - accuracy: 0.6062WARNING:tensorflow:Layers in a Sequential model should only have a single input tensor, but we receive a input: {'age': , 'sex': , 'cp': , 'trestbps': , 'chol': , 'fbs': , 'restecg': , 'thalach': , 'exang': , 'oldpeak': , 'slope': , 'ca': , 'thal': } +Consider rewriting this model with the Functional API. +WARNING:tensorflow:Layers in a Sequential model should only have a single input tensor, but we receive a input: {'age': , 'sex': , 'cp': , 'trestbps': , 'chol': , 'fbs': , 'restecg': , 'thalach': , 'exang': , 'oldpeak': , 'slope': , 'ca': , 'thal': } +Consider rewriting this model with the Functional API. +7/7 [==============================] - 0s 44ms/step - loss: 2.0670 - accuracy: 0.6062 - val_loss: 1.8843 - val_accuracy: 0.7347 +Epoch 2/5 +WARNING:tensorflow:Layers in a Sequential model should only have a single input tensor, but we receive a input: {'age': , 'sex': , 'cp': , 'trestbps': , 'chol': , 'fbs': , 'restecg': , 'thalach': , 'exang': , 'oldpeak': , 'slope': , 'ca': , 'thal': } +Consider rewriting this model with the Functional API. +1/7 [===>..........................] - ETA: 0s - loss: 2.3728 - accuracy: 0.6875WARNING:tensorflow:Layers in a Sequential model should only have a single input tensor, but we receive a input: {'age': , 'sex': , 'cp': , 'trestbps': , 'chol': , 'fbs': , 'restecg': , 'thalach': , 'exang': , 'oldpeak': , 'slope': , 'ca': , 'thal': } +Consider rewriting this model with the Functional API. +WARNING:tensorflow:Layers in a Sequential model should only have a single input tensor, but we receive a input: {'age': , 'sex': , 'cp': , 'trestbps': , 'chol': , 'fbs': , 'restecg': , 'thalach': , 'exang': , 'oldpeak': , 'slope': , 'ca': , 'thal': } +Consider rewriting this model with the Functional API. +3/7 [===========>..................] - ETA: 0s - loss: 2.1748 - accuracy: 0.6562WARNING:tensorflow:Layers in a Sequential model should only have a single input tensor, but we receive a input: {'age': , 'sex': , 'cp': , 'trestbps': , 'chol': , 'fbs': , 'restecg': , 'thalach': , 'exang': , 'oldpeak': , 'slope': , 'ca': , 'thal': } +Consider rewriting this model with the Functional API. +WARNING:tensorflow:Layers in a Sequential model should only have a single input tensor, but we receive a input: {'age': , 'sex': , 'cp': , 'trestbps': , 'chol': , 'fbs': , 'restecg': , 'thalach': , 'exang': , 'oldpeak': , 'slope': , 'ca': , 'thal': } +Consider rewriting this model with the Functional API. +5/7 [====================>.........] - ETA: 0s - loss: 1.5868 - accuracy: 0.6313WARNING:tensorflow:Layers in a Sequential model should only have a single input tensor, but we receive a input: {'age': , 'sex': , 'cp': , 'trestbps': , 'chol': , 'fbs': , 'restecg': , 'thalach': , 'exang': , 'oldpeak': , 'slope': , 'ca': , 'thal': } +Consider rewriting this model with the Functional API. +WARNING:tensorflow:Layers in a Sequential model should only have a single input tensor, but we receive a input: {'age': , 'sex': , 'cp': , 'trestbps': , 'chol': , 'fbs': , 'restecg': , 'thalach': , 'exang': , 'oldpeak': , 'slope': , 'ca': , 'thal': } +Consider rewriting this model with the Functional API. +7/7 [==============================] - ETA: 0s - loss: 1.5046 - accuracy: 0.5803WARNING:tensorflow:Layers in a Sequential model should only have a single input tensor, but we receive a input: {'age': , 'sex': , 'cp': , 'trestbps': , 'chol': , 'fbs': , 'restecg': , 'thalach': , 'exang': , 'oldpeak': , 'slope': , 'ca': , 'thal': } +Consider rewriting this model with the Functional API. +WARNING:tensorflow:Layers in a Sequential model should only have a single input tensor, but we receive a input: {'age': , 'sex': , 'cp': , 'trestbps': , 'chol': , 'fbs': , 'restecg': , 'thalach': , 'exang': , 'oldpeak': , 'slope': , 'ca': , 'thal': } +Consider rewriting this model with the Functional API. +7/7 [==============================] - 0s 42ms/step - loss: 1.5046 - accuracy: 0.5803 - val_loss: 0.5387 - val_accuracy: 0.7551 +Epoch 3/5 +WARNING:tensorflow:Layers in a Sequential model should only have a single input tensor, but we receive a input: {'age': , 'sex': , 'cp': , 'trestbps': , 'chol': , 'fbs': , 'restecg': , 'thalach': , 'exang': , 'oldpeak': , 'slope': , 'ca': , 'thal': } +Consider rewriting this model with the Functional API. +1/7 [===>..........................] - ETA: 0s - loss: 0.4726 - accuracy: 0.7500WARNING:tensorflow:Layers in a Sequential model should only have a single input tensor, but we receive a input: {'age': , 'sex': , 'cp': , 'trestbps': , 'chol': , 'fbs': , 'restecg': , 'thalach': , 'exang': , 'oldpeak': , 'slope': , 'ca': , 'thal': } +Consider rewriting this model with the Functional API. +WARNING:tensorflow:Layers in a Sequential model should only have a single input tensor, but we receive a input: {'age': , 'sex': , 'cp': , 'trestbps': , 'chol': , 'fbs': , 'restecg': , 'thalach': , 'exang': , 'oldpeak': , 'slope': , 'ca': , 'thal': } +Consider rewriting this model with the Functional API. +3/7 [===========>..................] - ETA: 0s - loss: 0.9738 - accuracy: 0.6979WARNING:tensorflow:Layers in a Sequential model should only have a single input tensor, but we receive a input: {'age': , 'sex': , 'cp': , 'trestbps': , 'chol': , 'fbs': , 'restecg': , 'thalach': , 'exang': , 'oldpeak': , 'slope': , 'ca': , 'thal': } +Consider rewriting this model with the Functional API. +WARNING:tensorflow:Layers in a Sequential model should only have a single input tensor, but we receive a input: {'age': , 'sex': , 'cp': , 'trestbps': , 'chol': , 'fbs': , 'restecg': , 'thalach': , 'exang': , 'oldpeak': , 'slope': , 'ca': , 'thal': } +Consider rewriting this model with the Functional API. +5/7 [====================>.........] - ETA: 0s - loss: 1.0254 - accuracy: 0.7188WARNING:tensorflow:Layers in a Sequential model should only have a single input tensor, but we receive a input: {'age': , 'sex': , 'cp': , 'trestbps': , 'chol': , 'fbs': , 'restecg': , 'thalach': , 'exang': , 'oldpeak': , 'slope': , 'ca': , 'thal': } +Consider rewriting this model with the Functional API. +WARNING:tensorflow:Layers in a Sequential model should only have a single input tensor, but we receive a input: {'age': , 'sex': , 'cp': , 'trestbps': , 'chol': , 'fbs': , 'restecg': , 'thalach': , 'exang': , 'oldpeak': , 'slope': , 'ca': , 'thal': } +Consider rewriting this model with the Functional API. +7/7 [==============================] - ETA: 0s - loss: 1.0386 - accuracy: 0.6995WARNING:tensorflow:Layers in a Sequential model should only have a single input tensor, but we receive a input: {'age': , 'sex': , 'cp': , 'trestbps': , 'chol': , 'fbs': , 'restecg': , 'thalach': , 'exang': , 'oldpeak': , 'slope': , 'ca': , 'thal': } +Consider rewriting this model with the Functional API. +WARNING:tensorflow:Layers in a Sequential model should only have a single input tensor, but we receive a input: {'age': , 'sex': , 'cp': , 'trestbps': , 'chol': , 'fbs': , 'restecg': , 'thalach': , 'exang': , 'oldpeak': , 'slope': , 'ca': , 'thal': } +Consider rewriting this model with the Functional API. +7/7 [==============================] - 0s 42ms/step - loss: 1.0386 - accuracy: 0.6995 - val_loss: 0.6039 - val_accuracy: 0.6531 +Epoch 4/5 +WARNING:tensorflow:Layers in a Sequential model should only have a single input tensor, but we receive a input: {'age': , 'sex': , 'cp': , 'trestbps': , 'chol': , 'fbs': , 'restecg': , 'thalach': , 'exang': , 'oldpeak': , 'slope': , 'ca': , 'thal': } +Consider rewriting this model with the Functional API. +1/7 [===>..........................] - ETA: 0s - loss: 0.5663 - accuracy: 0.6875WARNING:tensorflow:Layers in a Sequential model should only have a single input tensor, but we receive a input: {'age': , 'sex': , 'cp': , 'trestbps': , 'chol': , 'fbs': , 'restecg': , 'thalach': , 'exang': , 'oldpeak': , 'slope': , 'ca': , 'thal': } +Consider rewriting this model with the Functional API. +WARNING:tensorflow:Layers in a Sequential model should only have a single input tensor, but we receive a input: {'age': , 'sex': , 'cp': , 'trestbps': , 'chol': , 'fbs': , 'restecg': , 'thalach': , 'exang': , 'oldpeak': , 'slope': , 'ca': , 'thal': } +Consider rewriting this model with the Functional API. +3/7 [===========>..................] - ETA: 0s - loss: 0.6564 - accuracy: 0.6562WARNING:tensorflow:Layers in a Sequential model should only have a single input tensor, but we receive a input: {'age': , 'sex': , 'cp': , 'trestbps': , 'chol': , 'fbs': , 'restecg': , 'thalach': , 'exang': , 'oldpeak': , 'slope': , 'ca': , 'thal': } +Consider rewriting this model with the Functional API. +WARNING:tensorflow:Layers in a Sequential model should only have a single input tensor, but we receive a input: {'age': , 'sex': , 'cp': , 'trestbps': , 'chol': , 'fbs': , 'restecg': , 'thalach': , 'exang': , 'oldpeak': , 'slope': , 'ca': , 'thal': } +Consider rewriting this model with the Functional API. +5/7 [====================>.........] - ETA: 0s - loss: 0.6322 - accuracy: 0.6750WARNING:tensorflow:Layers in a Sequential model should only have a single input tensor, but we receive a input: {'age': , 'sex': , 'cp': , 'trestbps': , 'chol': , 'fbs': , 'restecg': , 'thalach': , 'exang': , 'oldpeak': , 'slope': , 'ca': , 'thal': } +Consider rewriting this model with the Functional API. +WARNING:tensorflow:Layers in a Sequential model should only have a single input tensor, but we receive a input: {'age': , 'sex': , 'cp': , 'trestbps': , 'chol': , 'fbs': , 'restecg': , 'thalach': , 'exang': , 'oldpeak': , 'slope': , 'ca': , 'thal': } +Consider rewriting this model with the Functional API. +7/7 [==============================] - ETA: 0s - loss: 0.6209 - accuracy: 0.6943WARNING:tensorflow:Layers in a Sequential model should only have a single input tensor, but we receive a input: {'age': , 'sex': , 'cp': , 'trestbps': , 'chol': , 'fbs': , 'restecg': , 'thalach': , 'exang': , 'oldpeak': , 'slope': , 'ca': , 'thal': } +Consider rewriting this model with the Functional API. +WARNING:tensorflow:Layers in a Sequential model should only have a single input tensor, but we receive a input: {'age': , 'sex': , 'cp': , 'trestbps': , 'chol': , 'fbs': , 'restecg': , 'thalach': , 'exang': , 'oldpeak': , 'slope': , 'ca': , 'thal': } +Consider rewriting this model with the Functional API. +7/7 [==============================] - 0s 43ms/step - loss: 0.6209 - accuracy: 0.6943 - val_loss: 0.6867 - val_accuracy: 0.7347 +Epoch 5/5 +WARNING:tensorflow:Layers in a Sequential model should only have a single input tensor, but we receive a input: {'age': , 'sex': , 'cp': , 'trestbps': , 'chol': , 'fbs': , 'restecg': , 'thalach': , 'exang': , 'oldpeak': , 'slope': , 'ca': , 'thal': } +Consider rewriting this model with the Functional API. +1/7 [===>..........................] - ETA: 0s - loss: 0.8235 - accuracy: 0.6562WARNING:tensorflow:Layers in a Sequential model should only have a single input tensor, but we receive a input: {'age': , 'sex': , 'cp': , 'trestbps': , 'chol': , 'fbs': , 'restecg': , 'thalach': , 'exang': , 'oldpeak': , 'slope': , 'ca': , 'thal': } +Consider rewriting this model with the Functional API. +WARNING:tensorflow:Layers in a Sequential model should only have a single input tensor, but we receive a input: {'age': , 'sex': , 'cp': , 'trestbps': , 'chol': , 'fbs': , 'restecg': , 'thalach': , 'exang': , 'oldpeak': , 'slope': , 'ca': , 'thal': } +Consider rewriting this model with the Functional API. +3/7 [===========>..................] - ETA: 0s - loss: 0.7040 - accuracy: 0.6979WARNING:tensorflow:Layers in a Sequential model should only have a single input tensor, but we receive a input: {'age': , 'sex': , 'cp': , 'trestbps': , 'chol': , 'fbs': , 'restecg': , 'thalach': , 'exang': , 'oldpeak': , 'slope': , 'ca': , 'thal': } +Consider rewriting this model with the Functional API. +WARNING:tensorflow:Layers in a Sequential model should only have a single input tensor, but we receive a input: {'age': , 'sex': , 'cp': , 'trestbps': , 'chol': , 'fbs': , 'restecg': , 'thalach': , 'exang': , 'oldpeak': , 'slope': , 'ca': , 'thal': } +Consider rewriting this model with the Functional API. +5/7 [====================>.........] - ETA: 0s - loss: 0.6946 - accuracy: 0.7000WARNING:tensorflow:Layers in a Sequential model should only have a single input tensor, but we receive a input: {'age': , 'sex': , 'cp': , 'trestbps': , 'chol': , 'fbs': , 'restecg': , 'thalach': , 'exang': , 'oldpeak': , 'slope': , 'ca': , 'thal': } +Consider rewriting this model with the Functional API. +WARNING:tensorflow:Layers in a Sequential model should only have a single input tensor, but we receive a input: {'age': , 'sex': , 'cp': , 'trestbps': , 'chol': , 'fbs': , 'restecg': , 'thalach': , 'exang': , 'oldpeak': , 'slope': , 'ca': , 'thal': } +Consider rewriting this model with the Functional API. +7/7 [==============================] - ETA: 0s - loss: 0.6719 - accuracy: 0.7098WARNING:tensorflow:Layers in a Sequential model should only have a single input tensor, but we receive a input: {'age': , 'sex': , 'cp': , 'trestbps': , 'chol': , 'fbs': , 'restecg': , 'thalach': , 'exang': , 'oldpeak': , 'slope': , 'ca': , 'thal': } +Consider rewriting this model with the Functional API. +WARNING:tensorflow:Layers in a Sequential model should only have a single input tensor, but we receive a input: {'age': , 'sex': , 'cp': , 'trestbps': , 'chol': , 'fbs': , 'restecg': , 'thalach': , 'exang': , 'oldpeak': , 'slope': , 'ca': , 'thal': } +Consider rewriting this model with the Functional API. +7/7 [==============================] - 0s 41ms/step - loss: 0.6719 - accuracy: 0.7098 - val_loss: 0.5845 - val_accuracy: 0.7347 + + + +``` + +```py +loss, accuracy = model.evaluate(test_ds) +print("Accuracy", accuracy) +``` + +```py +WARNING:tensorflow:Layers in a Sequential model should only have a single input tensor, but we receive a input: {'age': , 'sex': , 'cp': , 'trestbps': , 'chol': , 'fbs': , 'restecg': , 'thalach': , 'exang': , 'oldpeak': , 'slope': , 'ca': , 'thal': } +Consider rewriting this model with the Functional API. +1/2 [==============>...............] - ETA: 0s - loss: 0.3588 - accuracy: 0.8125WARNING:tensorflow:Layers in a Sequential model should only have a single input tensor, but we receive a input: {'age': , 'sex': , 'cp': , 'trestbps': , 'chol': , 'fbs': , 'restecg': , 'thalach': , 'exang': , 'oldpeak': , 'slope': , 'ca': , 'thal': } +Consider rewriting this model with the Functional API. +2/2 [==============================] - 0s 14ms/step - loss: 0.4201 - accuracy: 0.8197 +Accuracy 0.8196721076965332 + +``` + +关键点:通常使用更大更复杂的数据集进行深度学习,您将看到最佳结果。使用像这样的小数据集时,我们建议使用决策树或随机森林作为强有力的基准。本教程的目的不是训练一个准确的模型,而是演示处理结构化数据的机制,这样,在将来使用自己的数据集时,您有可以使用的代码作为起点。 + +### 下一步 + +了解有关分类结构化数据的更多信息的最佳方法是亲自尝试。我们建议寻找另一个可以使用的数据集,并使用和上面相似的代码,训练一个模型,对其分类。要提高准确率,请仔细考虑模型中包含哪些特征,以及如何表示这些特征。 \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/057.md b/Tensorflow/TensorFlow2.0/057.md new file mode 100644 index 00000000..5f42c422 --- /dev/null +++ b/Tensorflow/TensorFlow2.0/057.md @@ -0,0 +1,1278 @@ +# Classification on imbalanced data + +> 原文:[https://tensorflow.google.cn/tutorials/structured_data/imbalanced_data](https://tensorflow.google.cn/tutorials/structured_data/imbalanced_data) + + + +This tutorial demonstrates how to classify a highly imbalanced dataset in which the number of examples in one class greatly outnumbers the examples in another. You will work with the [Credit Card Fraud Detection](https://www.kaggle.com/mlg-ulb/creditcardfraud) dataset hosted on Kaggle. The aim is to detect a mere 492 fraudulent transactions from 284,807 transactions in total. You will use [Keras](https://tensorflow.google.cn/guide/keras/overview) to define the model and [class weights](https://tensorflow.google.cn/versions/r2.0/api_docs/python/tf/keras/Model) to help the model learn from the imbalanced data. . + +This tutorial contains complete code to: + +* Load a CSV file using Pandas. +* Create train, validation, and test sets. +* Define and train a model using Keras (including setting class weights). +* Evaluate the model using various metrics (including precision and recall). +* Try common techniques for dealing with imbalanced data like: + * Class weighting + * Oversampling + +## Setup + +```py +import tensorflow as tf +from tensorflow import keras + +import os +import tempfile + +import matplotlib as mpl +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import seaborn as sns + +import sklearn +from sklearn.metrics import confusion_matrix +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import StandardScaler +``` + +```py +mpl.rcParams['figure.figsize'] = (12, 10) +colors = plt.rcParams['axes.prop_cycle'].by_key()['color'] +``` + +## Data processing and exploration + +### Download the Kaggle Credit Card Fraud data set + +Pandas is a Python library with many helpful utilities for loading and working with structured data and can be used to download CSVs into a dataframe. + +**Note:** This dataset has been collected and analysed during a research collaboration of Worldline and the [Machine Learning Group](http://mlg.ulb.ac.be) of ULB (Université Libre de Bruxelles) on big data mining and fraud detection. More details on current and past projects on related topics are available [here](https://www.researchgate.net/project/Fraud-detection-5) and the page of the [DefeatFraud](https://mlg.ulb.ac.be/wordpress/portfolio_page/defeatfraud-assessment-and-validation-of-deep-feature-engineering-and-learning-solutions-for-fraud-detection/) project + +```py +file = tf.keras.utils +raw_df = pd.read_csv('https://storage.googleapis.com/download.tensorflow.org/data/creditcard.csv') +raw_df.head() +``` + + + +```py +raw_df[['Time', 'V1', 'V2', 'V3', 'V4', 'V5', 'V26', 'V27', 'V28', 'Amount', 'Class']].describe() +``` + + + +### Examine the class label imbalance + +Let's look at the dataset imbalance: + +```py +neg, pos = np.bincount(raw_df['Class']) +total = neg + pos +print('Examples:\n Total: {}\n Positive: {} ({:.2f}% of total)\n'.format( + total, pos, 100 * pos / total)) +``` + +```py +Examples: + Total: 284807 + Positive: 492 (0.17% of total) + +``` + +This shows the small fraction of positive samples. + +### Clean, split and normalize the data + +The raw data has a few issues. First the `Time` and `Amount` columns are too variable to use directly. Drop the `Time` column (since it's not clear what it means) and take the log of the `Amount` column to reduce its range. + +```py +cleaned_df = raw_df.copy() + +# You don't want the `Time` column. +cleaned_df.pop('Time') + +# The `Amount` column covers a huge range. Convert to log-space. +eps = 0.001 # 0 => 0.1¢ +cleaned_df['Log Ammount'] = np.log(cleaned_df.pop('Amount')+eps) +``` + +Split the dataset into train, validation, and test sets. The validation set is used during the model fitting to evaluate the loss and any metrics, however the model is not fit with this data. The test set is completely unused during the training phase and is only used at the end to evaluate how well the model generalizes to new data. This is especially important with imbalanced datasets where [overfitting](https://developers.google.cn/machine-learning/crash-course/generalization/peril-of-overfitting) is a significant concern from the lack of training data. + +```py +# Use a utility from sklearn to split and shuffle our dataset. +train_df, test_df = train_test_split(cleaned_df, test_size=0.2) +train_df, val_df = train_test_split(train_df, test_size=0.2) + +# Form np arrays of labels and features. +train_labels = np.array(train_df.pop('Class')) +bool_train_labels = train_labels != 0 +val_labels = np.array(val_df.pop('Class')) +test_labels = np.array(test_df.pop('Class')) + +train_features = np.array(train_df) +val_features = np.array(val_df) +test_features = np.array(test_df) +``` + +Normalize the input features using the sklearn StandardScaler. This will set the mean to 0 and standard deviation to 1. + +**Note:** The `StandardScaler` is only fit using the `train_features` to be sure the model is not peeking at the validation or test sets. + +```py +scaler = StandardScaler() +train_features = scaler.fit_transform(train_features) + +val_features = scaler.transform(val_features) +test_features = scaler.transform(test_features) + +train_features = np.clip(train_features, -5, 5) +val_features = np.clip(val_features, -5, 5) +test_features = np.clip(test_features, -5, 5) + +print('Training labels shape:', train_labels.shape) +print('Validation labels shape:', val_labels.shape) +print('Test labels shape:', test_labels.shape) + +print('Training features shape:', train_features.shape) +print('Validation features shape:', val_features.shape) +print('Test features shape:', test_features.shape) +``` + +```py +Training labels shape: (182276,) +Validation labels shape: (45569,) +Test labels shape: (56962,) +Training features shape: (182276, 29) +Validation features shape: (45569, 29) +Test features shape: (56962, 29) + +``` + +**Caution:** If you want to deploy a model, it's critical that you preserve the preprocessing calculations. The easiest way to implement them as layers, and attach them to your model before export. + +### Look at the data distribution + +Next compare the distributions of the positive and negative examples over a few features. Good questions to ask yourself at this point are: + +* Do these distributions make sense? + * Yes. You've normalized the input and these are mostly concentrated in the `+/- 2` range. +* Can you see the difference between the distributions? + * Yes the positive examples contain a much higher rate of extreme values. + +```py +pos_df = pd.DataFrame(train_features[ bool_train_labels], columns=train_df.columns) +neg_df = pd.DataFrame(train_features[~bool_train_labels], columns=train_df.columns) + +sns.jointplot(pos_df['V5'], pos_df['V6'], + kind='hex', xlim=(-5,5), ylim=(-5,5)) +plt.suptitle("Positive distribution") + +sns.jointplot(neg_df['V5'], neg_df['V6'], + kind='hex', xlim=(-5,5), ylim=(-5,5)) +_ = plt.suptitle("Negative distribution") +``` + +```py +/home/kbuilder/.local/lib/python3.6/site-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. + FutureWarning +/home/kbuilder/.local/lib/python3.6/site-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. + FutureWarning + +``` + +![png](img/72de951dda8439ddee9fe72a64a9ed2e.png) + +![png](img/67c5e6fec9fd9b11f792765822fbb538.png) + +## Define the model and metrics + +Define a function that creates a simple neural network with a densly connected hidden layer, a [dropout](https://developers.google.cn/machine-learning/glossary/#dropout_regularization) layer to reduce overfitting, and an output sigmoid layer that returns the probability of a transaction being fraudulent: + +```py +METRICS = [ + keras.metrics.TruePositives(name='tp'), + keras.metrics.FalsePositives(name='fp'), + keras.metrics.TrueNegatives(name='tn'), + keras.metrics.FalseNegatives(name='fn'), + keras.metrics.BinaryAccuracy(name='accuracy'), + keras.metrics.Precision(name='precision'), + keras.metrics.Recall(name='recall'), + keras.metrics.AUC(name='auc'), +] + +def make_model(metrics=METRICS, output_bias=None): + if output_bias is not None: + output_bias = tf.keras.initializers.Constant(output_bias) + model = keras.Sequential([ + keras.layers.Dense( + 16, activation='relu', + input_shape=(train_features.shape[-1],)), + keras.layers.Dropout(0.5), + keras.layers.Dense(1, activation='sigmoid', + bias_initializer=output_bias), + ]) + + model.compile( + optimizer=keras.optimizers.Adam(lr=1e-3), + loss=keras.losses.BinaryCrossentropy(), + metrics=metrics) + + return model +``` + +### Understanding useful metrics + +Notice that there are a few metrics defined above that can be computed by the model that will be helpful when evaluating the performance. + +* **False** negatives and **false** positives are samples that were **incorrectly** classified +* **True** negatives and **true** positives are samples that were **correctly** classified +* **Accuracy** is the percentage of examples correctly classified > $\frac{\text{true samples} }{\text{total samples} }$ +* **Precision** is the percentage of **predicted** positives that were correctly classified > $\frac{\text{true positives} }{\text{true positives + false positives} }$ +* **Recall** is the percentage of **actual** positives that were correctly classified > $\frac{\text{true positives} }{\text{true positives + false negatives} }$ +* **AUC** refers to the Area Under the Curve of a Receiver Operating Characteristic curve (ROC-AUC). This metric is equal to the probability that a classifier will rank a random positive sample higher than a random negative sample. + +**Note:** Accuracy is not a helpful metric for this task. You can 99.8%+ accuracy on this task by predicting False all the time. + +Read more: + +* [True vs. False and Positive vs. Negative](https://developers.google.cn/machine-learning/crash-course/classification/true-false-positive-negative) +* [Accuracy](https://developers.google.cn/machine-learning/crash-course/classification/accuracy) +* [Precision and Recall](https://developers.google.cn/machine-learning/crash-course/classification/precision-and-recall) +* [ROC-AUC](https://developers.google.cn/machine-learning/crash-course/classification/roc-and-auc) + +## Baseline model + +### Build the model + +Now create and train your model using the function that was defined earlier. Notice that the model is fit using a larger than default batch size of 2048, this is important to ensure that each batch has a decent chance of containing a few positive samples. If the batch size was too small, they would likely have no fraudulent transactions to learn from. + +**Note:** this model will not handle the class imbalance well. You will improve it later in this tutorial. + +```py +EPOCHS = 100 +BATCH_SIZE = 2048 + +early_stopping = tf.keras.callbacks.EarlyStopping( + monitor='val_auc', + verbose=1, + patience=10, + mode='max', + restore_best_weights=True) +``` + +```py +model = make_model() +model.summary() +``` + +```py +Model: "sequential" +_________________________________________________________________ +Layer (type) Output Shape Param # +================================================================= +dense (Dense) (None, 16) 480 +_________________________________________________________________ +dropout (Dropout) (None, 16) 0 +_________________________________________________________________ +dense_1 (Dense) (None, 1) 17 +================================================================= +Total params: 497 +Trainable params: 497 +Non-trainable params: 0 +_________________________________________________________________ + +``` + +Test run the model: + +```py +model.predict(train_features[:10]) +``` + +```py +array([[0.14155667], + [0.08245954], + [0.14218365], + [0.09784216], + [0.15547438], + [0.04696527], + [0.20200476], + [0.16137381], + [0.08730176], + [0.04467227]], dtype=float32) + +``` + +### Optional: Set the correct initial bias. + +These initial guesses are not great. You know the dataset is imbalanced. Set the output layer's bias to reflect that (See: [A Recipe for Training Neural Networks: "init well"](http://karpathy.github.io/2019/04/25/recipe/#2-set-up-the-end-to-end-trainingevaluation-skeleton--get-dumb-baselines)). This can help with initial convergence. + +With the default bias initialization the loss should be about [`math.log(2) = 0.69314`](https://tensorflow.google.cn/api_docs/python/tf/math/log) + +```py +results = model.evaluate(train_features, train_labels, batch_size=BATCH_SIZE, verbose=0) +print("Loss: {:0.4f}".format(results[0])) +``` + +```py +Loss: 0.1159 + +``` + +The correct bias to set can be derived from: + +$$ p_0 = pos/(pos + neg) = 1/(1+e^{-b_0}) $$$$ b_0 = -log_e(1/p_0 - 1) $$$$ b_0 = log_e(pos/neg)$$ + +```py +initial_bias = np.log([pos/neg]) +initial_bias +``` + +```py +array([-6.35935934]) + +``` + +Set that as the initial bias, and the model will give much more reasonable initial guesses. + +It should be near: `pos/total = 0.0018` + +```py +model = make_model(output_bias=initial_bias) +model.predict(train_features[:10]) +``` + +```py +array([[0.00172629], + [0.00338373], + [0.00397264], + [0.00406079], + [0.00449285], + [0.00492807], + [0.00254243], + [0.00370109], + [0.01771316], + [0.0022068 ]], dtype=float32) + +``` + +With this initialization the initial loss should be approximately: + +$$-p_0log(p_0)-(1-p_0)log(1-p_0) = 0.01317$$ + +```py +results = model.evaluate(train_features, train_labels, batch_size=BATCH_SIZE, verbose=0) +print("Loss: {:0.4f}".format(results[0])) +``` + +```py +Loss: 0.0141 + +``` + +This initial loss is about 50 times less than if would have been with naive initialization. + +This way the model doesn't need to spend the first few epochs just learning that positive examples are unlikely. This also makes it easier to read plots of the loss during training. + +### Checkpoint the initial weights + +To make the various training runs more comparable, keep this initial model's weights in a checkpoint file, and load them into each model before training. + +```py +initial_weights = os.path.join(tempfile.mkdtemp(), 'initial_weights') +model.save_weights(initial_weights) +``` + +### Confirm that the bias fix helps + +Before moving on, confirm quick that the careful bias initialization actually helped. + +Train the model for 20 epochs, with and without this careful initialization, and compare the losses: + +```py +model = make_model() +model.load_weights(initial_weights) +model.layers[-1].bias.assign([0.0]) +zero_bias_history = model.fit( + train_features, + train_labels, + batch_size=BATCH_SIZE, + epochs=20, + validation_data=(val_features, val_labels), + verbose=0) +``` + +```py +model = make_model() +model.load_weights(initial_weights) +careful_bias_history = model.fit( + train_features, + train_labels, + batch_size=BATCH_SIZE, + epochs=20, + validation_data=(val_features, val_labels), + verbose=0) +``` + +```py +def plot_loss(history, label, n): + # Use a log scale to show the wide range of values. + plt.semilogy(history.epoch, history.history['loss'], + color=colors[n], label='Train '+label) + plt.semilogy(history.epoch, history.history['val_loss'], + color=colors[n], label='Val '+label, + linestyle="--") + plt.xlabel('Epoch') + plt.ylabel('Loss') + + plt.legend() +``` + +```py +plot_loss(zero_bias_history, "Zero Bias", 0) +plot_loss(careful_bias_history, "Careful Bias", 1) +``` + +![png](img/9ec1c5121631fabdd2734d5a380b07fc.png) + +The above figure makes it clear: In terms of validation loss, on this problem, this careful initialization gives a clear advantage. + +### Train the model + +```py +model = make_model() +model.load_weights(initial_weights) +baseline_history = model.fit( + train_features, + train_labels, + batch_size=BATCH_SIZE, + epochs=EPOCHS, + callbacks=[early_stopping], + validation_data=(val_features, val_labels)) +``` + +```py +Epoch 1/100 +90/90 [==============================] - 1s 14ms/step - loss: 0.0127 - tp: 93.0000 - fp: 90.0000 - tn: 227362.0000 - fn: 300.0000 - accuracy: 0.9983 - precision: 0.5082 - recall: 0.2366 - auc: 0.8002 - val_loss: 0.0066 - val_tp: 8.0000 - val_fp: 4.0000 - val_tn: 45489.0000 - val_fn: 68.0000 - val_accuracy: 0.9984 - val_precision: 0.6667 - val_recall: 0.1053 - val_auc: 0.9394 +Epoch 2/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.0081 - tp: 99.0000 - fp: 25.0000 - tn: 181934.0000 - fn: 218.0000 - accuracy: 0.9987 - precision: 0.7984 - recall: 0.3123 - auc: 0.8446 - val_loss: 0.0044 - val_tp: 40.0000 - val_fp: 6.0000 - val_tn: 45487.0000 - val_fn: 36.0000 - val_accuracy: 0.9991 - val_precision: 0.8696 - val_recall: 0.5263 - val_auc: 0.9471 +Epoch 3/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.0067 - tp: 147.0000 - fp: 29.0000 - tn: 181930.0000 - fn: 170.0000 - accuracy: 0.9989 - precision: 0.8352 - recall: 0.4637 - auc: 0.8739 - val_loss: 0.0036 - val_tp: 45.0000 - val_fp: 6.0000 - val_tn: 45487.0000 - val_fn: 31.0000 - val_accuracy: 0.9992 - val_precision: 0.8824 - val_recall: 0.5921 - val_auc: 0.9472 +Epoch 4/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.0061 - tp: 151.0000 - fp: 32.0000 - tn: 181927.0000 - fn: 166.0000 - accuracy: 0.9989 - precision: 0.8251 - recall: 0.4763 - auc: 0.8867 - val_loss: 0.0031 - val_tp: 50.0000 - val_fp: 6.0000 - val_tn: 45487.0000 - val_fn: 26.0000 - val_accuracy: 0.9993 - val_precision: 0.8929 - val_recall: 0.6579 - val_auc: 0.9472 +Epoch 5/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.0059 - tp: 162.0000 - fp: 35.0000 - tn: 181924.0000 - fn: 155.0000 - accuracy: 0.9990 - precision: 0.8223 - recall: 0.5110 - auc: 0.8970 - val_loss: 0.0028 - val_tp: 53.0000 - val_fp: 6.0000 - val_tn: 45487.0000 - val_fn: 23.0000 - val_accuracy: 0.9994 - val_precision: 0.8983 - val_recall: 0.6974 - val_auc: 0.9538 +Epoch 6/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.0056 - tp: 167.0000 - fp: 34.0000 - tn: 181925.0000 - fn: 150.0000 - accuracy: 0.9990 - precision: 0.8308 - recall: 0.5268 - auc: 0.8942 - val_loss: 0.0026 - val_tp: 53.0000 - val_fp: 6.0000 - val_tn: 45487.0000 - val_fn: 23.0000 - val_accuracy: 0.9994 - val_precision: 0.8983 - val_recall: 0.6974 - val_auc: 0.9670 +Epoch 7/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.0051 - tp: 179.0000 - fp: 32.0000 - tn: 181927.0000 - fn: 138.0000 - accuracy: 0.9991 - precision: 0.8483 - recall: 0.5647 - auc: 0.9023 - val_loss: 0.0024 - val_tp: 58.0000 - val_fp: 6.0000 - val_tn: 45487.0000 - val_fn: 18.0000 - val_accuracy: 0.9995 - val_precision: 0.9062 - val_recall: 0.7632 - val_auc: 0.9669 +Epoch 8/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.0049 - tp: 181.0000 - fp: 29.0000 - tn: 181930.0000 - fn: 136.0000 - accuracy: 0.9991 - precision: 0.8619 - recall: 0.5710 - auc: 0.9040 - val_loss: 0.0023 - val_tp: 59.0000 - val_fp: 6.0000 - val_tn: 45487.0000 - val_fn: 17.0000 - val_accuracy: 0.9995 - val_precision: 0.9077 - val_recall: 0.7763 - val_auc: 0.9735 +Epoch 9/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.0050 - tp: 181.0000 - fp: 36.0000 - tn: 181923.0000 - fn: 136.0000 - accuracy: 0.9991 - precision: 0.8341 - recall: 0.5710 - auc: 0.9025 - val_loss: 0.0022 - val_tp: 56.0000 - val_fp: 6.0000 - val_tn: 45487.0000 - val_fn: 20.0000 - val_accuracy: 0.9994 - val_precision: 0.9032 - val_recall: 0.7368 - val_auc: 0.9735 +Epoch 10/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.0048 - tp: 179.0000 - fp: 32.0000 - tn: 181927.0000 - fn: 138.0000 - accuracy: 0.9991 - precision: 0.8483 - recall: 0.5647 - auc: 0.9041 - val_loss: 0.0021 - val_tp: 57.0000 - val_fp: 6.0000 - val_tn: 45487.0000 - val_fn: 19.0000 - val_accuracy: 0.9995 - val_precision: 0.9048 - val_recall: 0.7500 - val_auc: 0.9735 +Epoch 11/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.0044 - tp: 188.0000 - fp: 29.0000 - tn: 181930.0000 - fn: 129.0000 - accuracy: 0.9991 - precision: 0.8664 - recall: 0.5931 - auc: 0.9246 - val_loss: 0.0020 - val_tp: 57.0000 - val_fp: 6.0000 - val_tn: 45487.0000 - val_fn: 19.0000 - val_accuracy: 0.9995 - val_precision: 0.9048 - val_recall: 0.7500 - val_auc: 0.9735 +Epoch 12/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.0047 - tp: 168.0000 - fp: 34.0000 - tn: 181925.0000 - fn: 149.0000 - accuracy: 0.9990 - precision: 0.8317 - recall: 0.5300 - auc: 0.9184 - val_loss: 0.0019 - val_tp: 63.0000 - val_fp: 6.0000 - val_tn: 45487.0000 - val_fn: 13.0000 - val_accuracy: 0.9996 - val_precision: 0.9130 - val_recall: 0.8289 - val_auc: 0.9735 +Epoch 13/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.0045 - tp: 186.0000 - fp: 32.0000 - tn: 181927.0000 - fn: 131.0000 - accuracy: 0.9991 - precision: 0.8532 - recall: 0.5868 - auc: 0.9105 - val_loss: 0.0019 - val_tp: 63.0000 - val_fp: 6.0000 - val_tn: 45487.0000 - val_fn: 13.0000 - val_accuracy: 0.9996 - val_precision: 0.9130 - val_recall: 0.8289 - val_auc: 0.9735 +Epoch 14/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.0043 - tp: 199.0000 - fp: 37.0000 - tn: 181922.0000 - fn: 118.0000 - accuracy: 0.9991 - precision: 0.8432 - recall: 0.6278 - auc: 0.9217 - val_loss: 0.0019 - val_tp: 59.0000 - val_fp: 6.0000 - val_tn: 45487.0000 - val_fn: 17.0000 - val_accuracy: 0.9995 - val_precision: 0.9077 - val_recall: 0.7763 - val_auc: 0.9735 +Epoch 15/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.0045 - tp: 180.0000 - fp: 28.0000 - tn: 181931.0000 - fn: 137.0000 - accuracy: 0.9991 - precision: 0.8654 - recall: 0.5678 - auc: 0.9216 - val_loss: 0.0019 - val_tp: 63.0000 - val_fp: 6.0000 - val_tn: 45487.0000 - val_fn: 13.0000 - val_accuracy: 0.9996 - val_precision: 0.9130 - val_recall: 0.8289 - val_auc: 0.9801 +Epoch 16/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.0044 - tp: 188.0000 - fp: 41.0000 - tn: 181918.0000 - fn: 129.0000 - accuracy: 0.9991 - precision: 0.8210 - recall: 0.5931 - auc: 0.9200 - val_loss: 0.0019 - val_tp: 52.0000 - val_fp: 3.0000 - val_tn: 45490.0000 - val_fn: 24.0000 - val_accuracy: 0.9994 - val_precision: 0.9455 - val_recall: 0.6842 - val_auc: 0.9735 +Epoch 17/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.0045 - tp: 178.0000 - fp: 29.0000 - tn: 181930.0000 - fn: 139.0000 - accuracy: 0.9991 - precision: 0.8599 - recall: 0.5615 - auc: 0.9153 - val_loss: 0.0018 - val_tp: 66.0000 - val_fp: 6.0000 - val_tn: 45487.0000 - val_fn: 10.0000 - val_accuracy: 0.9996 - val_precision: 0.9167 - val_recall: 0.8684 - val_auc: 0.9801 +Epoch 18/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.0043 - tp: 186.0000 - fp: 33.0000 - tn: 181926.0000 - fn: 131.0000 - accuracy: 0.9991 - precision: 0.8493 - recall: 0.5868 - auc: 0.9248 - val_loss: 0.0018 - val_tp: 65.0000 - val_fp: 6.0000 - val_tn: 45487.0000 - val_fn: 11.0000 - val_accuracy: 0.9996 - val_precision: 0.9155 - val_recall: 0.8553 - val_auc: 0.9735 +Epoch 19/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.0044 - tp: 188.0000 - fp: 28.0000 - tn: 181931.0000 - fn: 129.0000 - accuracy: 0.9991 - precision: 0.8704 - recall: 0.5931 - auc: 0.9091 - val_loss: 0.0018 - val_tp: 66.0000 - val_fp: 6.0000 - val_tn: 45487.0000 - val_fn: 10.0000 - val_accuracy: 0.9996 - val_precision: 0.9167 - val_recall: 0.8684 - val_auc: 0.9801 +Epoch 20/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.0042 - tp: 189.0000 - fp: 30.0000 - tn: 181929.0000 - fn: 128.0000 - accuracy: 0.9991 - precision: 0.8630 - recall: 0.5962 - auc: 0.9249 - val_loss: 0.0018 - val_tp: 63.0000 - val_fp: 5.0000 - val_tn: 45488.0000 - val_fn: 13.0000 - val_accuracy: 0.9996 - val_precision: 0.9265 - val_recall: 0.8289 - val_auc: 0.9735 +Epoch 21/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.0042 - tp: 185.0000 - fp: 35.0000 - tn: 181924.0000 - fn: 132.0000 - accuracy: 0.9991 - precision: 0.8409 - recall: 0.5836 - auc: 0.9248 - val_loss: 0.0017 - val_tp: 66.0000 - val_fp: 6.0000 - val_tn: 45487.0000 - val_fn: 10.0000 - val_accuracy: 0.9996 - val_precision: 0.9167 - val_recall: 0.8684 - val_auc: 0.9801 +Epoch 22/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.0042 - tp: 188.0000 - fp: 31.0000 - tn: 181928.0000 - fn: 129.0000 - accuracy: 0.9991 - precision: 0.8584 - recall: 0.5931 - auc: 0.9249 - val_loss: 0.0017 - val_tp: 64.0000 - val_fp: 5.0000 - val_tn: 45488.0000 - val_fn: 12.0000 - val_accuracy: 0.9996 - val_precision: 0.9275 - val_recall: 0.8421 - val_auc: 0.9801 +Epoch 23/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.0045 - tp: 175.0000 - fp: 39.0000 - tn: 181920.0000 - fn: 142.0000 - accuracy: 0.9990 - precision: 0.8178 - recall: 0.5521 - auc: 0.9169 - val_loss: 0.0017 - val_tp: 65.0000 - val_fp: 6.0000 - val_tn: 45487.0000 - val_fn: 11.0000 - val_accuracy: 0.9996 - val_precision: 0.9155 - val_recall: 0.8553 - val_auc: 0.9801 +Epoch 24/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.0043 - tp: 188.0000 - fp: 31.0000 - tn: 181928.0000 - fn: 129.0000 - accuracy: 0.9991 - precision: 0.8584 - recall: 0.5931 - auc: 0.9122 - val_loss: 0.0017 - val_tp: 64.0000 - val_fp: 6.0000 - val_tn: 45487.0000 - val_fn: 12.0000 - val_accuracy: 0.9996 - val_precision: 0.9143 - val_recall: 0.8421 - val_auc: 0.9801 +Epoch 25/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.0043 - tp: 179.0000 - fp: 29.0000 - tn: 181930.0000 - fn: 138.0000 - accuracy: 0.9991 - precision: 0.8606 - recall: 0.5647 - auc: 0.9311 - val_loss: 0.0017 - val_tp: 64.0000 - val_fp: 6.0000 - val_tn: 45487.0000 - val_fn: 12.0000 - val_accuracy: 0.9996 - val_precision: 0.9143 - val_recall: 0.8421 - val_auc: 0.9801 +Epoch 26/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.0041 - tp: 189.0000 - fp: 32.0000 - tn: 181927.0000 - fn: 128.0000 - accuracy: 0.9991 - precision: 0.8552 - recall: 0.5962 - auc: 0.9218 - val_loss: 0.0017 - val_tp: 66.0000 - val_fp: 6.0000 - val_tn: 45487.0000 - val_fn: 10.0000 - val_accuracy: 0.9996 - val_precision: 0.9167 - val_recall: 0.8684 - val_auc: 0.9801 +Epoch 27/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.0040 - tp: 193.0000 - fp: 27.0000 - tn: 181932.0000 - fn: 124.0000 - accuracy: 0.9992 - precision: 0.8773 - recall: 0.6088 - auc: 0.9202 - val_loss: 0.0017 - val_tp: 66.0000 - val_fp: 6.0000 - val_tn: 45487.0000 - val_fn: 10.0000 - val_accuracy: 0.9996 - val_precision: 0.9167 - val_recall: 0.8684 - val_auc: 0.9801 +Epoch 28/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.0041 - tp: 189.0000 - fp: 31.0000 - tn: 181928.0000 - fn: 128.0000 - accuracy: 0.9991 - precision: 0.8591 - recall: 0.5962 - auc: 0.9187 - val_loss: 0.0017 - val_tp: 62.0000 - val_fp: 3.0000 - val_tn: 45490.0000 - val_fn: 14.0000 - val_accuracy: 0.9996 - val_precision: 0.9538 - val_recall: 0.8158 - val_auc: 0.9801 +Epoch 29/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.0041 - tp: 173.0000 - fp: 35.0000 - tn: 181924.0000 - fn: 144.0000 - accuracy: 0.9990 - precision: 0.8317 - recall: 0.5457 - auc: 0.9233 - val_loss: 0.0017 - val_tp: 64.0000 - val_fp: 4.0000 - val_tn: 45489.0000 - val_fn: 12.0000 - val_accuracy: 0.9996 - val_precision: 0.9412 - val_recall: 0.8421 - val_auc: 0.9801 +Epoch 30/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.0041 - tp: 193.0000 - fp: 36.0000 - tn: 181923.0000 - fn: 124.0000 - accuracy: 0.9991 - precision: 0.8428 - recall: 0.6088 - auc: 0.9218 - val_loss: 0.0017 - val_tp: 62.0000 - val_fp: 3.0000 - val_tn: 45490.0000 - val_fn: 14.0000 - val_accuracy: 0.9996 - val_precision: 0.9538 - val_recall: 0.8158 - val_auc: 0.9801 +Epoch 31/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.0044 - tp: 190.0000 - fp: 35.0000 - tn: 181924.0000 - fn: 127.0000 - accuracy: 0.9991 - precision: 0.8444 - recall: 0.5994 - auc: 0.9122 - val_loss: 0.0017 - val_tp: 60.0000 - val_fp: 3.0000 - val_tn: 45490.0000 - val_fn: 16.0000 - val_accuracy: 0.9996 - val_precision: 0.9524 - val_recall: 0.7895 - val_auc: 0.9801 +Epoch 32/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.0043 - tp: 184.0000 - fp: 33.0000 - tn: 181926.0000 - fn: 133.0000 - accuracy: 0.9991 - precision: 0.8479 - recall: 0.5804 - auc: 0.9186 - val_loss: 0.0017 - val_tp: 61.0000 - val_fp: 3.0000 - val_tn: 45490.0000 - val_fn: 15.0000 - val_accuracy: 0.9996 - val_precision: 0.9531 - val_recall: 0.8026 - val_auc: 0.9801 +Epoch 33/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.0041 - tp: 188.0000 - fp: 36.0000 - tn: 181923.0000 - fn: 129.0000 - accuracy: 0.9991 - precision: 0.8393 - recall: 0.5931 - auc: 0.9218 - val_loss: 0.0016 - val_tp: 62.0000 - val_fp: 3.0000 - val_tn: 45490.0000 - val_fn: 14.0000 - val_accuracy: 0.9996 - val_precision: 0.9538 - val_recall: 0.8158 - val_auc: 0.9801 +Epoch 34/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.0039 - tp: 196.0000 - fp: 37.0000 - tn: 181922.0000 - fn: 121.0000 - accuracy: 0.9991 - precision: 0.8412 - recall: 0.6183 - auc: 0.9297 - val_loss: 0.0017 - val_tp: 62.0000 - val_fp: 3.0000 - val_tn: 45490.0000 - val_fn: 14.0000 - val_accuracy: 0.9996 - val_precision: 0.9538 - val_recall: 0.8158 - val_auc: 0.9801 +Epoch 35/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.0040 - tp: 189.0000 - fp: 39.0000 - tn: 181920.0000 - fn: 128.0000 - accuracy: 0.9991 - precision: 0.8289 - recall: 0.5962 - auc: 0.9281 - val_loss: 0.0017 - val_tp: 60.0000 - val_fp: 3.0000 - val_tn: 45490.0000 - val_fn: 16.0000 - val_accuracy: 0.9996 - val_precision: 0.9524 - val_recall: 0.7895 - val_auc: 0.9801 +Epoch 36/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.0039 - tp: 197.0000 - fp: 29.0000 - tn: 181930.0000 - fn: 120.0000 - accuracy: 0.9992 - precision: 0.8717 - recall: 0.6215 - auc: 0.9203 - val_loss: 0.0016 - val_tp: 65.0000 - val_fp: 3.0000 - val_tn: 45490.0000 - val_fn: 11.0000 - val_accuracy: 0.9997 - val_precision: 0.9559 - val_recall: 0.8553 - val_auc: 0.9801 +Epoch 37/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.0039 - tp: 195.0000 - fp: 26.0000 - tn: 181933.0000 - fn: 122.0000 - accuracy: 0.9992 - precision: 0.8824 - recall: 0.6151 - auc: 0.9234 - val_loss: 0.0016 - val_tp: 62.0000 - val_fp: 3.0000 - val_tn: 45490.0000 - val_fn: 14.0000 - val_accuracy: 0.9996 - val_precision: 0.9538 - val_recall: 0.8158 - val_auc: 0.9801 +Epoch 38/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.0038 - tp: 197.0000 - fp: 40.0000 - tn: 181919.0000 - fn: 120.0000 - accuracy: 0.9991 - precision: 0.8312 - recall: 0.6215 - auc: 0.9329 - val_loss: 0.0017 - val_tp: 59.0000 - val_fp: 3.0000 - val_tn: 45490.0000 - val_fn: 17.0000 - val_accuracy: 0.9996 - val_precision: 0.9516 - val_recall: 0.7763 - val_auc: 0.9801 +Epoch 39/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.0040 - tp: 195.0000 - fp: 34.0000 - tn: 181925.0000 - fn: 122.0000 - accuracy: 0.9991 - precision: 0.8515 - recall: 0.6151 - auc: 0.9343 - val_loss: 0.0016 - val_tp: 62.0000 - val_fp: 3.0000 - val_tn: 45490.0000 - val_fn: 14.0000 - val_accuracy: 0.9996 - val_precision: 0.9538 - val_recall: 0.8158 - val_auc: 0.9801 +Epoch 40/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.0039 - tp: 199.0000 - fp: 35.0000 - tn: 181924.0000 - fn: 118.0000 - accuracy: 0.9992 - precision: 0.8504 - recall: 0.6278 - auc: 0.9250 - val_loss: 0.0016 - val_tp: 60.0000 - val_fp: 3.0000 - val_tn: 45490.0000 - val_fn: 16.0000 - val_accuracy: 0.9996 - val_precision: 0.9524 - val_recall: 0.7895 - val_auc: 0.9801 +Epoch 41/100 +88/90 [============================>.] - ETA: 0s - loss: 0.0041 - tp: 185.0000 - fp: 35.0000 - tn: 179875.0000 - fn: 129.0000 - accuracy: 0.9991 - precision: 0.8409 - recall: 0.5892 - auc: 0.9305Restoring model weights from the end of the best epoch. +90/90 [==============================] - 1s 6ms/step - loss: 0.0041 - tp: 186.0000 - fp: 35.0000 - tn: 181924.0000 - fn: 131.0000 - accuracy: 0.9991 - precision: 0.8416 - recall: 0.5868 - auc: 0.9295 - val_loss: 0.0016 - val_tp: 65.0000 - val_fp: 5.0000 - val_tn: 45488.0000 - val_fn: 11.0000 - val_accuracy: 0.9996 - val_precision: 0.9286 - val_recall: 0.8553 - val_auc: 0.9801 +Epoch 00041: early stopping + +``` + +### Check training history + +In this section, you will produce plots of your model's accuracy and loss on the training and validation set. These are useful to check for overfitting, which you can learn more about in this [tutorial](https://tensorflow.google.cn/tutorials/keras/overfit_and_underfit). + +Additionally, you can produce these plots for any of the metrics you created above. False negatives are included as an example. + +```py +def plot_metrics(history): + metrics = ['loss', 'auc', 'precision', 'recall'] + for n, metric in enumerate(metrics): + name = metric.replace("_"," ").capitalize() + plt.subplot(2,2,n+1) + plt.plot(history.epoch, history.history[metric], color=colors[0], label='Train') + plt.plot(history.epoch, history.history['val_'+metric], + color=colors[0], linestyle="--", label='Val') + plt.xlabel('Epoch') + plt.ylabel(name) + if metric == 'loss': + plt.ylim([0, plt.ylim()[1]]) + elif metric == 'auc': + plt.ylim([0.8,1]) + else: + plt.ylim([0,1]) + + plt.legend() +``` + +```py +plot_metrics(baseline_history) +``` + +![png](img/f021b204e92d0e77d8439a03a43bb21e.png) + +**Note:** That the validation curve generally performs better than the training curve. This is mainly caused by the fact that the dropout layer is not active when evaluating the model. + +### Evaluate metrics + +You can use a [confusion matrix](https://developers.google.cn/machine-learning/glossary/#confusion_matrix) to summarize the actual vs. predicted labels where the X axis is the predicted label and the Y axis is the actual label. + +```py +train_predictions_baseline = model.predict(train_features, batch_size=BATCH_SIZE) +test_predictions_baseline = model.predict(test_features, batch_size=BATCH_SIZE) +``` + +```py +def plot_cm(labels, predictions, p=0.5): + cm = confusion_matrix(labels, predictions > p) + plt.figure(figsize=(5,5)) + sns.heatmap(cm, annot=True, fmt="d") + plt.title('Confusion matrix @{:.2f}'.format(p)) + plt.ylabel('Actual label') + plt.xlabel('Predicted label') + + print('Legitimate Transactions Detected (True Negatives): ', cm[0][0]) + print('Legitimate Transactions Incorrectly Detected (False Positives): ', cm[0][1]) + print('Fraudulent Transactions Missed (False Negatives): ', cm[1][0]) + print('Fraudulent Transactions Detected (True Positives): ', cm[1][1]) + print('Total Fraudulent Transactions: ', np.sum(cm[1])) +``` + +Evaluate your model on the test dataset and display the results for the metrics you created above. + +```py +baseline_results = model.evaluate(test_features, test_labels, + batch_size=BATCH_SIZE, verbose=0) +for name, value in zip(model.metrics_names, baseline_results): + print(name, ': ', value) +print() + +plot_cm(test_labels, test_predictions_baseline) +``` + +```py +loss : 0.002797449706122279 +tp : 68.0 +fp : 3.0 +tn : 56860.0 +fn : 31.0 +accuracy : 0.9994031190872192 +precision : 0.9577465057373047 +recall : 0.6868686676025391 +auc : 0.949228823184967 + +Legitimate Transactions Detected (True Negatives): 56860 +Legitimate Transactions Incorrectly Detected (False Positives): 3 +Fraudulent Transactions Missed (False Negatives): 31 +Fraudulent Transactions Detected (True Positives): 68 +Total Fraudulent Transactions: 99 + +``` + +![png](img/85d63bf8a53bc6d25baa38c0e3e2dde0.png) + +If the model had predicted everything perfectly, this would be a [diagonal matrix](https://en.wikipedia.org/wiki/Diagonal_matrix) where values off the main diagonal, indicating incorrect predictions, would be zero. In this case the matrix shows that you have relatively few false positives, meaning that there were relatively few legitimate transactions that were incorrectly flagged. However, you would likely want to have even fewer false negatives despite the cost of increasing the number of false positives. This trade off may be preferable because false negatives would allow fraudulent transactions to go through, whereas false positives may cause an email to be sent to a customer to ask them to verify their card activity. + +### Plot the ROC + +Now plot the [ROC](https://developers.google.cn/machine-learning/glossary#ROC). This plot is useful because it shows, at a glance, the range of performance the model can reach just by tuning the output threshold. + +```py +def plot_roc(name, labels, predictions, **kwargs): + fp, tp, _ = sklearn.metrics.roc_curve(labels, predictions) + + plt.plot(100*fp, 100*tp, label=name, linewidth=2, **kwargs) + plt.xlabel('False positives [%]') + plt.ylabel('True positives [%]') + plt.xlim([-0.5,20]) + plt.ylim([80,100.5]) + plt.grid(True) + ax = plt.gca() + ax.set_aspect('equal') +``` + +```py +plot_roc("Train Baseline", train_labels, train_predictions_baseline, color=colors[0]) +plot_roc("Test Baseline", test_labels, test_predictions_baseline, color=colors[0], linestyle='--') +plt.legend(loc='lower right') +``` + +```py + + +``` + +![png](img/74badb10696c0ffbba886121ce004be0.png) + +It looks like the precision is relatively high, but the recall and the area under the ROC curve (AUC) aren't as high as you might like. Classifiers often face challenges when trying to maximize both precision and recall, which is especially true when working with imbalanced datasets. It is important to consider the costs of different types of errors in the context of the problem you care about. In this example, a false negative (a fraudulent transaction is missed) may have a financial cost, while a false positive (a transaction is incorrectly flagged as fraudulent) may decrease user happiness. + +## Class weights + +### Calculate class weights + +The goal is to identify fraudulent transactions, but you don't have very many of those positive samples to work with, so you would want to have the classifier heavily weight the few examples that are available. You can do this by passing Keras weights for each class through a parameter. These will cause the model to "pay more attention" to examples from an under-represented class. + +```py +# Scaling by total/2 helps keep the loss to a similar magnitude. +# The sum of the weights of all examples stays the same. +weight_for_0 = (1 / neg)*(total)/2.0 +weight_for_1 = (1 / pos)*(total)/2.0 + +class_weight = {0: weight_for_0, 1: weight_for_1} + +print('Weight for class 0: {:.2f}'.format(weight_for_0)) +print('Weight for class 1: {:.2f}'.format(weight_for_1)) +``` + +```py +Weight for class 0: 0.50 +Weight for class 1: 289.44 + +``` + +### Train a model with class weights + +Now try re-training and evaluating the model with class weights to see how that affects the predictions. + +**Note:** Using `class_weights` changes the range of the loss. This may affect the stability of the training depending on the optimizer. Optimizers whose step size is dependent on the magnitude of the gradient, like [`optimizers.SGD`](https://tensorflow.google.cn/api_docs/python/tf/keras/optimizers/SGD), may fail. The optimizer used here, [`optimizers.Adam`](https://tensorflow.google.cn/api_docs/python/tf/keras/optimizers/Adam), is unaffected by the scaling change. Also note that because of the weighting, the total losses are not comparable between the two models. + +```py +weighted_model = make_model() +weighted_model.load_weights(initial_weights) + +weighted_history = weighted_model.fit( + train_features, + train_labels, + batch_size=BATCH_SIZE, + epochs=EPOCHS, + callbacks=[early_stopping], + validation_data=(val_features, val_labels), + # The class weights go here + class_weight=class_weight) +``` + +```py +Epoch 1/100 +90/90 [==============================] - 1s 14ms/step - loss: 2.0102 - tp: 135.0000 - fp: 420.0000 - tn: 238402.0000 - fn: 281.0000 - accuracy: 0.9971 - precision: 0.2432 - recall: 0.3245 - auc: 0.8079 - val_loss: 0.0111 - val_tp: 45.0000 - val_fp: 51.0000 - val_tn: 45442.0000 - val_fn: 31.0000 - val_accuracy: 0.9982 - val_precision: 0.4688 - val_recall: 0.5921 - val_auc: 0.9314 +Epoch 2/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.9256 - tp: 178.0000 - fp: 793.0000 - tn: 181166.0000 - fn: 139.0000 - accuracy: 0.9949 - precision: 0.1833 - recall: 0.5615 - auc: 0.8662 - val_loss: 0.0164 - val_tp: 66.0000 - val_fp: 108.0000 - val_tn: 45385.0000 - val_fn: 10.0000 - val_accuracy: 0.9974 - val_precision: 0.3793 - val_recall: 0.8684 - val_auc: 0.9468 +Epoch 3/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.7136 - tp: 215.0000 - fp: 1377.0000 - tn: 180582.0000 - fn: 102.0000 - accuracy: 0.9919 - precision: 0.1351 - recall: 0.6782 - auc: 0.8811 - val_loss: 0.0238 - val_tp: 68.0000 - val_fp: 174.0000 - val_tn: 45319.0000 - val_fn: 8.0000 - val_accuracy: 0.9960 - val_precision: 0.2810 - val_recall: 0.8947 - val_auc: 0.9866 +Epoch 4/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.5158 - tp: 240.0000 - fp: 2141.0000 - tn: 179818.0000 - fn: 77.0000 - accuracy: 0.9878 - precision: 0.1008 - recall: 0.7571 - auc: 0.9124 - val_loss: 0.0334 - val_tp: 69.0000 - val_fp: 257.0000 - val_tn: 45236.0000 - val_fn: 7.0000 - val_accuracy: 0.9942 - val_precision: 0.2117 - val_recall: 0.9079 - val_auc: 0.9951 +Epoch 5/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.4060 - tp: 256.0000 - fp: 3039.0000 - tn: 178920.0000 - fn: 61.0000 - accuracy: 0.9830 - precision: 0.0777 - recall: 0.8076 - auc: 0.9329 - val_loss: 0.0439 - val_tp: 72.0000 - val_fp: 364.0000 - val_tn: 45129.0000 - val_fn: 4.0000 - val_accuracy: 0.9919 - val_precision: 0.1651 - val_recall: 0.9474 - val_auc: 0.9965 +Epoch 6/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.4045 - tp: 255.0000 - fp: 4034.0000 - tn: 177925.0000 - fn: 62.0000 - accuracy: 0.9775 - precision: 0.0595 - recall: 0.8044 - auc: 0.9289 - val_loss: 0.0557 - val_tp: 73.0000 - val_fp: 572.0000 - val_tn: 44921.0000 - val_fn: 3.0000 - val_accuracy: 0.9874 - val_precision: 0.1132 - val_recall: 0.9605 - val_auc: 0.9969 +Epoch 7/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.2990 - tp: 267.0000 - fp: 5024.0000 - tn: 176935.0000 - fn: 50.0000 - accuracy: 0.9722 - precision: 0.0505 - recall: 0.8423 - auc: 0.9544 - val_loss: 0.0723 - val_tp: 74.0000 - val_fp: 825.0000 - val_tn: 44668.0000 - val_fn: 2.0000 - val_accuracy: 0.9819 - val_precision: 0.0823 - val_recall: 0.9737 - val_auc: 0.9971 +Epoch 8/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.3124 - tp: 267.0000 - fp: 6321.0000 - tn: 175638.0000 - fn: 50.0000 - accuracy: 0.9650 - precision: 0.0405 - recall: 0.8423 - auc: 0.9493 - val_loss: 0.0886 - val_tp: 74.0000 - val_fp: 1043.0000 - val_tn: 44450.0000 - val_fn: 2.0000 - val_accuracy: 0.9771 - val_precision: 0.0662 - val_recall: 0.9737 - val_auc: 0.9971 +Epoch 9/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.2856 - tp: 276.0000 - fp: 6879.0000 - tn: 175080.0000 - fn: 41.0000 - accuracy: 0.9620 - precision: 0.0386 - recall: 0.8707 - auc: 0.9536 - val_loss: 0.0963 - val_tp: 74.0000 - val_fp: 1120.0000 - val_tn: 44373.0000 - val_fn: 2.0000 - val_accuracy: 0.9754 - val_precision: 0.0620 - val_recall: 0.9737 - val_auc: 0.9972 +Epoch 10/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.2871 - tp: 272.0000 - fp: 7610.0000 - tn: 174349.0000 - fn: 45.0000 - accuracy: 0.9580 - precision: 0.0345 - recall: 0.8580 - auc: 0.9545 - val_loss: 0.1053 - val_tp: 74.0000 - val_fp: 1219.0000 - val_tn: 44274.0000 - val_fn: 2.0000 - val_accuracy: 0.9732 - val_precision: 0.0572 - val_recall: 0.9737 - val_auc: 0.9972 +Epoch 11/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.2646 - tp: 273.0000 - fp: 8061.0000 - tn: 173898.0000 - fn: 44.0000 - accuracy: 0.9555 - precision: 0.0328 - recall: 0.8612 - auc: 0.9602 - val_loss: 0.1079 - val_tp: 74.0000 - val_fp: 1242.0000 - val_tn: 44251.0000 - val_fn: 2.0000 - val_accuracy: 0.9727 - val_precision: 0.0562 - val_recall: 0.9737 - val_auc: 0.9973 +Epoch 12/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.2574 - tp: 277.0000 - fp: 8239.0000 - tn: 173720.0000 - fn: 40.0000 - accuracy: 0.9546 - precision: 0.0325 - recall: 0.8738 - auc: 0.9621 - val_loss: 0.1086 - val_tp: 74.0000 - val_fp: 1223.0000 - val_tn: 44270.0000 - val_fn: 2.0000 - val_accuracy: 0.9731 - val_precision: 0.0571 - val_recall: 0.9737 - val_auc: 0.9973 +Epoch 13/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.2525 - tp: 279.0000 - fp: 7925.0000 - tn: 174034.0000 - fn: 38.0000 - accuracy: 0.9563 - precision: 0.0340 - recall: 0.8801 - auc: 0.9604 - val_loss: 0.1032 - val_tp: 74.0000 - val_fp: 1153.0000 - val_tn: 44340.0000 - val_fn: 2.0000 - val_accuracy: 0.9747 - val_precision: 0.0603 - val_recall: 0.9737 - val_auc: 0.9974 +Epoch 14/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.2712 - tp: 273.0000 - fp: 8290.0000 - tn: 173669.0000 - fn: 44.0000 - accuracy: 0.9543 - precision: 0.0319 - recall: 0.8612 - auc: 0.9572 - val_loss: 0.1083 - val_tp: 74.0000 - val_fp: 1193.0000 - val_tn: 44300.0000 - val_fn: 2.0000 - val_accuracy: 0.9738 - val_precision: 0.0584 - val_recall: 0.9737 - val_auc: 0.9973 +Epoch 15/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.2324 - tp: 283.0000 - fp: 8182.0000 - tn: 173777.0000 - fn: 34.0000 - accuracy: 0.9549 - precision: 0.0334 - recall: 0.8927 - auc: 0.9668 - val_loss: 0.1017 - val_tp: 74.0000 - val_fp: 1131.0000 - val_tn: 44362.0000 - val_fn: 2.0000 - val_accuracy: 0.9751 - val_precision: 0.0614 - val_recall: 0.9737 - val_auc: 0.9974 +Epoch 16/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.2605 - tp: 277.0000 - fp: 7798.0000 - tn: 174161.0000 - fn: 40.0000 - accuracy: 0.9570 - precision: 0.0343 - recall: 0.8738 - auc: 0.9585 - val_loss: 0.1030 - val_tp: 74.0000 - val_fp: 1134.0000 - val_tn: 44359.0000 - val_fn: 2.0000 - val_accuracy: 0.9751 - val_precision: 0.0613 - val_recall: 0.9737 - val_auc: 0.9973 +Epoch 17/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.2235 - tp: 281.0000 - fp: 7994.0000 - tn: 173965.0000 - fn: 36.0000 - accuracy: 0.9559 - precision: 0.0340 - recall: 0.8864 - auc: 0.9696 - val_loss: 0.1029 - val_tp: 74.0000 - val_fp: 1127.0000 - val_tn: 44366.0000 - val_fn: 2.0000 - val_accuracy: 0.9752 - val_precision: 0.0616 - val_recall: 0.9737 - val_auc: 0.9973 +Epoch 18/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.2455 - tp: 278.0000 - fp: 7706.0000 - tn: 174253.0000 - fn: 39.0000 - accuracy: 0.9575 - precision: 0.0348 - recall: 0.8770 - auc: 0.9633 - val_loss: 0.1010 - val_tp: 74.0000 - val_fp: 1099.0000 - val_tn: 44394.0000 - val_fn: 2.0000 - val_accuracy: 0.9758 - val_precision: 0.0631 - val_recall: 0.9737 - val_auc: 0.9973 +Epoch 19/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.1759 - tp: 285.0000 - fp: 7766.0000 - tn: 174193.0000 - fn: 32.0000 - accuracy: 0.9572 - precision: 0.0354 - recall: 0.8991 - auc: 0.9813 - val_loss: 0.1001 - val_tp: 74.0000 - val_fp: 1079.0000 - val_tn: 44414.0000 - val_fn: 2.0000 - val_accuracy: 0.9763 - val_precision: 0.0642 - val_recall: 0.9737 - val_auc: 0.9973 +Epoch 20/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.2220 - tp: 283.0000 - fp: 7554.0000 - tn: 174405.0000 - fn: 34.0000 - accuracy: 0.9584 - precision: 0.0361 - recall: 0.8927 - auc: 0.9669 - val_loss: 0.0944 - val_tp: 74.0000 - val_fp: 1009.0000 - val_tn: 44484.0000 - val_fn: 2.0000 - val_accuracy: 0.9778 - val_precision: 0.0683 - val_recall: 0.9737 - val_auc: 0.9973 +Epoch 21/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.2329 - tp: 282.0000 - fp: 7369.0000 - tn: 174590.0000 - fn: 35.0000 - accuracy: 0.9594 - precision: 0.0369 - recall: 0.8896 - auc: 0.9657 - val_loss: 0.0942 - val_tp: 74.0000 - val_fp: 1011.0000 - val_tn: 44482.0000 - val_fn: 2.0000 - val_accuracy: 0.9778 - val_precision: 0.0682 - val_recall: 0.9737 - val_auc: 0.9973 +Epoch 22/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.2317 - tp: 281.0000 - fp: 7563.0000 - tn: 174396.0000 - fn: 36.0000 - accuracy: 0.9583 - precision: 0.0358 - recall: 0.8864 - auc: 0.9658 - val_loss: 0.0936 - val_tp: 74.0000 - val_fp: 993.0000 - val_tn: 44500.0000 - val_fn: 2.0000 - val_accuracy: 0.9782 - val_precision: 0.0694 - val_recall: 0.9737 - val_auc: 0.9974 +Epoch 23/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.2109 - tp: 284.0000 - fp: 7520.0000 - tn: 174439.0000 - fn: 33.0000 - accuracy: 0.9586 - precision: 0.0364 - recall: 0.8959 - auc: 0.9702 - val_loss: 0.0940 - val_tp: 74.0000 - val_fp: 1003.0000 - val_tn: 44490.0000 - val_fn: 2.0000 - val_accuracy: 0.9779 - val_precision: 0.0687 - val_recall: 0.9737 - val_auc: 0.9974 +Epoch 24/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.2089 - tp: 283.0000 - fp: 7119.0000 - tn: 174840.0000 - fn: 34.0000 - accuracy: 0.9608 - precision: 0.0382 - recall: 0.8927 - auc: 0.9731 - val_loss: 0.0898 - val_tp: 74.0000 - val_fp: 939.0000 - val_tn: 44554.0000 - val_fn: 2.0000 - val_accuracy: 0.9793 - val_precision: 0.0731 - val_recall: 0.9737 - val_auc: 0.9974 +Epoch 25/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.2062 - tp: 280.0000 - fp: 7392.0000 - tn: 174567.0000 - fn: 37.0000 - accuracy: 0.9592 - precision: 0.0365 - recall: 0.8833 - auc: 0.9749 - val_loss: 0.0969 - val_tp: 74.0000 - val_fp: 1035.0000 - val_tn: 44458.0000 - val_fn: 2.0000 - val_accuracy: 0.9772 - val_precision: 0.0667 - val_recall: 0.9737 - val_auc: 0.9973 +Epoch 26/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.2006 - tp: 284.0000 - fp: 7149.0000 - tn: 174810.0000 - fn: 33.0000 - accuracy: 0.9606 - precision: 0.0382 - recall: 0.8959 - auc: 0.9754 - val_loss: 0.0949 - val_tp: 74.0000 - val_fp: 1004.0000 - val_tn: 44489.0000 - val_fn: 2.0000 - val_accuracy: 0.9779 - val_precision: 0.0686 - val_recall: 0.9737 - val_auc: 0.9973 +Epoch 27/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.2169 - tp: 281.0000 - fp: 7592.0000 - tn: 174367.0000 - fn: 36.0000 - accuracy: 0.9582 - precision: 0.0357 - recall: 0.8864 - auc: 0.9727 - val_loss: 0.0969 - val_tp: 74.0000 - val_fp: 1038.0000 - val_tn: 44455.0000 - val_fn: 2.0000 - val_accuracy: 0.9772 - val_precision: 0.0665 - val_recall: 0.9737 - val_auc: 0.9974 +Epoch 28/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.2110 - tp: 285.0000 - fp: 6957.0000 - tn: 175002.0000 - fn: 32.0000 - accuracy: 0.9617 - precision: 0.0394 - recall: 0.8991 - auc: 0.9697 - val_loss: 0.0922 - val_tp: 74.0000 - val_fp: 988.0000 - val_tn: 44505.0000 - val_fn: 2.0000 - val_accuracy: 0.9783 - val_precision: 0.0697 - val_recall: 0.9737 - val_auc: 0.9974 +Epoch 29/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.2234 - tp: 280.0000 - fp: 7030.0000 - tn: 174929.0000 - fn: 37.0000 - accuracy: 0.9612 - precision: 0.0383 - recall: 0.8833 - auc: 0.9679 - val_loss: 0.0942 - val_tp: 74.0000 - val_fp: 1024.0000 - val_tn: 44469.0000 - val_fn: 2.0000 - val_accuracy: 0.9775 - val_precision: 0.0674 - val_recall: 0.9737 - val_auc: 0.9974 +Epoch 30/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.1889 - tp: 288.0000 - fp: 7228.0000 - tn: 174731.0000 - fn: 29.0000 - accuracy: 0.9602 - precision: 0.0383 - recall: 0.9085 - auc: 0.9771 - val_loss: 0.0895 - val_tp: 74.0000 - val_fp: 954.0000 - val_tn: 44539.0000 - val_fn: 2.0000 - val_accuracy: 0.9790 - val_precision: 0.0720 - val_recall: 0.9737 - val_auc: 0.9974 +Epoch 31/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.1824 - tp: 286.0000 - fp: 6703.0000 - tn: 175256.0000 - fn: 31.0000 - accuracy: 0.9631 - precision: 0.0409 - recall: 0.9022 - auc: 0.9789 - val_loss: 0.0898 - val_tp: 74.0000 - val_fp: 957.0000 - val_tn: 44536.0000 - val_fn: 2.0000 - val_accuracy: 0.9790 - val_precision: 0.0718 - val_recall: 0.9737 - val_auc: 0.9974 +Epoch 32/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.2014 - tp: 279.0000 - fp: 6918.0000 - tn: 175041.0000 - fn: 38.0000 - accuracy: 0.9618 - precision: 0.0388 - recall: 0.8801 - auc: 0.9756 - val_loss: 0.0933 - val_tp: 74.0000 - val_fp: 993.0000 - val_tn: 44500.0000 - val_fn: 2.0000 - val_accuracy: 0.9782 - val_precision: 0.0694 - val_recall: 0.9737 - val_auc: 0.9974 +Epoch 33/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.2153 - tp: 283.0000 - fp: 6885.0000 - tn: 175074.0000 - fn: 34.0000 - accuracy: 0.9620 - precision: 0.0395 - recall: 0.8927 - auc: 0.9708 - val_loss: 0.0905 - val_tp: 74.0000 - val_fp: 949.0000 - val_tn: 44544.0000 - val_fn: 2.0000 - val_accuracy: 0.9791 - val_precision: 0.0723 - val_recall: 0.9737 - val_auc: 0.9974 +Epoch 34/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.1970 - tp: 283.0000 - fp: 6969.0000 - tn: 174990.0000 - fn: 34.0000 - accuracy: 0.9616 - precision: 0.0390 - recall: 0.8927 - auc: 0.9769 - val_loss: 0.0958 - val_tp: 74.0000 - val_fp: 1018.0000 - val_tn: 44475.0000 - val_fn: 2.0000 - val_accuracy: 0.9776 - val_precision: 0.0678 - val_recall: 0.9737 - val_auc: 0.9975 +Epoch 35/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.1737 - tp: 290.0000 - fp: 7040.0000 - tn: 174919.0000 - fn: 27.0000 - accuracy: 0.9612 - precision: 0.0396 - recall: 0.9148 - auc: 0.9806 - val_loss: 0.0907 - val_tp: 74.0000 - val_fp: 948.0000 - val_tn: 44545.0000 - val_fn: 2.0000 - val_accuracy: 0.9792 - val_precision: 0.0724 - val_recall: 0.9737 - val_auc: 0.9975 +Epoch 36/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.1906 - tp: 288.0000 - fp: 6663.0000 - tn: 175296.0000 - fn: 29.0000 - accuracy: 0.9633 - precision: 0.0414 - recall: 0.9085 - auc: 0.9764 - val_loss: 0.0872 - val_tp: 74.0000 - val_fp: 904.0000 - val_tn: 44589.0000 - val_fn: 2.0000 - val_accuracy: 0.9801 - val_precision: 0.0757 - val_recall: 0.9737 - val_auc: 0.9975 +Epoch 37/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.2071 - tp: 284.0000 - fp: 6688.0000 - tn: 175271.0000 - fn: 33.0000 - accuracy: 0.9631 - precision: 0.0407 - recall: 0.8959 - auc: 0.9727 - val_loss: 0.0905 - val_tp: 74.0000 - val_fp: 939.0000 - val_tn: 44554.0000 - val_fn: 2.0000 - val_accuracy: 0.9793 - val_precision: 0.0731 - val_recall: 0.9737 - val_auc: 0.9975 +Epoch 38/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.1813 - tp: 283.0000 - fp: 6784.0000 - tn: 175175.0000 - fn: 34.0000 - accuracy: 0.9626 - precision: 0.0400 - recall: 0.8927 - auc: 0.9817 - val_loss: 0.0913 - val_tp: 74.0000 - val_fp: 951.0000 - val_tn: 44542.0000 - val_fn: 2.0000 - val_accuracy: 0.9791 - val_precision: 0.0722 - val_recall: 0.9737 - val_auc: 0.9975 +Epoch 39/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.1897 - tp: 289.0000 - fp: 6872.0000 - tn: 175087.0000 - fn: 28.0000 - accuracy: 0.9621 - precision: 0.0404 - recall: 0.9117 - auc: 0.9767 - val_loss: 0.0916 - val_tp: 74.0000 - val_fp: 959.0000 - val_tn: 44534.0000 - val_fn: 2.0000 - val_accuracy: 0.9789 - val_precision: 0.0716 - val_recall: 0.9737 - val_auc: 0.9975 +Epoch 40/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.1546 - tp: 295.0000 - fp: 6584.0000 - tn: 175375.0000 - fn: 22.0000 - accuracy: 0.9638 - precision: 0.0429 - recall: 0.9306 - auc: 0.9858 - val_loss: 0.0904 - val_tp: 74.0000 - val_fp: 948.0000 - val_tn: 44545.0000 - val_fn: 2.0000 - val_accuracy: 0.9792 - val_precision: 0.0724 - val_recall: 0.9737 - val_auc: 0.9976 +Epoch 41/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.2263 - tp: 278.0000 - fp: 6677.0000 - tn: 175282.0000 - fn: 39.0000 - accuracy: 0.9632 - precision: 0.0400 - recall: 0.8770 - auc: 0.9676 - val_loss: 0.0908 - val_tp: 74.0000 - val_fp: 955.0000 - val_tn: 44538.0000 - val_fn: 2.0000 - val_accuracy: 0.9790 - val_precision: 0.0719 - val_recall: 0.9737 - val_auc: 0.9975 +Epoch 42/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.2033 - tp: 281.0000 - fp: 6777.0000 - tn: 175182.0000 - fn: 36.0000 - accuracy: 0.9626 - precision: 0.0398 - recall: 0.8864 - auc: 0.9756 - val_loss: 0.0953 - val_tp: 74.0000 - val_fp: 1029.0000 - val_tn: 44464.0000 - val_fn: 2.0000 - val_accuracy: 0.9774 - val_precision: 0.0671 - val_recall: 0.9737 - val_auc: 0.9975 +Epoch 43/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.1865 - tp: 283.0000 - fp: 6824.0000 - tn: 175135.0000 - fn: 34.0000 - accuracy: 0.9624 - precision: 0.0398 - recall: 0.8927 - auc: 0.9792 - val_loss: 0.0945 - val_tp: 74.0000 - val_fp: 1019.0000 - val_tn: 44474.0000 - val_fn: 2.0000 - val_accuracy: 0.9776 - val_precision: 0.0677 - val_recall: 0.9737 - val_auc: 0.9976 +Epoch 44/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.1857 - tp: 286.0000 - fp: 6655.0000 - tn: 175304.0000 - fn: 31.0000 - accuracy: 0.9633 - precision: 0.0412 - recall: 0.9022 - auc: 0.9781 - val_loss: 0.0927 - val_tp: 74.0000 - val_fp: 975.0000 - val_tn: 44518.0000 - val_fn: 2.0000 - val_accuracy: 0.9786 - val_precision: 0.0705 - val_recall: 0.9737 - val_auc: 0.9977 +Epoch 45/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.1874 - tp: 282.0000 - fp: 6595.0000 - tn: 175364.0000 - fn: 35.0000 - accuracy: 0.9636 - precision: 0.0410 - recall: 0.8896 - auc: 0.9810 - val_loss: 0.0934 - val_tp: 74.0000 - val_fp: 974.0000 - val_tn: 44519.0000 - val_fn: 2.0000 - val_accuracy: 0.9786 - val_precision: 0.0706 - val_recall: 0.9737 - val_auc: 0.9977 +Epoch 46/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.1641 - tp: 291.0000 - fp: 6532.0000 - tn: 175427.0000 - fn: 26.0000 - accuracy: 0.9640 - precision: 0.0426 - recall: 0.9180 - auc: 0.9837 - val_loss: 0.0868 - val_tp: 74.0000 - val_fp: 885.0000 - val_tn: 44608.0000 - val_fn: 2.0000 - val_accuracy: 0.9805 - val_precision: 0.0772 - val_recall: 0.9737 - val_auc: 0.9976 +Epoch 47/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.1909 - tp: 285.0000 - fp: 6662.0000 - tn: 175297.0000 - fn: 32.0000 - accuracy: 0.9633 - precision: 0.0410 - recall: 0.8991 - auc: 0.9780 - val_loss: 0.0874 - val_tp: 74.0000 - val_fp: 899.0000 - val_tn: 44594.0000 - val_fn: 2.0000 - val_accuracy: 0.9802 - val_precision: 0.0761 - val_recall: 0.9737 - val_auc: 0.9978 +Epoch 48/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.1687 - tp: 287.0000 - fp: 6627.0000 - tn: 175332.0000 - fn: 30.0000 - accuracy: 0.9635 - precision: 0.0415 - recall: 0.9054 - auc: 0.9836 - val_loss: 0.0875 - val_tp: 74.0000 - val_fp: 899.0000 - val_tn: 44594.0000 - val_fn: 2.0000 - val_accuracy: 0.9802 - val_precision: 0.0761 - val_recall: 0.9737 - val_auc: 0.9977 +Epoch 49/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.1740 - tp: 291.0000 - fp: 6522.0000 - tn: 175437.0000 - fn: 26.0000 - accuracy: 0.9641 - precision: 0.0427 - recall: 0.9180 - auc: 0.9794 - val_loss: 0.0848 - val_tp: 74.0000 - val_fp: 866.0000 - val_tn: 44627.0000 - val_fn: 2.0000 - val_accuracy: 0.9810 - val_precision: 0.0787 - val_recall: 0.9737 - val_auc: 0.9978 +Epoch 50/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.1773 - tp: 292.0000 - fp: 6370.0000 - tn: 175589.0000 - fn: 25.0000 - accuracy: 0.9649 - precision: 0.0438 - recall: 0.9211 - auc: 0.9785 - val_loss: 0.0794 - val_tp: 74.0000 - val_fp: 791.0000 - val_tn: 44702.0000 - val_fn: 2.0000 - val_accuracy: 0.9826 - val_precision: 0.0855 - val_recall: 0.9737 - val_auc: 0.9978 +Epoch 51/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.1828 - tp: 289.0000 - fp: 5870.0000 - tn: 176089.0000 - fn: 28.0000 - accuracy: 0.9676 - precision: 0.0469 - recall: 0.9117 - auc: 0.9789 - val_loss: 0.0791 - val_tp: 74.0000 - val_fp: 807.0000 - val_tn: 44686.0000 - val_fn: 2.0000 - val_accuracy: 0.9822 - val_precision: 0.0840 - val_recall: 0.9737 - val_auc: 0.9980 +Epoch 52/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.1567 - tp: 290.0000 - fp: 5704.0000 - tn: 176255.0000 - fn: 27.0000 - accuracy: 0.9686 - precision: 0.0484 - recall: 0.9148 - auc: 0.9860 - val_loss: 0.0772 - val_tp: 74.0000 - val_fp: 765.0000 - val_tn: 44728.0000 - val_fn: 2.0000 - val_accuracy: 0.9832 - val_precision: 0.0882 - val_recall: 0.9737 - val_auc: 0.9979 +Epoch 53/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.1752 - tp: 290.0000 - fp: 6278.0000 - tn: 175681.0000 - fn: 27.0000 - accuracy: 0.9654 - precision: 0.0442 - recall: 0.9148 - auc: 0.9808 - val_loss: 0.0854 - val_tp: 74.0000 - val_fp: 873.0000 - val_tn: 44620.0000 - val_fn: 2.0000 - val_accuracy: 0.9808 - val_precision: 0.0781 - val_recall: 0.9737 - val_auc: 0.9979 +Epoch 54/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.1644 - tp: 289.0000 - fp: 6298.0000 - tn: 175661.0000 - fn: 28.0000 - accuracy: 0.9653 - precision: 0.0439 - recall: 0.9117 - auc: 0.9833 - val_loss: 0.0875 - val_tp: 74.0000 - val_fp: 904.0000 - val_tn: 44589.0000 - val_fn: 2.0000 - val_accuracy: 0.9801 - val_precision: 0.0757 - val_recall: 0.9737 - val_auc: 0.9979 +Epoch 55/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.1721 - tp: 290.0000 - fp: 6318.0000 - tn: 175641.0000 - fn: 27.0000 - accuracy: 0.9652 - precision: 0.0439 - recall: 0.9148 - auc: 0.9816 - val_loss: 0.0827 - val_tp: 74.0000 - val_fp: 844.0000 - val_tn: 44649.0000 - val_fn: 2.0000 - val_accuracy: 0.9814 - val_precision: 0.0806 - val_recall: 0.9737 - val_auc: 0.9979 +Epoch 56/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.1841 - tp: 284.0000 - fp: 6052.0000 - tn: 175907.0000 - fn: 33.0000 - accuracy: 0.9666 - precision: 0.0448 - recall: 0.8959 - auc: 0.9798 - val_loss: 0.0872 - val_tp: 74.0000 - val_fp: 911.0000 - val_tn: 44582.0000 - val_fn: 2.0000 - val_accuracy: 0.9800 - val_precision: 0.0751 - val_recall: 0.9737 - val_auc: 0.9977 +Epoch 57/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.1710 - tp: 289.0000 - fp: 6593.0000 - tn: 175366.0000 - fn: 28.0000 - accuracy: 0.9637 - precision: 0.0420 - recall: 0.9117 - auc: 0.9824 - val_loss: 0.0856 - val_tp: 74.0000 - val_fp: 890.0000 - val_tn: 44603.0000 - val_fn: 2.0000 - val_accuracy: 0.9804 - val_precision: 0.0768 - val_recall: 0.9737 - val_auc: 0.9978 +Epoch 58/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.1649 - tp: 287.0000 - fp: 6478.0000 - tn: 175481.0000 - fn: 30.0000 - accuracy: 0.9643 - precision: 0.0424 - recall: 0.9054 - auc: 0.9836 - val_loss: 0.0797 - val_tp: 74.0000 - val_fp: 817.0000 - val_tn: 44676.0000 - val_fn: 2.0000 - val_accuracy: 0.9820 - val_precision: 0.0831 - val_recall: 0.9737 - val_auc: 0.9980 +Epoch 59/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.1752 - tp: 285.0000 - fp: 5981.0000 - tn: 175978.0000 - fn: 32.0000 - accuracy: 0.9670 - precision: 0.0455 - recall: 0.8991 - auc: 0.9827 - val_loss: 0.0813 - val_tp: 74.0000 - val_fp: 842.0000 - val_tn: 44651.0000 - val_fn: 2.0000 - val_accuracy: 0.9815 - val_precision: 0.0808 - val_recall: 0.9737 - val_auc: 0.9980 +Epoch 60/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.1374 - tp: 298.0000 - fp: 5910.0000 - tn: 176049.0000 - fn: 19.0000 - accuracy: 0.9675 - precision: 0.0480 - recall: 0.9401 - auc: 0.9884 - val_loss: 0.0760 - val_tp: 74.0000 - val_fp: 764.0000 - val_tn: 44729.0000 - val_fn: 2.0000 - val_accuracy: 0.9832 - val_precision: 0.0883 - val_recall: 0.9737 - val_auc: 0.9980 +Epoch 61/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.1655 - tp: 288.0000 - fp: 5872.0000 - tn: 176087.0000 - fn: 29.0000 - accuracy: 0.9676 - precision: 0.0468 - recall: 0.9085 - auc: 0.9838 - val_loss: 0.0795 - val_tp: 74.0000 - val_fp: 819.0000 - val_tn: 44674.0000 - val_fn: 2.0000 - val_accuracy: 0.9820 - val_precision: 0.0829 - val_recall: 0.9737 - val_auc: 0.9979 +Epoch 62/100 +90/90 [==============================] - 1s 6ms/step - loss: 0.2024 - tp: 281.0000 - fp: 6087.0000 - tn: 175872.0000 - fn: 36.0000 - accuracy: 0.9664 - precision: 0.0441 - recall: 0.8864 - auc: 0.9758 - val_loss: 0.0841 - val_tp: 74.0000 - val_fp: 872.0000 - val_tn: 44621.0000 - val_fn: 2.0000 - val_accuracy: 0.9808 - val_precision: 0.0782 - val_recall: 0.9737 - val_auc: 0.9979 +Epoch 63/100 +90/90 [==============================] - 1s 7ms/step - loss: 0.1624 - tp: 288.0000 - fp: 6407.0000 - tn: 175552.0000 - fn: 29.0000 - accuracy: 0.9647 - precision: 0.0430 - recall: 0.9085 - auc: 0.9855 - val_loss: 0.0836 - val_tp: 74.0000 - val_fp: 876.0000 - val_tn: 44617.0000 - val_fn: 2.0000 - val_accuracy: 0.9807 - val_precision: 0.0779 - val_recall: 0.9737 - val_auc: 0.9979 +Epoch 64/100 +90/90 [==============================] - 1s 7ms/step - loss: 0.1757 - tp: 287.0000 - fp: 6419.0000 - tn: 175540.0000 - fn: 30.0000 - accuracy: 0.9646 - precision: 0.0428 - recall: 0.9054 - auc: 0.9812 - val_loss: 0.0803 - val_tp: 74.0000 - val_fp: 832.0000 - val_tn: 44661.0000 - val_fn: 2.0000 - val_accuracy: 0.9817 - val_precision: 0.0817 - val_recall: 0.9737 - val_auc: 0.9979 +Epoch 65/100 +90/90 [==============================] - 1s 7ms/step - loss: 0.1609 - tp: 287.0000 - fp: 6008.0000 - tn: 175951.0000 - fn: 30.0000 - accuracy: 0.9669 - precision: 0.0456 - recall: 0.9054 - auc: 0.9854 - val_loss: 0.0805 - val_tp: 74.0000 - val_fp: 838.0000 - val_tn: 44655.0000 - val_fn: 2.0000 - val_accuracy: 0.9816 - val_precision: 0.0811 - val_recall: 0.9737 - val_auc: 0.9980 +Epoch 66/100 +90/90 [==============================] - 1s 7ms/step - loss: 0.1593 - tp: 290.0000 - fp: 6104.0000 - tn: 175855.0000 - fn: 27.0000 - accuracy: 0.9664 - precision: 0.0454 - recall: 0.9148 - auc: 0.9838 - val_loss: 0.0801 - val_tp: 74.0000 - val_fp: 822.0000 - val_tn: 44671.0000 - val_fn: 2.0000 - val_accuracy: 0.9819 - val_precision: 0.0826 - val_recall: 0.9737 - val_auc: 0.9980 +Epoch 67/100 +90/90 [==============================] - 1s 8ms/step - loss: 0.1600 - tp: 292.0000 - fp: 5888.0000 - tn: 176071.0000 - fn: 25.0000 - accuracy: 0.9676 - precision: 0.0472 - recall: 0.9211 - auc: 0.9823 - val_loss: 0.0766 - val_tp: 74.0000 - val_fp: 786.0000 - val_tn: 44707.0000 - val_fn: 2.0000 - val_accuracy: 0.9827 - val_precision: 0.0860 - val_recall: 0.9737 - val_auc: 0.9981 +Epoch 68/100 +90/90 [==============================] - 1s 8ms/step - loss: 0.1803 - tp: 286.0000 - fp: 5871.0000 - tn: 176088.0000 - fn: 31.0000 - accuracy: 0.9676 - precision: 0.0465 - recall: 0.9022 - auc: 0.9792 - val_loss: 0.0785 - val_tp: 74.0000 - val_fp: 788.0000 - val_tn: 44705.0000 - val_fn: 2.0000 - val_accuracy: 0.9827 - val_precision: 0.0858 - val_recall: 0.9737 - val_auc: 0.9980 +Epoch 69/100 +90/90 [==============================] - 1s 7ms/step - loss: 0.1873 - tp: 284.0000 - fp: 5685.0000 - tn: 176274.0000 - fn: 33.0000 - accuracy: 0.9686 - precision: 0.0476 - recall: 0.8959 - auc: 0.9773 - val_loss: 0.0779 - val_tp: 74.0000 - val_fp: 786.0000 - val_tn: 44707.0000 - val_fn: 2.0000 - val_accuracy: 0.9827 - val_precision: 0.0860 - val_recall: 0.9737 - val_auc: 0.9979 +Epoch 70/100 +90/90 [==============================] - 1s 7ms/step - loss: 0.1525 - tp: 289.0000 - fp: 6048.0000 - tn: 175911.0000 - fn: 28.0000 - accuracy: 0.9667 - precision: 0.0456 - recall: 0.9117 - auc: 0.9857 - val_loss: 0.0791 - val_tp: 74.0000 - val_fp: 803.0000 - val_tn: 44690.0000 - val_fn: 2.0000 - val_accuracy: 0.9823 - val_precision: 0.0844 - val_recall: 0.9737 - val_auc: 0.9979 +Epoch 71/100 +90/90 [==============================] - 1s 7ms/step - loss: 0.1971 - tp: 284.0000 - fp: 5980.0000 - tn: 175979.0000 - fn: 33.0000 - accuracy: 0.9670 - precision: 0.0453 - recall: 0.8959 - auc: 0.9753 - val_loss: 0.0811 - val_tp: 74.0000 - val_fp: 837.0000 - val_tn: 44656.0000 - val_fn: 2.0000 - val_accuracy: 0.9816 - val_precision: 0.0812 - val_recall: 0.9737 - val_auc: 0.9980 +Epoch 72/100 +90/90 [==============================] - 1s 7ms/step - loss: 0.1695 - tp: 286.0000 - fp: 6341.0000 - tn: 175618.0000 - fn: 31.0000 - accuracy: 0.9650 - precision: 0.0432 - recall: 0.9022 - auc: 0.9845 - val_loss: 0.0846 - val_tp: 74.0000 - val_fp: 896.0000 - val_tn: 44597.0000 - val_fn: 2.0000 - val_accuracy: 0.9803 - val_precision: 0.0763 - val_recall: 0.9737 - val_auc: 0.9979 +Epoch 73/100 +90/90 [==============================] - 1s 7ms/step - loss: 0.1766 - tp: 287.0000 - fp: 6297.0000 - tn: 175662.0000 - fn: 30.0000 - accuracy: 0.9653 - precision: 0.0436 - recall: 0.9054 - auc: 0.9818 - val_loss: 0.0824 - val_tp: 74.0000 - val_fp: 865.0000 - val_tn: 44628.0000 - val_fn: 2.0000 - val_accuracy: 0.9810 - val_precision: 0.0788 - val_recall: 0.9737 - val_auc: 0.9980 +Epoch 74/100 +90/90 [==============================] - 1s 7ms/step - loss: 0.1623 - tp: 289.0000 - fp: 6086.0000 - tn: 175873.0000 - fn: 28.0000 - accuracy: 0.9665 - precision: 0.0453 - recall: 0.9117 - auc: 0.9851 - val_loss: 0.0805 - val_tp: 74.0000 - val_fp: 829.0000 - val_tn: 44664.0000 - val_fn: 2.0000 - val_accuracy: 0.9818 - val_precision: 0.0819 - val_recall: 0.9737 - val_auc: 0.9979 +Epoch 75/100 +90/90 [==============================] - 1s 7ms/step - loss: 0.1746 - tp: 286.0000 - fp: 6430.0000 - tn: 175529.0000 - fn: 31.0000 - accuracy: 0.9646 - precision: 0.0426 - recall: 0.9022 - auc: 0.9826 - val_loss: 0.0851 - val_tp: 74.0000 - val_fp: 920.0000 - val_tn: 44573.0000 - val_fn: 2.0000 - val_accuracy: 0.9798 - val_precision: 0.0744 - val_recall: 0.9737 - val_auc: 0.9979 +Epoch 76/100 +90/90 [==============================] - 1s 7ms/step - loss: 0.1439 - tp: 294.0000 - fp: 6075.0000 - tn: 175884.0000 - fn: 23.0000 - accuracy: 0.9665 - precision: 0.0462 - recall: 0.9274 - auc: 0.9877 - val_loss: 0.0818 - val_tp: 74.0000 - val_fp: 872.0000 - val_tn: 44621.0000 - val_fn: 2.0000 - val_accuracy: 0.9808 - val_precision: 0.0782 - val_recall: 0.9737 - val_auc: 0.9979 +Epoch 77/100 +82/90 [==========================>...] - ETA: 0s - loss: 0.1624 - tp: 266.0000 - fp: 5807.0000 - tn: 161834.0000 - fn: 29.0000 - accuracy: 0.9652 - precision: 0.0438 - recall: 0.9017 - auc: 0.9853Restoring model weights from the end of the best epoch. +90/90 [==============================] - 1s 7ms/step - loss: 0.1554 - tp: 288.0000 - fp: 6332.0000 - tn: 175627.0000 - fn: 29.0000 - accuracy: 0.9651 - precision: 0.0435 - recall: 0.9085 - auc: 0.9862 - val_loss: 0.0818 - val_tp: 74.0000 - val_fp: 875.0000 - val_tn: 44618.0000 - val_fn: 2.0000 - val_accuracy: 0.9808 - val_precision: 0.0780 - val_recall: 0.9737 - val_auc: 0.9979 +Epoch 00077: early stopping + +``` + +### Check training history + +```py +plot_metrics(weighted_history) +``` + +![png](img/677f173984390980c5f4af3a22313c24.png) + +### Evaluate metrics + +```py +train_predictions_weighted = weighted_model.predict(train_features, batch_size=BATCH_SIZE) +test_predictions_weighted = weighted_model.predict(test_features, batch_size=BATCH_SIZE) +``` + +```py +weighted_results = weighted_model.evaluate(test_features, test_labels, + batch_size=BATCH_SIZE, verbose=0) +for name, value in zip(weighted_model.metrics_names, weighted_results): + print(name, ': ', value) +print() + +plot_cm(test_labels, test_predictions_weighted) +``` + +```py +loss : 0.07622280716896057 +tp : 90.0 +fp : 998.0 +tn : 55865.0 +fn : 9.0 +accuracy : 0.982321560382843 +precision : 0.08272058516740799 +recall : 0.9090909361839294 +auc : 0.9769566059112549 + +Legitimate Transactions Detected (True Negatives): 55865 +Legitimate Transactions Incorrectly Detected (False Positives): 998 +Fraudulent Transactions Missed (False Negatives): 9 +Fraudulent Transactions Detected (True Positives): 90 +Total Fraudulent Transactions: 99 + +``` + +![png](img/00fbc45eaf75c4132a6ea862403f4be4.png) + +Here you can see that with class weights the accuracy and precision are lower because there are more false positives, but conversely the recall and AUC are higher because the model also found more true positives. Despite having lower accuracy, this model has higher recall (and identifies more fraudulent transactions). Of course, there is a cost to both types of error (you wouldn't want to bug users by flagging too many legitimate transactions as fraudulent, either). Carefully consider the trade-offs between these different types of errors for your application. + +### Plot the ROC + +```py +plot_roc("Train Baseline", train_labels, train_predictions_baseline, color=colors[0]) +plot_roc("Test Baseline", test_labels, test_predictions_baseline, color=colors[0], linestyle='--') + +plot_roc("Train Weighted", train_labels, train_predictions_weighted, color=colors[1]) +plot_roc("Test Weighted", test_labels, test_predictions_weighted, color=colors[1], linestyle='--') + +plt.legend(loc='lower right') +``` + +```py + + +``` + +![png](img/13a6ef1c7f66c4208c56677c5ddd6506.png) + +## Oversampling + +### Oversample the minority class + +A related approach would be to resample the dataset by oversampling the minority class. + +```py +pos_features = train_features[bool_train_labels] +neg_features = train_features[~bool_train_labels] + +pos_labels = train_labels[bool_train_labels] +neg_labels = train_labels[~bool_train_labels] +``` + +#### Using NumPy + +You can balance the dataset manually by choosing the right number of random indices from the positive examples: + +```py +ids = np.arange(len(pos_features)) +choices = np.random.choice(ids, len(neg_features)) + +res_pos_features = pos_features[choices] +res_pos_labels = pos_labels[choices] + +res_pos_features.shape +``` + +```py +(181959, 29) + +``` + +```py +resampled_features = np.concatenate([res_pos_features, neg_features], axis=0) +resampled_labels = np.concatenate([res_pos_labels, neg_labels], axis=0) + +order = np.arange(len(resampled_labels)) +np.random.shuffle(order) +resampled_features = resampled_features[order] +resampled_labels = resampled_labels[order] + +resampled_features.shape +``` + +```py +(363918, 29) + +``` + +#### Using [`tf.data`](https://tensorflow.google.cn/api_docs/python/tf/data) + +If you're using [`tf.data`](https://tensorflow.google.cn/api_docs/python/tf/data) the easiest way to produce balanced examples is to start with a `positive` and a `negative` dataset, and merge them. See [the tf.data guide](https://tensorflow.google.cn/guide/data) for more examples. + +```py +BUFFER_SIZE = 100000 + +def make_ds(features, labels): + ds = tf.data.Dataset.from_tensor_slices((features, labels))#.cache() + ds = ds.shuffle(BUFFER_SIZE).repeat() + return ds + +pos_ds = make_ds(pos_features, pos_labels) +neg_ds = make_ds(neg_features, neg_labels) +``` + +Each dataset provides `(feature, label)` pairs: + +```py +for features, label in pos_ds.take(1): + print("Features:\n", features.numpy()) + print() + print("Label: ", label.numpy()) +``` + +```py +Features: + [-1.72731925 1.77656615 -3.74269876 2.74253414 -1.32668397 -1.34677584 + -4.46175762 2.0139002 -2.59309618 -5\. 4.12736453 -5. + 0.02274489 -5\. -0.32786349 -5\. -5\. -2.96360886 + 2.89835815 0.75463714 1.2022707 -0.14114195 -0.95544067 -1.22935903 + 0.18671861 -0.27928716 3.04376109 0.29779937 -1.45688482] + +Label: 1 + +``` + +Merge the two together using [`experimental.sample_from_datasets`](https://tensorflow.google.cn/api_docs/python/tf/data/experimental/sample_from_datasets): + +```py +resampled_ds = tf.data.experimental.sample_from_datasets([pos_ds, neg_ds], weights=[0.5, 0.5]) +resampled_ds = resampled_ds.batch(BATCH_SIZE).prefetch(2) +``` + +```py +for features, label in resampled_ds.take(1): + print(label.numpy().mean()) +``` + +```py +0.50537109375 + +``` + +To use this dataset, you'll need the number of steps per epoch. + +The definition of "epoch" in this case is less clear. Say it's the number of batches required to see each negative example once: + +```py +resampled_steps_per_epoch = np.ceil(2.0*neg/BATCH_SIZE) +resampled_steps_per_epoch +``` + +```py +278.0 + +``` + +### Train on the oversampled data + +Now try training the model with the resampled data set instead of using class weights to see how these methods compare. + +**Note:** Because the data was balanced by replicating the positive examples, the total dataset size is larger, and each epoch runs for more training steps. + +```py +resampled_model = make_model() +resampled_model.load_weights(initial_weights) + +# Reset the bias to zero, since this dataset is balanced. +output_layer = resampled_model.layers[-1] +output_layer.bias.assign([0]) + +val_ds = tf.data.Dataset.from_tensor_slices((val_features, val_labels)).cache() +val_ds = val_ds.batch(BATCH_SIZE).prefetch(2) + +resampled_history = resampled_model.fit( + resampled_ds, + epochs=EPOCHS, + steps_per_epoch=resampled_steps_per_epoch, + callbacks=[early_stopping], + validation_data=val_ds) +``` + +```py +Epoch 1/100 +278/278 [==============================] - 7s 25ms/step - loss: 0.4377 - tp: 243925.0000 - fp: 88309.0000 - tn: 253494.0000 - fn: 40578.0000 - accuracy: 0.7942 - precision: 0.7342 - recall: 0.8574 - auc: 0.9062 - val_loss: 0.2355 - val_tp: 75.0000 - val_fp: 1532.0000 - val_tn: 43961.0000 - val_fn: 1.0000 - val_accuracy: 0.9664 - val_precision: 0.0467 - val_recall: 0.9868 - val_auc: 0.9984 +Epoch 2/100 +278/278 [==============================] - 7s 24ms/step - loss: 0.2055 - tp: 258676.0000 - fp: 18235.0000 - tn: 266800.0000 - fn: 25633.0000 - accuracy: 0.9229 - precision: 0.9341 - recall: 0.9098 - auc: 0.9741 - val_loss: 0.1247 - val_tp: 74.0000 - val_fp: 1074.0000 - val_tn: 44419.0000 - val_fn: 2.0000 - val_accuracy: 0.9764 - val_precision: 0.0645 - val_recall: 0.9737 - val_auc: 0.9981 +Epoch 3/100 +278/278 [==============================] - 7s 24ms/step - loss: 0.1640 - tp: 263104.0000 - fp: 12413.0000 - tn: 272291.0000 - fn: 21536.0000 - accuracy: 0.9404 - precision: 0.9549 - recall: 0.9243 - auc: 0.9834 - val_loss: 0.0981 - val_tp: 74.0000 - val_fp: 937.0000 - val_tn: 44556.0000 - val_fn: 2.0000 - val_accuracy: 0.9794 - val_precision: 0.0732 - val_recall: 0.9737 - val_auc: 0.9980 +Epoch 4/100 +278/278 [==============================] - 7s 24ms/step - loss: 0.1417 - tp: 265623.0000 - fp: 10539.0000 - tn: 274390.0000 - fn: 18792.0000 - accuracy: 0.9485 - precision: 0.9618 - recall: 0.9339 - auc: 0.9882 - val_loss: 0.0842 - val_tp: 74.0000 - val_fp: 866.0000 - val_tn: 44627.0000 - val_fn: 2.0000 - val_accuracy: 0.9810 - val_precision: 0.0787 - val_recall: 0.9737 - val_auc: 0.9979 +Epoch 5/100 +278/278 [==============================] - 7s 24ms/step - loss: 0.1275 - tp: 267112.0000 - fp: 9551.0000 - tn: 275482.0000 - fn: 17199.0000 - accuracy: 0.9530 - precision: 0.9655 - recall: 0.9395 - auc: 0.9909 - val_loss: 0.0745 - val_tp: 74.0000 - val_fp: 822.0000 - val_tn: 44671.0000 - val_fn: 2.0000 - val_accuracy: 0.9819 - val_precision: 0.0826 - val_recall: 0.9737 - val_auc: 0.9978 +Epoch 6/100 +278/278 [==============================] - 7s 24ms/step - loss: 0.1175 - tp: 268432.0000 - fp: 8918.0000 - tn: 276042.0000 - fn: 15952.0000 - accuracy: 0.9563 - precision: 0.9678 - recall: 0.9439 - auc: 0.9926 - val_loss: 0.0663 - val_tp: 74.0000 - val_fp: 750.0000 - val_tn: 44743.0000 - val_fn: 2.0000 - val_accuracy: 0.9835 - val_precision: 0.0898 - val_recall: 0.9737 - val_auc: 0.9972 +Epoch 7/100 +278/278 [==============================] - 7s 24ms/step - loss: 0.1088 - tp: 269482.0000 - fp: 8208.0000 - tn: 276504.0000 - fn: 15150.0000 - accuracy: 0.9590 - precision: 0.9704 - recall: 0.9468 - auc: 0.9938 - val_loss: 0.0595 - val_tp: 74.0000 - val_fp: 703.0000 - val_tn: 44790.0000 - val_fn: 2.0000 - val_accuracy: 0.9845 - val_precision: 0.0952 - val_recall: 0.9737 - val_auc: 0.9969 +Epoch 8/100 +278/278 [==============================] - 7s 24ms/step - loss: 0.1040 - tp: 269902.0000 - fp: 7829.0000 - tn: 276891.0000 - fn: 14722.0000 - accuracy: 0.9604 - precision: 0.9718 - recall: 0.9483 - auc: 0.9943 - val_loss: 0.0527 - val_tp: 74.0000 - val_fp: 634.0000 - val_tn: 44859.0000 - val_fn: 2.0000 - val_accuracy: 0.9860 - val_precision: 0.1045 - val_recall: 0.9737 - val_auc: 0.9962 +Epoch 9/100 +278/278 [==============================] - 7s 24ms/step - loss: 0.0992 - tp: 271117.0000 - fp: 7567.0000 - tn: 276382.0000 - fn: 14278.0000 - accuracy: 0.9616 - precision: 0.9728 - recall: 0.9500 - auc: 0.9948 - val_loss: 0.0509 - val_tp: 74.0000 - val_fp: 651.0000 - val_tn: 44842.0000 - val_fn: 2.0000 - val_accuracy: 0.9857 - val_precision: 0.1021 - val_recall: 0.9737 - val_auc: 0.9959 +Epoch 10/100 +278/278 [==============================] - 7s 24ms/step - loss: 0.0960 - tp: 270783.0000 - fp: 7337.0000 - tn: 277379.0000 - fn: 13845.0000 - accuracy: 0.9628 - precision: 0.9736 - recall: 0.9514 - auc: 0.9950 - val_loss: 0.0467 - val_tp: 74.0000 - val_fp: 612.0000 - val_tn: 44881.0000 - val_fn: 2.0000 - val_accuracy: 0.9865 - val_precision: 0.1079 - val_recall: 0.9737 - val_auc: 0.9959 +Epoch 11/100 +278/278 [==============================] - ETA: 0s - loss: 0.0927 - tp: 271368.0000 - fp: 7017.0000 - tn: 277337.0000 - fn: 13622.0000 - accuracy: 0.9637 - precision: 0.9748 - recall: 0.9522 - auc: 0.9954Restoring model weights from the end of the best epoch. +278/278 [==============================] - 7s 25ms/step - loss: 0.0927 - tp: 271368.0000 - fp: 7017.0000 - tn: 277337.0000 - fn: 13622.0000 - accuracy: 0.9637 - precision: 0.9748 - recall: 0.9522 - auc: 0.9954 - val_loss: 0.0434 - val_tp: 74.0000 - val_fp: 555.0000 - val_tn: 44938.0000 - val_fn: 2.0000 - val_accuracy: 0.9878 - val_precision: 0.1176 - val_recall: 0.9737 - val_auc: 0.9957 +Epoch 00011: early stopping + +``` + +If the training process were considering the whole dataset on each gradient update, this oversampling would be basically identical to the class weighting. + +But when training the model batch-wise, as you did here, the oversampled data provides a smoother gradient signal: Instead of each positive example being shown in one batch with a large weight, they're shown in many different batches each time with a small weight. + +This smoother gradient signal makes it easier to train the model. + +### Check training history + +Note that the distributions of metrics will be different here, because the training data has a totally different distribution from the validation and test data. + +```py +plot_metrics(resampled_history) +``` + +![png](img/6e877844629f2888009486ac2f5ece0b.png) + +### Re-train + +Because training is easier on the balanced data, the above training procedure may overfit quickly. + +So break up the epochs to give the [`callbacks.EarlyStopping`](https://tensorflow.google.cn/api_docs/python/tf/keras/callbacks/EarlyStopping) finer control over when to stop training. + +```py +resampled_model = make_model() +resampled_model.load_weights(initial_weights) + +# Reset the bias to zero, since this dataset is balanced. +output_layer = resampled_model.layers[-1] +output_layer.bias.assign([0]) + +resampled_history = resampled_model.fit( + resampled_ds, + # These are not real epochs + steps_per_epoch=20, + epochs=10*EPOCHS, + callbacks=[early_stopping], + validation_data=(val_ds)) +``` + +```py +Epoch 1/1000 +20/20 [==============================] - 1s 56ms/step - loss: 0.9282 - tp: 14181.0000 - fp: 14018.0000 - tn: 51871.0000 - fn: 6459.0000 - accuracy: 0.7634 - precision: 0.5029 - recall: 0.6871 - auc: 0.8612 - val_loss: 0.9177 - val_tp: 68.0000 - val_fp: 32432.0000 - val_tn: 13061.0000 - val_fn: 8.0000 - val_accuracy: 0.2881 - val_precision: 0.0021 - val_recall: 0.8947 - val_auc: 0.8450 +Epoch 2/1000 +20/20 [==============================] - 1s 26ms/step - loss: 0.7143 - tp: 16471.0000 - fp: 12416.0000 - tn: 7841.0000 - fn: 4232.0000 - accuracy: 0.5936 - precision: 0.5702 - recall: 0.7956 - auc: 0.7514 - val_loss: 0.8173 - val_tp: 70.0000 - val_fp: 28250.0000 - val_tn: 17243.0000 - val_fn: 6.0000 - val_accuracy: 0.3799 - val_precision: 0.0025 - val_recall: 0.9211 - val_auc: 0.9056 +Epoch 3/1000 +20/20 [==============================] - 1s 26ms/step - loss: 0.5979 - tp: 17119.0000 - fp: 11055.0000 - tn: 9528.0000 - fn: 3258.0000 - accuracy: 0.6506 - precision: 0.6076 - recall: 0.8401 - auc: 0.8194 - val_loss: 0.7158 - val_tp: 70.0000 - val_fp: 22377.0000 - val_tn: 23116.0000 - val_fn: 6.0000 - val_accuracy: 0.5088 - val_precision: 0.0031 - val_recall: 0.9211 - val_auc: 0.9283 +Epoch 4/1000 +20/20 [==============================] - 1s 28ms/step - loss: 0.5265 - tp: 17496.0000 - fp: 9591.0000 - tn: 10955.0000 - fn: 2918.0000 - accuracy: 0.6946 - precision: 0.6459 - recall: 0.8571 - auc: 0.8542 - val_loss: 0.6276 - val_tp: 72.0000 - val_fp: 16226.0000 - val_tn: 29267.0000 - val_fn: 4.0000 - val_accuracy: 0.6438 - val_precision: 0.0044 - val_recall: 0.9474 - val_auc: 0.9576 +Epoch 5/1000 +20/20 [==============================] - 1s 28ms/step - loss: 0.4682 - tp: 17653.0000 - fp: 8002.0000 - tn: 12532.0000 - fn: 2773.0000 - accuracy: 0.7369 - precision: 0.6881 - recall: 0.8642 - auc: 0.8793 - val_loss: 0.5534 - val_tp: 75.0000 - val_fp: 10889.0000 - val_tn: 34604.0000 - val_fn: 1.0000 - val_accuracy: 0.7610 - val_precision: 0.0068 - val_recall: 0.9868 - val_auc: 0.9848 +Epoch 6/1000 +20/20 [==============================] - 1s 28ms/step - loss: 0.4174 - tp: 17837.0000 - fp: 6635.0000 - tn: 13890.0000 - fn: 2598.0000 - accuracy: 0.7746 - precision: 0.7289 - recall: 0.8729 - auc: 0.9003 - val_loss: 0.4905 - val_tp: 75.0000 - val_fp: 6493.0000 - val_tn: 39000.0000 - val_fn: 1.0000 - val_accuracy: 0.8575 - val_precision: 0.0114 - val_recall: 0.9868 - val_auc: 0.9922 +Epoch 7/1000 +20/20 [==============================] - 1s 28ms/step - loss: 0.3787 - tp: 17939.0000 - fp: 5276.0000 - tn: 15251.0000 - fn: 2494.0000 - accuracy: 0.8103 - precision: 0.7727 - recall: 0.8779 - auc: 0.9162 - val_loss: 0.4380 - val_tp: 75.0000 - val_fp: 4314.0000 - val_tn: 41179.0000 - val_fn: 1.0000 - val_accuracy: 0.9053 - val_precision: 0.0171 - val_recall: 0.9868 - val_auc: 0.9954 +Epoch 8/1000 +20/20 [==============================] - 1s 28ms/step - loss: 0.3537 - tp: 18025.0000 - fp: 4431.0000 - tn: 15994.0000 - fn: 2510.0000 - accuracy: 0.8305 - precision: 0.8027 - recall: 0.8778 - auc: 0.9242 - val_loss: 0.3942 - val_tp: 75.0000 - val_fp: 3206.0000 - val_tn: 42287.0000 - val_fn: 1.0000 - val_accuracy: 0.9296 - val_precision: 0.0229 - val_recall: 0.9868 - val_auc: 0.9972 +Epoch 9/1000 +20/20 [==============================] - 1s 27ms/step - loss: 0.3290 - tp: 17975.0000 - fp: 3746.0000 - tn: 16823.0000 - fn: 2416.0000 - accuracy: 0.8496 - precision: 0.8275 - recall: 0.8815 - auc: 0.9341 - val_loss: 0.3560 - val_tp: 75.0000 - val_fp: 2593.0000 - val_tn: 42900.0000 - val_fn: 1.0000 - val_accuracy: 0.9431 - val_precision: 0.0281 - val_recall: 0.9868 - val_auc: 0.9979 +Epoch 10/1000 +20/20 [==============================] - 1s 27ms/step - loss: 0.3121 - tp: 18157.0000 - fp: 3263.0000 - tn: 17154.0000 - fn: 2386.0000 - accuracy: 0.8621 - precision: 0.8477 - recall: 0.8839 - auc: 0.9397 - val_loss: 0.3239 - val_tp: 75.0000 - val_fp: 2212.0000 - val_tn: 43281.0000 - val_fn: 1.0000 - val_accuracy: 0.9514 - val_precision: 0.0328 - val_recall: 0.9868 - val_auc: 0.9982 +Epoch 11/1000 +20/20 [==============================] - 1s 28ms/step - loss: 0.2934 - tp: 18151.0000 - fp: 2867.0000 - tn: 17641.0000 - fn: 2301.0000 - accuracy: 0.8738 - precision: 0.8636 - recall: 0.8875 - auc: 0.9471 - val_loss: 0.2972 - val_tp: 75.0000 - val_fp: 1970.0000 - val_tn: 43523.0000 - val_fn: 1.0000 - val_accuracy: 0.9567 - val_precision: 0.0367 - val_recall: 0.9868 - val_auc: 0.9983 +Epoch 12/1000 +20/20 [==============================] - 1s 28ms/step - loss: 0.2765 - tp: 18454.0000 - fp: 2498.0000 - tn: 17838.0000 - fn: 2170.0000 - accuracy: 0.8860 - precision: 0.8808 - recall: 0.8948 - auc: 0.9537 - val_loss: 0.2727 - val_tp: 75.0000 - val_fp: 1763.0000 - val_tn: 43730.0000 - val_fn: 1.0000 - val_accuracy: 0.9613 - val_precision: 0.0408 - val_recall: 0.9868 - val_auc: 0.9984 +Epoch 13/1000 +20/20 [==============================] - 1s 27ms/step - loss: 0.2649 - tp: 18355.0000 - fp: 2240.0000 - tn: 18272.0000 - fn: 2093.0000 - accuracy: 0.8942 - precision: 0.8912 - recall: 0.8976 - auc: 0.9577 - val_loss: 0.2528 - val_tp: 75.0000 - val_fp: 1638.0000 - val_tn: 43855.0000 - val_fn: 1.0000 - val_accuracy: 0.9640 - val_precision: 0.0438 - val_recall: 0.9868 - val_auc: 0.9984 +Epoch 14/1000 +20/20 [==============================] - 1s 28ms/step - loss: 0.2547 - tp: 18293.0000 - fp: 2013.0000 - tn: 18577.0000 - fn: 2077.0000 - accuracy: 0.9001 - precision: 0.9009 - recall: 0.8980 - auc: 0.9609 - val_loss: 0.2338 - val_tp: 75.0000 - val_fp: 1516.0000 - val_tn: 43977.0000 - val_fn: 1.0000 - val_accuracy: 0.9667 - val_precision: 0.0471 - val_recall: 0.9868 - val_auc: 0.9985 +Epoch 15/1000 +20/20 [==============================] - 1s 27ms/step - loss: 0.2414 - tp: 18414.0000 - fp: 1709.0000 - tn: 18760.0000 - fn: 2077.0000 - accuracy: 0.9076 - precision: 0.9151 - recall: 0.8986 - auc: 0.9641 - val_loss: 0.2187 - val_tp: 75.0000 - val_fp: 1462.0000 - val_tn: 44031.0000 - val_fn: 1.0000 - val_accuracy: 0.9679 - val_precision: 0.0488 - val_recall: 0.9868 - val_auc: 0.9984 +Epoch 16/1000 +20/20 [==============================] - 1s 27ms/step - loss: 0.2386 - tp: 18465.0000 - fp: 1792.0000 - tn: 18740.0000 - fn: 1963.0000 - accuracy: 0.9083 - precision: 0.9115 - recall: 0.9039 - auc: 0.9660 - val_loss: 0.2044 - val_tp: 75.0000 - val_fp: 1383.0000 - val_tn: 44110.0000 - val_fn: 1.0000 - val_accuracy: 0.9696 - val_precision: 0.0514 - val_recall: 0.9868 - val_auc: 0.9984 +Epoch 17/1000 +20/20 [==============================] - 1s 27ms/step - loss: 0.2290 - tp: 18685.0000 - fp: 1578.0000 - tn: 18675.0000 - fn: 2022.0000 - accuracy: 0.9121 - precision: 0.9221 - recall: 0.9024 - auc: 0.9673 - val_loss: 0.1913 - val_tp: 75.0000 - val_fp: 1297.0000 - val_tn: 44196.0000 - val_fn: 1.0000 - val_accuracy: 0.9715 - val_precision: 0.0547 - val_recall: 0.9868 - val_auc: 0.9983 +Epoch 18/1000 +20/20 [==============================] - 1s 27ms/step - loss: 0.2170 - tp: 18526.0000 - fp: 1453.0000 - tn: 19039.0000 - fn: 1942.0000 - accuracy: 0.9171 - precision: 0.9273 - recall: 0.9051 - auc: 0.9714 - val_loss: 0.1812 - val_tp: 75.0000 - val_fp: 1266.0000 - val_tn: 44227.0000 - val_fn: 1.0000 - val_accuracy: 0.9722 - val_precision: 0.0559 - val_recall: 0.9868 - val_auc: 0.9983 +Epoch 19/1000 +20/20 [==============================] - 1s 28ms/step - loss: 0.2160 - tp: 18362.0000 - fp: 1402.0000 - tn: 19310.0000 - fn: 1886.0000 - accuracy: 0.9197 - precision: 0.9291 - recall: 0.9069 - auc: 0.9715 - val_loss: 0.1716 - val_tp: 75.0000 - val_fp: 1219.0000 - val_tn: 44274.0000 - val_fn: 1.0000 - val_accuracy: 0.9732 - val_precision: 0.0580 - val_recall: 0.9868 - val_auc: 0.9982 +Epoch 20/1000 +20/20 [==============================] - 1s 28ms/step - loss: 0.2081 - tp: 18722.0000 - fp: 1297.0000 - tn: 19031.0000 - fn: 1910.0000 - accuracy: 0.9217 - precision: 0.9352 - recall: 0.9074 - auc: 0.9735 - val_loss: 0.1633 - val_tp: 75.0000 - val_fp: 1192.0000 - val_tn: 44301.0000 - val_fn: 1.0000 - val_accuracy: 0.9738 - val_precision: 0.0592 - val_recall: 0.9868 - val_auc: 0.9981 +Epoch 21/1000 +20/20 [==============================] - 1s 27ms/step - loss: 0.2041 - tp: 18929.0000 - fp: 1263.0000 - tn: 18884.0000 - fn: 1884.0000 - accuracy: 0.9232 - precision: 0.9375 - recall: 0.9095 - auc: 0.9742 - val_loss: 0.1571 - val_tp: 75.0000 - val_fp: 1187.0000 - val_tn: 44306.0000 - val_fn: 1.0000 - val_accuracy: 0.9739 - val_precision: 0.0594 - val_recall: 0.9868 - val_auc: 0.9980 +Epoch 22/1000 +20/20 [==============================] - 1s 27ms/step - loss: 0.2017 - tp: 18834.0000 - fp: 1224.0000 - tn: 19064.0000 - fn: 1838.0000 - accuracy: 0.9252 - precision: 0.9390 - recall: 0.9111 - auc: 0.9752 - val_loss: 0.1508 - val_tp: 75.0000 - val_fp: 1162.0000 - val_tn: 44331.0000 - val_fn: 1.0000 - val_accuracy: 0.9745 - val_precision: 0.0606 - val_recall: 0.9868 - val_auc: 0.9980 +Epoch 23/1000 +20/20 [==============================] - 1s 28ms/step - loss: 0.1951 - tp: 18612.0000 - fp: 1127.0000 - tn: 19424.0000 - fn: 1797.0000 - accuracy: 0.9286 - precision: 0.9429 - recall: 0.9120 - auc: 0.9764 - val_loss: 0.1458 - val_tp: 75.0000 - val_fp: 1158.0000 - val_tn: 44335.0000 - val_fn: 1.0000 - val_accuracy: 0.9746 - val_precision: 0.0608 - val_recall: 0.9868 - val_auc: 0.9979 +Epoch 24/1000 +18/20 [==========================>...] - ETA: 0s - loss: 0.1945 - tp: 16932.0000 - fp: 1065.0000 - tn: 17301.0000 - fn: 1566.0000 - accuracy: 0.9286 - precision: 0.9408 - recall: 0.9153 - auc: 0.9765Restoring model weights from the end of the best epoch. +20/20 [==============================] - 1s 27ms/step - loss: 0.1943 - tp: 18796.0000 - fp: 1179.0000 - tn: 19226.0000 - fn: 1759.0000 - accuracy: 0.9283 - precision: 0.9410 - recall: 0.9144 - auc: 0.9765 - val_loss: 0.1401 - val_tp: 75.0000 - val_fp: 1136.0000 - val_tn: 44357.0000 - val_fn: 1.0000 - val_accuracy: 0.9750 - val_precision: 0.0619 - val_recall: 0.9868 - val_auc: 0.9979 +Epoch 00024: early stopping + +``` + +### Re-check training history + +```py +plot_metrics(resampled_history) +``` + +![png](img/217c4e55f89d4a55a78dc082bbdc1e2f.png) + +### Evaluate metrics + +```py +train_predictions_resampled = resampled_model.predict(train_features, batch_size=BATCH_SIZE) +test_predictions_resampled = resampled_model.predict(test_features, batch_size=BATCH_SIZE) +``` + +```py +resampled_results = resampled_model.evaluate(test_features, test_labels, + batch_size=BATCH_SIZE, verbose=0) +for name, value in zip(resampled_model.metrics_names, resampled_results): + print(name, ': ', value) +print() + +plot_cm(test_labels, test_predictions_resampled) +``` + +```py +loss : 0.23386089503765106 +tp : 91.0 +fp : 1892.0 +tn : 54971.0 +fn : 8.0 +accuracy : 0.9666444063186646 +precision : 0.045890066772699356 +recall : 0.9191918969154358 +auc : 0.977620542049408 + +Legitimate Transactions Detected (True Negatives): 54971 +Legitimate Transactions Incorrectly Detected (False Positives): 1892 +Fraudulent Transactions Missed (False Negatives): 8 +Fraudulent Transactions Detected (True Positives): 91 +Total Fraudulent Transactions: 99 + +``` + +![png](img/5224f36bbf08c7f64fa501011640042b.png) + +### Plot the ROC + +```py +plot_roc("Train Baseline", train_labels, train_predictions_baseline, color=colors[0]) +plot_roc("Test Baseline", test_labels, test_predictions_baseline, color=colors[0], linestyle='--') + +plot_roc("Train Weighted", train_labels, train_predictions_weighted, color=colors[1]) +plot_roc("Test Weighted", test_labels, test_predictions_weighted, color=colors[1], linestyle='--') + +plot_roc("Train Resampled", train_labels, train_predictions_resampled, color=colors[2]) +plot_roc("Test Resampled", test_labels, test_predictions_resampled, color=colors[2], linestyle='--') +plt.legend(loc='lower right') +``` + +```py + + +``` + +![png](img/f1fa73dd5b685549afd6264592919903.png) + +## Applying this tutorial to your problem + +Imbalanced data classification is an inherently difficult task since there are so few samples to learn from. You should always start with the data first and do your best to collect as many samples as possible and give substantial thought to what features may be relevant so the model can get the most out of your minority class. At some point your model may struggle to improve and yield the results you want, so it is important to keep in mind the context of your problem and the trade offs between different types of errors. \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/058.md b/Tensorflow/TensorFlow2.0/058.md new file mode 100644 index 00000000..0bb37132 --- /dev/null +++ b/Tensorflow/TensorFlow2.0/058.md @@ -0,0 +1,1843 @@ +# Time series forecasting + +> 原文:[https://tensorflow.google.cn/tutorials/structured_data/time_series](https://tensorflow.google.cn/tutorials/structured_data/time_series) + +This tutorial is an introduction to time series forecasting using TensorFlow. It builds a few different styles of models including Convolutional and Recurrent Neural Networks (CNNs and RNNs). + +This is covered in two main parts, with subsections: + +* Forecast for a single timestep: + * A single feature. + * All features. +* Forecast multiple steps: + * Single-shot: Make the predictions all at once. + * Autoregressive: Make one prediction at a time and feed the output back to the model. + +## Setup + +```py +import os +import datetime + +import IPython +import IPython.display +import matplotlib as mpl +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import seaborn as sns +import tensorflow as tf + +mpl.rcParams['figure.figsize'] = (8, 6) +mpl.rcParams['axes.grid'] = False +``` + +## The weather dataset + +This tutorial uses a [weather time series dataset](https://www.bgc-jena.mpg.de/wetter/) recorded by the [Max Planck Institute for Biogeochemistry](https://www.bgc-jena.mpg.de). + +This dataset contains 14 different features such as air temperature, atmospheric pressure, and humidity. These were collected every 10 minutes, beginning in 2003\. For efficiency, you will use only the data collected between 2009 and 2016\. This section of the dataset was prepared by François Chollet for his book [Deep Learning with Python](https://www.manning.com/books/deep-learning-with-python). + +```py +zip_path = tf.keras.utils.get_file( + origin='https://storage.googleapis.com/tensorflow/tf-keras-datasets/jena_climate_2009_2016.csv.zip', + fname='jena_climate_2009_2016.csv.zip', + extract=True) +csv_path, _ = os.path.splitext(zip_path) +``` + +```py +Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/jena_climate_2009_2016.csv.zip +13574144/13568290 [==============================] - 0s 0us/step + +``` + +This tutorial will just deal with **hourly predictions**, so start by sub-sampling the data from 10 minute intervals to 1h: + +```py +df = pd.read_csv(csv_path) +# slice [start:stop:step], starting from index 5 take every 6th record. +df = df[5::6] + +date_time = pd.to_datetime(df.pop('Date Time'), format='%d.%m.%Y %H:%M:%S') +``` + +Let's take a glance at the data. Here are the first few rows: + +```py +df.head() +``` + + + +Here is the evolution of a few features over time. + +```py +plot_cols = ['T (degC)', 'p (mbar)', 'rho (g/m**3)'] +plot_features = df[plot_cols] +plot_features.index = date_time +_ = plot_features.plot(subplots=True) + +plot_features = df[plot_cols][:480] +plot_features.index = date_time[:480] +_ = plot_features.plot(subplots=True) +``` + +![png](img/78576e063fbd26107e5efc5a23a8ec2d.png) + +![png](img/147de2a992900105434d95a527a869c8.png) + +### Inspect and cleanup + +Next look at the statistics of the dataset: + +```py +df.describe().transpose() +``` + + + +#### Wind velocity + +One thing that should stand out is the `min` value of the wind velocity, `wv (m/s)` and `max. wv (m/s)` columns. This `-9999` is likely erroneous. There's a separate wind direction column, so the velocity should be `>=0`. Replace it with zeros: + +```py +wv = df['wv (m/s)'] +bad_wv = wv == -9999.0 +wv[bad_wv] = 0.0 + +max_wv = df['max. wv (m/s)'] +bad_max_wv = max_wv == -9999.0 +max_wv[bad_max_wv] = 0.0 + +# The above inplace edits are reflected in the DataFrame +df['wv (m/s)'].min() +``` + +```py +0.0 + +``` + +### Feature engineering + +Before diving in to build a model it's important to understand your data, and be sure that you're passing the model appropriately formatted data. + +#### Wind + +The last column of the data, `wd (deg)`, gives the wind direction in units of degrees. Angles do not make good model inputs, 360° and 0° should be close to each other, and wrap around smoothly. Direction shouldn't matter if the wind is not blowing. + +Right now the distribution of wind data looks like this: + +```py +plt.hist2d(df['wd (deg)'], df['wv (m/s)'], bins=(50, 50), vmax=400) +plt.colorbar() +plt.xlabel('Wind Direction [deg]') +plt.ylabel('Wind Velocity [m/s]') +``` + +```py +Text(0, 0.5, 'Wind Velocity [m/s]') + +``` + +![png](img/4f82813c049f4e0039986833c582376d.png) + +But this will be easier for the model to interpret if you convert the wind direction and velocity columns to a wind **vector**: + +```py +wv = df.pop('wv (m/s)') +max_wv = df.pop('max. wv (m/s)') + +# Convert to radians. +wd_rad = df.pop('wd (deg)')*np.pi / 180 + +# Calculate the wind x and y components. +df['Wx'] = wv*np.cos(wd_rad) +df['Wy'] = wv*np.sin(wd_rad) + +# Calculate the max wind x and y components. +df['max Wx'] = max_wv*np.cos(wd_rad) +df['max Wy'] = max_wv*np.sin(wd_rad) +``` + +The distribution of wind vectors is much simpler for the model to correctly interpret. + +```py +plt.hist2d(df['Wx'], df['Wy'], bins=(50, 50), vmax=400) +plt.colorbar() +plt.xlabel('Wind X [m/s]') +plt.ylabel('Wind Y [m/s]') +ax = plt.gca() +ax.axis('tight') +``` + +```py +(-11.305513973134667, 8.24469928549079, -8.27438540335515, 7.7338312955467785) + +``` + +![png](img/df0f8cf3d780ab200f3e674a67324435.png) + +#### Time + +Similarly the `Date Time` column is very useful, but not in this string form. Start by converting it to seconds: + +```py +timestamp_s = date_time.map(datetime.datetime.timestamp) +``` + +Similar to the wind direction the time in seconds is not a useful model input. Being weather data it has clear daily and yearly periodicity. There are many ways you could deal with periodicity. + +A simple approach to convert it to a usable signal is to use `sin` and `cos` to convert the time to clear "Time of day" and "Time of year" signals: + +```py +day = 24*60*60 +year = (365.2425)*day + +df['Day sin'] = np.sin(timestamp_s * (2 * np.pi / day)) +df['Day cos'] = np.cos(timestamp_s * (2 * np.pi / day)) +df['Year sin'] = np.sin(timestamp_s * (2 * np.pi / year)) +df['Year cos'] = np.cos(timestamp_s * (2 * np.pi / year)) +``` + +```py +plt.plot(np.array(df['Day sin'])[:25]) +plt.plot(np.array(df['Day cos'])[:25]) +plt.xlabel('Time [h]') +plt.title('Time of day signal') +``` + +```py +Text(0.5, 1.0, 'Time of day signal') + +``` + +![png](img/70c92a3e3d823d4469d81b23380fef63.png) + +This gives the model access to the most important frequency features. In this case you knew ahead of time which frequencies were important. + +If you didn't know, you can determine which frequencies are important using an `fft`. To check our assumptions, here is the [`tf.signal.rfft`](https://tensorflow.google.cn/api_docs/python/tf/signal/rfft) of the temperature over time. Note the obvious peaks at frequencies near `1/year` and `1/day`: + +```py +fft = tf.signal.rfft(df['T (degC)']) +f_per_dataset = np.arange(0, len(fft)) + +n_samples_h = len(df['T (degC)']) +hours_per_year = 24*365.2524 +years_per_dataset = n_samples_h/(hours_per_year) + +f_per_year = f_per_dataset/years_per_dataset +plt.step(f_per_year, np.abs(fft)) +plt.xscale('log') +plt.ylim(0, 400000) +plt.xlim([0.1, max(plt.xlim())]) +plt.xticks([1, 365.2524], labels=['1/Year', '1/day']) +_ = plt.xlabel('Frequency (log scale)') +``` + +![png](img/e8229311b22645eacfe9d45893aa40bc.png) + +### Split the data + +We'll use a `(70%, 20%, 10%)` split for the training, validation, and test sets. Note the data is **not** being randomly shuffled before splitting. This is for two reasons. + +1. It ensures that chopping the data into windows of consecutive samples is still possible. +2. It ensures that the validation/test results are more realistic, being evaluated on data collected after the model was trained. + +```py +column_indices = {name: i for i, name in enumerate(df.columns)} + +n = len(df) +train_df = df[0:int(n*0.7)] +val_df = df[int(n*0.7):int(n*0.9)] +test_df = df[int(n*0.9):] + +num_features = df.shape[1] +``` + +### Normalize the data + +It is important to scale features before training a neural network. Normalization is a common way of doing this scaling. Subtract the mean and divide by the standard deviation of each feature. + +The mean and standard deviation should only be computed using the training data so that the models have no access to the values in the validation and test sets. + +It's also arguable that the model shouldn't have access to future values in the training set when training, and that this normalization should be done using moving averages. That's not the focus of this tutorial, and the validation and test sets ensure that you get (somewhat) honest metrics. So in the interest of simplicity this tutorial uses a simple average. + +```py +train_mean = train_df.mean() +train_std = train_df.std() + +train_df = (train_df - train_mean) / train_std +val_df = (val_df - train_mean) / train_std +test_df = (test_df - train_mean) / train_std +``` + +Now peek at the distribution of the features. Some features do have long tails, but there are no obvious errors like the `-9999` wind velocity value. + +```py +df_std = (df - train_mean) / train_std +df_std = df_std.melt(var_name='Column', value_name='Normalized') +plt.figure(figsize=(12, 6)) +ax = sns.violinplot(x='Column', y='Normalized', data=df_std) +_ = ax.set_xticklabels(df.keys(), rotation=90) +``` + +![png](img/a50df54eefaa2e0b41728c5e66685f3a.png) + +## Data windowing + +The models in this tutorial will make a set of predictions based on a window of consecutive samples from the data. + +The main features of the input windows are: + +* The width (number of time steps) of the input and label windows +* The time offset between them. +* Which features are used as inputs, labels, or both. + +This tutorial builds a variety of models (including Linear, DNN, CNN and RNN models), and uses them for both: + +* *Single-output*, and *multi-output* predictions. +* *Single-time-step* and *multi-time-step* predictions. + +This section focuses on implementing the data windowing so that it can be reused for all of those models. + +Depending on the task and type of model you may want to generate a variety of data windows. Here are some examples: + +1. For example, to make a single prediction 24h into the future, given 24h of history you might define a window like this: + + ![One prediction 24h into the future.](img/e2f893946253a84a1410e69624e915bc.png) + +2. A model that makes a prediction 1h into the future, given 6h of history would need a window like this: + + ![One prediction 1h into the future.](img/5fc218fbba095949ac4ca115de92329b.png) + +The rest of this section defines a `WindowGenerator` class. This class can: + +1. Handle the indexes and offsets as shown in the diagrams above. +2. Split windows of features into a `(features, labels)` pairs. +3. Plot the content of the resulting windows. +4. Efficiently generate batches of these windows from the training, evaluation, and test data, using [`tf.data.Dataset`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset)s. + +### 1\. Indexes and offsets + +Start by creating the `WindowGenerator` class. The `__init__` method includes all the necessary logic for the input and label indices. + +It also takes the train, eval, and test dataframes as input. These will be converted to [`tf.data.Dataset`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset)s of windows later. + +```py +class WindowGenerator(): + def __init__(self, input_width, label_width, shift, + train_df=train_df, val_df=val_df, test_df=test_df, + label_columns=None): + # Store the raw data. + self.train_df = train_df + self.val_df = val_df + self.test_df = test_df + + # Work out the label column indices. + self.label_columns = label_columns + if label_columns is not None: + self.label_columns_indices = {name: i for i, name in + enumerate(label_columns)} + self.column_indices = {name: i for i, name in + enumerate(train_df.columns)} + + # Work out the window parameters. + self.input_width = input_width + self.label_width = label_width + self.shift = shift + + self.total_window_size = input_width + shift + + self.input_slice = slice(0, input_width) + self.input_indices = np.arange(self.total_window_size)[self.input_slice] + + self.label_start = self.total_window_size - self.label_width + self.labels_slice = slice(self.label_start, None) + self.label_indices = np.arange(self.total_window_size)[self.labels_slice] + + def __repr__(self): + return '\n'.join([ + f'Total window size: {self.total_window_size}', + f'Input indices: {self.input_indices}', + f'Label indices: {self.label_indices}', + f'Label column name(s): {self.label_columns}']) +``` + +Here is code to create the 2 windows shown in the diagrams at the start of this section: + +```py +w1 = WindowGenerator(input_width=24, label_width=1, shift=24, + label_columns=['T (degC)']) +w1 +``` + +```py +Total window size: 48 +Input indices: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23] +Label indices: [47] +Label column name(s): ['T (degC)'] + +``` + +```py +w2 = WindowGenerator(input_width=6, label_width=1, shift=1, + label_columns=['T (degC)']) +w2 +``` + +```py +Total window size: 7 +Input indices: [0 1 2 3 4 5] +Label indices: [6] +Label column name(s): ['T (degC)'] + +``` + +### 2\. Split + +Given a list consecutive inputs, the `split_window` method will convert them to a window of inputs and a window of labels. + +The example `w2`, above, will be split like this: + +![The initial window is all consecuitive samples, this splits it into an (inputs, labels) pairs](img/b84cdc6273e2ce65876a86561af080a5.png) + +This diagram doesn't show the `features` axis of the data, but this `split_window` function also handles the `label_columns` so it can be used for both the single output and multi-output examples. + +```py +def split_window(self, features): + inputs = features[:, self.input_slice, :] + labels = features[:, self.labels_slice, :] + if self.label_columns is not None: + labels = tf.stack( + [labels[:, :, self.column_indices[name]] for name in self.label_columns], + axis=-1) + + # Slicing doesn't preserve static shape information, so set the shapes + # manually. This way the `tf.data.Datasets` are easier to inspect. + inputs.set_shape([None, self.input_width, None]) + labels.set_shape([None, self.label_width, None]) + + return inputs, labels + +WindowGenerator.split_window = split_window +``` + +Try it out: + +```py +# Stack three slices, the length of the total window: +example_window = tf.stack([np.array(train_df[:w2.total_window_size]), + np.array(train_df[100:100+w2.total_window_size]), + np.array(train_df[200:200+w2.total_window_size])]) + +example_inputs, example_labels = w2.split_window(example_window) + +print('All shapes are: (batch, time, features)') +print(f'Window shape: {example_window.shape}') +print(f'Inputs shape: {example_inputs.shape}') +print(f'labels shape: {example_labels.shape}') +``` + +```py +All shapes are: (batch, time, features) +Window shape: (3, 7, 19) +Inputs shape: (3, 6, 19) +labels shape: (3, 1, 1) + +``` + +Typically data in TensorFlow is packed into arrays where the outermost index is across examples (the "batch" dimension). The middle indices are the "time" or "space" (width, height) dimension(s). The innermost indices are the features. + +The code above took a batch of 3, 7-timestep windows, with 19 features at each time step. It split them into a batch of 6-timestep, 19 feature inputs, and a 1-timestep 1-feature label. The label only has one feature because the `WindowGenerator` was initialized with `label_columns=['T (degC)']`. Initially this tutorial will build models that predict single output labels. + +### 3\. Plot + +Here is a plot method that allows a simple visualization of the split window: + +```py +w2.example = example_inputs, example_labels +``` + +```py +def plot(self, model=None, plot_col='T (degC)', max_subplots=3): + inputs, labels = self.example + plt.figure(figsize=(12, 8)) + plot_col_index = self.column_indices[plot_col] + max_n = min(max_subplots, len(inputs)) + for n in range(max_n): + plt.subplot(3, 1, n+1) + plt.ylabel(f'{plot_col} [normed]') + plt.plot(self.input_indices, inputs[n, :, plot_col_index], + label='Inputs', marker='.', zorder=-10) + + if self.label_columns: + label_col_index = self.label_columns_indices.get(plot_col, None) + else: + label_col_index = plot_col_index + + if label_col_index is None: + continue + + plt.scatter(self.label_indices, labels[n, :, label_col_index], + edgecolors='k', label='Labels', c='#2ca02c', s=64) + if model is not None: + predictions = model(inputs) + plt.scatter(self.label_indices, predictions[n, :, label_col_index], + marker='X', edgecolors='k', label='Predictions', + c='#ff7f0e', s=64) + + if n == 0: + plt.legend() + + plt.xlabel('Time [h]') + +WindowGenerator.plot = plot +``` + +This plot aligns inputs, labels, and (later) predictions based on the time that the item refers to: + +```py +w2.plot() +``` + +![png](img/9de9afda3f7352b4106f61a83025d8e1.png) + +You can plot the other columns, but the example window `w2` configuration only has labels for the `T (degC)` column. + +```py +w2.plot(plot_col='p (mbar)') +``` + +![png](img/651f6b2fbf396ac15fb7661af005bb0e.png) + +### 4\. Create [`tf.data.Dataset`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset)s + +Finally this `make_dataset` method will take a time series `DataFrame` and convert it to a [`tf.data.Dataset`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset) of `(input_window, label_window)` pairs using the [`preprocessing.timeseries_dataset_from_array`](https://tensorflow.google.cn/api_docs/python/tf/keras/preprocessing/timeseries_dataset_from_array) function. + +```py +def make_dataset(self, data): + data = np.array(data, dtype=np.float32) + ds = tf.keras.preprocessing.timeseries_dataset_from_array( + data=data, + targets=None, + sequence_length=self.total_window_size, + sequence_stride=1, + shuffle=True, + batch_size=32,) + + ds = ds.map(self.split_window) + + return ds + +WindowGenerator.make_dataset = make_dataset +``` + +The `WindowGenerator` object holds training, validation and test data. Add properties for accessing them as `tf.data.Datasets` using the above `make_dataset` method. Also add a standard example batch for easy access and plotting: + +```py +@property +def train(self): + return self.make_dataset(self.train_df) + +@property +def val(self): + return self.make_dataset(self.val_df) + +@property +def test(self): + return self.make_dataset(self.test_df) + +@property +def example(self): + """Get and cache an example batch of `inputs, labels` for plotting.""" + result = getattr(self, '_example', None) + if result is None: + # No example batch was found, so get one from the `.train` dataset + result = next(iter(self.train)) + # And cache it for next time + self._example = result + return result + +WindowGenerator.train = train +WindowGenerator.val = val +WindowGenerator.test = test +WindowGenerator.example = example +``` + +Now the `WindowGenerator` object gives you access to the [`tf.data.Dataset`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset) objects, so you can easily iterate over the data. + +The [`Dataset.element_spec`](https://tensorflow.google.cn/api_docs/python/tf/data/Dataset#element_spec) property tells you the structure, `dtypes` and shapes of the dataset elements. + +```py +# Each element is an (inputs, label) pair +w2.train.element_spec +``` + +```py +(TensorSpec(shape=(None, 6, 19), dtype=tf.float32, name=None), + TensorSpec(shape=(None, 1, 1), dtype=tf.float32, name=None)) + +``` + +Iterating over a `Dataset` yields concrete batches: + +```py +for example_inputs, example_labels in w2.train.take(1): + print(f'Inputs shape (batch, time, features): {example_inputs.shape}') + print(f'Labels shape (batch, time, features): {example_labels.shape}') +``` + +```py +Inputs shape (batch, time, features): (32, 6, 19) +Labels shape (batch, time, features): (32, 1, 1) + +``` + +## Single step models + +The simplest model you can build on this sort of data is one that predicts a single feature's value, 1 timestep (1h) in the future based only on the current conditions. + +So start by building models to predict the `T (degC)` value 1h into the future. + +![Predict the next time step](img/572dbe03f38bc28e6575b97b2ce91fda.png) + +Configure a `WindowGenerator` object to produce these single-step `(input, label)` pairs: + +```py +single_step_window = WindowGenerator( + input_width=1, label_width=1, shift=1, + label_columns=['T (degC)']) +single_step_window +``` + +```py +Total window size: 2 +Input indices: [0] +Label indices: [1] +Label column name(s): ['T (degC)'] + +``` + +The `window` object creates `tf.data.Datasets` from the training, validation, and test sets, allowing you to easily iterate over batches of data. + +```py +for example_inputs, example_labels in single_step_window.train.take(1): + print(f'Inputs shape (batch, time, features): {example_inputs.shape}') + print(f'Labels shape (batch, time, features): {example_labels.shape}') +``` + +```py +Inputs shape (batch, time, features): (32, 1, 19) +Labels shape (batch, time, features): (32, 1, 1) + +``` + +### Baseline + +Before building a trainable model it would be good to have a performance baseline as a point for comparison with the later more complicated models. + +This first task is to predict temperature 1h in the future given the current value of all features. The current values include the current temperature. + +So start with a model that just returns the current temperature as the prediction, predicting "No change". This is a reasonable baseline since temperature changes slowly. Of course, this baseline will work less well if you make a prediction further in the future. + +![Send the input to the output](img/1b0da93649f50a8108ee8ddd8b08bc04.png) + +```py +class Baseline(tf.keras.Model): + def __init__(self, label_index=None): + super().__init__() + self.label_index = label_index + + def call(self, inputs): + if self.label_index is None: + return inputs + result = inputs[:, :, self.label_index] + return result[:, :, tf.newaxis] +``` + +Instantiate and evaluate this model: + +```py +baseline = Baseline(label_index=column_indices['T (degC)']) + +baseline.compile(loss=tf.losses.MeanSquaredError(), + metrics=[tf.metrics.MeanAbsoluteError()]) + +val_performance = {} +performance = {} +val_performance['Baseline'] = baseline.evaluate(single_step_window.val) +performance['Baseline'] = baseline.evaluate(single_step_window.test, verbose=0) +``` + +```py +439/439 [==============================] - 1s 2ms/step - loss: 0.0128 - mean_absolute_error: 0.0785 + +``` + +That printed some performance metrics, but those don't give you a feeling for how well the model is doing. + +The `WindowGenerator` has a plot method, but the plots won't be very interesting with only a single sample. So, create a wider `WindowGenerator` that generates windows 24h of consecutive inputs and labels at a time. + +The `wide_window` doesn't change the way the model operates. The model still makes predictions 1h into the future based on a single input time step. Here the `time` axis acts like the `batch` axis: Each prediction is made independently with no interaction between time steps. + +```py +wide_window = WindowGenerator( + input_width=24, label_width=24, shift=1, + label_columns=['T (degC)']) + +wide_window +``` + +```py +Total window size: 25 +Input indices: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23] +Label indices: [ 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24] +Label column name(s): ['T (degC)'] + +``` + +This expanded window can be passed directly to the same `baseline` model without any code changes. This is possible because the inputs and labels have the same number of timesteps, and the baseline just forwards the input to the output: + +![One prediction 1h into the future, ever hour.](img/8c90a903ef2c498784c14d3c169e9ee6.png) + +```py +print('Input shape:', wide_window.example[0].shape) +print('Output shape:', baseline(wide_window.example[0]).shape) +``` + +```py +Input shape: (32, 24, 19) +Output shape: (32, 24, 1) + +``` + +Plotting the baseline model's predictions you can see that it is simply the labels, shifted right by 1h. + +```py +wide_window.plot(baseline) +``` + +![png](img/89837448c36d73e735364038e7b3e689.png) + +In the above plots of three examples the single step model is run over the course of 24h. This deserves some explaination: + +* The blue "Inputs" line shows the input temperature at each time step. The model recieves all features, this plot only shows the temperature. +* The green "Labels" dots show the target prediction value. These dots are shown at the prediction time, not the input time. That is why the range of labels is shifted 1 step relative to the inputs. +* The orange "Predictions" crosses are the model's prediction's for each output time step. If the model were predicting perfectly the predictions would land directly on the "labels". + +### Linear model + +The simplest **trainable** model you can apply to this task is to insert linear transformation between the input and output. In this case the output from a time step only depends on that step: + +![A single step prediction](img/572dbe03f38bc28e6575b97b2ce91fda.png) + +A [`layers.Dense`](https://tensorflow.google.cn/api_docs/python/tf/keras/layers/Dense) with no `activation` set is a linear model. The layer only transforms the last axis of the data from `(batch, time, inputs)` to `(batch, time, units)`, it is applied independently to every item across the `batch` and `time` axes. + +```py +linear = tf.keras.Sequential([ + tf.keras.layers.Dense(units=1) +]) +``` + +```py +print('Input shape:', single_step_window.example[0].shape) +print('Output shape:', linear(single_step_window.example[0]).shape) +``` + +```py +Input shape: (32, 1, 19) +Output shape: (32, 1, 1) + +``` + +This tutorial trains many models, so package the training procedure into a function: + +```py +MAX_EPOCHS = 20 + +def compile_and_fit(model, window, patience=2): + early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', + patience=patience, + mode='min') + + model.compile(loss=tf.losses.MeanSquaredError(), + optimizer=tf.optimizers.Adam(), + metrics=[tf.metrics.MeanAbsoluteError()]) + + history = model.fit(window.train, epochs=MAX_EPOCHS, + validation_data=window.val, + callbacks=[early_stopping]) + return history +``` + +Train the model and evaluate its performance: + +```py +history = compile_and_fit(linear, single_step_window) + +val_performance['Linear'] = linear.evaluate(single_step_window.val) +performance['Linear'] = linear.evaluate(single_step_window.test, verbose=0) +``` + +```py +Epoch 1/20 +1534/1534 [==============================] - 5s 3ms/step - loss: 0.2864 - mean_absolute_error: 0.2848 - val_loss: 0.0163 - val_mean_absolute_error: 0.0975 +Epoch 2/20 +1534/1534 [==============================] - 5s 3ms/step - loss: 0.0121 - mean_absolute_error: 0.0817 - val_loss: 0.0103 - val_mean_absolute_error: 0.0752 +Epoch 3/20 +1534/1534 [==============================] - 5s 3ms/step - loss: 0.0103 - mean_absolute_error: 0.0749 - val_loss: 0.0098 - val_mean_absolute_error: 0.0738 +Epoch 4/20 +1534/1534 [==============================] - 5s 3ms/step - loss: 0.0099 - mean_absolute_error: 0.0733 - val_loss: 0.0095 - val_mean_absolute_error: 0.0731 +Epoch 5/20 +1534/1534 [==============================] - 5s 3ms/step - loss: 0.0096 - mean_absolute_error: 0.0721 - val_loss: 0.0092 - val_mean_absolute_error: 0.0719 +Epoch 6/20 +1534/1534 [==============================] - 5s 3ms/step - loss: 0.0095 - mean_absolute_error: 0.0715 - val_loss: 0.0091 - val_mean_absolute_error: 0.0716 +Epoch 7/20 +1534/1534 [==============================] - 5s 3ms/step - loss: 0.0094 - mean_absolute_error: 0.0710 - val_loss: 0.0091 - val_mean_absolute_error: 0.0716 +Epoch 8/20 +1534/1534 [==============================] - 5s 3ms/step - loss: 0.0093 - mean_absolute_error: 0.0707 - val_loss: 0.0090 - val_mean_absolute_error: 0.0706 +Epoch 9/20 +1534/1534 [==============================] - 5s 3ms/step - loss: 0.0092 - mean_absolute_error: 0.0704 - val_loss: 0.0090 - val_mean_absolute_error: 0.0712 +Epoch 10/20 +1534/1534 [==============================] - 5s 3ms/step - loss: 0.0092 - mean_absolute_error: 0.0703 - val_loss: 0.0091 - val_mean_absolute_error: 0.0715 +439/439 [==============================] - 1s 2ms/step - loss: 0.0091 - mean_absolute_error: 0.0715 + +``` + +Like the `baseline` model, the linear model can be called on batches of wide windows. Used this way the model makes a set of independent predictions on consecuitive time steps. The `time` axis acts like another `batch` axis. There are no interactions between the predictions at each time step. + +![A single step prediction](img/f8f7d813408efbfa9b764dfcfe54d2ad.png) + +```py +print('Input shape:', wide_window.example[0].shape) +print('Output shape:', baseline(wide_window.example[0]).shape) +``` + +```py +Input shape: (32, 24, 19) +Output shape: (32, 24, 1) + +``` + +Here is the plot of its example predictions on the `wide_window`, note how in many cases the prediction is clearly better than just returning the input temperature, but in a few cases it's worse: + +```py +wide_window.plot(linear) +``` + +![png](img/05dfb322f577170f0e9218f00221d363.png) + +One advantage to linear models is that they're relatively simple to interpret. You can pull out the layer's weights, and see the weight assigned to each input: + +```py +plt.bar(x = range(len(train_df.columns)), + height=linear.layers[0].kernel[:,0].numpy()) +axis = plt.gca() +axis.set_xticks(range(len(train_df.columns))) +_ = axis.set_xticklabels(train_df.columns, rotation=90) +``` + +![png](img/019d6a002d56ca3eff0330fbb68495d2.png) + +Sometimes the model doesn't even place the most weight on the input `T (degC)`. This is one of the risks of random initialization. + +### Dense + +Before applying models that actually operate on multiple time-steps, it's worth checking the performance of deeper, more powerful, single input step models. + +Here's a model similar to the `linear` model, except it stacks several a few `Dense` layers between the input and the output: + +```py +dense = tf.keras.Sequential([ + tf.keras.layers.Dense(units=64, activation='relu'), + tf.keras.layers.Dense(units=64, activation='relu'), + tf.keras.layers.Dense(units=1) +]) + +history = compile_and_fit(dense, single_step_window) + +val_performance['Dense'] = dense.evaluate(single_step_window.val) +performance['Dense'] = dense.evaluate(single_step_window.test, verbose=0) +``` + +```py +Epoch 1/20 +1534/1534 [==============================] - 6s 4ms/step - loss: 0.0159 - mean_absolute_error: 0.0814 - val_loss: 0.0086 - val_mean_absolute_error: 0.0693 +Epoch 2/20 +1534/1534 [==============================] - 6s 4ms/step - loss: 0.0079 - mean_absolute_error: 0.0645 - val_loss: 0.0076 - val_mean_absolute_error: 0.0629 +Epoch 3/20 +1534/1534 [==============================] - 6s 4ms/step - loss: 0.0074 - mean_absolute_error: 0.0622 - val_loss: 0.0085 - val_mean_absolute_error: 0.0666 +Epoch 4/20 +1534/1534 [==============================] - 6s 4ms/step - loss: 0.0072 - mean_absolute_error: 0.0608 - val_loss: 0.0071 - val_mean_absolute_error: 0.0593 +Epoch 5/20 +1534/1534 [==============================] - 6s 4ms/step - loss: 0.0070 - mean_absolute_error: 0.0595 - val_loss: 0.0067 - val_mean_absolute_error: 0.0579 +Epoch 6/20 +1534/1534 [==============================] - 6s 4ms/step - loss: 0.0068 - mean_absolute_error: 0.0588 - val_loss: 0.0072 - val_mean_absolute_error: 0.0594 +Epoch 7/20 +1534/1534 [==============================] - 6s 4ms/step - loss: 0.0068 - mean_absolute_error: 0.0583 - val_loss: 0.0066 - val_mean_absolute_error: 0.0564 +Epoch 8/20 +1534/1534 [==============================] - 6s 4ms/step - loss: 0.0066 - mean_absolute_error: 0.0576 - val_loss: 0.0078 - val_mean_absolute_error: 0.0637 +Epoch 9/20 +1534/1534 [==============================] - 6s 4ms/step - loss: 0.0066 - mean_absolute_error: 0.0576 - val_loss: 0.0066 - val_mean_absolute_error: 0.0564 +Epoch 10/20 +1534/1534 [==============================] - 6s 4ms/step - loss: 0.0065 - mean_absolute_error: 0.0569 - val_loss: 0.0062 - val_mean_absolute_error: 0.0551 +Epoch 11/20 +1534/1534 [==============================] - 6s 4ms/step - loss: 0.0065 - mean_absolute_error: 0.0571 - val_loss: 0.0070 - val_mean_absolute_error: 0.0596 +Epoch 12/20 +1534/1534 [==============================] - 6s 4ms/step - loss: 0.0064 - mean_absolute_error: 0.0565 - val_loss: 0.0063 - val_mean_absolute_error: 0.0551 +439/439 [==============================] - 1s 3ms/step - loss: 0.0063 - mean_absolute_error: 0.0551 + +``` + +### Multi-step dense + +A single-time-step model has no context for the current values of its inputs. It can't see how the input features are changing over time. To address this issue the model needs access to multiple time steps when making predictions: + +![Three time steps are used for each prediction.](img/b9f520a19b0bd493834819ddfc1ef26b.png) + +The `baseline`, `linear` and `dense` models handled each time step independently. Here the model will take multiple time steps as input to produce a single output. + +Create a `WindowGenerator` that will produce batches of the 3h of inputs and, 1h of labels: + +Note that the `Window`'s `shift` parameter is relative to the end of the two windows. + +```py +CONV_WIDTH = 3 +conv_window = WindowGenerator( + input_width=CONV_WIDTH, + label_width=1, + shift=1, + label_columns=['T (degC)']) + +conv_window +``` + +```py +Total window size: 4 +Input indices: [0 1 2] +Label indices: [3] +Label column name(s): ['T (degC)'] + +``` + +```py +conv_window.plot() +plt.title("Given 3h as input, predict 1h into the future.") +``` + +```py +Text(0.5, 1.0, 'Given 3h as input, predict 1h into the future.') + +``` + +![png](img/96e942cbf28db3cfff8c8f90167f2a8b.png) + +You could train a `dense` model on a multiple-input-step window by adding a [`layers.Flatten`](https://tensorflow.google.cn/api_docs/python/tf/keras/layers/Flatten) as the first layer of the model: + +```py +multi_step_dense = tf.keras.Sequential([ + # Shape: (time, features) => (time*features) + tf.keras.layers.Flatten(), + tf.keras.layers.Dense(units=32, activation='relu'), + tf.keras.layers.Dense(units=32, activation='relu'), + tf.keras.layers.Dense(units=1), + # Add back the time dimension. + # Shape: (outputs) => (1, outputs) + tf.keras.layers.Reshape([1, -1]), +]) +``` + +```py +print('Input shape:', conv_window.example[0].shape) +print('Output shape:', multi_step_dense(conv_window.example[0]).shape) +``` + +```py +Input shape: (32, 3, 19) +Output shape: (32, 1, 1) + +``` + +```py +history = compile_and_fit(multi_step_dense, conv_window) + +IPython.display.clear_output() +val_performance['Multi step dense'] = multi_step_dense.evaluate(conv_window.val) +performance['Multi step dense'] = multi_step_dense.evaluate(conv_window.test, verbose=0) +``` + +```py +438/438 [==============================] - 1s 2ms/step - loss: 0.0078 - mean_absolute_error: 0.0637 + +``` + +```py +conv_window.plot(multi_step_dense) +``` + +![png](img/dcf39b63e54ff302e7e37e39b90facc7.png) + +The main down-side of this approach is that the resulting model can only be executed on input windows of exactly this shape. + +```py +print('Input shape:', wide_window.example[0].shape) +try: + print('Output shape:', multi_step_dense(wide_window.example[0]).shape) +except Exception as e: + print(f'\n{type(e).__name__}:{e}') +``` + +```py +Input shape: (32, 24, 19) + +InvalidArgumentError:Matrix size-incompatible: In[0]: [32,456], In[1]: [57,32] [Op:MatMul] + +``` + +The convolutional models in the next section fix this problem. + +### Convolution neural network + +A convolution layer ([`layers.Conv1D`](https://tensorflow.google.cn/api_docs/python/tf/keras/layers/Conv1D)) also takes multiple time steps as input to each prediction. + +Below is the **same** model as `multi_step_dense`, re-written with a convolution. + +Note the changes: + +* The [`layers.Flatten`](https://tensorflow.google.cn/api_docs/python/tf/keras/layers/Flatten) and the first [`layers.Dense`](https://tensorflow.google.cn/api_docs/python/tf/keras/layers/Dense) are replaced by a [`layers.Conv1D`](https://tensorflow.google.cn/api_docs/python/tf/keras/layers/Conv1D). +* The [`layers.Reshape`](https://tensorflow.google.cn/api_docs/python/tf/keras/layers/Reshape) is no longer necessary since the convolution keeps the time axis in its output. + +```py +conv_model = tf.keras.Sequential([ + tf.keras.layers.Conv1D(filters=32, + kernel_size=(CONV_WIDTH,), + activation='relu'), + tf.keras.layers.Dense(units=32, activation='relu'), + tf.keras.layers.Dense(units=1), +]) +``` + +Run it on an example batch to see that the model produces outputs with the expected shape: + +```py +print("Conv model on `conv_window`") +print('Input shape:', conv_window.example[0].shape) +print('Output shape:', conv_model(conv_window.example[0]).shape) +``` + +```py +Conv model on `conv_window` +Input shape: (32, 3, 19) +Output shape: (32, 1, 1) + +``` + +Train and evaluate it on the `conv_window` and it should give performance similar to the `multi_step_dense` model. + +```py +history = compile_and_fit(conv_model, conv_window) + +IPython.display.clear_output() +val_performance['Conv'] = conv_model.evaluate(conv_window.val) +performance['Conv'] = conv_model.evaluate(conv_window.test, verbose=0) +``` + +```py +438/438 [==============================] - 1s 3ms/step - loss: 0.0063 - mean_absolute_error: 0.0556 + +``` + +The difference between this `conv_model` and the `multi_step_dense` model is that the `conv_model` can be run on inputs of any length. The convolutional layer is applied to a sliding window of inputs: + +![Executing a convolutional model on a sequence](img/4ae87c070d1160e16944305509636b57.png) + +If you run it on wider input, it produces wider output: + +```py +print("Wide window") +print('Input shape:', wide_window.example[0].shape) +print('Labels shape:', wide_window.example[1].shape) +print('Output shape:', conv_model(wide_window.example[0]).shape) +``` + +```py +Wide window +Input shape: (32, 24, 19) +Labels shape: (32, 24, 1) +Output shape: (32, 22, 1) + +``` + +Note that the output is shorter than the input. To make training or plotting work, you need the labels, and prediction to have the same length. So build a `WindowGenerator` to produce wide windows with a few extra input time steps so the label and prediction lengths match: + +```py +LABEL_WIDTH = 24 +INPUT_WIDTH = LABEL_WIDTH + (CONV_WIDTH - 1) +wide_conv_window = WindowGenerator( + input_width=INPUT_WIDTH, + label_width=LABEL_WIDTH, + shift=1, + label_columns=['T (degC)']) + +wide_conv_window +``` + +```py +Total window size: 27 +Input indices: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 + 24 25] +Label indices: [ 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26] +Label column name(s): ['T (degC)'] + +``` + +```py +print("Wide conv window") +print('Input shape:', wide_conv_window.example[0].shape) +print('Labels shape:', wide_conv_window.example[1].shape) +print('Output shape:', conv_model(wide_conv_window.example[0]).shape) +``` + +```py +Wide conv window +Input shape: (32, 26, 19) +Labels shape: (32, 24, 1) +Output shape: (32, 24, 1) + +``` + +Now you can plot the model's predictions on a wider window. Note the 3 input time steps before the first prediction. Every prediction here is based on the 3 preceding timesteps: + +```py +wide_conv_window.plot(conv_model) +``` + +![png](img/7eb3066f3b0de91799a8bcae1606a337.png) + +### Recurrent neural network + +A Recurrent Neural Network (RNN) is a type of neural network well-suited to time series data. RNNs process a time series step-by-step, maintaining an internal state from time-step to time-step. + +For more details, read the [text generation tutorial](https://tensorflow.google.cn/tutorials/text/text_generation) or the [RNN guide](https://tensorflow.google.cn/guide/keras/rnn). + +In this tutorial, you will use an RNN layer called Long Short Term Memory ([LSTM](https://tensorflow.google.cn/versions/r2.0/api_docs/python/tf/keras/layers/LSTM)). + +An important constructor argument for all keras RNN layers is the `return_sequences` argument. This setting can configure the layer in one of two ways. + +1. If `False`, the default, the layer only returns the output of the final timestep, giving the model time to warm up its internal state before making a single prediction: + +![An lstm warming up and making a single prediction](img/e0df7b7876498420dcf8663d9c91a023.png) + +1. If `True` the layer returns an output for each input. This is useful for: + * Stacking RNN layers. + * Training a model on multiple timesteps simultaneously. + +![An lstm making a prediction after every timestep](img/45d997f6558f9b2c948978653a5112f5.png) + +```py +lstm_model = tf.keras.models.Sequential([ + # Shape [batch, time, features] => [batch, time, lstm_units] + tf.keras.layers.LSTM(32, return_sequences=True), + # Shape => [batch, time, features] + tf.keras.layers.Dense(units=1) +]) +``` + +With `return_sequences=True` the model can be trained on 24h of data at a time. + +**Note:** This will give a pessimistic view of the model's performance. On the first timestep the model has no access to previous steps, and so can't do any better than the simple `linear` and `dense` models shown earlier. + +```py +print('Input shape:', wide_window.example[0].shape) +print('Output shape:', lstm_model(wide_window.example[0]).shape) +``` + +```py +Input shape: (32, 24, 19) +Output shape: (32, 24, 1) + +``` + +```py +history = compile_and_fit(lstm_model, wide_window) + +IPython.display.clear_output() +val_performance['LSTM'] = lstm_model.evaluate(wide_window.val) +performance['LSTM'] = lstm_model.evaluate(wide_window.test, verbose=0) +``` + +```py +438/438 [==============================] - 1s 3ms/step - loss: 0.0057 - mean_absolute_error: 0.0523 + +``` + +```py +wide_window.plot(lstm_model) +``` + +![png](img/fa2d5bf8aeffd46cc7559d7104f99ba2.png) + +### Performance + +With this dataset typically each of the models does slightly better than the one before it. + +```py +x = np.arange(len(performance)) +width = 0.3 +metric_name = 'mean_absolute_error' +metric_index = lstm_model.metrics_names.index('mean_absolute_error') +val_mae = [v[metric_index] for v in val_performance.values()] +test_mae = [v[metric_index] for v in performance.values()] + +plt.ylabel('mean_absolute_error [T (degC), normalized]') +plt.bar(x - 0.17, val_mae, width, label='Validation') +plt.bar(x + 0.17, test_mae, width, label='Test') +plt.xticks(ticks=x, labels=performance.keys(), + rotation=45) +_ = plt.legend() +``` + +![png](img/8f2ea45ce97f58e2339af71a7d349532.png) + +```py +for name, value in performance.items(): + print(f'{name:12s}: {value[1]:0.4f}') +``` + +```py +Baseline : 0.0852 +Linear : 0.0694 +Dense : 0.0566 +Multi step dense: 0.0667 +Conv : 0.0572 +LSTM : 0.0528 + +``` + +### Multi-output models + +The models so far all predicted a single output feature, `T (degC)`, for a single time step. + +All of these models can be converted to predict multiple features just by changing the number of units in the output layer and adjusting the training windows to include all features in the `labels`. + +```py +single_step_window = WindowGenerator( + # `WindowGenerator` returns all features as labels if you + # don't set the `label_columns` argument. + input_width=1, label_width=1, shift=1) + +wide_window = WindowGenerator( + input_width=24, label_width=24, shift=1) + +for example_inputs, example_labels in wide_window.train.take(1): + print(f'Inputs shape (batch, time, features): {example_inputs.shape}') + print(f'Labels shape (batch, time, features): {example_labels.shape}') +``` + +```py +Inputs shape (batch, time, features): (32, 24, 19) +Labels shape (batch, time, features): (32, 24, 19) + +``` + +Note above that the `features` axis of the labels now has the same depth as the inputs, instead of 1. + +#### Baseline + +The same baseline model can be used here, but this time repeating all features instead of selecting a specific `label_index`. + +```py +baseline = Baseline() +baseline.compile(loss=tf.losses.MeanSquaredError(), + metrics=[tf.metrics.MeanAbsoluteError()]) +``` + +```py +val_performance = {} +performance = {} +val_performance['Baseline'] = baseline.evaluate(wide_window.val) +performance['Baseline'] = baseline.evaluate(wide_window.test, verbose=0) +``` + +```py +438/438 [==============================] - 1s 2ms/step - loss: 0.0886 - mean_absolute_error: 0.1589 + +``` + +#### Dense + +```py +dense = tf.keras.Sequential([ + tf.keras.layers.Dense(units=64, activation='relu'), + tf.keras.layers.Dense(units=64, activation='relu'), + tf.keras.layers.Dense(units=num_features) +]) +``` + +```py +history = compile_and_fit(dense, single_step_window) + +IPython.display.clear_output() +val_performance['Dense'] = dense.evaluate(single_step_window.val) +performance['Dense'] = dense.evaluate(single_step_window.test, verbose=0) +``` + +```py +439/439 [==============================] - 1s 3ms/step - loss: 0.0706 - mean_absolute_error: 0.1362 + +``` + +#### RNN + +```py +%%time +wide_window = WindowGenerator( + input_width=24, label_width=24, shift=1) + +lstm_model = tf.keras.models.Sequential([ + # Shape [batch, time, features] => [batch, time, lstm_units] + tf.keras.layers.LSTM(32, return_sequences=True), + # Shape => [batch, time, features] + tf.keras.layers.Dense(units=num_features) +]) + +history = compile_and_fit(lstm_model, wide_window) + +IPython.display.clear_output() +val_performance['LSTM'] = lstm_model.evaluate( wide_window.val) +performance['LSTM'] = lstm_model.evaluate( wide_window.test, verbose=0) + +print() +``` + +```py +438/438 [==============================] - 1s 3ms/step - loss: 0.0613 - mean_absolute_error: 0.1192 + +CPU times: user 6min 18s, sys: 1min 36s, total: 7min 55s +Wall time: 2min 53s + +``` + +#### Advanced: Residual connections + +The `Baseline` model from earlier took advantage of the fact that the sequence doesn't change drastically from time step to time step. Every model trained in this tutorial so far was randomly initialized, and then had to learn that the output is a a small change from the previous time step. + +While you can get around this issue with careful initialization, it's simpler to build this into the model structure. + +It's common in time series analysis to build models that instead of predicting the next value, predict how the value will change in the next timestep. Similarly, "Residual networks" or "ResNets" in deep learning refer to architectures where each layer adds to the model's accumulating result. + +That is how you take advantage of the knowledge that the change should be small. + +![A model with a residual connection](img/918c568bb9e84acfad1ad27dbca52256.png) + +Essentially this initializes the model to match the `Baseline`. For this task it helps models converge faster, with slightly better performance. + +This approach can be used in conjunction with any model discussed in this tutorial. + +Here it is being applied to the LSTM model, note the use of the [`tf.initializers.zeros`](https://tensorflow.google.cn/api_docs/python/tf/keras/initializers/Zeros) to ensure that the initial predicted changes are small, and don't overpower the residual connection. There are no symmetry-breaking concerns for the gradients here, since the `zeros` are only used on the last layer. + +```py +class ResidualWrapper(tf.keras.Model): + def __init__(self, model): + super().__init__() + self.model = model + + def call(self, inputs, *args, **kwargs): + delta = self.model(inputs, *args, **kwargs) + + # The prediction for each timestep is the input + # from the previous time step plus the delta + # calculated by the model. + return inputs + delta +``` + +```py +%%time +residual_lstm = ResidualWrapper( + tf.keras.Sequential([ + tf.keras.layers.LSTM(32, return_sequences=True), + tf.keras.layers.Dense( + num_features, + # The predicted deltas should start small + # So initialize the output layer with zeros + kernel_initializer=tf.initializers.zeros) +])) + +history = compile_and_fit(residual_lstm, wide_window) + +IPython.display.clear_output() +val_performance['Residual LSTM'] = residual_lstm.evaluate(wide_window.val) +performance['Residual LSTM'] = residual_lstm.evaluate(wide_window.test, verbose=0) +print() +``` + +```py +438/438 [==============================] - 1s 3ms/step - loss: 0.0619 - mean_absolute_error: 0.1181 + +CPU times: user 2min 2s, sys: 31.2 s, total: 2min 33s +Wall time: 56.9 s + +``` + +#### Performance + +Here is the overall performance for these multi-output models. + +```py +x = np.arange(len(performance)) +width = 0.3 + +metric_name = 'mean_absolute_error' +metric_index = lstm_model.metrics_names.index('mean_absolute_error') +val_mae = [v[metric_index] for v in val_performance.values()] +test_mae = [v[metric_index] for v in performance.values()] + +plt.bar(x - 0.17, val_mae, width, label='Validation') +plt.bar(x + 0.17, test_mae, width, label='Test') +plt.xticks(ticks=x, labels=performance.keys(), + rotation=45) +plt.ylabel('MAE (average over all outputs)') +_ = plt.legend() +``` + +![png](img/707c82ef753d85482b462054a3e19161.png) + +```py +for name, value in performance.items(): + print(f'{name:15s}: {value[1]:0.4f}') +``` + +```py +Baseline : 0.1638 +Dense : 0.1367 +LSTM : 0.1208 +Residual LSTM : 0.1197 + +``` + +The above performances are averaged across all model outputs. + +## Multi-step models + +Both the single-output and multiple-output models in the previous sections made **single time step predictions**, 1h into the future. + +This section looks at how to expand these models to make **multiple time step predictions**. + +In a multi-step prediction, the model needs to learn to predict a range of future values. Thus, unlike a single step model, where only a single future point is predicted, a multi-step model predicts a sequence of the future values. + +There are two rough approaches to this: + +1. Single shot predictions where the entire time series is predicted at once. +2. Autoregressive predictions where the model only makes single step predictions and its output is fed back as its input. + +In this section all the models will predict **all the features across all output time steps**. + +For the multi-step model, the training data again consists of hourly samples. However, here, the models will learn to predict 24h of the future, given 24h of the past. + +Here is a `Window` object that generates these slices from the dataset: + +```py +OUT_STEPS = 24 +multi_window = WindowGenerator(input_width=24, + label_width=OUT_STEPS, + shift=OUT_STEPS) + +multi_window.plot() +multi_window +``` + +```py +Total window size: 48 +Input indices: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23] +Label indices: [24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47] +Label column name(s): None + +``` + +![png](img/4e0ba8d4a487b7517b7dbff7e19c4d52.png) + +### Baselines + +A simple baseline for this task is to repeat the last input time step for the required number of output timesteps: + +![Repeat the last input, for each output step](img/ca5c6f8a5c7444a19483d53990e42c33.png) + +```py +class MultiStepLastBaseline(tf.keras.Model): + def call(self, inputs): + return tf.tile(inputs[:, -1:, :], [1, OUT_STEPS, 1]) + +last_baseline = MultiStepLastBaseline() +last_baseline.compile(loss=tf.losses.MeanSquaredError(), + metrics=[tf.metrics.MeanAbsoluteError()]) + +multi_val_performance = {} +multi_performance = {} + +multi_val_performance['Last'] = last_baseline.evaluate(multi_window.val) +multi_performance['Last'] = last_baseline.evaluate(multi_window.test, verbose=0) +multi_window.plot(last_baseline) +``` + +```py +437/437 [==============================] - 1s 2ms/step - loss: 0.6285 - mean_absolute_error: 0.5007 + +``` + +![png](img/a71722493bd8850dfcd7c1ec0e69918f.png) + +Since this task is to predict 24h given 24h another simple approach is to repeat the previous day, assuming tomorrow will be similar: + +![Repeat the previous day](img/5edc6f55df9224577119e687a2c97282.png) + +```py +class RepeatBaseline(tf.keras.Model): + def call(self, inputs): + return inputs + +repeat_baseline = RepeatBaseline() +repeat_baseline.compile(loss=tf.losses.MeanSquaredError(), + metrics=[tf.metrics.MeanAbsoluteError()]) + +multi_val_performance['Repeat'] = repeat_baseline.evaluate(multi_window.val) +multi_performance['Repeat'] = repeat_baseline.evaluate(multi_window.test, verbose=0) +multi_window.plot(repeat_baseline) +``` + +```py +437/437 [==============================] - 1s 2ms/step - loss: 0.4270 - mean_absolute_error: 0.3959 + +``` + +![png](img/ae14079092bef088b0ecf83c8f429ef2.png) + +### Single-shot models + +One high level approach to this problem is use a "single-shot" model, where the model makes the entire sequence prediction in a single step. + +This can be implemented efficiently as a [`layers.Dense`](https://tensorflow.google.cn/api_docs/python/tf/keras/layers/Dense) with `OUT_STEPS*features` output units. The model just needs to reshape that output to the required `(OUTPUT_STEPS, features)`. + +#### Linear + +A simple linear model based on the last input time step does better than either baseline, but is underpowered. The model needs to predict `OUTPUT_STEPS` time steps, from a single input time step with a linear projection. It can only capture a low-dimensional slice of the behavior, likely based mainly on the time of day and time of year. + +![Predct all timesteps from the last time-step](img/74469ffcda8aa53d4c29cfcfda5831ad.png) + +```py +multi_linear_model = tf.keras.Sequential([ + # Take the last time-step. + # Shape [batch, time, features] => [batch, 1, features] + tf.keras.layers.Lambda(lambda x: x[:, -1:, :]), + # Shape => [batch, 1, out_steps*features] + tf.keras.layers.Dense(OUT_STEPS*num_features, + kernel_initializer=tf.initializers.zeros), + # Shape => [batch, out_steps, features] + tf.keras.layers.Reshape([OUT_STEPS, num_features]) +]) + +history = compile_and_fit(multi_linear_model, multi_window) + +IPython.display.clear_output() +multi_val_performance['Linear'] = multi_linear_model.evaluate(multi_window.val) +multi_performance['Linear'] = multi_linear_model.evaluate(multi_window.test, verbose=0) +multi_window.plot(multi_linear_model) +``` + +```py +437/437 [==============================] - 1s 2ms/step - loss: 0.2556 - mean_absolute_error: 0.3050 + +``` + +![png](img/c9a00de59da2a696666612d0bd08c7a7.png) + +#### Dense + +Adding a [`layers.Dense`](https://tensorflow.google.cn/api_docs/python/tf/keras/layers/Dense) between the input and output gives the linear model more power, but is still only based on a single input timestep. + +```py +multi_dense_model = tf.keras.Sequential([ + # Take the last time step. + # Shape [batch, time, features] => [batch, 1, features] + tf.keras.layers.Lambda(lambda x: x[:, -1:, :]), + # Shape => [batch, 1, dense_units] + tf.keras.layers.Dense(512, activation='relu'), + # Shape => [batch, out_steps*features] + tf.keras.layers.Dense(OUT_STEPS*num_features, + kernel_initializer=tf.initializers.zeros), + # Shape => [batch, out_steps, features] + tf.keras.layers.Reshape([OUT_STEPS, num_features]) +]) + +history = compile_and_fit(multi_dense_model, multi_window) + +IPython.display.clear_output() +multi_val_performance['Dense'] = multi_dense_model.evaluate(multi_window.val) +multi_performance['Dense'] = multi_dense_model.evaluate(multi_window.test, verbose=0) +multi_window.plot(multi_dense_model) +``` + +```py +437/437 [==============================] - 1s 3ms/step - loss: 0.2192 - mean_absolute_error: 0.2807 + +``` + +![png](img/a3f9320fc0125d442f2b7412c2492197.png) + +#### CNN + +A convolutional model makes predictions based on a fixed-width history, which may lead to better performance than the dense model since it can see how things are changing over time: + +![A convolutional model sees how things change over time](img/6b1df74992f4b9253cc3e928d73cd3a8.png) + +```py +CONV_WIDTH = 3 +multi_conv_model = tf.keras.Sequential([ + # Shape [batch, time, features] => [batch, CONV_WIDTH, features] + tf.keras.layers.Lambda(lambda x: x[:, -CONV_WIDTH:, :]), + # Shape => [batch, 1, conv_units] + tf.keras.layers.Conv1D(256, activation='relu', kernel_size=(CONV_WIDTH)), + # Shape => [batch, 1, out_steps*features] + tf.keras.layers.Dense(OUT_STEPS*num_features, + kernel_initializer=tf.initializers.zeros), + # Shape => [batch, out_steps, features] + tf.keras.layers.Reshape([OUT_STEPS, num_features]) +]) + +history = compile_and_fit(multi_conv_model, multi_window) + +IPython.display.clear_output() + +multi_val_performance['Conv'] = multi_conv_model.evaluate(multi_window.val) +multi_performance['Conv'] = multi_conv_model.evaluate(multi_window.test, verbose=0) +multi_window.plot(multi_conv_model) +``` + +```py +437/437 [==============================] - 1s 3ms/step - loss: 0.2142 - mean_absolute_error: 0.2805 + +``` + +![png](img/4d094983d2f7b41e395fe05b64ee1a36.png) + +#### RNN + +A recurrent model can learn to use a long history of inputs, if it's relevant to the predictions the model is making. Here the model will accumulate internal state for 24h, before making a single prediction for the next 24h. + +In this single-shot format, the LSTM only needs to produce an output at the last time step, so set `return_sequences=False`. + +![The lstm accumulates state over the input window, and makes a single prediction for the next 24h](img/de0d6e38c509169ec5a4edafdcb08e55.png) + +```py +multi_lstm_model = tf.keras.Sequential([ + # Shape [batch, time, features] => [batch, lstm_units] + # Adding more `lstm_units` just overfits more quickly. + tf.keras.layers.LSTM(32, return_sequences=False), + # Shape => [batch, out_steps*features] + tf.keras.layers.Dense(OUT_STEPS*num_features, + kernel_initializer=tf.initializers.zeros), + # Shape => [batch, out_steps, features] + tf.keras.layers.Reshape([OUT_STEPS, num_features]) +]) + +history = compile_and_fit(multi_lstm_model, multi_window) + +IPython.display.clear_output() + +multi_val_performance['LSTM'] = multi_lstm_model.evaluate(multi_window.val) +multi_performance['LSTM'] = multi_lstm_model.evaluate(multi_window.test, verbose=0) +multi_window.plot(multi_lstm_model) +``` + +```py +437/437 [==============================] - 1s 3ms/step - loss: 0.2146 - mean_absolute_error: 0.2829 + +``` + +![png](img/9534e12498035f0762e0c85eee6f02ca.png) + +### Advanced: Autoregressive model + +The above models all predict the entire output sequence as a in a single step. + +In some cases it may be helpful for the model to decompose this prediction into individual time steps. Then each model's output can be fed back into itself at each step and predictions can be made conditioned on the previous one, like in the classic [Generating Sequences With Recurrent Neural Networks](https://arxiv.org/abs/1308.0850). + +One clear advantage to this style of model is that it can be set up to produce output with a varying length. + +You could take any of single single-step multi-output models trained in the first half of this tutorial and run in an autoregressive feedback loop, but here you'll focus on building a model that's been explicitly trained to do that. + +![Feedback a model's output to its input](img/55e97ec60fa87503352f7debd65515af.png) + +#### RNN + +This tutorial only builds an autoregressive RNN model, but this pattern could be applied to any model that was designed to output a single timestep. + +The model will have the same basic form as the single-step `LSTM` models: An `LSTM` followed by a [`layers.Dense`](https://tensorflow.google.cn/api_docs/python/tf/keras/layers/Dense) that converts the `LSTM` outputs to model predictions. + +A [`layers.LSTM`](https://tensorflow.google.cn/api_docs/python/tf/keras/layers/LSTM) is a [`layers.LSTMCell`](https://tensorflow.google.cn/api_docs/python/tf/keras/layers/LSTMCell) wrapped in the higher level [`layers.RNN`](https://tensorflow.google.cn/api_docs/python/tf/keras/layers/RNN) that manages the state and sequence results for you (See [Keras RNNs](https://tensorflow.google.cn/guide/keras/rnn) for details). + +In this case the model has to manually manage the inputs for each step so it uses [`layers.LSTMCell`](https://tensorflow.google.cn/api_docs/python/tf/keras/layers/LSTMCell) directly for the lower level, single time step interface. + +```py +class FeedBack(tf.keras.Model): + def __init__(self, units, out_steps): + super().__init__() + self.out_steps = out_steps + self.units = units + self.lstm_cell = tf.keras.layers.LSTMCell(units) + # Also wrap the LSTMCell in an RNN to simplify the `warmup` method. + self.lstm_rnn = tf.keras.layers.RNN(self.lstm_cell, return_state=True) + self.dense = tf.keras.layers.Dense(num_features) +``` + +```py +feedback_model = FeedBack(units=32, out_steps=OUT_STEPS) +``` + +The first method this model needs is a `warmup` method to initialize its internal state based on the inputs. Once trained this state will capture the relevant parts of the input history. This is equivalent to the single-step `LSTM` model from earlier: + +```py +def warmup(self, inputs): + # inputs.shape => (batch, time, features) + # x.shape => (batch, lstm_units) + x, *state = self.lstm_rnn(inputs) + + # predictions.shape => (batch, features) + prediction = self.dense(x) + return prediction, state + +FeedBack.warmup = warmup +``` + +This method returns a single time-step prediction, and the internal state of the LSTM: + +```py +prediction, state = feedback_model.warmup(multi_window.example[0]) +prediction.shape +``` + +```py +TensorShape([32, 19]) + +``` + +With the `RNN`'s state, and an initial prediction you can now continue iterating the model feeding the predictions at each step back as the input. + +The simplest approach to collecting the output predictions is to use a python list, and [`tf.stack`](https://tensorflow.google.cn/api_docs/python/tf/stack) after the loop. + +**Note:** Stacking a python list like this only works with eager-execution, using [`Model.compile(..., run_eagerly=True)`](https://tensorflow.google.cn/api_docs/python/tf/keras/Model#compile) for training, or with a fixed length output. For a dynamic output length you would need to use a [`tf.TensorArray`](https://tensorflow.google.cn/api_docs/python/tf/TensorArray) instead of a python list, and [`tf.range`](https://tensorflow.google.cn/api_docs/python/tf/range) instead of the python `range`. + +```py +def call(self, inputs, training=None): + # Use a TensorArray to capture dynamically unrolled outputs. + predictions = [] + # Initialize the lstm state + prediction, state = self.warmup(inputs) + + # Insert the first prediction + predictions.append(prediction) + + # Run the rest of the prediction steps + for n in range(1, self.out_steps): + # Use the last prediction as input. + x = prediction + # Execute one lstm step. + x, state = self.lstm_cell(x, states=state, + training=training) + # Convert the lstm output to a prediction. + prediction = self.dense(x) + # Add the prediction to the output + predictions.append(prediction) + + # predictions.shape => (time, batch, features) + predictions = tf.stack(predictions) + # predictions.shape => (batch, time, features) + predictions = tf.transpose(predictions, [1, 0, 2]) + return predictions + +FeedBack.call = call +``` + +Test run this model on the example inputs: + +```py +print('Output shape (batch, time, features): ', feedback_model(multi_window.example[0]).shape) +``` + +```py +Output shape (batch, time, features): (32, 24, 19) + +``` + +Now train the model: + +```py +history = compile_and_fit(feedback_model, multi_window) + +IPython.display.clear_output() + +multi_val_performance['AR LSTM'] = feedback_model.evaluate(multi_window.val) +multi_performance['AR LSTM'] = feedback_model.evaluate(multi_window.test, verbose=0) +multi_window.plot(feedback_model) +``` + +```py +437/437 [==============================] - 3s 8ms/step - loss: 0.2352 - mean_absolute_error: 0.3116 + +``` + +![png](img/28877f41199c01fa4ec5041f5b1fac9f.png) + +### Performance + +There are clearly diminishing returns as a function of model complexity on this problem. + +```py +x = np.arange(len(multi_performance)) +width = 0.3 + +metric_name = 'mean_absolute_error' +metric_index = lstm_model.metrics_names.index('mean_absolute_error') +val_mae = [v[metric_index] for v in multi_val_performance.values()] +test_mae = [v[metric_index] for v in multi_performance.values()] + +plt.bar(x - 0.17, val_mae, width, label='Validation') +plt.bar(x + 0.17, test_mae, width, label='Test') +plt.xticks(ticks=x, labels=multi_performance.keys(), + rotation=45) +plt.ylabel(f'MAE (average over all times and outputs)') +_ = plt.legend() +``` + +![png](img/fa351313bf1ddb033fe47d07a518db6c.png) + +The metrics for the multi-output models in the first half of this tutorial show the performance averaged across all output features. These performances similar but also averaged across output timesteps. + +```py +for name, value in multi_performance.items(): + print(f'{name:8s}: {value[1]:0.4f}') +``` + +```py +Last : 0.5157 +Repeat : 0.3774 +Linear : 0.2980 +Dense : 0.2754 +Conv : 0.2724 +LSTM : 0.2770 +AR LSTM : 0.3026 + +``` + +The gains achieved going from a dense model to convolutional and recurrent models are only a few percent (if any), and the autoregressive model performed clearly worse. So these more complex approaches may not be worth while on **this** problem, but there was no way to know without trying, and these models could be helpful for **your** problem. + +## Next steps + +This tutorial was a quick introduction to time series forecasting using TensorFlow. + +* For further understanding, see: + * Chapter 15 of [Hands-on Machine Learning with Scikit-Learn, Keras, and TensorFlow](https://www.oreilly.com/library/view/hands-on-machine-learning/9781492032632/), 2nd Edition + * Chapter 6 of [Deep Learning with Python](https://www.manning.com/books/deep-learning-with-python). + * Lesson 8 of [Udacity's intro to TensorFlow for deep learning](https://www.udacity.com/course/intro-to-tensorflow-for-deep-learning--ud187), and the [exercise notebooks](https://github.com/tensorflow/examples/tree/master/courses/udacity_intro_to_tensorflow_for_deep_learning) +* Also remember that you can implement any [classical time series model](https://otexts.com/fpp2/index.html) in TensorFlow, this tutorial just focuses on TensorFlow's built-in functionality. \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/059.md b/Tensorflow/TensorFlow2.0/059.md new file mode 100644 index 00000000..c10a0c07 --- /dev/null +++ b/Tensorflow/TensorFlow2.0/059.md @@ -0,0 +1 @@ +# 生成 \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/060.md b/Tensorflow/TensorFlow2.0/060.md new file mode 100644 index 00000000..9b48e8eb --- /dev/null +++ b/Tensorflow/TensorFlow2.0/060.md @@ -0,0 +1,689 @@ +# 神经风格迁移 + +> 原文:[https://tensorflow.google.cn/tutorials/generative/style_transfer](https://tensorflow.google.cn/tutorials/generative/style_transfer) + + + +**Note:** 我们的 TensorFlow 社区翻译了这些文档。因为社区翻译是尽力而为, 所以无法保证它们是最准确的,并且反映了最新的 [官方英文文档](https://tensorflow.google.cn/?hl=en)。如果您有改进此翻译的建议, 请提交 pull request 到 [tensorflow/docs](https://github.com/tensorflow/docs) GitHub 仓库。要志愿地撰写或者审核译文,请加入 [docs-zh-cn@tensorflow.org Google Group](https://groups.google.com/a/tensorflow.org/forum/#!forum/docs-zh-cn)。 + +本教程使用深度学习来用其他图像的风格创造一个图像(曾经你是否希望可以像毕加索或梵高一样绘画?)。 这被称为*神经风格迁移*,该技术概述于 [A Neural Algorithm of Artistic Style](https://arxiv.org/abs/1508.06576) (Gatys et al.). + +**Note:** 本教程演示了原始的风格迁移算法。它将图像内容优化为特定样式。最新的一些方法训练模型以直接生成风格化图像(类似于 [cyclegan](/tutorials/generative/cyclegan))。原始的这种方法要快得多(高达 1000 倍)。[TensorFlow Hub](https://tensorflow.google.cn/hub) 和 [TensorFlow Lite](https://tensorflow.google.cn/lite/models/style_transfer/overview) 中提供了预训练的[任意图像风格化模块](https://colab.sandbox.google.com/github/tensorflow/hub/blob/master/examples/colab/tf2_arbitrary_image_stylization.ipynb)。 + +神经风格迁移是一种优化技术,用于将两个图像——一个*内容*图像和一个*风格参考*图像(如著名画家的一个作品)——混合在一起,使输出的图像看起来像内容图像, 但是用了风格参考图像的风格。 + +这是通过优化输出图像以匹配内容图像的内容统计数据和风格参考图像的风格统计数据来实现的。 这些统计数据可以使用卷积网络从图像中提取。 + +例如,我们选取这张小狗的照片和 Wassily Kandinsky 的作品 7: + +![](img/8d456c03cff000c86147a07dbbcb6f32.png) + +[黄色拉布拉多犬的凝视](https://commons.wikimedia.org/wiki/File:YellowLabradorLooking_new.jpg),来自 Wikimedia Commons + +![](img/35253af9a3f5a4e0035787fd80b11ca3.png) + +如果 Kandinsky 决定用这种风格来专门描绘这只海龟会是什么样子? 是否如下图一样? + +![](img/40793e753f5cc525c8f3c9cd20d1085c.png) + +## 配置 + +### 导入和配置模块 + +```py +import tensorflow as tf +``` + +```py +import IPython.display as display + +import matplotlib.pyplot as plt +import matplotlib as mpl +mpl.rcParams['figure.figsize'] = (12,12) +mpl.rcParams['axes.grid'] = False + +import numpy as np +import PIL.Image +import time +import functools +``` + +```py +def tensor_to_image(tensor): + tensor = tensor*255 + tensor = np.array(tensor, dtype=np.uint8) + if np.ndim(tensor)>3: + assert tensor.shape[0] == 1 + tensor = tensor[0] + return PIL.Image.fromarray(tensor) +``` + +下载图像并选择风格图像和内容图像: + +```py +content_path = tf.keras.utils.get_file('YellowLabradorLooking_new.jpg', 'https://storage.googleapis.com/download.tensorflow.org/example_images/YellowLabradorLooking_new.jpg') + +# https://commons.wikimedia.org/wiki/File:Vassily_Kandinsky,_1913_-_Composition_7.jpg +style_path = tf.keras.utils.get_file('kandinsky5.jpg','https://storage.googleapis.com/download.tensorflow.org/example_images/Vassily_Kandinsky%2C_1913_-_Composition_7.jpg') +``` + +```py +Downloading data from https://storage.googleapis.com/download.tensorflow.org/example_images/YellowLabradorLooking_new.jpg +90112/83281 [================================] - 0s 0us/step +Downloading data from https://storage.googleapis.com/download.tensorflow.org/example_images/Vassily_Kandinsky%2C_1913_-_Composition_7.jpg +196608/195196 [==============================] - 0s 0us/step + +``` + +## 将输入可视化 + +定义一个加载图像的函数,并将其最大尺寸限制为 512 像素。 + +```py +def load_img(path_to_img): + max_dim = 512 + img = tf.io.read_file(path_to_img) + img = tf.image.decode_image(img, channels=3) + img = tf.image.convert_image_dtype(img, tf.float32) + + shape = tf.cast(tf.shape(img)[:-1], tf.float32) + long_dim = max(shape) + scale = max_dim / long_dim + + new_shape = tf.cast(shape * scale, tf.int32) + + img = tf.image.resize(img, new_shape) + img = img[tf.newaxis, :] + return img +``` + +创建一个简单的函数来显示图像: + +```py +def imshow(image, title=None): + if len(image.shape) > 3: + image = tf.squeeze(image, axis=0) + + plt.imshow(image) + if title: + plt.title(title) +``` + +```py +content_image = load_img(content_path) +style_image = load_img(style_path) + +plt.subplot(1, 2, 1) +imshow(content_image, 'Content Image') + +plt.subplot(1, 2, 2) +imshow(style_image, 'Style Image') +``` + +![png](img/d85fdaff014f0211e5ef646977087e50.png) + +## 使用 TF-Hub 进行快速风格迁移 + +本教程演示了原始的风格迁移算法。其将图像内容优化为特定风格。在进入细节之前,让我们看一下 [TensorFlow Hub](https://tensorflow.google.cn/hub) 模块如何快速风格迁移: + +```py +import tensorflow_hub as hub +hub_module = hub.load('https://hub.tensorflow.google.cn/google/magenta/arbitrary-image-stylization-v1-256/1') +stylized_image = hub_module(tf.constant(content_image), tf.constant(style_image))[0] +tensor_to_image(stylized_image) +``` + +![png](img/833d9eeff633ce77dec2eb85f74e8bbb.png) + +## 定义内容和风格的表示 + +使用模型的中间层来获取图像的*内容*和*风格*表示。 从网络的输入层开始,前几个层的激励响应表示边缘和纹理等低级 feature (特征)。 随着层数加深,最后几层代表更高级的 feature (特征)——实体的部分,如*轮子*或*眼睛*。 在此教程中,我们使用的是 VGG19 网络结构,这是一个已经预训练好的图像分类网络。 这些中间层是从图像中定义内容和风格的表示所必需的。 对于一个输入图像,我们尝试匹配这些中间层的相应风格和内容目标的表示。 + +加载 [VGG19](https://keras.io/applications/#vgg19) 并在我们的图像上测试它以确保正常运行: + +```py +x = tf.keras.applications.vgg19.preprocess_input(content_image*255) +x = tf.image.resize(x, (224, 224)) +vgg = tf.keras.applications.VGG19(include_top=True, weights='imagenet') +prediction_probabilities = vgg(x) +prediction_probabilities.shape +``` + +```py +TensorShape([1, 1000]) + +``` + +```py +predicted_top_5 = tf.keras.applications.vgg19.decode_predictions(prediction_probabilities.numpy())[0] +[(class_name, prob) for (number, class_name, prob) in predicted_top_5] +``` + +```py +Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/imagenet_class_index.json +40960/35363 [==================================] - 0s 0us/step + +[('Labrador_retriever', 0.493171), + ('golden_retriever', 0.23665288), + ('kuvasz', 0.036357544), + ('Chesapeake_Bay_retriever', 0.024182763), + ('Greater_Swiss_Mountain_dog', 0.0186461)] + +``` + +现在,加载没有分类部分的 `VGG19` ,并列出各层的名称: + +```py +vgg = tf.keras.applications.VGG19(include_top=False, weights='imagenet') + +print() +for layer in vgg.layers: + print(layer.name) +``` + +```py +Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg19/vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5 +80142336/80134624 [==============================] - 1s 0us/step + +input_2 +block1_conv1 +block1_conv2 +block1_pool +block2_conv1 +block2_conv2 +block2_pool +block3_conv1 +block3_conv2 +block3_conv3 +block3_conv4 +block3_pool +block4_conv1 +block4_conv2 +block4_conv3 +block4_conv4 +block4_pool +block5_conv1 +block5_conv2 +block5_conv3 +block5_conv4 +block5_pool + +``` + +从网络中选择中间层的输出以表示图像的风格和内容: + +```py +# 内容层将提取出我们的 feature maps (特征图) +content_layers = ['block5_conv2'] + +# 我们感兴趣的风格层 +style_layers = ['block1_conv1', + 'block2_conv1', + 'block3_conv1', + 'block4_conv1', + 'block5_conv1'] + +num_content_layers = len(content_layers) +num_style_layers = len(style_layers) +``` + +#### 用于表示风格和内容的中间层 + +那么,为什么我们预训练的图像分类网络中的这些中间层的输出允许我们定义风格和内容的表示? + +从高层理解,为了使网络能够实现图像分类(该网络已被训练过),它必须理解图像。 这需要将原始图像作为输入像素并构建内部表示,这个内部表示将原始图像像素转换为对图像中存在的 feature (特征)的复杂理解。 + +这也是卷积神经网络能够很好地推广的一个原因:它们能够捕获不变性并定义类别(例如猫与狗)之间的 feature (特征),这些 feature (特征)与背景噪声和其他干扰无关。 因此,将原始图像传递到模型输入和分类标签输出之间的某处的这一过程,可以视作复杂的 feature (特征)提取器。通过这些模型的中间层,我们就可以描述输入图像的内容和风格。 + +## 建立模型 + +使用[`tf.keras.applications`](https://tensorflow.google.cn/api_docs/python/tf/keras/applications)中的网络可以让我们非常方便的利用 Keras 的功能接口提取中间层的值。 + +在使用功能接口定义模型时,我们需要指定输入和输出: + +`model = Model(inputs, outputs)` + +以下函数构建了一个 VGG19 模型,该模型返回一个中间层输出的列表: + +```py +def vgg_layers(layer_names): + """ Creates a vgg model that returns a list of intermediate output values.""" + # 加载我们的模型。 加载已经在 imagenet 数据上预训练的 VGG + vgg = tf.keras.applications.VGG19(include_top=False, weights='imagenet') + vgg.trainable = False + + outputs = [vgg.get_layer(name).output for name in layer_names] + + model = tf.keras.Model([vgg.input], outputs) + return model +``` + +然后建立模型: + +```py +style_extractor = vgg_layers(style_layers) +style_outputs = style_extractor(style_image*255) + +#查看每层输出的统计信息 +for name, output in zip(style_layers, style_outputs): + print(name) + print(" shape: ", output.numpy().shape) + print(" min: ", output.numpy().min()) + print(" max: ", output.numpy().max()) + print(" mean: ", output.numpy().mean()) + print() +``` + +```py +block1_conv1 + shape: (1, 336, 512, 64) + min: 0.0 + max: 835.5256 + mean: 33.97525 + +block2_conv1 + shape: (1, 168, 256, 128) + min: 0.0 + max: 4625.8857 + mean: 199.82687 + +block3_conv1 + shape: (1, 84, 128, 256) + min: 0.0 + max: 8789.239 + mean: 230.78099 + +block4_conv1 + shape: (1, 42, 64, 512) + min: 0.0 + max: 21566.135 + mean: 791.24005 + +block5_conv1 + shape: (1, 21, 32, 512) + min: 0.0 + max: 3189.2542 + mean: 59.179478 + +``` + +## 风格计算 + +图像的内容由中间 feature maps (特征图)的值表示。 + +事实证明,图像的风格可以通过不同 feature maps (特征图)上的平均值和相关性来描述。 通过在每个位置计算 feature (特征)向量的外积,并在所有位置对该外积进行平均,可以计算出包含此信息的 Gram 矩阵。 对于特定层的 Gram 矩阵,具体计算方法如下所示: + +$$G^l_{cd} = \frac{\sum_{ij} F^l_{ijc}(x)F^l_{ijd}(x)}{IJ}$$ + +这可以使用[`tf.linalg.einsum`](https://tensorflow.google.cn/api_docs/python/tf/einsum)函数来实现: + +```py +def gram_matrix(input_tensor): + result = tf.linalg.einsum('bijc,bijd->bcd', input_tensor, input_tensor) + input_shape = tf.shape(input_tensor) + num_locations = tf.cast(input_shape[1]*input_shape[2], tf.float32) + return result/(num_locations) +``` + +## 提取风格和内容 + +构建一个返回风格和内容张量的模型。 + +```py +class StyleContentModel(tf.keras.models.Model): + def __init__(self, style_layers, content_layers): + super(StyleContentModel, self).__init__() + self.vgg = vgg_layers(style_layers + content_layers) + self.style_layers = style_layers + self.content_layers = content_layers + self.num_style_layers = len(style_layers) + self.vgg.trainable = False + + def call(self, inputs): + "Expects float input in [0,1]" + inputs = inputs*255.0 + preprocessed_input = tf.keras.applications.vgg19.preprocess_input(inputs) + outputs = self.vgg(preprocessed_input) + style_outputs, content_outputs = (outputs[:self.num_style_layers], + outputs[self.num_style_layers:]) + + style_outputs = [gram_matrix(style_output) + for style_output in style_outputs] + + content_dict = {content_name:value + for content_name, value + in zip(self.content_layers, content_outputs)} + + style_dict = {style_name:value + for style_name, value + in zip(self.style_layers, style_outputs)} + + return {'content':content_dict, 'style':style_dict} +``` + +在图像上调用此模型,可以返回 style_layers 的 gram 矩阵(风格)和 content_layers 的内容: + +```py +extractor = StyleContentModel(style_layers, content_layers) + +results = extractor(tf.constant(content_image)) + +style_results = results['style'] + +print('Styles:') +for name, output in sorted(results['style'].items()): + print(" ", name) + print(" shape: ", output.numpy().shape) + print(" min: ", output.numpy().min()) + print(" max: ", output.numpy().max()) + print(" mean: ", output.numpy().mean()) + print() + +print("Contents:") +for name, output in sorted(results['content'].items()): + print(" ", name) + print(" shape: ", output.numpy().shape) + print(" min: ", output.numpy().min()) + print(" max: ", output.numpy().max()) + print(" mean: ", output.numpy().mean()) +``` + +```py +Styles: + block1_conv1 + shape: (1, 64, 64) + min: 0.0055228462 + max: 28014.562 + mean: 263.79025 + + block2_conv1 + shape: (1, 128, 128) + min: 0.0 + max: 61479.49 + mean: 9100.949 + + block3_conv1 + shape: (1, 256, 256) + min: 0.0 + max: 545623.44 + mean: 7660.976 + + block4_conv1 + shape: (1, 512, 512) + min: 0.0 + max: 4320502.0 + mean: 134288.84 + + block5_conv1 + shape: (1, 512, 512) + min: 0.0 + max: 110005.34 + mean: 1487.0381 + +Contents: + block5_conv2 + shape: (1, 26, 32, 512) + min: 0.0 + max: 2410.8796 + mean: 13.764149 + +``` + +## 梯度下降 + +使用此风格和内容提取器,我们现在可以实现风格传输算法。我们通过计算每个图像的输出和目标的均方误差来做到这一点,然后取这些损失值的加权和。 + +设置风格和内容的目标值: + +```py +style_targets = extractor(style_image)['style'] +content_targets = extractor(content_image)['content'] +``` + +定义一个 [`tf.Variable`](https://tensorflow.google.cn/api_docs/python/tf/Variable) 来表示要优化的图像。 为了快速实现这一点,使用内容图像对其进行初始化( [`tf.Variable`](https://tensorflow.google.cn/api_docs/python/tf/Variable) 必须与内容图像的形状相同) + +```py +image = tf.Variable(content_image) +``` + +由于这是一个浮点图像,因此我们定义一个函数来保持像素值在 0 和 1 之间: + +```py +def clip_0_1(image): + return tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0) +``` + +创建一个 optimizer 。 本教程推荐 LBFGS,但 `Adam` 也可以正常工作: + +```py +opt = tf.optimizers.Adam(learning_rate=0.02, beta_1=0.99, epsilon=1e-1) +``` + +为了优化它,我们使用两个损失的加权组合来获得总损失: + +```py +style_weight=1e-2 +content_weight=1e4 +``` + +```py +def style_content_loss(outputs): + style_outputs = outputs['style'] + content_outputs = outputs['content'] + style_loss = tf.add_n([tf.reduce_mean((style_outputs[name]-style_targets[name])**2) + for name in style_outputs.keys()]) + style_loss *= style_weight / num_style_layers + + content_loss = tf.add_n([tf.reduce_mean((content_outputs[name]-content_targets[name])**2) + for name in content_outputs.keys()]) + content_loss *= content_weight / num_content_layers + loss = style_loss + content_loss + return loss +``` + +使用 [`tf.GradientTape`](https://tensorflow.google.cn/api_docs/python/tf/GradientTape) 来更新图像。 + +```py +@tf.function() +def train_step(image): + with tf.GradientTape() as tape: + outputs = extractor(image) + loss = style_content_loss(outputs) + + grad = tape.gradient(loss, image) + opt.apply_gradients([(grad, image)]) + image.assign(clip_0_1(image)) +``` + +现在,我们运行几个步来测试一下: + +```py +train_step(image) +train_step(image) +train_step(image) +tensor_to_image(image) +``` + +![png](img/643455194a29bfd2dc25c8821cbbf3b4.png) + +运行正常,我们来执行一个更长的优化: + +```py +import time +start = time.time() + +epochs = 10 +steps_per_epoch = 100 + +step = 0 +for n in range(epochs): + for m in range(steps_per_epoch): + step += 1 + train_step(image) + print(".", end='') + display.clear_output(wait=True) + display.display(tensor_to_image(image)) + print("Train step: {}".format(step)) + +end = time.time() +print("Total time: {:.1f}".format(end-start)) +``` + +![png](img/867e80eb383cce30a1f013a43e465d02.png) + +```py +Train step: 1000 +Total time: 20.4 + +``` + +## 总变分损失 + +此实现只是一个基础版本,它的一个缺点是它会产生大量的高频误差。 我们可以直接通过正则化图像的高频分量来减少这些高频误差。 在风格转移中,这通常被称为*总变分损失*: + +```py +def high_pass_x_y(image): + x_var = image[:,:,1:,:] - image[:,:,:-1,:] + y_var = image[:,1:,:,:] - image[:,:-1,:,:] + + return x_var, y_var +``` + +```py +x_deltas, y_deltas = high_pass_x_y(content_image) + +plt.figure(figsize=(14,10)) +plt.subplot(2,2,1) +imshow(clip_0_1(2*y_deltas+0.5), "Horizontal Deltas: Original") + +plt.subplot(2,2,2) +imshow(clip_0_1(2*x_deltas+0.5), "Vertical Deltas: Original") + +x_deltas, y_deltas = high_pass_x_y(image) + +plt.subplot(2,2,3) +imshow(clip_0_1(2*y_deltas+0.5), "Horizontal Deltas: Styled") + +plt.subplot(2,2,4) +imshow(clip_0_1(2*x_deltas+0.5), "Vertical Deltas: Styled") +``` + +![png](img/e3d2caa770c7f600fb5cdc2a95ad0e0a.png) + +这显示了高频分量如何增加。 + +而且,本质上高频分量是一个边缘检测器。 我们可以从 Sobel 边缘检测器获得类似的输出,例如: + +```py +plt.figure(figsize=(14,10)) + +sobel = tf.image.sobel_edges(content_image) +plt.subplot(1,2,1) +imshow(clip_0_1(sobel[...,0]/4+0.5), "Horizontal Sobel-edges") +plt.subplot(1,2,2) +imshow(clip_0_1(sobel[...,1]/4+0.5), "Vertical Sobel-edges") +``` + +![png](img/03dad7eb5e1c97b1391c9925be7da416.png) + +与此相关的正则化损失是这些值的平方和: + +```py +def total_variation_loss(image): + x_deltas, y_deltas = high_pass_x_y(image) + return tf.reduce_sum(tf.abs(x_deltas)) + tf.reduce_sum(tf.abs(y_deltas)) +``` + +```py +total_variation_loss(image).numpy() +``` + +```py +149342.6 + +``` + +以上说明了总变分损失的用途。但是无需自己实现,因为 TensorFlow 包含了一个标准实现: + +```py +tf.image.total_variation(image).numpy() +``` + +```py +array([149342.6], dtype=float32) + +``` + +## 重新进行优化 + +选择 `total_variation_loss` 的权重: + +```py +total_variation_weight=30 +``` + +现在,将它加入 `train_step` 函数中: + +```py +@tf.function() +def train_step(image): + with tf.GradientTape() as tape: + outputs = extractor(image) + loss = style_content_loss(outputs) + loss += total_variation_weight*tf.image.total_variation(image) + + grad = tape.gradient(loss, image) + opt.apply_gradients([(grad, image)]) + image.assign(clip_0_1(image)) +``` + +重新初始化优化的变量: + +```py +image = tf.Variable(content_image) +``` + +并进行优化: + +```py +import time +start = time.time() + +epochs = 10 +steps_per_epoch = 100 + +step = 0 +for n in range(epochs): + for m in range(steps_per_epoch): + step += 1 + train_step(image) + print(".", end='') + display.clear_output(wait=True) + display.display(tensor_to_image(image)) + print("Train step: {}".format(step)) + +end = time.time() +print("Total time: {:.1f}".format(end-start)) +``` + +![png](img/c67ce581d874e2d04e2761cc44b1d094.png) + +```py +Train step: 1000 +Total time: 21.7 + +``` + +最后,保存结果: + +```py +file_name = 'stylized-image.png' +tensor_to_image(image).save(file_name) + +try: + from google.colab import files +except ImportError: + pass +else: + files.download(file_name) +``` \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/061.md b/Tensorflow/TensorFlow2.0/061.md new file mode 100644 index 00000000..a649569e --- /dev/null +++ b/Tensorflow/TensorFlow2.0/061.md @@ -0,0 +1,373 @@ +# DeepDream + +> 原文:[https://tensorflow.google.cn/tutorials/generative/deepdream](https://tensorflow.google.cn/tutorials/generative/deepdream) + +This tutorial contains a minimal implementation of DeepDream, as described in this [blog post](https://ai.googleblog.com/2015/06/inceptionism-going-deeper-into-neural.html) by Alexander Mordvintsev. + +DeepDream is an experiment that visualizes the patterns learned by a neural network. Similar to when a child watches clouds and tries to interpret random shapes, DeepDream over-interprets and enhances the patterns it sees in an image. + +It does so by forwarding an image through the network, then calculating the gradient of the image with respect to the activations of a particular layer. The image is then modified to increase these activations, enhancing the patterns seen by the network, and resulting in a dream-like image. This process was dubbed "Inceptionism" (a reference to [InceptionNet](https://arxiv.org/pdf/1409.4842.pdf), and the [movie](https://en.wikipedia.org/wiki/Inception) Inception). + +Let's demonstrate how you can make a neural network "dream" and enhance the surreal patterns it sees in an image. + +![Dogception](img/ad462e5b3dc8d32430aaa7de7e4bf303.png) + +```py +import tensorflow as tf +``` + +```py +import numpy as np + +import matplotlib as mpl + +import IPython.display as display +import PIL.Image + +from tensorflow.keras.preprocessing import image +``` + +## Choose an image to dream-ify + +For this tutorial, let's use an image of a [labrador](https://commons.wikimedia.org/wiki/File:YellowLabradorLooking_new.jpg). + +```py +url = 'https://storage.googleapis.com/download.tensorflow.org/example_images/YellowLabradorLooking_new.jpg' +``` + +```py +# Download an image and read it into a NumPy array. +def download(url, max_dim=None): + name = url.split('/')[-1] + image_path = tf.keras.utils.get_file(name, origin=url) + img = PIL.Image.open(image_path) + if max_dim: + img.thumbnail((max_dim, max_dim)) + return np.array(img) + +# Normalize an image +def deprocess(img): + img = 255*(img + 1.0)/2.0 + return tf.cast(img, tf.uint8) + +# Display an image +def show(img): + display.display(PIL.Image.fromarray(np.array(img))) + +# Downsizing the image makes it easier to work with. +original_img = download(url, max_dim=500) +show(original_img) +display.display(display.HTML('Image cc-by: Von.grzanka')) +``` + +```py +Downloading data from https://storage.googleapis.com/download.tensorflow.org/example_images/YellowLabradorLooking_new.jpg +90112/83281 [================================] - 0s 0us/step + +``` + +![png](img/61002e329110c6cb1db1a82acd8d232f.png) + + + +## Prepare the feature extraction model + +Download and prepare a pre-trained image classification model. You will use [InceptionV3](https://keras.io/applications/#inceptionv3) which is similar to the model originally used in DeepDream. Note that any [pre-trained model](https://keras.io/applications/#models-for-image-classification-with-weights-trained-on-imagenet) will work, although you will have to adjust the layer names below if you change this. + +```py +base_model = tf.keras.applications.InceptionV3(include_top=False, weights='imagenet') +``` + +```py +Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5 +87916544/87910968 [==============================] - 2s 0us/step + +``` + +The idea in DeepDream is to choose a layer (or layers) and maximize the "loss" in a way that the image increasingly "excites" the layers. The complexity of the features incorporated depends on layers chosen by you, i.e, lower layers produce strokes or simple patterns, while deeper layers give sophisticated features in images, or even whole objects. + +The InceptionV3 architecture is quite large (for a graph of the model architecture see TensorFlow's [research repo](https://github.com/tensorflow/models/tree/master/research/inception)). For DeepDream, the layers of interest are those where the convolutions are concatenated. There are 11 of these layers in InceptionV3, named 'mixed0' though 'mixed10'. Using different layers will result in different dream-like images. Deeper layers respond to higher-level features (such as eyes and faces), while earlier layers respond to simpler features (such as edges, shapes, and textures). Feel free to experiment with the layers selected below, but keep in mind that deeper layers (those with a higher index) will take longer to train on since the gradient computation is deeper. + +```py +# Maximize the activations of these layers +names = ['mixed3', 'mixed5'] +layers = [base_model.get_layer(name).output for name in names] + +# Create the feature extraction model +dream_model = tf.keras.Model(inputs=base_model.input, outputs=layers) +``` + +## Calculate loss + +The loss is the sum of the activations in the chosen layers. The loss is normalized at each layer so the contribution from larger layers does not outweigh smaller layers. Normally, loss is a quantity you wish to minimize via gradient descent. In DeepDream, you will maximize this loss via gradient ascent. + +```py +def calc_loss(img, model): + # Pass forward the image through the model to retrieve the activations. + # Converts the image into a batch of size 1. + img_batch = tf.expand_dims(img, axis=0) + layer_activations = model(img_batch) + if len(layer_activations) == 1: + layer_activations = [layer_activations] + + losses = [] + for act in layer_activations: + loss = tf.math.reduce_mean(act) + losses.append(loss) + + return tf.reduce_sum(losses) +``` + +## Gradient ascent + +Once you have calculated the loss for the chosen layers, all that is left is to calculate the gradients with respect to the image, and add them to the original image. + +Adding the gradients to the image enhances the patterns seen by the network. At each step, you will have created an image that increasingly excites the activations of certain layers in the network. + +The method that does this, below, is wrapped in a [`tf.function`](https://tensorflow.google.cn/api_docs/python/tf/function) for performance. It uses an `input_signature` to ensure that the function is not retraced for different image sizes or `steps`/`step_size` values. See the [Concrete functions guide](https://tensorflow.google.cn/guide/concrete_function) for details. + +```py +class DeepDream(tf.Module): + def __init__(self, model): + self.model = model + + @tf.function( + input_signature=( + tf.TensorSpec(shape=[None,None,3], dtype=tf.float32), + tf.TensorSpec(shape=[], dtype=tf.int32), + tf.TensorSpec(shape=[], dtype=tf.float32),) + ) + def __call__(self, img, steps, step_size): + print("Tracing") + loss = tf.constant(0.0) + for n in tf.range(steps): + with tf.GradientTape() as tape: + # This needs gradients relative to `img` + # `GradientTape` only watches `tf.Variable`s by default + tape.watch(img) + loss = calc_loss(img, self.model) + + # Calculate the gradient of the loss with respect to the pixels of the input image. + gradients = tape.gradient(loss, img) + + # Normalize the gradients. + gradients /= tf.math.reduce_std(gradients) + 1e-8 + + # In gradient ascent, the "loss" is maximized so that the input image increasingly "excites" the layers. + # You can update the image by directly adding the gradients (because they're the same shape!) + img = img + gradients*step_size + img = tf.clip_by_value(img, -1, 1) + + return loss, img +``` + +```py +deepdream = DeepDream(dream_model) +``` + +## Main Loop + +```py +def run_deep_dream_simple(img, steps=100, step_size=0.01): + # Convert from uint8 to the range expected by the model. + img = tf.keras.applications.inception_v3.preprocess_input(img) + img = tf.convert_to_tensor(img) + step_size = tf.convert_to_tensor(step_size) + steps_remaining = steps + step = 0 + while steps_remaining: + if steps_remaining>100: + run_steps = tf.constant(100) + else: + run_steps = tf.constant(steps_remaining) + steps_remaining -= run_steps + step += run_steps + + loss, img = deepdream(img, run_steps, tf.constant(step_size)) + + display.clear_output(wait=True) + show(deprocess(img)) + print ("Step {}, loss {}".format(step, loss)) + + result = deprocess(img) + display.clear_output(wait=True) + show(result) + + return result +``` + +```py +dream_img = run_deep_dream_simple(img=original_img, + steps=100, step_size=0.01) +``` + +![png](img/e47b08aec7cc62d5268c6c6af8cf2b16.png) + +## Taking it up an octave + +Pretty good, but there are a few issues with this first attempt: + +1. The output is noisy (this could be addressed with a [`tf.image.total_variation`](https://tensorflow.google.cn/api_docs/python/tf/image/total_variation) loss). +2. The image is low resolution. +3. The patterns appear like they're all happening at the same granularity. + +One approach that addresses all these problems is applying gradient ascent at different scales. This will allow patterns generated at smaller scales to be incorporated into patterns at higher scales and filled in with additional detail. + +To do this you can perform the previous gradient ascent approach, then increase the size of the image (which is referred to as an octave), and repeat this process for multiple octaves. + +```py +import time +start = time.time() + +OCTAVE_SCALE = 1.30 + +img = tf.constant(np.array(original_img)) +base_shape = tf.shape(img)[:-1] +float_base_shape = tf.cast(base_shape, tf.float32) + +for n in range(-2, 3): + new_shape = tf.cast(float_base_shape*(OCTAVE_SCALE**n), tf.int32) + + img = tf.image.resize(img, new_shape).numpy() + + img = run_deep_dream_simple(img=img, steps=50, step_size=0.01) + +display.clear_output(wait=True) +img = tf.image.resize(img, base_shape) +img = tf.image.convert_image_dtype(img/255.0, dtype=tf.uint8) +show(img) + +end = time.time() +end-start +``` + +![png](img/a3d4072cdd299fedb28dda8fdab7e611.png) + +```py +5.535110235214233 + +``` + +## Optional: Scaling up with tiles + +One thing to consider is that as the image increases in size, so will the time and memory necessary to perform the gradient calculation. The above octave implementation will not work on very large images, or many octaves. + +To avoid this issue you can split the image into tiles and compute the gradient for each tile. + +Applying random shifts to the image before each tiled computation prevents tile seams from appearing. + +Start by implementing the random shift: + +```py +def random_roll(img, maxroll): + # Randomly shift the image to avoid tiled boundaries. + shift = tf.random.uniform(shape=[2], minval=-maxroll, maxval=maxroll, dtype=tf.int32) + img_rolled = tf.roll(img, shift=shift, axis=[0,1]) + return shift, img_rolled +``` + +```py +shift, img_rolled = random_roll(np.array(original_img), 512) +show(img_rolled) +``` + +![png](img/47c750cbb275e148fd8d76c4bf49d4a6.png) + +Here is a tiled equivalent of the `deepdream` function defined earlier: + +```py +class TiledGradients(tf.Module): + def __init__(self, model): + self.model = model + + @tf.function( + input_signature=( + tf.TensorSpec(shape=[None,None,3], dtype=tf.float32), + tf.TensorSpec(shape=[], dtype=tf.int32),) + ) + def __call__(self, img, tile_size=512): + shift, img_rolled = random_roll(img, tile_size) + + # Initialize the image gradients to zero. + gradients = tf.zeros_like(img_rolled) + + # Skip the last tile, unless there's only one tile. + xs = tf.range(0, img_rolled.shape[0], tile_size)[:-1] + if not tf.cast(len(xs), bool): + xs = tf.constant([0]) + ys = tf.range(0, img_rolled.shape[1], tile_size)[:-1] + if not tf.cast(len(ys), bool): + ys = tf.constant([0]) + + for x in xs: + for y in ys: + # Calculate the gradients for this tile. + with tf.GradientTape() as tape: + # This needs gradients relative to `img_rolled`. + # `GradientTape` only watches `tf.Variable`s by default. + tape.watch(img_rolled) + + # Extract a tile out of the image. + img_tile = img_rolled[x:x+tile_size, y:y+tile_size] + loss = calc_loss(img_tile, self.model) + + # Update the image gradients for this tile. + gradients = gradients + tape.gradient(loss, img_rolled) + + # Undo the random shift applied to the image and its gradients. + gradients = tf.roll(gradients, shift=-shift, axis=[0,1]) + + # Normalize the gradients. + gradients /= tf.math.reduce_std(gradients) + 1e-8 + + return gradients +``` + +```py +get_tiled_gradients = TiledGradients(dream_model) +``` + +Putting this together gives a scalable, octave-aware deepdream implementation: + +```py +def run_deep_dream_with_octaves(img, steps_per_octave=100, step_size=0.01, + octaves=range(-2,3), octave_scale=1.3): + base_shape = tf.shape(img) + img = tf.keras.preprocessing.image.img_to_array(img) + img = tf.keras.applications.inception_v3.preprocess_input(img) + + initial_shape = img.shape[:-1] + img = tf.image.resize(img, initial_shape) + for octave in octaves: + # Scale the image based on the octave + new_size = tf.cast(tf.convert_to_tensor(base_shape[:-1]), tf.float32)*(octave_scale**octave) + img = tf.image.resize(img, tf.cast(new_size, tf.int32)) + + for step in range(steps_per_octave): + gradients = get_tiled_gradients(img) + img = img + gradients*step_size + img = tf.clip_by_value(img, -1, 1) + + if step % 10 == 0: + display.clear_output(wait=True) + show(deprocess(img)) + print ("Octave {}, Step {}".format(octave, step)) + + result = deprocess(img) + return result +``` + +```py +img = run_deep_dream_with_octaves(img=original_img, step_size=0.01) + +display.clear_output(wait=True) +img = tf.image.resize(img, base_shape) +img = tf.image.convert_image_dtype(img/255.0, dtype=tf.uint8) +show(img) +``` + +![png](img/1c3bc0a947aefadc9c04f9c5f0bf1991.png) + +Much better! Play around with the number of octaves, octave scale, and activated layers to change how your DeepDream-ed image looks. + +Readers might also be interested in [TensorFlow Lucid](https://github.com/tensorflow/lucid) which expands on ideas introduced in this tutorial to visualize and interpret neural networks. \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/062.md b/Tensorflow/TensorFlow2.0/062.md new file mode 100644 index 00000000..83740f7d --- /dev/null +++ b/Tensorflow/TensorFlow2.0/062.md @@ -0,0 +1,389 @@ +# 深度卷积生成对抗网络 + +> 原文:[https://tensorflow.google.cn/tutorials/generative/dcgan](https://tensorflow.google.cn/tutorials/generative/dcgan) + +**Note:** 我们的 TensorFlow 社区翻译了这些文档。因为社区翻译是尽力而为, 所以无法保证它们是最准确的,并且反映了最新的 [官方英文文档](https://tensorflow.google.cn/?hl=en)。如果您有改进此翻译的建议, 请提交 pull request 到 [tensorflow/docs](https://github.com/tensorflow/docs) GitHub 仓库。要志愿地撰写或者审核译文,请加入 [docs-zh-cn@tensorflow.org Google Group](https://groups.google.com/a/tensorflow.org/forum/#!forum/docs-zh-cn)。 + +本教程演示了如何使用[深度卷积生成对抗网络](https://arxiv.org/pdf/1511.06434.pdf)(DCGAN)生成手写数字图片。该代码是使用 [Keras Sequential API](https://tensorflow.google.cn/guide/keras) 与 [`tf.GradientTape`](https://tensorflow.google.cn/api_docs/python/tf/GradientTape) 训练循环编写的。 + +## 什么是生成对抗网络? + +[生成对抗网络](https://arxiv.org/abs/1406.2661)(GANs)是当今计算机科学领域最有趣的想法之一。两个模型通过对抗过程同时训练。一个*生成器*(“艺术家”)学习创造看起来真实的图像,而*判别器*(“艺术评论家”)学习区分真假图像。 + +![生成器和判别器图示](img/d6513785291f1616fa5a88b830c9a438.png) + +训练过程中,*生成器*在生成逼真图像方面逐渐变强,而*判别器*在辨别这些图像的能力上逐渐变强。当*判别器*不再能够区分真实图片和伪造图片时,训练过程达到平衡。 + +![生成器和判别器图示二](img/a84da0fdd95c0b8365360f941f57e017.png) + +本笔记在 MNIST 数据集上演示了该过程。下方动画展示了当训练了 50 个 epoch (全部数据集迭代 50 次) 时*生成器*所生成的一系列图片。图片从随机噪声开始,随着时间的推移越来越像手写数字。 + +![输出样本](img/2e071a2b770d50ed5ef40dabbe1fd1a7.png) + +要了解关于 GANs 的更多信息,我们建议参阅 MIT 的 [深度学习入门](http://introtodeeplearning.com/) 课程。 + +### Import TensorFlow and other libraries + +```py +import tensorflow as tf +``` + +```py +tf.__version__ +``` + +```py +'2.3.0' + +``` + +```py +# 用于生成 GIF 图片 +pip install -q imageio + +``` + +```py +WARNING: You are using pip version 20.2.2; however, version 20.2.3 is available. +You should consider upgrading via the '/tmpfs/src/tf_docs_env/bin/python -m pip install --upgrade pip' command. + +``` + +```py +import glob +import imageio +import matplotlib.pyplot as plt +import numpy as np +import os +import PIL +from tensorflow.keras import layers +import time + +from IPython import display +``` + +### 加载和准备数据集 + +您将使用 MNIST 数据集来训练生成器和判别器。生成器将生成类似于 MNIST 数据集的手写数字。 + +```py +(train_images, train_labels), (_, _) = tf.keras.datasets.mnist.load_data() +``` + +```py +train_images = train_images.reshape(train_images.shape[0], 28, 28, 1).astype('float32') +train_images = (train_images - 127.5) / 127.5 # 将图片标准化到 [-1, 1] 区间内 +``` + +```py +BUFFER_SIZE = 60000 +BATCH_SIZE = 256 +``` + +```py +# 批量化和打乱数据 +train_dataset = tf.data.Dataset.from_tensor_slices(train_images).shuffle(BUFFER_SIZE).batch(BATCH_SIZE) +``` + +## 创建模型 + +生成器和判别器均使用 [Keras Sequential API](https://tensorflow.google.cn/guide/keras#sequential_model) 定义。 + +### 生成器 + +生成器使用 [`tf.keras.layers.Conv2DTranspose`](https://tensorflow.google.cn/api_docs/python/tf/keras/layers/Conv2DTranspose) (上采样)层来从种子(随机噪声)中产生图片。以一个使用该种子作为输入的 `Dense` 层开始,然后多次上采样直到达到所期望的 28x28x1 的图片尺寸。注意除了输出层使用 tanh 之外,其他每层均使用 [`tf.keras.layers.LeakyReLU`](https://tensorflow.google.cn/api_docs/python/tf/keras/layers/LeakyReLU) 作为激活函数。 + +```py +def make_generator_model(): + model = tf.keras.Sequential() + model.add(layers.Dense(7*7*256, use_bias=False, input_shape=(100,))) + model.add(layers.BatchNormalization()) + model.add(layers.LeakyReLU()) + + model.add(layers.Reshape((7, 7, 256))) + assert model.output_shape == (None, 7, 7, 256) # 注意:batch size 没有限制 + + model.add(layers.Conv2DTranspose(128, (5, 5), strides=(1, 1), padding='same', use_bias=False)) + assert model.output_shape == (None, 7, 7, 128) + model.add(layers.BatchNormalization()) + model.add(layers.LeakyReLU()) + + model.add(layers.Conv2DTranspose(64, (5, 5), strides=(2, 2), padding='same', use_bias=False)) + assert model.output_shape == (None, 14, 14, 64) + model.add(layers.BatchNormalization()) + model.add(layers.LeakyReLU()) + + model.add(layers.Conv2DTranspose(1, (5, 5), strides=(2, 2), padding='same', use_bias=False, activation='tanh')) + assert model.output_shape == (None, 28, 28, 1) + + return model +``` + +使用(尚未训练的)生成器创建一张图片。 + +```py +generator = make_generator_model() + +noise = tf.random.normal([1, 100]) +generated_image = generator(noise, training=False) + +plt.imshow(generated_image[0, :, :, 0], cmap='gray') +``` + +```py + + +``` + +![png](img/22f7bd226b742292050c368b980067f4.png) + +### 判别器 + +判别器是一个基于 CNN 的图片分类器。 + +```py +def make_discriminator_model(): + model = tf.keras.Sequential() + model.add(layers.Conv2D(64, (5, 5), strides=(2, 2), padding='same', + input_shape=[28, 28, 1])) + model.add(layers.LeakyReLU()) + model.add(layers.Dropout(0.3)) + + model.add(layers.Conv2D(128, (5, 5), strides=(2, 2), padding='same')) + model.add(layers.LeakyReLU()) + model.add(layers.Dropout(0.3)) + + model.add(layers.Flatten()) + model.add(layers.Dense(1)) + + return model +``` + +使用(尚未训练的)判别器来对图片的真伪进行判断。模型将被训练为为真实图片输出正值,为伪造图片输出负值。 + +```py +discriminator = make_discriminator_model() +decision = discriminator(generated_image) +print (decision) +``` + +```py +tf.Tensor([[-0.00427552]], shape=(1, 1), dtype=float32) + +``` + +## 定义损失函数和优化器 + +为两个模型定义损失函数和优化器。 + +```py +# 该方法返回计算交叉熵损失的辅助函数 +cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=True) +``` + +### 判别器损失 + +该方法量化判别器从判断真伪图片的能力。它将判别器对真实图片的预测值与值全为 1 的数组进行对比,将判别器对伪造(生成的)图片的预测值与值全为 0 的数组进行对比。 + +```py +def discriminator_loss(real_output, fake_output): + real_loss = cross_entropy(tf.ones_like(real_output), real_output) + fake_loss = cross_entropy(tf.zeros_like(fake_output), fake_output) + total_loss = real_loss + fake_loss + return total_loss +``` + +### 生成器损失 + +生成器损失量化其欺骗判别器的能力。直观来讲,如果生成器表现良好,判别器将会把伪造图片判断为真实图片(或 1)。这里我们将把判别器在生成图片上的判断结果与一个值全为 1 的数组进行对比。 + +```py +def generator_loss(fake_output): + return cross_entropy(tf.ones_like(fake_output), fake_output) +``` + +由于我们需要分别训练两个网络,判别器和生成器的优化器是不同的。 + +```py +generator_optimizer = tf.keras.optimizers.Adam(1e-4) +discriminator_optimizer = tf.keras.optimizers.Adam(1e-4) +``` + +### 保存检查点 + +本笔记还演示了如何保存和恢复模型,这在长时间训练任务被中断的情况下比较有帮助。 + +```py +checkpoint_dir = './training_checkpoints' +checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt") +checkpoint = tf.train.Checkpoint(generator_optimizer=generator_optimizer, + discriminator_optimizer=discriminator_optimizer, + generator=generator, + discriminator=discriminator) +``` + +## 定义训练循环 + +```py +EPOCHS = 50 +noise_dim = 100 +num_examples_to_generate = 16 + +# 我们将重复使用该种子(因此在动画 GIF 中更容易可视化进度) +seed = tf.random.normal([num_examples_to_generate, noise_dim]) +``` + +训练循环在生成器接收到一个随机种子作为输入时开始。该种子用于生产一张图片。判别器随后被用于区分真实图片(选自训练集)和伪造图片(由生成器生成)。针对这里的每一个模型都计算损失函数,并且计算梯度用于更新生成器与判别器。 + +```py +# 注意 `tf.function` 的使用 +# 该注解使函数被“编译” +@tf.function +def train_step(images): + noise = tf.random.normal([BATCH_SIZE, noise_dim]) + + with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape: + generated_images = generator(noise, training=True) + + real_output = discriminator(images, training=True) + fake_output = discriminator(generated_images, training=True) + + gen_loss = generator_loss(fake_output) + disc_loss = discriminator_loss(real_output, fake_output) + + gradients_of_generator = gen_tape.gradient(gen_loss, generator.trainable_variables) + gradients_of_discriminator = disc_tape.gradient(disc_loss, discriminator.trainable_variables) + + generator_optimizer.apply_gradients(zip(gradients_of_generator, generator.trainable_variables)) + discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, discriminator.trainable_variables)) +``` + +```py +def train(dataset, epochs): + for epoch in range(epochs): + start = time.time() + + for image_batch in dataset: + train_step(image_batch) + + # 继续进行时为 GIF 生成图像 + display.clear_output(wait=True) + generate_and_save_images(generator, + epoch + 1, + seed) + + # 每 15 个 epoch 保存一次模型 + if (epoch + 1) % 15 == 0: + checkpoint.save(file_prefix = checkpoint_prefix) + + print ('Time for epoch {} is {} sec'.format(epoch + 1, time.time()-start)) + + # 最后一个 epoch 结束后生成图片 + display.clear_output(wait=True) + generate_and_save_images(generator, + epochs, + seed) +``` + +**生成与保存图片** + +```py +def generate_and_save_images(model, epoch, test_input): + # 注意 training` 设定为 False + # 因此,所有层都在推理模式下运行(batchnorm)。 + predictions = model(test_input, training=False) + + fig = plt.figure(figsize=(4,4)) + + for i in range(predictions.shape[0]): + plt.subplot(4, 4, i+1) + plt.imshow(predictions[i, :, :, 0] * 127.5 + 127.5, cmap='gray') + plt.axis('off') + + plt.savefig('image_at_epoch_{:04d}.png'.format(epoch)) + plt.show() +``` + +## 训练模型 + +调用上面定义的 `train()` 方法来同时训练生成器和判别器。注意,训练 GANs 可能是棘手的。重要的是,生成器和判别器不能够互相压制对方(例如,他们以相似的学习率训练)。 + +在训练之初,生成的图片看起来像是随机噪声。随着训练过程的进行,生成的数字将越来越真实。在大概 50 个 epoch 之后,这些图片看起来像是 MNIST 数字。使用 Colab 中的默认设置可能需要大约 1 分钟每 epoch。 + +```py +%%time +train(train_dataset, EPOCHS) +``` + +![png](img/f3c5a66b35a03bd6a2bf9c3a65a39dfb.png) + +```py +CPU times: user 1min 52s, sys: 11.7 s, total: 2min 4s +Wall time: 3min 22s + +``` + +恢复最新的检查点。 + +```py +checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) +``` + +```py + + +``` + +## 创建 GIF + +```py +# 使用 epoch 数生成单张图片 +def display_image(epoch_no): + return PIL.Image.open('image_at_epoch_{:04d}.png'.format(epoch_no)) +``` + +```py +display_image(EPOCHS) +``` + +![png](img/c12f3797e75b6aa8bdc206f4b91344c1.png) + +使用训练过程中生成的图片通过 `imageio` 生成动态 gif + +```py +anim_file = 'dcgan.gif' + +with imageio.get_writer(anim_file, mode='I') as writer: + filenames = glob.glob('image*.png') + filenames = sorted(filenames) + last = -1 + for i,filename in enumerate(filenames): + frame = 2*(i**0.5) + if round(frame) > round(last): + last = frame + else: + continue + image = imageio.imread(filename) + writer.append_data(image) + image = imageio.imread(filename) + writer.append_data(image) + +import IPython +if IPython.version_info > (6,2,0,''): + display.Image(filename=anim_file) +``` + +如果您正在使用 Colab,您可以通过如下代码下载动画: + +```py +try: + from google.colab import files +except ImportError: + pass +else: + files.download(anim_file) +``` + +## 下一步 + +本教程展示了实现和训练 GAN 模型所需的全部必要代码。接下来,您可能想尝试其他数据集,例如大规模名人面部属性(CelebA)数据集 [在 Kaggle 上获取](https://www.kaggle.com/jessicali9530/celeba-dataset)。要了解更多关于 GANs 的信息,我们推荐参阅 [NIPS 2016 教程: 生成对抗网络](https://arxiv.org/abs/1701.00160)。 \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/063.md b/Tensorflow/TensorFlow2.0/063.md new file mode 100644 index 00000000..57a5ef65 --- /dev/null +++ b/Tensorflow/TensorFlow2.0/063.md @@ -0,0 +1,703 @@ +# Pix2Pix + +> 原文:[https://tensorflow.google.cn/tutorials/generative/pix2pix](https://tensorflow.google.cn/tutorials/generative/pix2pix) + +This notebook demonstrates image to image translation using conditional GAN's, as described in [Image-to-Image Translation with Conditional Adversarial Networks](https://arxiv.org/abs/1611.07004). Using this technique we can colorize black and white photos, convert google maps to google earth, etc. Here, we convert building facades to real buildings. + +In example, we will use the [CMP Facade Database](http://cmp.felk.cvut.cz/%7Etylecr1/facade/), helpfully provided by the [Center for Machine Perception](http://cmp.felk.cvut.cz/) at the [Czech Technical University in Prague](https://www.cvut.cz/). To keep our example short, we will use a preprocessed [copy](https://people.eecs.berkeley.edu/%7Etinghuiz/projects/pix2pix/datasets/) of this dataset, created by the authors of the [paper](https://arxiv.org/abs/1611.07004) above. + +Each epoch takes around 15 seconds on a single V100 GPU. + +Below is the output generated after training the model for 200 epochs. + +![sample output_1](img/e297781397cdc97e304b45625f7ae423.png) ![sample output_2](img/7f05b53be9225270c3955654d7d465de.png) + +## Import TensorFlow and other libraries + +```py +import tensorflow as tf + +import os +import time + +from matplotlib import pyplot as plt +from IPython import display +``` + +```py +pip install -q -U tensorboard + +``` + +```py +WARNING: You are using pip version 20.2.2; however, version 20.2.3 is available. +You should consider upgrading via the '/tmpfs/src/tf_docs_env/bin/python -m pip install --upgrade pip' command. + +``` + +## Load the dataset + +You can download this dataset and similar datasets from [here](https://people.eecs.berkeley.edu/%7Etinghuiz/projects/pix2pix/datasets). As mentioned in the [paper](https://arxiv.org/abs/1611.07004) we apply random jittering and mirroring to the training dataset. + +* In random jittering, the image is resized to `286 x 286` and then randomly cropped to `256 x 256` +* In random mirroring, the image is randomly flipped horizontally i.e left to right. + +```py +_URL = 'https://people.eecs.berkeley.edu/~tinghuiz/projects/pix2pix/datasets/facades.tar.gz' + +path_to_zip = tf.keras.utils.get_file('facades.tar.gz', + origin=_URL, + extract=True) + +PATH = os.path.join(os.path.dirname(path_to_zip), 'facades/') +``` + +```py +Downloading data from https://people.eecs.berkeley.edu/~tinghuiz/projects/pix2pix/datasets/facades.tar.gz +30171136/30168306 [==============================] - 2s 0us/step + +``` + +```py +BUFFER_SIZE = 400 +BATCH_SIZE = 1 +IMG_WIDTH = 256 +IMG_HEIGHT = 256 +``` + +```py +def load(image_file): + image = tf.io.read_file(image_file) + image = tf.image.decode_jpeg(image) + + w = tf.shape(image)[1] + + w = w // 2 + real_image = image[:, :w, :] + input_image = image[:, w:, :] + + input_image = tf.cast(input_image, tf.float32) + real_image = tf.cast(real_image, tf.float32) + + return input_image, real_image +``` + +```py +inp, re = load(PATH+'train/100.jpg') +# casting to int for matplotlib to show the image +plt.figure() +plt.imshow(inp/255.0) +plt.figure() +plt.imshow(re/255.0) +``` + +```py + + +``` + +![png](img/52194b6e27c77c651d0f3c56066448f5.png) + +![png](img/ab876a0a7878b27ea0658f95d96f1ddb.png) + +```py +def resize(input_image, real_image, height, width): + input_image = tf.image.resize(input_image, [height, width], + method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) + real_image = tf.image.resize(real_image, [height, width], + method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) + + return input_image, real_image +``` + +```py +def random_crop(input_image, real_image): + stacked_image = tf.stack([input_image, real_image], axis=0) + cropped_image = tf.image.random_crop( + stacked_image, size=[2, IMG_HEIGHT, IMG_WIDTH, 3]) + + return cropped_image[0], cropped_image[1] +``` + +```py +# normalizing the images to [-1, 1] + +def normalize(input_image, real_image): + input_image = (input_image / 127.5) - 1 + real_image = (real_image / 127.5) - 1 + + return input_image, real_image +``` + +```py +@tf.function() +def random_jitter(input_image, real_image): + # resizing to 286 x 286 x 3 + input_image, real_image = resize(input_image, real_image, 286, 286) + + # randomly cropping to 256 x 256 x 3 + input_image, real_image = random_crop(input_image, real_image) + + if tf.random.uniform(()) > 0.5: + # random mirroring + input_image = tf.image.flip_left_right(input_image) + real_image = tf.image.flip_left_right(real_image) + + return input_image, real_image +``` + +As you can see in the images below that they are going through random jittering Random jittering as described in the paper is to + +1. Resize an image to bigger height and width +2. Randomly crop to the target size +3. Randomly flip the image horizontally + +```py +plt.figure(figsize=(6, 6)) +for i in range(4): + rj_inp, rj_re = random_jitter(inp, re) + plt.subplot(2, 2, i+1) + plt.imshow(rj_inp/255.0) + plt.axis('off') +plt.show() +``` + +![png](img/be737507a3c4409c7dc8aa33d2196e15.png) + +```py +def load_image_train(image_file): + input_image, real_image = load(image_file) + input_image, real_image = random_jitter(input_image, real_image) + input_image, real_image = normalize(input_image, real_image) + + return input_image, real_image +``` + +```py +def load_image_test(image_file): + input_image, real_image = load(image_file) + input_image, real_image = resize(input_image, real_image, + IMG_HEIGHT, IMG_WIDTH) + input_image, real_image = normalize(input_image, real_image) + + return input_image, real_image +``` + +## Input Pipeline + +```py +train_dataset = tf.data.Dataset.list_files(PATH+'train/*.jpg') +train_dataset = train_dataset.map(load_image_train, + num_parallel_calls=tf.data.experimental.AUTOTUNE) +train_dataset = train_dataset.shuffle(BUFFER_SIZE) +train_dataset = train_dataset.batch(BATCH_SIZE) +``` + +```py +test_dataset = tf.data.Dataset.list_files(PATH+'test/*.jpg') +test_dataset = test_dataset.map(load_image_test) +test_dataset = test_dataset.batch(BATCH_SIZE) +``` + +## Build the Generator + +* The architecture of generator is a modified U-Net. +* Each block in the encoder is (Conv -> Batchnorm -> Leaky ReLU) +* Each block in the decoder is (Transposed Conv -> Batchnorm -> Dropout(applied to the first 3 blocks) -> ReLU) +* There are skip connections between the encoder and decoder (as in U-Net). + +```py +OUTPUT_CHANNELS = 3 +``` + +```py +def downsample(filters, size, apply_batchnorm=True): + initializer = tf.random_normal_initializer(0., 0.02) + + result = tf.keras.Sequential() + result.add( + tf.keras.layers.Conv2D(filters, size, strides=2, padding='same', + kernel_initializer=initializer, use_bias=False)) + + if apply_batchnorm: + result.add(tf.keras.layers.BatchNormalization()) + + result.add(tf.keras.layers.LeakyReLU()) + + return result +``` + +```py +down_model = downsample(3, 4) +down_result = down_model(tf.expand_dims(inp, 0)) +print (down_result.shape) +``` + +```py +(1, 128, 128, 3) + +``` + +```py +def upsample(filters, size, apply_dropout=False): + initializer = tf.random_normal_initializer(0., 0.02) + + result = tf.keras.Sequential() + result.add( + tf.keras.layers.Conv2DTranspose(filters, size, strides=2, + padding='same', + kernel_initializer=initializer, + use_bias=False)) + + result.add(tf.keras.layers.BatchNormalization()) + + if apply_dropout: + result.add(tf.keras.layers.Dropout(0.5)) + + result.add(tf.keras.layers.ReLU()) + + return result +``` + +```py +up_model = upsample(3, 4) +up_result = up_model(down_result) +print (up_result.shape) +``` + +```py +(1, 256, 256, 3) + +``` + +```py +def Generator(): + inputs = tf.keras.layers.Input(shape=[256,256,3]) + + down_stack = [ + downsample(64, 4, apply_batchnorm=False), # (bs, 128, 128, 64) + downsample(128, 4), # (bs, 64, 64, 128) + downsample(256, 4), # (bs, 32, 32, 256) + downsample(512, 4), # (bs, 16, 16, 512) + downsample(512, 4), # (bs, 8, 8, 512) + downsample(512, 4), # (bs, 4, 4, 512) + downsample(512, 4), # (bs, 2, 2, 512) + downsample(512, 4), # (bs, 1, 1, 512) + ] + + up_stack = [ + upsample(512, 4, apply_dropout=True), # (bs, 2, 2, 1024) + upsample(512, 4, apply_dropout=True), # (bs, 4, 4, 1024) + upsample(512, 4, apply_dropout=True), # (bs, 8, 8, 1024) + upsample(512, 4), # (bs, 16, 16, 1024) + upsample(256, 4), # (bs, 32, 32, 512) + upsample(128, 4), # (bs, 64, 64, 256) + upsample(64, 4), # (bs, 128, 128, 128) + ] + + initializer = tf.random_normal_initializer(0., 0.02) + last = tf.keras.layers.Conv2DTranspose(OUTPUT_CHANNELS, 4, + strides=2, + padding='same', + kernel_initializer=initializer, + activation='tanh') # (bs, 256, 256, 3) + + x = inputs + + # Downsampling through the model + skips = [] + for down in down_stack: + x = down(x) + skips.append(x) + + skips = reversed(skips[:-1]) + + # Upsampling and establishing the skip connections + for up, skip in zip(up_stack, skips): + x = up(x) + x = tf.keras.layers.Concatenate()([x, skip]) + + x = last(x) + + return tf.keras.Model(inputs=inputs, outputs=x) +``` + +```py +generator = Generator() +tf.keras.utils.plot_model(generator, show_shapes=True, dpi=64) +``` + +![png](img/027fe3c7c1b2c8f4ba851311692e3d91.png) + +```py +gen_output = generator(inp[tf.newaxis,...], training=False) +plt.imshow(gen_output[0,...]) +``` + +```py +Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). + + + +``` + +![png](img/e4d27c794147e0649dec40c1e673fa3d.png) + +* **Generator loss** + * It is a sigmoid cross entropy loss of the generated images and an **array of ones**. + * The [paper](https://arxiv.org/abs/1611.07004) also includes L1 loss which is MAE (mean absolute error) between the generated image and the target image. + * This allows the generated image to become structurally similar to the target image. + * The formula to calculate the total generator loss = gan_loss + LAMBDA * l1_loss, where LAMBDA = 100\. This value was decided by the authors of the [paper](https://arxiv.org/abs/1611.07004). + +The training procedure for the generator is shown below: + +```py +LAMBDA = 100 +``` + +```py +def generator_loss(disc_generated_output, gen_output, target): + gan_loss = loss_object(tf.ones_like(disc_generated_output), disc_generated_output) + + # mean absolute error + l1_loss = tf.reduce_mean(tf.abs(target - gen_output)) + + total_gen_loss = gan_loss + (LAMBDA * l1_loss) + + return total_gen_loss, gan_loss, l1_loss +``` + +![Generator Update Image](img/b7fd03ac59129ba2515cf59b292f3296.png) + +## Build the Discriminator + +* The Discriminator is a PatchGAN. +* Each block in the discriminator is (Conv -> BatchNorm -> Leaky ReLU) +* The shape of the output after the last layer is (batch_size, 30, 30, 1) +* Each 30x30 patch of the output classifies a 70x70 portion of the input image (such an architecture is called a PatchGAN). +* Discriminator receives 2 inputs. + * Input image and the target image, which it should classify as real. + * Input image and the generated image (output of generator), which it should classify as fake. + * We concatenate these 2 inputs together in the code (`tf.concat([inp, tar], axis=-1)`) + +```py +def Discriminator(): + initializer = tf.random_normal_initializer(0., 0.02) + + inp = tf.keras.layers.Input(shape=[256, 256, 3], name='input_image') + tar = tf.keras.layers.Input(shape=[256, 256, 3], name='target_image') + + x = tf.keras.layers.concatenate([inp, tar]) # (bs, 256, 256, channels*2) + + down1 = downsample(64, 4, False)(x) # (bs, 128, 128, 64) + down2 = downsample(128, 4)(down1) # (bs, 64, 64, 128) + down3 = downsample(256, 4)(down2) # (bs, 32, 32, 256) + + zero_pad1 = tf.keras.layers.ZeroPadding2D()(down3) # (bs, 34, 34, 256) + conv = tf.keras.layers.Conv2D(512, 4, strides=1, + kernel_initializer=initializer, + use_bias=False)(zero_pad1) # (bs, 31, 31, 512) + + batchnorm1 = tf.keras.layers.BatchNormalization()(conv) + + leaky_relu = tf.keras.layers.LeakyReLU()(batchnorm1) + + zero_pad2 = tf.keras.layers.ZeroPadding2D()(leaky_relu) # (bs, 33, 33, 512) + + last = tf.keras.layers.Conv2D(1, 4, strides=1, + kernel_initializer=initializer)(zero_pad2) # (bs, 30, 30, 1) + + return tf.keras.Model(inputs=[inp, tar], outputs=last) +``` + +```py +discriminator = Discriminator() +tf.keras.utils.plot_model(discriminator, show_shapes=True, dpi=64) +``` + +![png](img/0425284f7bd595a686480abe82721a04.png) + +```py +disc_out = discriminator([inp[tf.newaxis,...], gen_output], training=False) +plt.imshow(disc_out[0,...,-1], vmin=-20, vmax=20, cmap='RdBu_r') +plt.colorbar() +``` + +```py + + +``` + +![png](img/644c999529792fb810f213e660e582b8.png) + +**Discriminator loss** + +* The discriminator loss function takes 2 inputs; **real images, generated images** +* real_loss is a sigmoid cross entropy loss of the **real images** and an **array of ones(since these are the real images)** +* generated_loss is a sigmoid cross entropy loss of the **generated images** and an **array of zeros(since these are the fake images)** +* Then the total_loss is the sum of real_loss and the generated_loss + +```py +loss_object = tf.keras.losses.BinaryCrossentropy(from_logits=True) +``` + +```py +def discriminator_loss(disc_real_output, disc_generated_output): + real_loss = loss_object(tf.ones_like(disc_real_output), disc_real_output) + + generated_loss = loss_object(tf.zeros_like(disc_generated_output), disc_generated_output) + + total_disc_loss = real_loss + generated_loss + + return total_disc_loss +``` + +The training procedure for the discriminator is shown below. + +To learn more about the architecture and the hyperparameters you can refer the [paper](https://arxiv.org/abs/1611.07004). + +![Discriminator Update Image](img/a49dab0e9e9ab0a58b2928fb2760dab6.png) + +## Define the Optimizers and Checkpoint-saver + +```py +generator_optimizer = tf.keras.optimizers.Adam(2e-4, beta_1=0.5) +discriminator_optimizer = tf.keras.optimizers.Adam(2e-4, beta_1=0.5) +``` + +```py +checkpoint_dir = './training_checkpoints' +checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt") +checkpoint = tf.train.Checkpoint(generator_optimizer=generator_optimizer, + discriminator_optimizer=discriminator_optimizer, + generator=generator, + discriminator=discriminator) +``` + +## Generate Images + +Write a function to plot some images during training. + +* We pass images from the test dataset to the generator. +* The generator will then translate the input image into the output. +* Last step is to plot the predictions and **voila!** + +**Note:** The `training=True` is intentional here since we want the batch statistics while running the model on the test dataset. If we use training=False, we will get the accumulated statistics learned from the training dataset (which we don't want) + +```py +def generate_images(model, test_input, tar): + prediction = model(test_input, training=True) + plt.figure(figsize=(15,15)) + + display_list = [test_input[0], tar[0], prediction[0]] + title = ['Input Image', 'Ground Truth', 'Predicted Image'] + + for i in range(3): + plt.subplot(1, 3, i+1) + plt.title(title[i]) + # getting the pixel values between [0, 1] to plot it. + plt.imshow(display_list[i] * 0.5 + 0.5) + plt.axis('off') + plt.show() +``` + +```py +for example_input, example_target in test_dataset.take(1): + generate_images(generator, example_input, example_target) +``` + +![png](img/a2d79e6f20ade2372271c76afeaca800.png) + +## Training + +* For each example input generate an output. +* The discriminator receives the input_image and the generated image as the first input. The second input is the input_image and the target_image. +* Next, we calculate the generator and the discriminator loss. +* Then, we calculate the gradients of loss with respect to both the generator and the discriminator variables(inputs) and apply those to the optimizer. +* Then log the losses to TensorBoard. + +```py +EPOCHS = 150 +``` + +```py +import datetime +log_dir="logs/" + +summary_writer = tf.summary.create_file_writer( + log_dir + "fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")) +``` + +```py +@tf.function +def train_step(input_image, target, epoch): + with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape: + gen_output = generator(input_image, training=True) + + disc_real_output = discriminator([input_image, target], training=True) + disc_generated_output = discriminator([input_image, gen_output], training=True) + + gen_total_loss, gen_gan_loss, gen_l1_loss = generator_loss(disc_generated_output, gen_output, target) + disc_loss = discriminator_loss(disc_real_output, disc_generated_output) + + generator_gradients = gen_tape.gradient(gen_total_loss, + generator.trainable_variables) + discriminator_gradients = disc_tape.gradient(disc_loss, + discriminator.trainable_variables) + + generator_optimizer.apply_gradients(zip(generator_gradients, + generator.trainable_variables)) + discriminator_optimizer.apply_gradients(zip(discriminator_gradients, + discriminator.trainable_variables)) + + with summary_writer.as_default(): + tf.summary.scalar('gen_total_loss', gen_total_loss, step=epoch) + tf.summary.scalar('gen_gan_loss', gen_gan_loss, step=epoch) + tf.summary.scalar('gen_l1_loss', gen_l1_loss, step=epoch) + tf.summary.scalar('disc_loss', disc_loss, step=epoch) +``` + +The actual training loop: + +* Iterates over the number of epochs. +* On each epoch it clears the display, and runs `generate_images` to show it's progress. +* On each epoch it iterates over the training dataset, printing a '.' for each example. +* It saves a checkpoint every 20 epochs. + +```py +def fit(train_ds, epochs, test_ds): + for epoch in range(epochs): + start = time.time() + + display.clear_output(wait=True) + + for example_input, example_target in test_ds.take(1): + generate_images(generator, example_input, example_target) + print("Epoch: ", epoch) + + # Train + for n, (input_image, target) in train_ds.enumerate(): + print('.', end='') + if (n+1) % 100 == 0: + print() + train_step(input_image, target, epoch) + print() + + # saving (checkpoint) the model every 20 epochs + if (epoch + 1) % 20 == 0: + checkpoint.save(file_prefix = checkpoint_prefix) + + print ('Time taken for epoch {} is {} sec\n'.format(epoch + 1, + time.time()-start)) + checkpoint.save(file_prefix = checkpoint_prefix) +``` + +This training loop saves logs you can easily view in TensorBoard to monitor the training progress. Working locally you would launch a separate tensorboard process. In a notebook, if you want to monitor with TensorBoard it's easiest to launch the viewer before starting the training. + +To launch the viewer paste the following into a code-cell: + +```py +%load_ext tensorboard +%tensorboard --logdir {log_dir} +``` + +Now run the training loop: + +```py +fit(train_dataset, EPOCHS, test_dataset) +``` + +![png](img/4c8ef6a2c8f0548a9f5bb182b8d3de01.png) + +```py +Epoch: 149 +.................................................................................................... +.................................................................................................... +.................................................................................................... +.................................................................................................... + +Time taken for epoch 150 is 16.14578342437744 sec + +``` + +If you want to share the TensorBoard results *publicly* you can upload the logs to [TensorBoard.dev](https://tensorboard.dev/) by copying the following into a code-cell. + +**Note:** This requires a Google account. + +```py +tensorboard dev upload --logdir {log_dir} + +``` + +**Caution:** This command does not terminate. It's designed to continuously upload the results of long-running experiments. Once your data is uploaded you need to stop it using the "interrupt execution" option in your notebook tool. + +You can view the [results of a previous run](https://tensorboard.dev/experiment/lZ0C6FONROaUMfjYkVyJqw) of this notebook on [TensorBoard.dev](https://tensorboard.dev/). + +TensorBoard.dev is a managed experience for hosting, tracking, and sharing ML experiments with everyone. + +It can also included inline using an ` + +Interpreting the logs from a GAN is more subtle than a simple classification or regression model. Things to look for:: + +* Check that neither model has "won". If either the `gen_gan_loss` or the `disc_loss` gets very low it's an indicator that this model is dominating the other, and you are not successfully training the combined model. +* The value `log(2) = 0.69` is a good reference point for these losses, as it indicates a perplexity of 2: That the discriminator is on average equally uncertain about the two options. +* For the `disc_loss` a value below `0.69` means the discriminator is doing better than random, on the combined set of real+generated images. +* For the `gen_gan_loss` a value below `0.69` means the generator i doing better than random at foolding the descriminator. +* As training progresses the `gen_l1_loss` should go down. + +## Restore the latest checkpoint and test + +```py +ls {checkpoint_dir} + +``` + +```py +checkpoint ckpt-5.data-00000-of-00001 +ckpt-1.data-00000-of-00001 ckpt-5.index +ckpt-1.index ckpt-6.data-00000-of-00001 +ckpt-2.data-00000-of-00001 ckpt-6.index +ckpt-2.index ckpt-7.data-00000-of-00001 +ckpt-3.data-00000-of-00001 ckpt-7.index +ckpt-3.index ckpt-8.data-00000-of-00001 +ckpt-4.data-00000-of-00001 ckpt-8.index +ckpt-4.index + +``` + +```py +# restoring the latest checkpoint in checkpoint_dir +checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) +``` + +```py + + +``` + +## Generate using test dataset + +```py +# Run the trained model on a few examples from the test dataset +for inp, tar in test_dataset.take(5): + generate_images(generator, inp, tar) +``` + +![png](img/21b3b7303748422d35a6212f940d399c.png) + +![png](img/711ebb2cc10e3bb88f77a6eb89fac014.png) + +![png](img/7138c243e1e2c00466be2191f6395597.png) + +![png](img/a83182d7f6b11d76dd2d428db01ade58.png) + +![png](img/5f0049e4eda5b1689106731ac4d622f6.png) \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/064.md b/Tensorflow/TensorFlow2.0/064.md new file mode 100644 index 00000000..a0472e86 --- /dev/null +++ b/Tensorflow/TensorFlow2.0/064.md @@ -0,0 +1,499 @@ +# CycleGAN + +> 原文:[https://tensorflow.google.cn/tutorials/generative/cyclegan](https://tensorflow.google.cn/tutorials/generative/cyclegan) + + + +**Note:** 我们的 TensorFlow 社区翻译了这些文档。因为社区翻译是尽力而为, 所以无法保证它们是最准确的,并且反映了最新的 [官方英文文档](https://tensorflow.google.cn/?hl=en)。如果您有改进此翻译的建议, 请提交 pull request 到 [tensorflow/docs](https://github.com/tensorflow/docs) GitHub 仓库。要志愿地撰写或者审核译文,请加入 [docs-zh-cn@tensorflow.org Google Group](https://groups.google.com/a/tensorflow.org/forum/#!forum/docs-zh-cn)。 + +本笔记演示了使用条件 GAN 进行的未配对图像到图像转换,如[使用循环一致的对抗网络进行未配对图像到图像转换](https://arxiv.org/abs/1703.10593) 中所述,也称之为 CycleGAN。论文提出了一种可以捕捉图像域特征并找出如何将这些特征转换为另一个图像域的方法,而无需任何成对的训练样本。 + +本笔记假定您熟悉 Pix2Pix,您可以在 [Pix2Pix 教程](https://tensorflow.google.cn/tutorials/generative/pix2pix)中了解有关它的信息。CycleGAN 的代码与其相似,主要区别在于额外的损失函数,以及非配对训练数据的使用。 + +CycleGAN 使用循环一致损失来使训练过程无需配对数据。换句话说,它可以从一个域转换到另一个域,而不需要在源域与目标域之间进行一对一映射。 + +这为完成许多有趣的任务开辟了可能性,例如照片增强、图片着色、风格迁移等。您所需要的只是源数据集和目标数据集(仅仅是图片目录) + +![输出图像 1](img/921588a88d035dfd280c98f420033345.png) ![输出图像 2](img/f89cb56c5d3c77f56118a42ca7fb3936.png) + +## 设定输入管线 + +安装 [tensorflow_examples](https://github.com/tensorflow/examples) 包,以导入生成器和判别器。 + +```py +pip install -q git+https://github.com/tensorflow/examples.git + +``` + +```py +WARNING: You are using pip version 20.2.2; however, version 20.2.3 is available. +You should consider upgrading via the '/tmpfs/src/tf_docs_env/bin/python -m pip install --upgrade pip' command. + +``` + +```py +import tensorflow as tf +``` + +```py +import tensorflow_datasets as tfds +from tensorflow_examples.models.pix2pix import pix2pix + +import os +import time +import matplotlib.pyplot as plt +from IPython.display import clear_output + +tfds.disable_progress_bar() +AUTOTUNE = tf.data.experimental.AUTOTUNE +``` + +## 输入管线 + +本教程训练一个模型,以将普通马图片转换为斑马图片。您可以在[此处](https://tensorflow.google.cn/datasets/datasets#cycle_gan)获取该数据集以及类似数据集。 + +如[论文](https://arxiv.org/abs/1703.10593)所述,将随机抖动和镜像应用到训练集。这是一些避免过拟合的图像增强技术。 + +这类似于 [pix2pix](https://tensorflow.google.cn/tutorials/generative/pix2pix#load_the_dataset) 中所做的工作。 + +* 在随机抖动中,图片大小调整为 `286 x 286`,随后被随机裁剪为 `256 x 256`。 +* 在随机镜像中,图片会从左到右随机翻转。 + +```py +dataset, metadata = tfds.load('cycle_gan/horse2zebra', + with_info=True, as_supervised=True) + +train_horses, train_zebras = dataset['trainA'], dataset['trainB'] +test_horses, test_zebras = dataset['testA'], dataset['testB'] +``` + +```py +Downloading and preparing dataset cycle_gan/horse2zebra/2.0.0 (download: 111.45 MiB, generated: Unknown size, total: 111.45 MiB) to /home/kbuilder/tensorflow_datasets/cycle_gan/horse2zebra/2.0.0... +Shuffling and writing examples to /home/kbuilder/tensorflow_datasets/cycle_gan/horse2zebra/2.0.0.incompleteNSW88L/cycle_gan-trainA.tfrecord +Shuffling and writing examples to /home/kbuilder/tensorflow_datasets/cycle_gan/horse2zebra/2.0.0.incompleteNSW88L/cycle_gan-trainB.tfrecord +Shuffling and writing examples to /home/kbuilder/tensorflow_datasets/cycle_gan/horse2zebra/2.0.0.incompleteNSW88L/cycle_gan-testA.tfrecord +Shuffling and writing examples to /home/kbuilder/tensorflow_datasets/cycle_gan/horse2zebra/2.0.0.incompleteNSW88L/cycle_gan-testB.tfrecord +Dataset cycle_gan downloaded and prepared to /home/kbuilder/tensorflow_datasets/cycle_gan/horse2zebra/2.0.0\. Subsequent calls will reuse this data. + +``` + +```py +BUFFER_SIZE = 1000 +BATCH_SIZE = 1 +IMG_WIDTH = 256 +IMG_HEIGHT = 256 +``` + +```py +def random_crop(image): + cropped_image = tf.image.random_crop( + image, size=[IMG_HEIGHT, IMG_WIDTH, 3]) + + return cropped_image +``` + +```py +# 将图像归一化到区间 [-1, 1] 内。 +def normalize(image): + image = tf.cast(image, tf.float32) + image = (image / 127.5) - 1 + return image +``` + +```py +def random_jitter(image): + # 调整大小为 286 x 286 x 3 + image = tf.image.resize(image, [286, 286], + method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) + + # 随机裁剪到 256 x 256 x 3 + image = random_crop(image) + + # 随机镜像 + image = tf.image.random_flip_left_right(image) + + return image +``` + +```py +def preprocess_image_train(image, label): + image = random_jitter(image) + image = normalize(image) + return image +``` + +```py +def preprocess_image_test(image, label): + image = normalize(image) + return image +``` + +```py +train_horses = train_horses.map( + preprocess_image_train, num_parallel_calls=AUTOTUNE).cache().shuffle( + BUFFER_SIZE).batch(1) + +train_zebras = train_zebras.map( + preprocess_image_train, num_parallel_calls=AUTOTUNE).cache().shuffle( + BUFFER_SIZE).batch(1) + +test_horses = test_horses.map( + preprocess_image_test, num_parallel_calls=AUTOTUNE).cache().shuffle( + BUFFER_SIZE).batch(1) + +test_zebras = test_zebras.map( + preprocess_image_test, num_parallel_calls=AUTOTUNE).cache().shuffle( + BUFFER_SIZE).batch(1) +``` + +```py +sample_horse = next(iter(train_horses)) +sample_zebra = next(iter(train_zebras)) +``` + +```py +plt.subplot(121) +plt.title('Horse') +plt.imshow(sample_horse[0] * 0.5 + 0.5) + +plt.subplot(122) +plt.title('Horse with random jitter') +plt.imshow(random_jitter(sample_horse[0]) * 0.5 + 0.5) +``` + +```py + + +``` + +![png](img/6b843e3001e6a49928fc35d8af4c843d.png) + +```py +plt.subplot(121) +plt.title('Zebra') +plt.imshow(sample_zebra[0] * 0.5 + 0.5) + +plt.subplot(122) +plt.title('Zebra with random jitter') +plt.imshow(random_jitter(sample_zebra[0]) * 0.5 + 0.5) +``` + +```py + + +``` + +![png](img/34e85547487e77a52b9e494a05fdc8f8.png) + +## 导入并重用 Pix2Pix 模型 + +通过安装的 [tensorflow_examples](https://github.com/tensorflow/examples) 包导入 [Pix2Pix](https://github.com/tensorflow/examples/blob/master/tensorflow_examples/models/pix2pix/pix2pix.py) 中的生成器和判别器。 + +本教程中使用模型体系结构与 [pix2pix](https://github.com/tensorflow/examples/blob/master/tensorflow_examples/models/pix2pix/pix2pix.py) 中所使用的非常相似。一些区别在于: + +* Cyclegan 使用 [instance normalization(实例归一化)](https://arxiv.org/abs/1607.08022)而不是 [batch normalization (批归一化)](https://arxiv.org/abs/1502.03167)。 +* [CycleGAN 论文](https://arxiv.org/abs/1703.10593)使用一种基于 `resnet` 的改进生成器。简单起见,本教程使用的是改进的 `unet` 生成器。 + +这里训练了两个生成器(G 和 F)以及两个判别器(X 和 Y)。 + +* 生成器 `G` 学习将图片 `X` 转换为 `Y`。 $(G: X -> Y)$ +* 生成器 `F` 学习将图片 `Y` 转换为 `X`。 $(F: Y -> X)$ +* 判别器 `D_X` 学习区分图片 `X` 与生成的图片 `X` (`F(Y)`)。 +* 判别器 `D_Y` 学习区分图片 `Y` 与生成的图片 `Y` (`G(X)`)。 + +![Cyclegan 模型](img/141e262e42c195dfe1174f7824ff4c3c.png) + +```py +OUTPUT_CHANNELS = 3 + +generator_g = pix2pix.unet_generator(OUTPUT_CHANNELS, norm_type='instancenorm') +generator_f = pix2pix.unet_generator(OUTPUT_CHANNELS, norm_type='instancenorm') + +discriminator_x = pix2pix.discriminator(norm_type='instancenorm', target=False) +discriminator_y = pix2pix.discriminator(norm_type='instancenorm', target=False) +``` + +```py +to_zebra = generator_g(sample_horse) +to_horse = generator_f(sample_zebra) +plt.figure(figsize=(8, 8)) +contrast = 8 + +imgs = [sample_horse, to_zebra, sample_zebra, to_horse] +title = ['Horse', 'To Zebra', 'Zebra', 'To Horse'] + +for i in range(len(imgs)): + plt.subplot(2, 2, i+1) + plt.title(title[i]) + if i % 2 == 0: + plt.imshow(imgs[i][0] * 0.5 + 0.5) + else: + plt.imshow(imgs[i][0] * 0.5 * contrast + 0.5) +plt.show() +``` + +```py +WARNING:matplotlib.image:Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). +WARNING:matplotlib.image:Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers). + +``` + +![png](img/e2143b6a00159c480e078bcbc7c8c72b.png) + +```py +plt.figure(figsize=(8, 8)) + +plt.subplot(121) +plt.title('Is a real zebra?') +plt.imshow(discriminator_y(sample_zebra)[0, ..., -1], cmap='RdBu_r') + +plt.subplot(122) +plt.title('Is a real horse?') +plt.imshow(discriminator_x(sample_horse)[0, ..., -1], cmap='RdBu_r') + +plt.show() +``` + +![png](img/6637dace2ef4faea4a327361aec7c4ae.png) + +## 损失函数 + +在 CycleGAN 中,没有可训练的成对数据,因此无法保证输入 `x` 和 目标 `y` 数据对在训练期间是有意义的。所以为了强制网络学习正确的映射,作者提出了循环一致损失。 + +判别器损失和生成器损失和 [pix2pix](https://google.tensorflow.cn/tutorials/generative/pix2pix#define_the_loss_functions_and_the_optimizer) 中所使用的类似。 + +```py +LAMBDA = 10 +``` + +```py +loss_obj = tf.keras.losses.BinaryCrossentropy(from_logits=True) +``` + +```py +def discriminator_loss(real, generated): + real_loss = loss_obj(tf.ones_like(real), real) + + generated_loss = loss_obj(tf.zeros_like(generated), generated) + + total_disc_loss = real_loss + generated_loss + + return total_disc_loss * 0.5 +``` + +```py +def generator_loss(generated): + return loss_obj(tf.ones_like(generated), generated) +``` + +循环一致意味着结果应接近原始输出。例如,将一句英文译为法文,随后再从法文翻译回英文,最终的结果句应与原始句输入相同。 + +在循环一致损失中, + +* 图片 $X$ 通过生成器 $G$ 传递,该生成器生成图片 $\hat{Y}$。 +* 生成的图片 $\hat{Y}$ 通过生成器 $F$ 传递,循环生成图片 $\hat{X}$。 +* 在 $X$ 和 $\hat{X}$ 之间计算平均绝对误差。 + +$$forward\ cycle\ consistency\ loss: X -> G(X) -> F(G(X)) \sim \hat{X}$$$$backward\ cycle\ consistency\ loss: Y -> F(Y) -> G(F(Y)) \sim \hat{Y}$$ + +![循环损失](img/4aa12ddc0a8f44acc45b9ed9dc9055bf.png) + +```py +def calc_cycle_loss(real_image, cycled_image): + loss1 = tf.reduce_mean(tf.abs(real_image - cycled_image)) + + return LAMBDA * loss1 +``` + +如上所示,生成器 $G$ 负责将图片 $X$ 转换为 $Y$。一致性损失表明,如果您将图片 $Y$ 馈送给生成器 $G$,它应当生成真实图片 $Y$ 或接近于 $Y$ 的图片。 + +$$Identity\ loss = |G(Y) - Y| + |F(X) - X|$$ + +```py +def identity_loss(real_image, same_image): + loss = tf.reduce_mean(tf.abs(real_image - same_image)) + return LAMBDA * 0.5 * loss +``` + +为所有生成器和判别器初始化优化器。 + +```py +generator_g_optimizer = tf.keras.optimizers.Adam(2e-4, beta_1=0.5) +generator_f_optimizer = tf.keras.optimizers.Adam(2e-4, beta_1=0.5) + +discriminator_x_optimizer = tf.keras.optimizers.Adam(2e-4, beta_1=0.5) +discriminator_y_optimizer = tf.keras.optimizers.Adam(2e-4, beta_1=0.5) +``` + +## Checkpoints + +```py +checkpoint_path = "./checkpoints/train" + +ckpt = tf.train.Checkpoint(generator_g=generator_g, + generator_f=generator_f, + discriminator_x=discriminator_x, + discriminator_y=discriminator_y, + generator_g_optimizer=generator_g_optimizer, + generator_f_optimizer=generator_f_optimizer, + discriminator_x_optimizer=discriminator_x_optimizer, + discriminator_y_optimizer=discriminator_y_optimizer) + +ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=5) + +# 如果存在检查点,恢复最新版本检查点 +if ckpt_manager.latest_checkpoint: + ckpt.restore(ckpt_manager.latest_checkpoint) + print ('Latest checkpoint restored!!') +``` + +## 训练 + +注意:本示例模型比论文中训练了更少的 epoch(本示例为 40 epoch,论文中为 200 epoch),以使训练时间相对于本教程是合理的。预测的准确率可能会低一些。 + +```py +EPOCHS = 40 +``` + +```py +def generate_images(model, test_input): + prediction = model(test_input) + + plt.figure(figsize=(12, 12)) + + display_list = [test_input[0], prediction[0]] + title = ['Input Image', 'Predicted Image'] + + for i in range(2): + plt.subplot(1, 2, i+1) + plt.title(title[i]) + # 获取范围在 [0, 1] 之间的像素值以绘制它。 + plt.imshow(display_list[i] * 0.5 + 0.5) + plt.axis('off') + plt.show() +``` + +尽管训练循环看起来很复杂,其实包含四个基本步骤: + +* 获取预测。 +* 计算损失值。 +* 使用反向传播计算损失值。 +* 将梯度应用于优化器。 + +```py +@tf.function +def train_step(real_x, real_y): + # persistent 设置为 Ture,因为 GradientTape 被多次应用于计算梯度。 + with tf.GradientTape(persistent=True) as tape: + # 生成器 G 转换 X -> Y。 + # 生成器 F 转换 Y -> X。 + + fake_y = generator_g(real_x, training=True) + cycled_x = generator_f(fake_y, training=True) + + fake_x = generator_f(real_y, training=True) + cycled_y = generator_g(fake_x, training=True) + + # same_x 和 same_y 用于一致性损失。 + same_x = generator_f(real_x, training=True) + same_y = generator_g(real_y, training=True) + + disc_real_x = discriminator_x(real_x, training=True) + disc_real_y = discriminator_y(real_y, training=True) + + disc_fake_x = discriminator_x(fake_x, training=True) + disc_fake_y = discriminator_y(fake_y, training=True) + + # 计算损失。 + gen_g_loss = generator_loss(disc_fake_y) + gen_f_loss = generator_loss(disc_fake_x) + + total_cycle_loss = calc_cycle_loss(real_x, cycled_x) + calc_cycle_loss(real_y, cycled_y) + + # 总生成器损失 = 对抗性损失 + 循环损失。 + total_gen_g_loss = gen_g_loss + total_cycle_loss + identity_loss(real_y, same_y) + total_gen_f_loss = gen_f_loss + total_cycle_loss + identity_loss(real_x, same_x) + + disc_x_loss = discriminator_loss(disc_real_x, disc_fake_x) + disc_y_loss = discriminator_loss(disc_real_y, disc_fake_y) + + # 计算生成器和判别器损失。 + generator_g_gradients = tape.gradient(total_gen_g_loss, + generator_g.trainable_variables) + generator_f_gradients = tape.gradient(total_gen_f_loss, + generator_f.trainable_variables) + + discriminator_x_gradients = tape.gradient(disc_x_loss, + discriminator_x.trainable_variables) + discriminator_y_gradients = tape.gradient(disc_y_loss, + discriminator_y.trainable_variables) + + # 将梯度应用于优化器。 + generator_g_optimizer.apply_gradients(zip(generator_g_gradients, + generator_g.trainable_variables)) + + generator_f_optimizer.apply_gradients(zip(generator_f_gradients, + generator_f.trainable_variables)) + + discriminator_x_optimizer.apply_gradients(zip(discriminator_x_gradients, + discriminator_x.trainable_variables)) + + discriminator_y_optimizer.apply_gradients(zip(discriminator_y_gradients, + discriminator_y.trainable_variables)) +``` + +```py +for epoch in range(EPOCHS): + start = time.time() + + n = 0 + for image_x, image_y in tf.data.Dataset.zip((train_horses, train_zebras)): + train_step(image_x, image_y) + if n % 10 == 0: + print ('.', end='') + n+=1 + + clear_output(wait=True) + # 使用一致的图像(sample_horse),以便模型的进度清晰可见。 + generate_images(generator_g, sample_horse) + + if (epoch + 1) % 5 == 0: + ckpt_save_path = ckpt_manager.save() + print ('Saving checkpoint for epoch {} at {}'.format(epoch+1, + ckpt_save_path)) + + print ('Time taken for epoch {} is {} sec\n'.format(epoch + 1, + time.time()-start)) +``` + +![png](img/c2a117375845a6a7d1c87b2c84de54e8.png) + +```py +Saving checkpoint for epoch 40 at ./checkpoints/train/ckpt-8 +Time taken for epoch 40 is 175.41231870651245 sec + +``` + +## 使用测试数据集进行生成 + +```py +# 在测试数据集上运行训练的模型。 +for inp in test_horses.take(5): + generate_images(generator_g, inp) +``` + +![png](img/d68f92600680dfc45d965045e843ec4d.png) + +![png](img/0ba1e7316ba7e228576bbcd85280c309.png) + +![png](img/33043d022bdb4912f00756593d5b4a7c.png) + +![png](img/032dc17ad0509afd4505858b1f0c7d19.png) + +![png](img/d653a0d6330958d36f31b35e1410ff6d.png) + +## 下一步 + +本教程展示了如何从 [Pix2Pix](https://tensorflow.google.cn/tutorials/generative/pix2pix) 教程实现的生成器和判别器开始实现 CycleGAN。 下一步,您可以尝试使用一个来源于 [TensorFlow 数据集](https://tensorflow.google.cn/datasets/datasets#cycle_gan)的不同的数据集。 + +您也可以训练更多的 epoch 以改进结果,或者可以实现[论文](https://arxiv.org/abs/1703.10593)中所使用的改进 ResNet 生成器来代替这里使用的 U-Net 生成器。 \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/065.md b/Tensorflow/TensorFlow2.0/065.md new file mode 100644 index 00000000..0fd7c72c --- /dev/null +++ b/Tensorflow/TensorFlow2.0/065.md @@ -0,0 +1,186 @@ +# Adversarial example using FGSM + +> 原文:[https://tensorflow.google.cn/tutorials/generative/adversarial_fgsm](https://tensorflow.google.cn/tutorials/generative/adversarial_fgsm) + + + +This tutorial creates an *adversarial example* using the Fast Gradient Signed Method (FGSM) attack as described in [Explaining and Harnessing Adversarial Examples](https://arxiv.org/abs/1412.6572) by Goodfellow *et al*. This was one of the first and most popular attacks to fool a neural network. + +## What is an adversarial example? + +Adversarial examples are specialised inputs created with the purpose of confusing a neural network, resulting in the misclassification of a given input. These notorious inputs are indistinguishable to the human eye, but cause the network to fail to identify the contents of the image. There are several types of such attacks, however, here the focus is on the fast gradient sign method attack, which is a *white box* attack whose goal is to ensure misclassification. A white box attack is where the attacker has complete access to the model being attacked. One of the most famous examples of an adversarial image shown below is taken from the aforementioned paper. + +![Adversarial Example](img/ac69959225a206f2b2c5ed2e33218511.png) + +Here, starting with the image of a panda, the attacker adds small perturbations (distortions) to the original image, which results in the model labelling this image as a gibbon, with high confidence. The process of adding these perturbations is explained below. + +## Fast gradient sign method + +The fast gradient sign method works by using the gradients of the neural network to create an adversarial example. For an input image, the method uses the gradients of the loss with respect to the input image to create a new image that maximises the loss. This new image is called the adversarial image. This can be summarised using the following expression: + +$$adv\_x = x + \epsilon*\text{sign}(\nabla_xJ(\theta, x, y))$$ + +where + +* adv_x : Adversarial image. +* x : Original input image. +* y : Original input label. +* $\epsilon$ : Multiplier to ensure the perturbations are small. +* $\theta$ : Model parameters. +* $J$ : Loss. + +An intriguing property here, is the fact that the gradients are taken with respect to the input image. This is done because the objective is to create an image that maximises the loss. A method to accomplish this is to find how much each pixel in the image contributes to the loss value, and add a perturbation accordingly. This works pretty fast because it is easy to find how each input pixel contributes to the loss by using the chain rule and finding the required gradients. Hence, the gradients are taken with respect to the image. In addition, since the model is no longer being trained (thus the gradient is not taken with respect to the trainable variables, i.e., the model parameters), and so the model parameters remain constant. The only goal is to fool an already trained model. + +So let's try and fool a pretrained model. In this tutorial, the model is [MobileNetV2](https://tensorflow.google.cn/versions/r2.0/api_docs/python/tf/keras/applications/MobileNetV2) model, pretrained on [ImageNet](http://www.image-net.org/). + +```py +import tensorflow as tf +import matplotlib as mpl +import matplotlib.pyplot as plt + +mpl.rcParams['figure.figsize'] = (8, 8) +mpl.rcParams['axes.grid'] = False +``` + +Let's load the pretrained MobileNetV2 model and the ImageNet class names. + +```py +pretrained_model = tf.keras.applications.MobileNetV2(include_top=True, + weights='imagenet') +pretrained_model.trainable = False + +# ImageNet labels +decode_predictions = tf.keras.applications.mobilenet_v2.decode_predictions +``` + +```py +Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224.h5 +14540800/14536120 [==============================] - 0s 0us/step + +``` + +```py +# Helper function to preprocess the image so that it can be inputted in MobileNetV2 +def preprocess(image): + image = tf.cast(image, tf.float32) + image = tf.image.resize(image, (224, 224)) + image = tf.keras.applications.mobilenet_v2.preprocess_input(image) + image = image[None, ...] + return image + +# Helper function to extract labels from probability vector +def get_imagenet_label(probs): + return decode_predictions(probs, top=1)[0][0] +``` + +## Original image + +Let's use a sample image of a [Labrador Retriever](https://commons.wikimedia.org/wiki/File:YellowLabradorLooking_new.jpg) by Mirko [CC-BY-SA 3.0](https://creativecommons.org/licenses/by-sa/3.0/) from Wikimedia Common and create adversarial examples from it. The first step is to preprocess it so that it can be fed as an input to the MobileNetV2 model. + +```py +image_path = tf.keras.utils.get_file('YellowLabradorLooking_new.jpg', 'https://storage.googleapis.com/download.tensorflow.org/example_images/YellowLabradorLooking_new.jpg') +image_raw = tf.io.read_file(image_path) +image = tf.image.decode_image(image_raw) + +image = preprocess(image) +image_probs = pretrained_model.predict(image) +``` + +```py +Downloading data from https://storage.googleapis.com/download.tensorflow.org/example_images/YellowLabradorLooking_new.jpg +90112/83281 [================================] - 0s 0us/step + +``` + +Let's have a look at the image. + +```py +plt.figure() +plt.imshow(image[0]*0.5+0.5) # To change [-1, 1] to [0,1] +_, image_class, class_confidence = get_imagenet_label(image_probs) +plt.title('{} : {:.2f}% Confidence'.format(image_class, class_confidence*100)) +plt.show() +``` + +```py +Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/imagenet_class_index.json +40960/35363 [==================================] - 0s 0us/step + +``` + +![png](img/1c498df577bb9dd0638c25332e7b68a1.png) + +## Create the adversarial image + +### Implementing fast gradient sign method + +The first step is to create perturbations which will be used to distort the original image resulting in an adversarial image. As mentioned, for this task, the gradients are taken with respect to the image. + +```py +loss_object = tf.keras.losses.CategoricalCrossentropy() + +def create_adversarial_pattern(input_image, input_label): + with tf.GradientTape() as tape: + tape.watch(input_image) + prediction = pretrained_model(input_image) + loss = loss_object(input_label, prediction) + + # Get the gradients of the loss w.r.t to the input image. + gradient = tape.gradient(loss, input_image) + # Get the sign of the gradients to create the perturbation + signed_grad = tf.sign(gradient) + return signed_grad +``` + +The resulting perturbations can also be visualised. + +```py +# Get the input label of the image. +labrador_retriever_index = 208 +label = tf.one_hot(labrador_retriever_index, image_probs.shape[-1]) +label = tf.reshape(label, (1, image_probs.shape[-1])) + +perturbations = create_adversarial_pattern(image, label) +plt.imshow(perturbations[0]*0.5+0.5); # To change [-1, 1] to [0,1] +``` + +![png](img/e3ffe6a29488821b01dd98cba6690e5f.png) + +Let's try this out for different values of epsilon and observe the resultant image. You'll notice that as the value of epsilon is increased, it becomes easier to fool the network. However, this comes as a trade-off which results in the perturbations becoming more identifiable. + +```py +def display_images(image, description): + _, label, confidence = get_imagenet_label(pretrained_model.predict(image)) + plt.figure() + plt.imshow(image[0]*0.5+0.5) + plt.title('{} \n {} : {:.2f}% Confidence'.format(description, + label, confidence*100)) + plt.show() +``` + +```py +epsilons = [0, 0.01, 0.1, 0.15] +descriptions = [('Epsilon = {:0.3f}'.format(eps) if eps else 'Input') + for eps in epsilons] + +for i, eps in enumerate(epsilons): + adv_x = image + eps*perturbations + adv_x = tf.clip_by_value(adv_x, -1, 1) + display_images(adv_x, descriptions[i]) +``` + +![png](img/8aa1d48ada55b367535dbe964ad2cd79.png) + +![png](img/4bebff99ef427fe52c09346e6f6b1971.png) + +![png](img/7fb60d07e3fa3bd88b02197b1f12223f.png) + +![png](img/66503afc507478f400022c625de3c878.png) + +## Next steps + +Now that you know about adversarial attacks, try this out on different datasets and different architectures. You may also create and train your own model, and then attempt to fool it using the same method. You can also try and see how the confidence in predictions vary as you change epsilon. + +Though powerful, the attack shown in this tutorial was just the start of research into adversarial attacks, and there have been multiple papers creating more powerful attacks since then. In addition to adversarial attacks, research has also led to the creation of defenses, which aims at creating robust machine learning models. You may review this [survey paper](https://arxiv.org/abs/1810.00069) for a comprehensive list of adversarial attacks and defences. + +For many more implementations of adversarial attacks and defenses, you may want to see the adversarial example library [CleverHans](https://github.com/tensorflow/cleverhans). \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/066.md b/Tensorflow/TensorFlow2.0/066.md new file mode 100644 index 00000000..0aa4c4bf --- /dev/null +++ b/Tensorflow/TensorFlow2.0/066.md @@ -0,0 +1,632 @@ +# Intro to Autoencoders + +> 原文:[https://tensorflow.google.cn/tutorials/generative/autoencoder](https://tensorflow.google.cn/tutorials/generative/autoencoder) + +This tutorial introduces autoencoders with three examples: the basics, image denoising, and anomaly detection. + +An autoencoder is a special type of neural network that is trained to copy its input to its output. For example, given an image of a handwritten digit, an autoencoder first encodes the image into a lower dimensional latent representation, then decodes the latent representation back to an image. An autoencoder learns to compress the data while minimizing the reconstruction error. + +To learn more about autoencoders, please consider reading chapter 14 from [Deep Learning](https://www.deeplearningbook.org/) by Ian Goodfellow, Yoshua Bengio, and Aaron Courville. + +## Import TensorFlow and other libraries + +```py +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import tensorflow as tf + +from sklearn.metrics import accuracy_score, precision_score, recall_score +from sklearn.model_selection import train_test_split +from tensorflow.keras import layers, losses +from tensorflow.keras.datasets import fashion_mnist +from tensorflow.keras.models import Model +``` + +## Load the dataset + +To start, you will train the basic autoencoder using the Fashon MNIST dataset. Each image in this dataset is 28x28 pixels. + +```py +(x_train, _), (x_test, _) = fashion_mnist.load_data() + +x_train = x_train.astype('float32') / 255. +x_test = x_test.astype('float32') / 255. + +print (x_train.shape) +print (x_test.shape) +``` + +```py +Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz +32768/29515 [=================================] - 0s 0us/step +Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz +26427392/26421880 [==============================] - 0s 0us/step +Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz +8192/5148 [===============================================] - 0s 0us/step +Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz +4423680/4422102 [==============================] - 0s 0us/step +(60000, 28, 28) +(10000, 28, 28) + +``` + +## First example: Basic autoencoder + +![Basic autoencoder results](img/ee409d59cd41f3de0f02655abfc4d0c0.png) + +Define an autoencoder with two Dense layers: an `encoder`, which compresses the images into a 64 dimensional latent vector, and a `decoder`, that reconstructs the original image from the latent space. + +To define your model, use the [Keras Model Subclassing API](https://tensorflow.google.cn/guide/keras/custom_layers_and_models). + +```py +latent_dim = 64 + +class Autoencoder(Model): + def __init__(self, latent_dim): + super(Autoencoder, self).__init__() + self.latent_dim = latent_dim + self.encoder = tf.keras.Sequential([ + layers.Flatten(), + layers.Dense(latent_dim, activation='relu'), + ]) + self.decoder = tf.keras.Sequential([ + layers.Dense(784, activation='sigmoid'), + layers.Reshape((28, 28)) + ]) + + def call(self, x): + encoded = self.encoder(x) + decoded = self.decoder(encoded) + return decoded + +autoencoder = Autoencoder(latent_dim) +``` + +```py +autoencoder.compile(optimizer='adam', loss=losses.MeanSquaredError()) +``` + +Train the model using `x_train` as both the input and the target. The `encoder` will learn to compress the dataset from 784 dimensions to the latent space, and the `decoder` will learn to reconstruct the original images. . + +```py +autoencoder.fit(x_train, x_train, + epochs=10, + shuffle=True, + validation_data=(x_test, x_test)) +``` + +```py +Epoch 1/10 +1875/1875 [==============================] - 3s 2ms/step - loss: 0.0239 - val_loss: 0.0132 +Epoch 2/10 +1875/1875 [==============================] - 3s 2ms/step - loss: 0.0116 - val_loss: 0.0105 +Epoch 3/10 +1875/1875 [==============================] - 3s 2ms/step - loss: 0.0100 - val_loss: 0.0097 +Epoch 4/10 +1875/1875 [==============================] - 3s 2ms/step - loss: 0.0095 - val_loss: 0.0094 +Epoch 5/10 +1875/1875 [==============================] - 3s 2ms/step - loss: 0.0092 - val_loss: 0.0092 +Epoch 6/10 +1875/1875 [==============================] - 3s 2ms/step - loss: 0.0090 - val_loss: 0.0091 +Epoch 7/10 +1875/1875 [==============================] - 3s 2ms/step - loss: 0.0090 - val_loss: 0.0090 +Epoch 8/10 +1875/1875 [==============================] - 3s 2ms/step - loss: 0.0089 - val_loss: 0.0090 +Epoch 9/10 +1875/1875 [==============================] - 3s 2ms/step - loss: 0.0088 - val_loss: 0.0090 +Epoch 10/10 +1875/1875 [==============================] - 3s 2ms/step - loss: 0.0088 - val_loss: 0.0088 + + + +``` + +Now that the model is trained, let's test it by encoding and decoding images from the test set. + +```py +encoded_imgs = autoencoder.encoder(x_test).numpy() +decoded_imgs = autoencoder.decoder(encoded_imgs).numpy() +``` + +```py +n = 10 +plt.figure(figsize=(20, 4)) +for i in range(n): + # display original + ax = plt.subplot(2, n, i + 1) + plt.imshow(x_test[i]) + plt.title("original") + plt.gray() + ax.get_xaxis().set_visible(False) + ax.get_yaxis().set_visible(False) + + # display reconstruction + ax = plt.subplot(2, n, i + 1 + n) + plt.imshow(decoded_imgs[i]) + plt.title("reconstructed") + plt.gray() + ax.get_xaxis().set_visible(False) + ax.get_yaxis().set_visible(False) +plt.show() +``` + +![png](img/c239b9ad6cf9b5f72e3d6d37fd17b9d1.png) + +## Second example: Image denoising + +![Image denoising results](img/9461d6f88eb7d390eea25f1f034101b5.png) + +An autoencoder can also be trained to remove noise from images. In the following section, you will create a noisy version of the Fashion MNIST dataset by applying random noise to each image. You will then train an autoencoder using the noisy image as input, and the original image as the target. + +Let's reimport the dataset to omit the modifications made earlier. + +```py +(x_train, _), (x_test, _) = fashion_mnist.load_data() +``` + +```py +x_train = x_train.astype('float32') / 255. +x_test = x_test.astype('float32') / 255. + +x_train = x_train[..., tf.newaxis] +x_test = x_test[..., tf.newaxis] + +print(x_train.shape) +``` + +```py +(60000, 28, 28, 1) + +``` + +Adding random noise to the images + +```py +noise_factor = 0.2 +x_train_noisy = x_train + noise_factor * tf.random.normal(shape=x_train.shape) +x_test_noisy = x_test + noise_factor * tf.random.normal(shape=x_test.shape) + +x_train_noisy = tf.clip_by_value(x_train_noisy, clip_value_min=0., clip_value_max=1.) +x_test_noisy = tf.clip_by_value(x_test_noisy, clip_value_min=0., clip_value_max=1.) +``` + +Plot the noisy images. + +```py +n = 10 +plt.figure(figsize=(20, 2)) +for i in range(n): + ax = plt.subplot(1, n, i + 1) + plt.title("original + noise") + plt.imshow(tf.squeeze(x_test_noisy[i])) + plt.gray() +plt.show() +``` + +![png](img/6c3e8444c64a773d92f67fd4f07992b7.png) + +### Define a convolutional autoencoder + +In this example, you will train a convolutional autoencoder using [Conv2D](https://tensorflow.google.cn/api_docs/python/tf/keras/layers/Conv2D) layers in the `encoder`, and [Conv2DTranspose](https://tensorflow.google.cn/api_docs/python/tf/keras/layers/Conv2DTranspose) layers in the `decoder`. + +```py +class Denoise(Model): + def __init__(self): + super(Denoise, self).__init__() + self.encoder = tf.keras.Sequential([ + layers.Input(shape=(28, 28, 1)), + layers.Conv2D(16, (3,3), activation='relu', padding='same', strides=2), + layers.Conv2D(8, (3,3), activation='relu', padding='same', strides=2)]) + + self.decoder = tf.keras.Sequential([ + layers.Conv2DTranspose(8, kernel_size=3, strides=2, activation='relu', padding='same'), + layers.Conv2DTranspose(16, kernel_size=3, strides=2, activation='relu', padding='same'), + layers.Conv2D(1, kernel_size=(3,3), activation='sigmoid', padding='same')]) + + def call(self, x): + encoded = self.encoder(x) + decoded = self.decoder(encoded) + return decoded + +autoencoder = Denoise() +``` + +```py +autoencoder.compile(optimizer='adam', loss=losses.MeanSquaredError()) +``` + +```py +autoencoder.fit(x_train_noisy, x_train, + epochs=10, + shuffle=True, + validation_data=(x_test_noisy, x_test)) +``` + +```py +Epoch 1/10 +1875/1875 [==============================] - 26s 14ms/step - loss: 0.0182 - val_loss: 0.0112 +Epoch 2/10 +1875/1875 [==============================] - 25s 14ms/step - loss: 0.0100 - val_loss: 0.0093 +Epoch 3/10 +1875/1875 [==============================] - 25s 13ms/step - loss: 0.0089 - val_loss: 0.0087 +Epoch 4/10 +1875/1875 [==============================] - 26s 14ms/step - loss: 0.0085 - val_loss: 0.0084 +Epoch 5/10 +1875/1875 [==============================] - 25s 14ms/step - loss: 0.0083 - val_loss: 0.0083 +Epoch 6/10 +1875/1875 [==============================] - 26s 14ms/step - loss: 0.0082 - val_loss: 0.0082 +Epoch 7/10 +1875/1875 [==============================] - 25s 13ms/step - loss: 0.0081 - val_loss: 0.0081 +Epoch 8/10 +1875/1875 [==============================] - 25s 13ms/step - loss: 0.0081 - val_loss: 0.0080 +Epoch 9/10 +1875/1875 [==============================] - 25s 13ms/step - loss: 0.0080 - val_loss: 0.0080 +Epoch 10/10 +1875/1875 [==============================] - 25s 14ms/step - loss: 0.0079 - val_loss: 0.0080 + + + +``` + +Let's take a look at a summary of the encoder. Notice how the images are downsampled from 28x28 to 7x7. + +```py +autoencoder.encoder.summary() +``` + +```py +Model: "sequential_2" +_________________________________________________________________ +Layer (type) Output Shape Param # +================================================================= +conv2d (Conv2D) (None, 14, 14, 16) 160 +_________________________________________________________________ +conv2d_1 (Conv2D) (None, 7, 7, 8) 1160 +================================================================= +Total params: 1,320 +Trainable params: 1,320 +Non-trainable params: 0 +_________________________________________________________________ + +``` + +The decoder upsamples the images back from 7x7 to 28x28. + +```py +autoencoder.decoder.summary() +``` + +```py +Model: "sequential_3" +_________________________________________________________________ +Layer (type) Output Shape Param # +================================================================= +conv2d_transpose (Conv2DTran (None, 14, 14, 8) 584 +_________________________________________________________________ +conv2d_transpose_1 (Conv2DTr (None, 28, 28, 16) 1168 +_________________________________________________________________ +conv2d_2 (Conv2D) (None, 28, 28, 1) 145 +================================================================= +Total params: 1,897 +Trainable params: 1,897 +Non-trainable params: 0 +_________________________________________________________________ + +``` + +Plotting both the noisy images and the denoised images produced by the autoencoder. + +```py +encoded_imgs = autoencoder.encoder(x_test).numpy() +decoded_imgs = autoencoder.decoder(encoded_imgs).numpy() +``` + +```py +n = 10 +plt.figure(figsize=(20, 4)) +for i in range(n): + + # display original + noise + ax = plt.subplot(2, n, i + 1) + plt.title("original + noise") + plt.imshow(tf.squeeze(x_test_noisy[i])) + plt.gray() + ax.get_xaxis().set_visible(False) + ax.get_yaxis().set_visible(False) + + # display reconstruction + bx = plt.subplot(2, n, i + n + 1) + plt.title("reconstructed") + plt.imshow(tf.squeeze(decoded_imgs[i])) + plt.gray() + bx.get_xaxis().set_visible(False) + bx.get_yaxis().set_visible(False) +plt.show() +``` + +![png](img/d6d4178e447bc9f8c984345c73202b01.png) + +## Third example: Anomaly detection + +## Overview + +In this example, you will train an autoencoder to detect anomalies on the [ECG5000 dataset](http://www.timeseriesclassification.com/description.php?Dataset=ECG5000). This dataset contains 5,000 [Electrocardiograms](https://en.wikipedia.org/wiki/Electrocardiography), each with 140 data points. You will use a simplified version of the dataset, where each example has been labeled either `0` (corresponding to an abnormal rhythm), or `1` (corresponding to a normal rhythm). You are interested in identifying the abnormal rhythms. + +**Note:** This is a labeled dataset, so you could phrase this as a supervised learning problem. The goal of this example is to illustrate anomaly detection concepts you can apply to larger datasets, where you do not have labels available (for example, if you had many thousands of normal rhythms, and only a small number of abnormal rhythms). + +How will you detect anomalies using an autoencoder? Recall that an autoencoder is trained to minimize reconstruction error. You will train an autoencoder on the normal rhythms only, then use it to reconstruct all the data. Our hypothesis is that the abnormal rhythms will have higher reconstruction error. You will then classify a rhythm as an anomaly if the reconstruction error surpasses a fixed threshold. + +### Load ECG data + +The dataset you will use is based on one from [timeseriesclassification.com](http://www.timeseriesclassification.com/description.php?Dataset=ECG5000). + +```py +# Download the dataset +dataframe = pd.read_csv('http://storage.googleapis.com/download.tensorflow.org/data/ecg.csv', header=None) +raw_data = dataframe.values +dataframe.head() +``` + + + +```py +# The last element contains the labels +labels = raw_data[:, -1] + +# The other data points are the electrocadriogram data +data = raw_data[:, 0:-1] + +train_data, test_data, train_labels, test_labels = train_test_split( + data, labels, test_size=0.2, random_state=21 +) +``` + +Normalize the data to `[0,1]`. + +```py +min_val = tf.reduce_min(train_data) +max_val = tf.reduce_max(train_data) + +train_data = (train_data - min_val) / (max_val - min_val) +test_data = (test_data - min_val) / (max_val - min_val) + +train_data = tf.cast(train_data, tf.float32) +test_data = tf.cast(test_data, tf.float32) +``` + +You will train the autoencoder using only the normal rhythms, which are labeled in this dataset as `1`. Separate the normal rhythms from the abnormal rhythms. + +```py +train_labels = train_labels.astype(bool) +test_labels = test_labels.astype(bool) + +normal_train_data = train_data[train_labels] +normal_test_data = test_data[test_labels] + +anomalous_train_data = train_data[~train_labels] +anomalous_test_data = test_data[~test_labels] +``` + +Plot a normal ECG. + +```py +plt.grid() +plt.plot(np.arange(140), normal_train_data[0]) +plt.title("A Normal ECG") +plt.show() +``` + +![png](img/aef2c569f7fec52ed4d6e656dddb8da4.png) + +Plot an anomalous ECG. + +```py +plt.grid() +plt.plot(np.arange(140), anomalous_train_data[0]) +plt.title("An Anomalous ECG") +plt.show() +``` + +![png](img/7e31e526f055ddde2fd0d3a4e5d60aef.png) + +### Build the model + +```py +class AnomalyDetector(Model): + def __init__(self): + super(AnomalyDetector, self).__init__() + self.encoder = tf.keras.Sequential([ + layers.Dense(32, activation="relu"), + layers.Dense(16, activation="relu"), + layers.Dense(8, activation="relu")]) + + self.decoder = tf.keras.Sequential([ + layers.Dense(16, activation="relu"), + layers.Dense(32, activation="relu"), + layers.Dense(140, activation="sigmoid")]) + + def call(self, x): + encoded = self.encoder(x) + decoded = self.decoder(encoded) + return decoded + +autoencoder = AnomalyDetector() +``` + +```py +autoencoder.compile(optimizer='adam', loss='mae') +``` + +Notice that the autoencoder is trained using only the normal ECGs, but is evaluated using the full test set. + +```py +history = autoencoder.fit(normal_train_data, normal_train_data, + epochs=20, + batch_size=512, + validation_data=(test_data, test_data), + shuffle=True) +``` + +```py +Epoch 1/20 +5/5 [==============================] - 0s 20ms/step - loss: 0.0582 - val_loss: 0.0534 +Epoch 2/20 +5/5 [==============================] - 0s 5ms/step - loss: 0.0564 - val_loss: 0.0519 +Epoch 3/20 +5/5 [==============================] - 0s 5ms/step - loss: 0.0540 - val_loss: 0.0508 +Epoch 4/20 +5/5 [==============================] - 0s 5ms/step - loss: 0.0514 - val_loss: 0.0491 +Epoch 5/20 +5/5 [==============================] - 0s 5ms/step - loss: 0.0482 - val_loss: 0.0467 +Epoch 6/20 +5/5 [==============================] - 0s 5ms/step - loss: 0.0448 - val_loss: 0.0449 +Epoch 7/20 +5/5 [==============================] - 0s 5ms/step - loss: 0.0414 - val_loss: 0.0429 +Epoch 8/20 +5/5 [==============================] - 0s 5ms/step - loss: 0.0380 - val_loss: 0.0413 +Epoch 9/20 +5/5 [==============================] - 0s 5ms/step - loss: 0.0345 - val_loss: 0.0400 +Epoch 10/20 +5/5 [==============================] - 0s 5ms/step - loss: 0.0316 - val_loss: 0.0390 +Epoch 11/20 +5/5 [==============================] - 0s 5ms/step - loss: 0.0293 - val_loss: 0.0382 +Epoch 12/20 +5/5 [==============================] - 0s 5ms/step - loss: 0.0276 - val_loss: 0.0379 +Epoch 13/20 +5/5 [==============================] - 0s 5ms/step - loss: 0.0262 - val_loss: 0.0370 +Epoch 14/20 +5/5 [==============================] - 0s 5ms/step - loss: 0.0251 - val_loss: 0.0366 +Epoch 15/20 +5/5 [==============================] - 0s 5ms/step - loss: 0.0244 - val_loss: 0.0359 +Epoch 16/20 +5/5 [==============================] - 0s 5ms/step - loss: 0.0237 - val_loss: 0.0355 +Epoch 17/20 +5/5 [==============================] - 0s 5ms/step - loss: 0.0231 - val_loss: 0.0352 +Epoch 18/20 +5/5 [==============================] - 0s 5ms/step - loss: 0.0225 - val_loss: 0.0345 +Epoch 19/20 +5/5 [==============================] - 0s 5ms/step - loss: 0.0219 - val_loss: 0.0343 +Epoch 20/20 +5/5 [==============================] - 0s 5ms/step - loss: 0.0214 - val_loss: 0.0341 + +``` + +```py +plt.plot(history.history["loss"], label="Training Loss") +plt.plot(history.history["val_loss"], label="Validation Loss") +plt.legend() +``` + +```py + + +``` + +![png](img/062d680b7bfc538f75dbd6e3d7562502.png) + +You will soon classify an ECG as anomalous if the reconstruction error is greater than one standard deviation from the normal training examples. First, let's plot a normal ECG from the training set, the reconstruction after it's encoded and decoded by the autoencoder, and the reconstruction error. + +```py +encoded_imgs = autoencoder.encoder(normal_test_data).numpy() +decoded_imgs = autoencoder.decoder(encoded_imgs).numpy() + +plt.plot(normal_test_data[0],'b') +plt.plot(decoded_imgs[0],'r') +plt.fill_between(np.arange(140), decoded_imgs[0], normal_test_data[0], color='lightcoral' ) +plt.legend(labels=["Input", "Reconstruction", "Error"]) +plt.show() +``` + +![png](img/8f8b815630d4213a923f492eacc9d2d0.png) + +Create a similar plot, this time for an anomalous test example. + +```py +encoded_imgs = autoencoder.encoder(anomalous_test_data).numpy() +decoded_imgs = autoencoder.decoder(encoded_imgs).numpy() + +plt.plot(anomalous_test_data[0],'b') +plt.plot(decoded_imgs[0],'r') +plt.fill_between(np.arange(140), decoded_imgs[0], anomalous_test_data[0], color='lightcoral' ) +plt.legend(labels=["Input", "Reconstruction", "Error"]) +plt.show() +``` + +![png](img/65e3cc57565dea4503cb5f3f7dca3035.png) + +### Detect anomalies + +Detect anomalies by calculating whether the reconstruction loss is greater than a fixed threshold. In this tutorial, you will calculate the mean average error for normal examples from the training set, then classify future examples as anomalous if the reconstruction error is higher than one standard deviation from the training set. + +Plot the reconstruction error on normal ECGs from the training set + +```py +reconstructions = autoencoder.predict(normal_train_data) +train_loss = tf.keras.losses.mae(reconstructions, normal_train_data) + +plt.hist(train_loss, bins=50) +plt.xlabel("Train loss") +plt.ylabel("No of examples") +plt.show() +``` + +![png](img/17b66fa7e9565fdeabc4fe4752bad60d.png) + +Choose a threshold value that is one standard deviations above the mean. + +```py +threshold = np.mean(train_loss) + np.std(train_loss) +print("Threshold: ", threshold) +``` + +```py +Threshold: 0.033377893 + +``` + +**Note:** There are other strategies you could use to select a threshold value above which test examples should be classified as anomalous, the correct approach will depend on your dataset. You can learn more with the links at the end of this tutorial. + +If you examine the recontruction error for the anomalous examples in the test set, you'll notice most have greater reconstruction error than the threshold. By varing the threshold, you can adjust the [precision](https://developers.google.cn/machine-learning/glossary#precision) and [recall](https://developers.google.cn/machine-learning/glossary#recall) of your classifier. + +```py +reconstructions = autoencoder.predict(anomalous_test_data) +test_loss = tf.keras.losses.mae(reconstructions, anomalous_test_data) + +plt.hist(test_loss, bins=50) +plt.xlabel("Test loss") +plt.ylabel("No of examples") +plt.show() +``` + +![png](img/f9843723cb76f7e84a4d3e7435c3a2c0.png) + +Classify an ECG as an anomaly if the reconstruction error is greater than the threshold. + +```py +def predict(model, data, threshold): + reconstructions = model(data) + loss = tf.keras.losses.mae(reconstructions, data) + return tf.math.less(loss, threshold) + +def print_stats(predictions, labels): + print("Accuracy = {}".format(accuracy_score(labels, preds))) + print("Precision = {}".format(precision_score(labels, preds))) + print("Recall = {}".format(recall_score(labels, preds))) +``` + +```py +preds = predict(autoencoder, test_data, threshold) +print_stats(preds, test_labels) +``` + +```py +Accuracy = 0.944 +Precision = 0.9921875 +Recall = 0.9071428571428571 + +``` + +## Next steps + +To learn more about anomaly detection with autoencoders, check out this excellent [interactive example](https://anomagram.fastforwardlabs.com/#/) built with TensorFlow.js by Victor Dibia. For a real-world use case, you can learn how [Airbus Detects Anomalies in ISS Telemetry Data](https://blog.tensorflow.org/2020/04/how-airbus-detects-anomalies-iss-telemetry-data-tfx.html) using TensorFlow. To learn more about the basics, consider reading this [blog post](https://blog.keras.io/building-autoencoders-in-keras.html) by François Chollet. For more details, check out chapter 14 from [Deep Learning](https://www.deeplearningbook.org/) by Ian Goodfellow, Yoshua Bengio, and Aaron Courville. \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/067.md b/Tensorflow/TensorFlow2.0/067.md new file mode 100644 index 00000000..34dd3103 --- /dev/null +++ b/Tensorflow/TensorFlow2.0/067.md @@ -0,0 +1,329 @@ +# 卷积变分自编码器 + +> 原文:[https://tensorflow.google.cn/tutorials/generative/cvae](https://tensorflow.google.cn/tutorials/generative/cvae) + + + +**Note:** 我们的 TensorFlow 社区翻译了这些文档。因为社区翻译是尽力而为, 所以无法保证它们是最准确的,并且反映了最新的 [官方英文文档](https://tensorflow.google.cn/?hl=en)。如果您有改进此翻译的建议, 请提交 pull request 到 [tensorflow/docs](https://github.com/tensorflow/docs) GitHub 仓库。要志愿地撰写或者审核译文,请加入 [docs-zh-cn@tensorflow.org Google Group](https://groups.google.com/a/tensorflow.org/forum/#!forum/docs-zh-cn)。 + +![训练过程中输出的演变](img/82444fa7539ed0a798d9a1de5aaf147b.png) + +本笔记演示了如何通过训练变分自编码器([1](https://arxiv.org/abs/1312.6114), [2](https://arxiv.org/abs/1401.4082))来生成手写数字图片。 + +```py +# 用于生成 gif +pip install -q imageio + +``` + +```py +WARNING: You are using pip version 20.2.2; however, version 20.2.3 is available. +You should consider upgrading via the '/tmpfs/src/tf_docs_env/bin/python -m pip install --upgrade pip' command. + +``` + +## 导入 Tensorflow 与其他库 + +```py +import tensorflow as tf + +import os +import time +import numpy as np +import glob +import matplotlib.pyplot as plt +import PIL +import imageio + +from IPython import display +``` + +## 加载 MNIST 数据集 + +每个 MNIST 图片最初都是包含 784 个整数的向量,每个整数取值都在 0-255 之间,表示像素的强度。我们在模型中使用伯努利分布对每个像素进行建模,并对数据集进行静态二值化。 + +```py +(train_images, _), (test_images, _) = tf.keras.datasets.mnist.load_data() +``` + +```py +train_images = train_images.reshape(train_images.shape[0], 28, 28, 1).astype('float32') +test_images = test_images.reshape(test_images.shape[0], 28, 28, 1).astype('float32') + +# 标准化图片到区间 [0., 1.] 内 +train_images /= 255. +test_images /= 255. + +# 二值化 +train_images[train_images >= .5] = 1. +train_images[train_images < .5] = 0. +test_images[test_images >= .5] = 1. +test_images[test_images < .5] = 0. +``` + +```py +TRAIN_BUF = 60000 +BATCH_SIZE = 100 + +TEST_BUF = 10000 +``` + +## 使用 *tf.data* 来将数据分批和打乱 + +```py +train_dataset = tf.data.Dataset.from_tensor_slices(train_images).shuffle(TRAIN_BUF).batch(BATCH_SIZE) +test_dataset = tf.data.Dataset.from_tensor_slices(test_images).shuffle(TEST_BUF).batch(BATCH_SIZE) +``` + +## 通过 *tf.keras.Sequential* 连接生成网络与推理网络 + +在我们的 VAE 示例中,我们将两个小型的 ConvNet 用于生成和推断网络。由于这些神经网络较小,我们使用 [`tf.keras.Sequential`](https://tensorflow.google.cn/api_docs/python/tf/keras/Sequential) 来简化代码。在下面的描述中,令 $x$ 和 $z$ 分别表示观测值和潜在变量 + +### 生成网络 + +这里定义了生成模型,该模型将潜在编码作为输入,并输出用于观测条件分布的参数,即 $p(x|z)$。另外,我们对潜在变量使用单位高斯先验 $p(z)$。 + +### 推理网络 + +这里定义了近似后验分布 $q(z|x)$,该后验分布以观测值作为输入,并输出用于潜在表示的条件分布的一组参数。在本示例中,我们仅将此分布建模为对角高斯模型。在这种情况下,推断网络将输出因式分解的高斯均值和对数方差参数(为了数值稳定性使用对数方差而不是直接使用方差)。 + +### 重参数化技巧 + +在优化过程中,我们可以从 $q(z|x)$ 中采样,方法是首先从单位高斯采样,然后乘以标准偏差并加平均值。这样可以确保梯度能够通过样本传递到推理网络参数。 + +### 网络架构 + +对于推理网络,我们使用两个卷积层,后接一个全连接层。在生成网络中,我们通过使用全连接层,后接三个卷积转置层(在某些情况下也称为反卷积层)来镜像词体系结构。请注意,在训练 VAE 时避免使用批归一化(batch normalization)是一种常见的做法,因为使用小批量处理会导致额外的随机性,从而加剧随机抽样的不稳定性。 + +```py +class CVAE(tf.keras.Model): + def __init__(self, latent_dim): + super(CVAE, self).__init__() + self.latent_dim = latent_dim + self.inference_net = tf.keras.Sequential( + [ + tf.keras.layers.InputLayer(input_shape=(28, 28, 1)), + tf.keras.layers.Conv2D( + filters=32, kernel_size=3, strides=(2, 2), activation='relu'), + tf.keras.layers.Conv2D( + filters=64, kernel_size=3, strides=(2, 2), activation='relu'), + tf.keras.layers.Flatten(), + # No activation + tf.keras.layers.Dense(latent_dim + latent_dim), + ] + ) + + self.generative_net = tf.keras.Sequential( + [ + tf.keras.layers.InputLayer(input_shape=(latent_dim,)), + tf.keras.layers.Dense(units=7*7*32, activation=tf.nn.relu), + tf.keras.layers.Reshape(target_shape=(7, 7, 32)), + tf.keras.layers.Conv2DTranspose( + filters=64, + kernel_size=3, + strides=(2, 2), + padding="SAME", + activation='relu'), + tf.keras.layers.Conv2DTranspose( + filters=32, + kernel_size=3, + strides=(2, 2), + padding="SAME", + activation='relu'), + # No activation + tf.keras.layers.Conv2DTranspose( + filters=1, kernel_size=3, strides=(1, 1), padding="SAME"), + ] + ) + + @tf.function + def sample(self, eps=None): + if eps is None: + eps = tf.random.normal(shape=(100, self.latent_dim)) + return self.decode(eps, apply_sigmoid=True) + + def encode(self, x): + mean, logvar = tf.split(self.inference_net(x), num_or_size_splits=2, axis=1) + return mean, logvar + + def reparameterize(self, mean, logvar): + eps = tf.random.normal(shape=mean.shape) + return eps * tf.exp(logvar * .5) + mean + + def decode(self, z, apply_sigmoid=False): + logits = self.generative_net(z) + if apply_sigmoid: + probs = tf.sigmoid(logits) + return probs + + return logits +``` + +## 定义损失函数和优化器 + +VAE 通过最大化边际对数似然的证据下界(ELBO)进行训练: + +$$\log p(x) \ge \text{ELBO} = \mathbb{E}_{q(z|x)}\left[\log \frac{p(x, z)}{q(z|x)}\right].$$ + +实际上,我们优化了此期望的单样本蒙卡特罗估计: + +$$\log p(x| z) + \log p(z) - \log q(z|x),$$ + +其中 $z$ 从 $q(z|x)$ 中采样。 + +**注意**:我们也可以分析性地计算 KL 项,但简单起见,这里我们将所有三个项合并到蒙卡特罗估计器中。 + +```py +optimizer = tf.keras.optimizers.Adam(1e-4) + +def log_normal_pdf(sample, mean, logvar, raxis=1): + log2pi = tf.math.log(2\. * np.pi) + return tf.reduce_sum( + -.5 * ((sample - mean) ** 2\. * tf.exp(-logvar) + logvar + log2pi), + axis=raxis) + +@tf.function +def compute_loss(model, x): + mean, logvar = model.encode(x) + z = model.reparameterize(mean, logvar) + x_logit = model.decode(z) + + cross_ent = tf.nn.sigmoid_cross_entropy_with_logits(logits=x_logit, labels=x) + logpx_z = -tf.reduce_sum(cross_ent, axis=[1, 2, 3]) + logpz = log_normal_pdf(z, 0., 0.) + logqz_x = log_normal_pdf(z, mean, logvar) + return -tf.reduce_mean(logpx_z + logpz - logqz_x) + +@tf.function +def compute_apply_gradients(model, x, optimizer): + with tf.GradientTape() as tape: + loss = compute_loss(model, x) + gradients = tape.gradient(loss, model.trainable_variables) + optimizer.apply_gradients(zip(gradients, model.trainable_variables)) +``` + +## 训练 + +* 我们从迭代数据集开始 +* 在每次迭代期间,我们将图像传递给编码器,以获得近似后验 $q(z|x)$ 的一组均值和对数方差参数 +* 然后,我们应用 *重参数化技巧* 从 $q(z|x)$ 中采样 +* 最后,我们将重新参数化的样本传递给解码器,以获取生成分布 $p(x|z)$ 的 logit +* **注意:**由于我们使用的是由 keras 加载的数据集,其中训练集中有 6 万个数据点,测试集中有 1 万个数据点,因此我们在测试集上的最终 ELBO 略高于对 Larochelle 版 MNIST 使用动态二值化的文献中的报告结果。 + +## 生成图片 + +* 进行训练后,可以生成一些图片了 +* 我们首先从单位高斯先验分布 $p(z)$ 中采样一组潜在向量 +* 随后生成器将潜在样本 $z$ 转换为观测值的 logit,得到分布 $p(x|z)$ +* 这里我们画出伯努利分布的概率 + +```py +epochs = 100 +latent_dim = 50 +num_examples_to_generate = 16 + +# 保持随机向量恒定以进行生成(预测),以便更易于看到改进。 +random_vector_for_generation = tf.random.normal( + shape=[num_examples_to_generate, latent_dim]) +model = CVAE(latent_dim) +``` + +```py +def generate_and_save_images(model, epoch, test_input): + predictions = model.sample(test_input) + fig = plt.figure(figsize=(4,4)) + + for i in range(predictions.shape[0]): + plt.subplot(4, 4, i+1) + plt.imshow(predictions[i, :, :, 0], cmap='gray') + plt.axis('off') + + # tight_layout 最小化两个子图之间的重叠 + plt.savefig('image_at_epoch_{:04d}.png'.format(epoch)) + plt.show() +``` + +```py +generate_and_save_images(model, 0, random_vector_for_generation) + +for epoch in range(1, epochs + 1): + start_time = time.time() + for train_x in train_dataset: + compute_apply_gradients(model, train_x, optimizer) + end_time = time.time() + + if epoch % 1 == 0: + loss = tf.keras.metrics.Mean() + for test_x in test_dataset: + loss(compute_loss(model, test_x)) + elbo = -loss.result() + display.clear_output(wait=False) + print('Epoch: {}, Test set ELBO: {}, ' + 'time elapse for current epoch {}'.format(epoch, + elbo, + end_time - start_time)) + generate_and_save_images( + model, epoch, random_vector_for_generation) +``` + +```py +Epoch: 100, Test set ELBO: -77.80061340332031, time elapse for current epoch 1.6898043155670166 + +``` + +![png](img/25c5372b82b31daf5535e4f1571434a9.png) + +### 使用 epoch 编号显示图片 + +```py +def display_image(epoch_no): + return PIL.Image.open('image_at_epoch_{:04d}.png'.format(epoch_no)) +``` + +```py +plt.imshow(display_image(epochs)) +plt.axis('off')# 显示图片 +``` + +```py +(-0.5, 287.5, 287.5, -0.5) + +``` + +![png](img/74d6d6302722b19888cd2b8a076a9899.png) + +### 生成所有保存图片的 GIF + +```py +anim_file = 'cvae.gif' + +with imageio.get_writer(anim_file, mode='I') as writer: + filenames = glob.glob('image*.png') + filenames = sorted(filenames) + last = -1 + for i,filename in enumerate(filenames): + frame = 2*(i**0.5) + if round(frame) > round(last): + last = frame + else: + continue + image = imageio.imread(filename) + writer.append_data(image) + image = imageio.imread(filename) + writer.append_data(image) + +import IPython +if IPython.version_info >= (6,2,0,''): + display.Image(filename=anim_file) +``` + +如果您正使用 Colab,您可以使用以下代码下载动画。 + +```py +try: + from google.colab import files +except ImportError: + pass +else: + files.download(anim_file) +``` \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/068.md b/Tensorflow/TensorFlow2.0/068.md new file mode 100644 index 00000000..0d022968 --- /dev/null +++ b/Tensorflow/TensorFlow2.0/068.md @@ -0,0 +1 @@ +# 可解释性 \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/069.md b/Tensorflow/TensorFlow2.0/069.md new file mode 100644 index 00000000..29538d6a --- /dev/null +++ b/Tensorflow/TensorFlow2.0/069.md @@ -0,0 +1,600 @@ +# Integrated gradients + +> 原文:[https://tensorflow.google.cn/tutorials/interpretability/integrated_gradients](https://tensorflow.google.cn/tutorials/interpretability/integrated_gradients) + + + +This tutorial demonstrates how to implement **Integrated Gradients (IG)**, an [Explainable AI](https://en.wikipedia.org/wiki/Explainable_artificial_intelligence) technique introduced in the paper [Axiomatic Attribution for Deep Networks](https://arxiv.org/abs/1703.01365). IG aims to explain the relationship between a model's predictions in terms of its features. It has many use cases including understanding feature importances, identifying data skew, and debugging model performance. + +IG has become a popular interpretability technique due to its broad applicability to any differentiable model (e.g. images, text, structured data), ease of implementation, theoretical justifications, and computational efficiency relative to alternative approaches that allows it to scale to large networks and feature spaces such as images. + +In this tutorial, you will walk through an implementation of IG step-by-step to understand the pixel feature importances of an image classifier. As an example, consider this [image](https://commons.wikimedia.org/wiki/File:San_Francisco_fireboat_showing_off.jpg) of a fireboat spraying jets of water. You would classify this image as a fireboat and might highlight the pixels making up the boat and water cannons as being important to your decision. Your model will also classify this image as a fireboat later on in this tutorial; however, does it highlight the same pixels as important when explaining its decision? + +In the images below titled "IG Attribution Mask" and "Original + IG Mask Overlay" you can see that your model instead highlights (in purple) the pixels comprising the boat's water cannons and jets of water as being more important than the boat itself to its decision. How will your model generalize to new fireboats? What about fireboats without water jets? Read on to learn more about how IG works and how to apply IG to your models to better understand the relationship between their predictions and underlying features. + +![Output Image 1](img/8350c367e4679800cd155cf00a343b47.png) + +## Setup + +```py +import matplotlib.pylab as plt +import numpy as np +import tensorflow as tf +import tensorflow_hub as hub +``` + +### Download a pretrained image classifier from TF-Hub + +IG can be applied to any differentiable model. In the spirit of the original paper, you will use a pre-trained version of the same model, Inception V1, which you will download from [TensorFlow Hub](https://hub.tensorflow.google.cn/google/imagenet/inception_v1/classification/4). + +```py +model = tf.keras.Sequential([ + hub.KerasLayer( + name='inception_v1', + handle='https://hub.tensorflow.google.cn/google/imagenet/inception_v1/classification/4', + trainable=False), +]) +model.build([None, 224, 224, 3]) +model.summary() +``` + +```py +Model: "sequential" +_________________________________________________________________ +Layer (type) Output Shape Param # +================================================================= +inception_v1 (KerasLayer) (None, 1001) 6633209 +================================================================= +Total params: 6,633,209 +Trainable params: 0 +Non-trainable params: 6,633,209 +_________________________________________________________________ + +``` + +From the module page, you need to keep in mind the following about Inception V1: + +**Inputs**: The expected input shape for the model is `(None, 224, 224, 3)`. This is a dense 4D tensor of dtype float32 and shape `(batch_size, height, width, RGB channels)` whose elements are RGB color values of pixels normalized to the range [0, 1]. The first element is `None` to indicate that the model can take any integer batch size. + +**Outputs**: A [`tf.Tensor`](https://tensorflow.google.cn/api_docs/python/tf/Tensor) of logits in the shape of `(batch_size, 1001)`. Each row represents the model's predicted score for each of 1,001 classes from ImageNet. For the model's top predicted class index you can use `tf.argmax(predictions, axis=-1)`. Furthermore, you can also convert the model's logit output to predicted probabilities across all classes using `tf.nn.softmax(predictions, axis=-1)` to quantify the model's uncertainty as well as explore similar predicted classes for debugging. + +```py +def load_imagenet_labels(file_path): + labels_file = tf.keras.utils.get_file('ImageNetLabels.txt', file_path) + with open(labels_file) as reader: + f = reader.read() + labels = f.splitlines() + return np.array(labels) +``` + +```py +imagenet_labels = load_imagenet_labels('https://storage.googleapis.com/download.tensorflow.org/data/ImageNetLabels.txt') +``` + +### Load and preprocess images with [`tf.image`](https://tensorflow.google.cn/api_docs/python/tf/image) + +You will illustrate IG using two images from [Wikimedia Commons](https://commons.wikimedia.org/wiki/Main_Page): a [Fireboat](https://commons.wikimedia.org/wiki/File:San_Francisco_fireboat_showing_off.jpg), and a [Giant Panda](https://commons.wikimedia.org/wiki/File:Giant_Panda_2.JPG). + +```py +def read_image(file_name): + image = tf.io.read_file(file_name) + image = tf.image.decode_jpeg(image, channels=3) + image = tf.image.convert_image_dtype(image, tf.float32) + image = tf.image.resize_with_pad(image, target_height=224, target_width=224) + return image +``` + +```py +img_url = { + 'Fireboat': 'http://storage.googleapis.com/download.tensorflow.org/example_images/San_Francisco_fireboat_showing_off.jpg', + 'Giant Panda': 'http://storage.googleapis.com/download.tensorflow.org/example_images/Giant_Panda_2.jpeg', +} + +img_paths = {name: tf.keras.utils.get_file(name, url) for (name, url) in img_url.items()} +img_name_tensors = {name: read_image(img_path) for (name, img_path) in img_paths.items()} +``` + +```py +Downloading data from http://storage.googleapis.com/download.tensorflow.org/example_images/San_Francisco_fireboat_showing_off.jpg +3956736/3954129 [==============================] - 0s 0us/step +Downloading data from http://storage.googleapis.com/download.tensorflow.org/example_images/Giant_Panda_2.jpeg +811008/802859 [==============================] - 0s 0us/step + +``` + +```py +plt.figure(figsize=(8, 8)) +for n, (name, img_tensors) in enumerate(img_name_tensors.items()): + ax = plt.subplot(1, 2, n+1) + ax.imshow(img_tensors) + ax.set_title(name) + ax.axis('off') +plt.tight_layout() +``` + +![png](img/e68189c9da69b7848e9033d29a0dc574.png) + +### Classify images + +Let's start by classifying these images and displaying the top 3 most confident predictions. Following is a utility function to retrieve the top k predicted labels and probabilities. + +```py +def top_k_predictions(img, k=3): + image_batch = tf.expand_dims(img, 0) + predictions = model(image_batch) + probs = tf.nn.softmax(predictions, axis=-1) + top_probs, top_idxs = tf.math.top_k(input=probs, k=k) + top_labels = imagenet_labels[tuple(top_idxs)] + return top_labels, top_probs[0] +``` + +```py +for (name, img_tensor) in img_name_tensors.items(): + plt.imshow(img_tensor) + plt.title(name, fontweight='bold') + plt.axis('off') + plt.show() + + pred_label, pred_prob = top_k_predictions(img_tensor) + for label, prob in zip(pred_label, pred_prob): + print(f'{label}: {prob:0.1%}') +``` + +![png](img/518bc2d08038969576066eb381910cc1.png) + +```py +fireboat: 32.6% +pier: 12.7% +suspension bridge: 5.7% + +``` + +![png](img/fecda9bde6f4c7551c164dc066491cb5.png) + +```py +giant panda: 89.4% +teddy: 0.3% +gibbon: 0.3% + +``` + +## Calculate Integrated Gradients + +Your model, Inception V1, is a learned function that describes a mapping between your input feature space, image pixel values, and an output space defined by ImageNet class probability values between 0 and 1\. Early interpretability methods for neural networks assigned feature importance scores using gradients, which tell you which pixels have the steepest local relative to your model's prediction at a given point along your model's prediction function. However, gradients only describe *local* changes in your model's prediction function with respect to pixel values and do not fully describe your entire model prediction function. As your model fully "learns" the relationship between the range of an individual pixel and the correct ImageNet class, the gradient for this pixel will *saturate*, meaning become increasingly small and even go to zero. Consider the simple model function below: + +```py +def f(x): + """A simplified model function.""" + return tf.where(x < 0.8, x, 0.8) + +def interpolated_path(x): + """A straight line path.""" + return tf.zeros_like(x) + +x = tf.linspace(start=0.0, stop=1.0, num=6) +y = f(x) +``` + +```py +fig = plt.figure(figsize=(12, 5)) +ax0 = fig.add_subplot(121) +ax0.plot(x, f(x), marker='o') +ax0.set_title('Gradients saturate over F(x)', fontweight='bold') +ax0.text(0.2, 0.5, 'Gradients > 0 = \n x is important') +ax0.text(0.7, 0.85, 'Gradients = 0 \n x not important') +ax0.set_yticks(tf.range(0, 1.5, 0.5)) +ax0.set_xticks(tf.range(0, 1.5, 0.5)) +ax0.set_ylabel('F(x) - model true class predicted probability') +ax0.set_xlabel('x - (pixel value)') + +ax1 = fig.add_subplot(122) +ax1.plot(x, f(x), marker='o') +ax1.plot(x, interpolated_path(x), marker='>') +ax1.set_title('IG intuition', fontweight='bold') +ax1.text(0.25, 0.1, 'Accumulate gradients along path') +ax1.set_ylabel('F(x) - model true class predicted probability') +ax1.set_xlabel('x - (pixel value)') +ax1.set_yticks(tf.range(0, 1.5, 0.5)) +ax1.set_xticks(tf.range(0, 1.5, 0.5)) +ax1.annotate('Baseline', xy=(0.0, 0.0), xytext=(0.0, 0.2), + arrowprops=dict(facecolor='black', shrink=0.1)) +ax1.annotate('Input', xy=(1.0, 0.0), xytext=(0.95, 0.2), + arrowprops=dict(facecolor='black', shrink=0.1)) +plt.show(); +``` + +![png](img/6d8da708f09878fc993e75adb40fd2a1.png) + +* **left**: Your model's gradients for pixel `x` are positive between 0.0 and 0.8 but go to 0.0 between 0.8 and 1.0\. Pixel `x` clearly has a significant impact on pushing your model toward 80% predicted probability on the true class. *Does it make sense that pixel `x`'s importance is small or discontinuous?* + +* **right**: The intuition behind IG is to accumulate pixel `x`'s local gradients and attribute its importance as a score for how much it adds or subtracts to your model's overall output class probability. You can break down and compute IG in 3 parts: + + 1. interpolate small steps along a straight line in the feature space between 0 (a baseline or starting point) and 1 (input pixel's value) + 2. compute gradients at each step between your model's predictions with respect to each step + 3. approximate the integral between your baseline and input by accumulating (cumulative average) these local gradients. + +To reinforce this intuition, you will walk through these 3 parts by applying IG to the example "Fireboat" image below. + +### Establish a baseline + +A baseline is an input image used as a starting point for calculating feature importance. Intuitively, you can think of the baseline's explanatory role as representing the impact of the absence of each pixel on the "Fireboat" prediction to contrast with its impact of each pixel on the "Fireboat" prediction when present in the input image. As a result, the choice of the baseline plays a central role in interpreting and visualizing pixel feature importances. For additional discussion of baseline selection, see the resources in the "Next steps" section at the bottom of this tutorial. Here, you will use a black image whose pixel values are all zero. + +Other choices you could experiment with include an all white image, or a random image, which you can create with `tf.random.uniform(shape=(224,224,3), minval=0.0, maxval=1.0)`. + +```py +baseline = tf.zeros(shape=(224,224,3)) +``` + +```py +plt.imshow(baseline) +plt.title("Baseline") +plt.axis('off') +plt.show() +``` + +![png](img/3e1bc64db4c260d2327ca5a9defae306.png) + +### Unpack formulas into code + +The formula for Integrated Gradients is as follows: + +$IntegratedGradients_{i}(x) ::= (x_{i} - x'_{i})\times\int_{\alpha=0}^1\frac{\partial F(x'+\alpha \times (x - x'))}{\partial x_i}{d\alpha}$ + +where: + +$_{i}$ = feature +$x$ = input +$x'$ = baseline +$\alpha$ = interpolation constant to perturbe features by + +In practice, computing a definite integral is not always numerically possible and can be computationally costly, so you compute the following numerical approximation: + +$IntegratedGrads^{approx}_{i}(x)::=(x_{i}-x'_{i})\times\sum_{k=1}^{m}\frac{\partial F(x' + \frac{k}{m}\times(x - x'))}{\partial x_{i} } \times \frac{1}{m}$ + +where: + +$_{i}$ = feature (individual pixel) +$x$ = input (image tensor) +$x'$ = baseline (image tensor) +$k$ = scaled feature perturbation constant +$m$ = number of steps in the Riemann sum approximation of the integral +$(x_{i}-x'_{i})$ = a term for the difference from the baseline. This is necessary to scale the integrated gradients and keep them in terms of the original image. The path from the baseline image to the input is in pixel space. Since with IG you are integrating in a straight line (linear transformation) this ends up being roughly equivalent to the integral term of the derivative of the interpolated image function with respect to $\alpha$ with enough steps. The integral sums each pixel's gradient times the change in the pixel along the path. It's simpler to implement this integration as uniform steps from one image to the other, substituting $x := (x' + \alpha(x-x'))$. So the change of variables gives $dx = (x-x')d\alpha$. The $(x-x')$ term is constant and is factored out of the integral. + +### Interpolate images + +$IntegratedGrads^{approx}_{i}(x)::=(x_{i}-x'_{i})\times\sum_{k=1}^{m}\frac{\partial F(\overbrace{x' + \frac{k}{m}\times(x - x')}^\text{interpolate m images at k intervals})}{\partial x_{i} } \times \frac{1}{m}$ + +First, you will generate a [linear interpolation](https://en.wikipedia.org/wiki/Linear_interpolation) between the baseline and the original image. You can think of interpolated images as small steps in the feature space between your baseline and input, represented by $\alpha$ in the original equation. + +```py +m_steps=50 +alphas = tf.linspace(start=0.0, stop=1.0, num=m_steps+1) # Generate m_steps intervals for integral_approximation() below. +``` + +```py +def interpolate_images(baseline, + image, + alphas): + alphas_x = alphas[:, tf.newaxis, tf.newaxis, tf.newaxis] + baseline_x = tf.expand_dims(baseline, axis=0) + input_x = tf.expand_dims(image, axis=0) + delta = input_x - baseline_x + images = baseline_x + alphas_x * delta + return images +``` + +Let's use the above function to generate interpolated images along a linear path at alpha intervals between a black baseline image and the example "Fireboat" image. + +```py +interpolated_images = interpolate_images( + baseline=baseline, + image=img_name_tensors['Fireboat'], + alphas=alphas) +``` + +Let's visualize the interpolated images. Note: another way of thinking about the $\alpha$ constant is that it is consistently increasing each interpolated image's intensity. + +```py +fig = plt.figure(figsize=(20, 20)) + +i = 0 +for alpha, image in zip(alphas[0::10], interpolated_images[0::10]): + i += 1 + plt.subplot(1, len(alphas[0::10]), i) + plt.title(f'alpha: {alpha:.1f}') + plt.imshow(image) + plt.axis('off') + +plt.tight_layout(); +``` + +![png](img/e2e6d59bb8ebd47a957558d11e836ec1.png) + +### Compute gradients + +Now let's take a look at how to calculate gradients in order to measure the relationship between changes to a feature and changes in the model's predictions. In the case of images, the gradient tells us which pixels have the strongest effect on the models predicted class probabilities. + +$IntegratedGrads^{approx}_{i}(x)::=(x_{i}-x'_{i})\times\sum_{k=1}^{m}\frac{\overbrace{\partial F(\text{interpolated images})}^\text{compute gradients} }{\partial x_{i} } \times \frac{1}{m}$ + +where: +$F()$ = your model's prediction function +$\frac{\partial{F} }{\partial{x_i} }$ = gradient (vector of partial derivatives $\partial$) of your model F's prediction function relative to each feature $x_i$ + +TensorFlow makes computing gradients easy for you with a [`tf.GradientTape`](https://tensorflow.google.cn/api_docs/python/tf/GradientTape). + +```py +def compute_gradients(images, target_class_idx): + with tf.GradientTape() as tape: + tape.watch(images) + logits = model(images) + probs = tf.nn.softmax(logits, axis=-1)[:, target_class_idx] + return tape.gradient(probs, images) +``` + +Let's compute the gradients for each image along the interpolation path with respect to the correct output. Recall that your model returns a `(1, 1001)` shaped `Tensor` with logits that you convert to predicted probabilities for each class. You need to pass the correct ImageNet target class index to the `compute_gradients` function for your image. + +```py +path_gradients = compute_gradients( + images=interpolated_images, + target_class_idx=555) +``` + +Note the output shape of `(n_interpolated_images, img_height, img_width, RGB)`, which gives us the gradient for every pixel of every image along the interpolation path. You can think of these gradients as measuring the change in your model's predictions for each small step in the feature space. + +```py +print(path_gradients.shape) +``` + +```py +(51, 224, 224, 3) + +``` + +**Visualizing gradient saturation** + +Recall that the gradients you just calculated above describe *local* changes to your model's predicted probability of "Fireboat" and can *saturate*. + +These concepts are visualized using the gradients you calculated above in the 2 plots below. + +```py +pred = model(interpolated_images) +pred_proba = tf.nn.softmax(pred, axis=-1)[:, 555] + +plt.figure(figsize=(10, 4)) +ax1 = plt.subplot(1, 2, 1) +ax1.plot(alphas, pred_proba) +ax1.set_title('Target class predicted probability over alpha') +ax1.set_ylabel('model p(target class)') +ax1.set_xlabel('alpha') +ax1.set_ylim([0, 1]) + +ax2 = plt.subplot(1, 2, 2) +# Average across interpolation steps +average_grads = tf.reduce_mean(path_gradients, axis=[1, 2, 3]) +# Normalize gradients to 0 to 1 scale. E.g. (x - min(x))/(max(x)-min(x)) +average_grads_norm = (average_grads-tf.math.reduce_min(average_grads))/(tf.math.reduce_max(average_grads)-tf.reduce_min(average_grads)) +ax2.plot(alphas, average_grads_norm) +ax2.set_title('Average pixel gradients (normalized) over alpha') +ax2.set_ylabel('Average pixel gradients') +ax2.set_xlabel('alpha') +ax2.set_ylim([0, 1]); +``` + +```py +(0.0, 1.0) + +``` + +![png](img/0b0835e78f54f2c464c9df77cfe6a93b.png) + +* **left**: This plot shows how your model's confidence in the "Fireboat" class varies across alphas. Notice how the gradients, or slope of the line, largely flattens or saturates between 0.6 and 1.0 before settling at the final "Fireboat" predicted probability of about 40%. + +* **right**: The right plot shows the average gradients magnitudes over alpha more directly. Note how the values sharply approach and even briefly dip below zero. In fact, your model "learns" the most from gradients at lower values of alpha before saturating. Intuitively, you can think of this as your model has learned the pixels e.g. water cannons to make the correct prediction, sending these pixels gradients to zero, but is still quite uncertain and focused on spurious bridge or water jet pixels as the alpha values approach the original input image. + +To make sure these important water cannon pixels are reflected as important to the "Fireboat" prediction, you will continue on below to learn how to accumulate these gradients to accurately approximate how each pixel impacts your "Fireboat" predicted probability. + +### Accumulate gradients (integral approximation) + +There are many different ways you can go about computing the numerical approximation of an integral for IG with different tradeoffs in accuracy and convergence across varying functions. A popular class of methods is called [Riemann sums](https://en.wikipedia.org/wiki/Riemann_sum). Here, you will use the Trapezoidal rule (you can find additional code to explore different approximation methods at the end of this tutorial). + +$IntegratedGrads^{approx}_{i}(x)::=(x_{i}-x'_{i})\times \overbrace{\sum_{k=1}^{m} }^\text{Sum m local gradients} \text{gradients(interpolated images)} \times \overbrace{\frac{1}{m} }^\text{Divide by m steps}$ + +From the equation, you can see you are summing over `m` gradients and dividing by `m` steps. You can implement the two operations together for part 3 as an *average of the local gradients of `m` interpolated predictions and input images*. + +```py +def integral_approximation(gradients): + # riemann_trapezoidal + grads = (gradients[:-1] + gradients[1:]) / tf.constant(2.0) + integrated_gradients = tf.math.reduce_mean(grads, axis=0) + return integrated_gradients +``` + +The `integral_approximation` function takes the gradients of the predicted probability of the target class with respect to the interpolated images between the baseline and the original image. + +```py +ig = integral_approximation( + gradients=path_gradients) +``` + +You can confirm averaging across the gradients of `m` interpolated images returns an integrated gradients tensor with the same shape as the original "Giant Panda" image. + +```py +print(ig.shape) +``` + +```py +(224, 224, 3) + +``` + +### Putting it all together + +Now you will combine the 3 previous general parts together into an `IntegratedGradients` function and utilize a [@tf.function](https://tensorflow.google.cn/guide/function) decorator to compile it into a high performance callable TensorFlow graph. This is implemented as 5 smaller steps below: + +$IntegratedGrads^{approx}_{i}(x)::=\overbrace{(x_{i}-x'_{i})}^\text{5.}\times \overbrace{\sum_{k=1}^{m} }^\text{4.} \frac{\partial \overbrace{F(\overbrace{x' + \overbrace{\frac{k}{m} }^\text{1.}\times(x - x'))}^\text{2.} }^\text{3.} }{\partial x_{i} } \times \overbrace{\frac{1}{m} }^\text{4.}$ + +1. Generate alphas $\alpha$ + +2. Generate interpolated images = $(x' + \frac{k}{m}\times(x - x'))$ + +3. Compute gradients between model $F$ output predictions with respect to input features = $\frac{\partial F(\text{interpolated path inputs})}{\partial x_{i} }$ + +4. Integral approximation through averaging gradients = $\sum_{k=1}^m \text{gradients} \times \frac{1}{m}$ + +5. Scale integrated gradients with respect to original image = $(x_{i}-x'_{i}) \times \text{integrated gradients}$. The reason this step is necessary is to make sure that the attribution values accumulated across multiple interpolated images are in the same units and faithfully represent the pixel importances on the original image. + +```py +@tf.function +def integrated_gradients(baseline, + image, + target_class_idx, + m_steps=50, + batch_size=32): + # 1\. Generate alphas. + alphas = tf.linspace(start=0.0, stop=1.0, num=m_steps+1) + + # Initialize TensorArray outside loop to collect gradients. + gradient_batches = tf.TensorArray(tf.float32, size=m_steps+1) + + # Iterate alphas range and batch computation for speed, memory efficiency, and scaling to larger m_steps. + for alpha in tf.range(0, len(alphas), batch_size): + from_ = alpha + to = tf.minimum(from_ + batch_size, len(alphas)) + alpha_batch = alphas[from_:to] + + # 2\. Generate interpolated inputs between baseline and input. + interpolated_path_input_batch = interpolate_images(baseline=baseline, + image=image, + alphas=alpha_batch) + + # 3\. Compute gradients between model outputs and interpolated inputs. + gradient_batch = compute_gradients(images=interpolated_path_input_batch, + target_class_idx=target_class_idx) + + # Write batch indices and gradients to extend TensorArray. + gradient_batches = gradient_batches.scatter(tf.range(from_, to), gradient_batch) + + # Stack path gradients together row-wise into single tensor. + total_gradients = gradient_batches.stack() + + # 4\. Integral approximation through averaging gradients. + avg_gradients = integral_approximation(gradients=total_gradients) + + # 5\. Scale integrated gradients with respect to input. + integrated_gradients = (image - baseline) * avg_gradients + + return integrated_gradients +``` + +```py +ig_attributions = integrated_gradients(baseline=baseline, + image=img_name_tensors['Fireboat'], + target_class_idx=555, + m_steps=240) +``` + +Again, you can check that the IG feature attributions have the same shape as the input "Fireboat" image. + +```py +print(ig_attributions.shape) +``` + +```py +(224, 224, 3) + +``` + +The paper suggests the number of steps to range between 20 to 300 depending upon the example (although in practice this can be higher in the 1,000s to accurately approximate the integral). You can find additional code to check for the appropriate number of steps in the "Next steps" resources at the end of this tutorial. + +### Visualize attributions + +You are ready to visualize attributions, and overlay them on the original image. The code below sums the absolute values of the integrated gradients across the color channels to produce an attribution mask. This plotting method captures the relative impact of pixels on the model's predictions. + +```py +def plot_img_attributions(baseline, + image, + target_class_idx, + m_steps=50, + cmap=None, + overlay_alpha=0.4): + + attributions = integrated_gradients(baseline=baseline, + image=image, + target_class_idx=target_class_idx, + m_steps=m_steps) + + # Sum of the attributions across color channels for visualization. + # The attribution mask shape is a grayscale image with height and width + # equal to the original image. + attribution_mask = tf.reduce_sum(tf.math.abs(attributions), axis=-1) + + fig, axs = plt.subplots(nrows=2, ncols=2, squeeze=False, figsize=(8, 8)) + + axs[0, 0].set_title('Baseline image') + axs[0, 0].imshow(baseline) + axs[0, 0].axis('off') + + axs[0, 1].set_title('Original image') + axs[0, 1].imshow(image) + axs[0, 1].axis('off') + + axs[1, 0].set_title('Attribution mask') + axs[1, 0].imshow(attribution_mask, cmap=cmap) + axs[1, 0].axis('off') + + axs[1, 1].set_title('Overlay') + axs[1, 1].imshow(attribution_mask, cmap=cmap) + axs[1, 1].imshow(image, alpha=overlay_alpha) + axs[1, 1].axis('off') + + plt.tight_layout() + return fig +``` + +Looking at the attributions on the "Fireboat" image, you can see the model identifies the water cannons and spouts as contributing to its correct prediction. + +```py +_ = plot_img_attributions(image=img_name_tensors['Fireboat'], + baseline=baseline, + target_class_idx=555, + m_steps=240, + cmap=plt.cm.inferno, + overlay_alpha=0.4) +``` + +![png](img/29af5825a7303165115c9cfbc59ae606.png) + +On the "Giant Panda" image, the attributions highlight the texture, nose, and the fur of the Panda's face. + +```py +_ = plot_img_attributions(image=img_name_tensors['Giant Panda'], + baseline=baseline, + target_class_idx=389, + m_steps=55, + cmap=plt.cm.viridis, + overlay_alpha=0.5) +``` + +![png](img/07f89687b786f68c1561b81ac448c45e.png) + +## Uses and limitations + +Use cases + +* Employing techniques like Integrated Gradients before deploying your model can help you develop intuition for how and why it works. Do the features highlighted by this technique match your intuition? If not, that may be indicative of a bug in your model or dataset, or overfitting. + +Limitations + +* Integrated Gradients provides feature importances on individual examples, however, it does not provide global feature importances across an entire dataset. + +* Integrated Gradients provides individual feature importances, but it does not explain feature interactions and combinations. + +## Next steps + +This tutorial presented a basic implementation of Integrated Gradients. As a next step, you can use this notebook to try this technique with different models and images yourself. + +For interested readers, there is a lengthier version of this tutorial (which includes code for different baselines, to compute integral approximations, and to determine a sufficient number of steps) which you can find [here](https://github.com/GoogleCloudPlatform/training-data-analyst/tree/master/blogs/integrated_gradients). + +To deepen your understanding, check out the paper [Axiomatic Attribution for Deep Networks](https://arxiv.org/abs/1703.01365) and [Github repository](https://github.com/ankurtaly/Integrated-Gradients), which contains an implementation in a previous version of TensorFlow. You can also explore feature attribution, and the impact of different baselines, on [distill.pub](https://distill.pub/2020/attribution-baselines/). + +Interested in incorporating IG into your production machine learning workflows for feature importances, model error analysis, and data skew monitoring? Check out Google Cloud's [Explainable AI](https://cloud.google.com/explainable-ai) product that supports IG attributions. The Google AI PAIR research group also open-sourced the [What-if tool](https://pair-code.github.io/what-if-tool/index.html#about) which can be used for model debugging, including visualizing IG feature attributions. \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/070.md b/Tensorflow/TensorFlow2.0/070.md new file mode 100644 index 00000000..f9d86906 --- /dev/null +++ b/Tensorflow/TensorFlow2.0/070.md @@ -0,0 +1 @@ +# 强化学习 \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/071.md b/Tensorflow/TensorFlow2.0/071.md new file mode 100644 index 00000000..4af3cf7a --- /dev/null +++ b/Tensorflow/TensorFlow2.0/071.md @@ -0,0 +1,476 @@ +# Playing CartPole with the Actor-Critic Method + +> 原文:[https://tensorflow.google.cn/tutorials/reinforcement_learning/actor_critic](https://tensorflow.google.cn/tutorials/reinforcement_learning/actor_critic) + + + +This tutorial demonstrates how to implement the [Actor-Critic](https://papers.nips.cc/paper/1786-actor-critic-algorithms.pdf) method using TensorFlow to train an agent on the [Open AI Gym](https://gym.openai.com/) CartPole-V0 environment. The reader is assumed to have some familiarity with [policy gradient methods](https://papers.nips.cc/paper/1713-policy-gradient-methods-for-reinforcement-learning-with-function-approximation.pdf) of reinforcement learning. + +**Actor-Critic methods** + +Actor-Critic methods are [temporal difference (TD) learning](https://en.wikipedia.org/wiki/Temporal_difference_learning) methods that represent the policy function independent of the value function. + +A policy function (or policy) returns a probability distribution over actions that the agent can take based on the given state. A value function determines the expected return for an agent starting at a given state and acting according to a particular policy forever after. + +In the Actor-Critic method, the policy is referred to as the *actor* that proposes a set of possible actions given a state, and the estimated value function is referred to as the *critic*, which evaluates actions taken by the *actor* based on the given policy. + +In this tutorial, both the *Actor* and *Critic* will be represented using one neural network with two outputs. + +**CartPole-v0** + +In the [CartPole-v0 environment](https://gym.openai.com/envs/CartPole-v0), a pole is attached to a cart moving along a frictionless track. The pole starts upright and the goal of the agent is to prevent it from falling over by applying a force of -1 or +1 to the cart. A reward of +1 is given for every time step the pole remains upright. An episode ends when (1) the pole is more than 15 degrees from vertical or (2) the cart moves more than 2.4 units from the center. + +
+ +
![](/tutorials/reinforcement_learning/images/cartpole-v0.gif) + +
Trained actor-critic model in Cartpole-v0 environment
+ +
+ +
+ +The problem is considered "solved" when the average total reward for the episode reaches 195 over 100 consecutive trials. + +## Setup + +Import necessary packages and configure global settings. + +```py +pip install -q gym + +``` + +```py +WARNING: You are using pip version 20.2.2; however, version 20.2.3 is available. +You should consider upgrading via the '/tmpfs/src/tf_docs_env/bin/python -m pip install --upgrade pip' command. + +``` + +```py +# Install additional packages for visualization +sudo apt-get install -y xvfb python-opengl > /dev/null 2>&1 +pip install -q pyvirtualdisplay > /dev/null 2>&1 +pip install -q git+https://github.com/tensorflow/docs > /dev/null 2>&1 + +``` + +```py +import collections +import gym +import numpy as np +import tensorflow as tf +import tqdm + +from matplotlib import pyplot as plt +from tensorflow.keras import layers +from typing import Any, List, Sequence, Tuple + +# Create the environment +env = gym.make("CartPole-v0") + +# Set seed for experiment reproducibility +seed = 42 +env.seed(seed) +tf.random.set_seed(seed) +np.random.seed(seed) + +# Small epsilon value for stabilizing division operations +eps = np.finfo(np.float32).eps.item() +``` + +## Model + +The *Actor* and *Critic* will be modeled using one neural network that generates the action probabilities and critic value respectively. We use model subclassing to define the model. + +During the forward pass, the model will take in the state as the input and will output both action probabilities and critic value $V$, which models the state-dependent [value function](https://spinningup.openai.com/en/latest/spinningup/rl_intro.html#value-functions). The goal is to train a model that chooses actions based on a policy $\pi$ that maximizes expected [return](https://spinningup.openai.com/en/latest/spinningup/rl_intro.html#reward-and-return). + +For Cartpole-v0, there are four values representing the state: cart position, cart-velocity, pole angle and pole velocity respectively. The agent can take two actions to push the cart left (0) and right (1) respectively. + +Refer to [OpenAI Gym's CartPole-v0 wiki page](http://www.derongliu.org/adp/adp-cdrom/Barto1983.pdf) for more information. + +```py +class ActorCritic(tf.keras.Model): + """Combined actor-critic network.""" + + def __init__( + self, + num_actions: int, + num_hidden_units: int): + """Initialize.""" + super().__init__() + + self.common = layers.Dense(num_hidden_units, activation="relu") + self.actor = layers.Dense(num_actions) + self.critic = layers.Dense(1) + + def call(self, inputs: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor]: + x = self.common(inputs) + return self.actor(x), self.critic(x) +``` + +```py +num_actions = env.action_space.n # 2 +num_hidden_units = 128 + +model = ActorCritic(num_actions, num_hidden_units) +``` + +## Training + +To train the agent, you will follow these steps: + +1. Run the agent on the environment to collect training data per episode. +2. Compute expected return at each time step. +3. Compute the loss for the combined actor-critic model. +4. Compute gradients and update network parameters. +5. Repeat 1-4 until either success criterion or max episodes has been reached. + +### 1\. Collecting training data + +As in supervised learning, in order to train the actor-critic model, we need to have training data. However, in order to collect such data, the model would need to be "run" in the environment. + +We collect training data for each episode. Then at each time step, the model's forward pass will be run on the environment's state in order to generate action probabilities and the critic value based on the current policy parameterized by the model's weights. + +The next action will be sampled from the action probabilities generated by the model, which would then be applied to the environment, causing the next state and reward to be generated. + +This process is implemented in the `run_episode` function, which uses TensorFlow operations so that it can later be compiled into a TensorFlow graph for faster training. Note that [`tf.TensorArray`](https://tensorflow.google.cn/api_docs/python/tf/TensorArray)s were used to support Tensor iteration on variable length arrays. + +```py +# Wrap OpenAI Gym's `env.step` call as an operation in a TensorFlow function. +# This would allow it to be included in a callable TensorFlow graph. + +def env_step(action: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: + """Returns state, reward and done flag given an action.""" + + state, reward, done, _ = env.step(action) + return (state.astype(np.float32), + np.array(reward, np.int32), + np.array(done, np.int32)) + +def tf_env_step(action: tf.Tensor) -> List[tf.Tensor]: + return tf.numpy_function(env_step, [action], + [tf.float32, tf.int32, tf.int32]) +``` + +```py +def run_episode( + initial_state: tf.Tensor, + model: tf.keras.Model, + max_steps: int) -> List[tf.Tensor]: + """Runs a single episode to collect training data.""" + + action_probs = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True) + values = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True) + rewards = tf.TensorArray(dtype=tf.int32, size=0, dynamic_size=True) + + initial_state_shape = initial_state.shape + state = initial_state + + for t in tf.range(max_steps): + # Convert state into a batched tensor (batch size = 1) + state = tf.expand_dims(state, 0) + + # Run the model and to get action probabilities and critic value + action_logits_t, value = model(state) + + # Sample next action from the action probability distribution + action = tf.random.categorical(action_logits_t, 1)[0, 0] + action_probs_t = tf.nn.softmax(action_logits_t) + + # Store critic values + values = values.write(t, tf.squeeze(value)) + + # Store log probability of the action chosen + action_probs = action_probs.write(t, action_probs_t[0, action]) + + # Apply action to the environment to get next state and reward + state, reward, done = tf_env_step(action) + state.set_shape(initial_state_shape) + + # Store reward + rewards = rewards.write(t, reward) + + if tf.cast(done, tf.bool): + break + + action_probs = action_probs.stack() + values = values.stack() + rewards = rewards.stack() + + return action_probs, values, rewards +``` + +### 2\. Computing expected returns + +We convert the sequence of rewards for each timestep $t$, ${r_{t}}^{T}_{t=1}$ collected during one episode into a sequence of expected returns ${G_{t}}^{T}_{t=1}$ in which the sum of rewards is taken from the current timestep $t$ to $T$ and each reward is multiplied with an exponentially decaying discount factor $\gamma$: + +$$G_{t} = \sum^{T}_{t'=t} \gamma^{t'-t}r_{t'}$$ + +Since $\gamma\in(0,1)$, rewards further out from the current timestep are given less weight. + +Intuitively, expected return simply implies that rewards now are better than rewards later. In a mathematical sense, it is to ensure that the sum of the rewards converges. + +To stabilize training, we also standardize the resulting sequence of returns (i.e. to have zero mean and unit standard deviation). + +```py +def get_expected_return( + rewards: tf.Tensor, + gamma: float, + standardize: bool = True) -> tf.Tensor: + """Compute expected returns per timestep.""" + + n = tf.shape(rewards)[0] + returns = tf.TensorArray(dtype=tf.float32, size=n) + + # Start from the end of `rewards` and accumulate reward sums + # into the `returns` array + rewards = tf.cast(rewards[::-1], dtype=tf.float32) + discounted_sum = tf.constant(0.0) + discounted_sum_shape = discounted_sum.shape + for i in tf.range(n): + reward = rewards[i] + discounted_sum = reward + gamma * discounted_sum + discounted_sum.set_shape(discounted_sum_shape) + returns = returns.write(i, discounted_sum) + returns = returns.stack()[::-1] + + if standardize: + returns = ((returns - tf.math.reduce_mean(returns)) / + (tf.math.reduce_std(returns) + eps)) + + return returns +``` + +### 3\. The actor-critic loss + +Since we are using a hybrid actor-critic model, we will use loss function that is a combination of actor and critic losses for training, as shown below: + +$$L = L_{actor} + L_{critic}$$ + +#### Actor loss + +We formulate the actor loss based on [policy gradients with the critic as a state dependent baseline](https://www.youtube.com/watch?v=EKqxumCuAAY&t=62m23s) and compute single-sample (per-episode) estimates. + +$$L_{actor} = -\sum^{T}_{t=1} log\pi_{\theta}(a_{t} | s_{t})[G(s_{t}, a_{t}) - V^{\pi}_{\theta}(s_{t})]$$ + +where: + +* $T$: the number of timesteps per episode, which can vary per episode +* $s_{t}$: the state at timestep $t$ +* $a_{t}$: chosen action at timestep $t$ given state $s$ +* $\pi_{\theta}$: is the policy (actor) parameterized by $\theta$ +* $V^{\pi}_{\theta}$: is the value function (critic) also parameterized by $\theta$ +* $G = G_{t}$: the expected return for a given state, action pair at timestep $t$ + +We add a negative term to the sum since we want to maximize the probabilities of actions yielding higher rewards by minimizing the combined loss. + +##### Advantage + +The $G - V$ term in our $L_{actor}$ formulation is called the [advantage](https://spinningup.openai.com/en/latest/spinningup/rl_intro.html#advantage-functions), which indicates how much better an action is given a particular state over a random action selected according to the policy $\pi$ for that state. + +While it's possible to exclude a baseline, this may result in high variance during training. And the nice thing about choosing the critic $V$ as a baseline is that it trained to be as close as possible to $G$, leading to a lower variance. + +In addition, without the critic, the algorithm would try to increase probabilities for actions taken on a particular state based on expected return, which may not make much of a difference if the relative probabilities between actions remain the same. + +For instance, suppose that two actions for a given state would yield the same expected return. Without the critic, the algorithm would try to raise the probability of these actions based on the objective $J$. With the critic, it may turn out that there's no advantage ($G - V = 0$) and thus no benefit gained in increasing the actions' probabilities and the algorithm would set the gradients to zero. + +#### Critic loss + +Training $V$ to be as close possible to $G$ can be set up as a regression problem with the following loss function: + +$$L_{critic} = L_{\delta}(G, V^{\pi}_{\theta})$$ + +where $L_{\delta}$ is the [Huber loss](https://en.wikipedia.org/wiki/Huber_loss), which is less sensitive to outliers in data than squared-error loss. + +```py +huber_loss = tf.keras.losses.Huber(reduction=tf.keras.losses.Reduction.SUM) + +def compute_loss( + action_probs: tf.Tensor, + values: tf.Tensor, + returns: tf.Tensor) -> tf.Tensor: + """Computes the combined actor-critic loss.""" + + advantage = returns - values + + action_log_probs = tf.math.log(action_probs) + actor_loss = -tf.math.reduce_sum(action_log_probs * advantage) + + critic_loss = huber_loss(values, returns) + + return actor_loss + critic_loss +``` + +### 4\. Defining the training step to update parameters + +We combine all of the steps above into a training step that is run every episode. All steps leading up to the loss function are executed with the [`tf.GradientTape`](https://tensorflow.google.cn/api_docs/python/tf/GradientTape) context to enable automatic differentiation. + +We use the Adam optimizer to apply the gradients to the model parameters. + +We also compute the sum of the undiscounted rewards, `episode_reward`, in this step which would be used later on to evaluate if we have met the success criterion. + +We apply the [`tf.function`](https://tensorflow.google.cn/api_docs/python/tf/function) context to the `train_step` function so that it can be compiled into a callable TensorFlow graph, which can lead to 10x speedup in training. + +```py +optimizer = tf.keras.optimizers.Adam(learning_rate=0.01) + +@tf.function +def train_step( + initial_state: tf.Tensor, + model: tf.keras.Model, + optimizer: tf.keras.optimizers.Optimizer, + gamma: float, + max_steps_per_episode: int) -> tf.Tensor: + """Runs a model training step.""" + + with tf.GradientTape() as tape: + + # Run the model for one episode to collect training data + action_probs, values, rewards = run_episode( + initial_state, model, max_steps_per_episode) + + # Calculate expected returns + returns = get_expected_return(rewards, gamma) + + # Convert training data to appropriate TF tensor shapes + action_probs, values, returns = [ + tf.expand_dims(x, 1) for x in [action_probs, values, returns]] + + # Calculating loss values to update our network + loss = compute_loss(action_probs, values, returns) + + # Compute the gradients from the loss + grads = tape.gradient(loss, model.trainable_variables) + + # Apply the gradients to the model's parameters + optimizer.apply_gradients(zip(grads, model.trainable_variables)) + + episode_reward = tf.math.reduce_sum(rewards) + + return episode_reward +``` + +### 5\. Run the training loop + +We execute training by run the training step until either the success criterion or maximum number of episodes is reached. + +We keep a running record of episode rewards using a queue. Once 100 trials are reached, the oldest reward is removed at the left (tail) end of the queue and the newest one is added at the head (right). A running sum of the rewards is also maintained for computational efficiency. + +Depending on your runtime, training can finish in less than a minute. + +```py +%%time + +max_episodes = 10000 +max_steps_per_episode = 1000 + +# Cartpole-v0 is considered solved if average reward is >= 195 over 100 +# consecutive trials +reward_threshold = 195 +running_reward = 0 + +# Discount factor for future rewards +gamma = 0.99 + +with tqdm.trange(max_episodes) as t: + for i in t: + initial_state = tf.constant(env.reset(), dtype=tf.float32) + episode_reward = int(train_step( + initial_state, model, optimizer, gamma, max_steps_per_episode)) + + running_reward = episode_reward*0.01 + running_reward*.99 + + t.set_description(f'Episode {i}') + t.set_postfix( + episode_reward=episode_reward, running_reward=running_reward) + + # Show average episode reward every 10 episodes + if i % 10 == 0: + pass # print(f'Episode {i}: average reward: {avg_reward}') + + if running_reward > reward_threshold: + break + +print(f'\nSolved at episode {i}: average reward: {running_reward:.2f}!') +``` + +```py +Episode 1524: 15%|█▌ | 1524/10000 [08:16<46:00, 3.07it/s, episode_reward=200, running_reward=195] + +Solved at episode 1524: average reward: 195.03! +CPU times: user 20min 43s, sys: 4min 52s, total: 25min 35s +Wall time: 8min 16s + +``` + +## Visualization + +After training, it would be good to visualize how the model performs in the environment. You can run the cells below to generate a GIF animation of one episode run of the model. Note that additional packages need to be installed for OpenAI Gym to render the environment's images correctly in Colab. + +```py +# Render an episode and save as a GIF file + +from IPython import display as ipythondisplay +from PIL import Image +from pyvirtualdisplay import Display + +display = Display(visible=0, size=(400, 300)) +display.start() + +def render_episode(env: gym.Env, model: tf.keras.Model, max_steps: int): + screen = env.render(mode='rgb_array') + im = Image.fromarray(screen) + + images = [im] + + state = tf.constant(env.reset(), dtype=tf.float32) + for i in range(1, max_steps + 1): + state = tf.expand_dims(state, 0) + action_probs, _ = model(state) + action = np.argmax(np.squeeze(action_probs)) + + state, _, done, _ = env.step(action) + state = tf.constant(state, dtype=tf.float32) + + # Render screen every 10 steps + if i % 10 == 0: + screen = env.render(mode='rgb_array') + images.append(Image.fromarray(screen)) + + if done: + break + + return images + +# Save GIF image +images = render_episode(env, model, max_steps_per_episode) +image_file = 'cartpole-v0.gif' +# loop=0: loop forever, duration=1: play each frame for 1ms +images[0].save( + image_file, save_all=True, append_images=images[1:], loop=0, duration=1) +``` + +```py +import tensorflow_docs.vis.embed as embed +embed.embed_file(image_file) +``` + +![gif](img/536f812a8cb3bafa44a738899b173733.png) + +## Next steps + +This tutorial demonstrated how to implement the actor-critic method using Tensorflow. + +As a next step, you could try training a model on a different environment in OpenAI Gym. + +For additional information regarding actor-critic methods and the Cartpole-v0 problem, you may refer to the following resources: + +* [Actor Critic Method](https://hal.inria.fr/hal-00840470/document) +* [Actor Critic Lecture (CAL)](https://www.youtube.com/watch?v=EKqxumCuAAY&list=PLkFD6_40KJIwhWJpGazJ9VSj9CFMkb79A&index=7&t=0s) +* [Cartpole learning control problem [Barto, et al. 1983]](http://www.derongliu.org/adp/adp-cdrom/Barto1983.pdf) + +For more reinforcement learning examples in TensorFlow, you can check the following resources: + +* [Reinforcement learning code examples (keras.io)](https://keras.io/examples/rl/) +* [TF-Agents reinforcement learning library](https://tensorflow.google.cn/agents) \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/README.md b/Tensorflow/TensorFlow2.0/README.md new file mode 100644 index 00000000..71563c1d --- /dev/null +++ b/Tensorflow/TensorFlow2.0/README.md @@ -0,0 +1,3 @@ +# TensorFlow 2.4 官方教程 + +来源:[https://tensorflow.google.cn/tutorials?authuser=0](https://tensorflow.google.cn/tutorials?authuser=0) \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/SUMMARY.md b/Tensorflow/TensorFlow2.0/SUMMARY.md new file mode 100644 index 00000000..722ae95b --- /dev/null +++ b/Tensorflow/TensorFlow2.0/SUMMARY.md @@ -0,0 +1,71 @@ ++ [TensorFlow 2.4 官方教程](README.md) ++ [初学者的 TensorFlow 2.0 教程](002.md) ++ [针对专业人员的 TensorFlow 2.0 入门](003.md) ++ [初级](004.md) + + [Keras 机器学习基础知识](005.md) + + [基本分类:对服装图像进行分类](006.md) + + [电影评论文本分类](007.md) + + [使用 Keras 和 Tensorflow Hub 对电影评论进行文本分类](008.md) + + [Basic regression: Predict fuel efficiency](009.md) + + [Overfit and underfit](010.md) + + [保存和恢复模型](011.md) + + [Introduction to the Keras Tuner](012.md) + + [加载和预处理数据](013.md) + + [用 tf.data 加载图片](014.md) + + [使用 tf.data 加载文本数据](015.md) + + [用 tf.data 加载 CSV 数据](016.md) + + [使用 tf.data 加载 NumPy 数据](017.md) + + [使用 tf.data 加载 pandas dataframes](018.md) + + [Unicode 字符串](019.md) + + [TF.Text](020.md) + + [TFRecord 和 tf.Example](021.md) + + [Estimator](022.md) + + [预创建的 Estimators](023.md) + + [Build a linear model with Estimators](024.md) + + [在 Tensorflow 中训练提升树(Boosted Trees)模型](025.md) + + [梯度提升树(Gradient Boosted Trees):模型理解](026.md) + + [通过 Keras 模型创建 Estimator](027.md) ++ [高级](028.md) + + [自定义](029.md) + + [Customization basics: tensors and operations](030.md) + + [Custom layers](031.md) + + [自定义训练: 演示](032.md) + + [分布式训练](033.md) + + [Keras 的分布式训练](034.md) + + [使用 tf.distribute.Strategy 进行自定义训练](035.md) + + [利用 Keras 来训练多工作器(worker)](036.md) + + [利用 Estimator 进行多工作器训练](037.md) + + [使用分布策略保存和加载模型](038.md) + + [Distributed Input](039.md) + + [图像](040.md) + + [卷积神经网络(Convolutional Neural Network, CNN)](041.md) + + [Image classification](042.md) + + [Transfer learning and fine-tuning](043.md) + + [Transfer learning with TensorFlow Hub](044.md) + + [Data augmentation](045.md) + + [图像分割](046.md) + + [文本](047.md) + + [单词嵌入向量](048.md) + + [使用 RNN 进行文本分类](049.md) + + [循环神经网络(RNN)文本生成](050.md) + + [基于注意力的神经机器翻译](051.md) + + [Image captioning with visual attention](052.md) + + [理解语言的 Transformer 模型](053.md) + + [Fine-tuning a BERT model](054.md) + + [结构化数据](055.md) + + [对结构化数据进行分类](056.md) + + [Classification on imbalanced data](057.md) + + [Time series forecasting](058.md) + + [生成](059.md) + + [神经风格迁移](060.md) + + [DeepDream](061.md) + + [深度卷积生成对抗网络](062.md) + + [Pix2Pix](063.md) + + [CycleGAN](064.md) + + [Adversarial example using FGSM](065.md) + + [Intro to Autoencoders](066.md) + + [卷积变分自编码器](067.md) + + [可解释性](068.md) + + [Integrated gradients](069.md) + + [强化学习](070.md) + + [Playing CartPole with the Actor-Critic Method](071.md) diff --git a/Tensorflow/TensorFlow2.0/img/00e8007835b36bd4dd63de1a16bd2f94.png b/Tensorflow/TensorFlow2.0/img/00e8007835b36bd4dd63de1a16bd2f94.png new file mode 100644 index 00000000..e4bb8d00 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/00e8007835b36bd4dd63de1a16bd2f94.png differ diff --git a/Tensorflow/TensorFlow2.0/img/00ec3c3882214936d486fb8107b457b2.png b/Tensorflow/TensorFlow2.0/img/00ec3c3882214936d486fb8107b457b2.png new file mode 100644 index 00000000..582120e6 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/00ec3c3882214936d486fb8107b457b2.png differ diff --git a/Tensorflow/TensorFlow2.0/img/00fbc45eaf75c4132a6ea862403f4be4.png b/Tensorflow/TensorFlow2.0/img/00fbc45eaf75c4132a6ea862403f4be4.png new file mode 100644 index 00000000..23297cf3 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/00fbc45eaf75c4132a6ea862403f4be4.png differ diff --git a/Tensorflow/TensorFlow2.0/img/019d6a002d56ca3eff0330fbb68495d2.png b/Tensorflow/TensorFlow2.0/img/019d6a002d56ca3eff0330fbb68495d2.png new file mode 100644 index 00000000..b93f7d81 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/019d6a002d56ca3eff0330fbb68495d2.png differ diff --git a/Tensorflow/TensorFlow2.0/img/01e618f7715193d849381e8d78c78c09.png b/Tensorflow/TensorFlow2.0/img/01e618f7715193d849381e8d78c78c09.png new file mode 100644 index 00000000..e4da706a Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/01e618f7715193d849381e8d78c78c09.png differ diff --git a/Tensorflow/TensorFlow2.0/img/027fe3c7c1b2c8f4ba851311692e3d91.png b/Tensorflow/TensorFlow2.0/img/027fe3c7c1b2c8f4ba851311692e3d91.png new file mode 100644 index 00000000..8a94ce42 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/027fe3c7c1b2c8f4ba851311692e3d91.png differ diff --git a/Tensorflow/TensorFlow2.0/img/02b2fc97a46c88c22ee2d11e8c28bf0d.png b/Tensorflow/TensorFlow2.0/img/02b2fc97a46c88c22ee2d11e8c28bf0d.png new file mode 100644 index 00000000..7c863402 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/02b2fc97a46c88c22ee2d11e8c28bf0d.png differ diff --git a/Tensorflow/TensorFlow2.0/img/032dc17ad0509afd4505858b1f0c7d19.png b/Tensorflow/TensorFlow2.0/img/032dc17ad0509afd4505858b1f0c7d19.png new file mode 100644 index 00000000..6cf29d57 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/032dc17ad0509afd4505858b1f0c7d19.png differ diff --git a/Tensorflow/TensorFlow2.0/img/03dad7eb5e1c97b1391c9925be7da416.png b/Tensorflow/TensorFlow2.0/img/03dad7eb5e1c97b1391c9925be7da416.png new file mode 100644 index 00000000..ac95c751 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/03dad7eb5e1c97b1391c9925be7da416.png differ diff --git a/Tensorflow/TensorFlow2.0/img/040af2c2187faf5ae826579d657c0aa0.png b/Tensorflow/TensorFlow2.0/img/040af2c2187faf5ae826579d657c0aa0.png new file mode 100644 index 00000000..3d6a29db Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/040af2c2187faf5ae826579d657c0aa0.png differ diff --git a/Tensorflow/TensorFlow2.0/img/0425284f7bd595a686480abe82721a04.png b/Tensorflow/TensorFlow2.0/img/0425284f7bd595a686480abe82721a04.png new file mode 100644 index 00000000..59a1facf Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/0425284f7bd595a686480abe82721a04.png differ diff --git a/Tensorflow/TensorFlow2.0/img/05dfb322f577170f0e9218f00221d363.png b/Tensorflow/TensorFlow2.0/img/05dfb322f577170f0e9218f00221d363.png new file mode 100644 index 00000000..6a8097d9 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/05dfb322f577170f0e9218f00221d363.png differ diff --git a/Tensorflow/TensorFlow2.0/img/062d680b7bfc538f75dbd6e3d7562502.png b/Tensorflow/TensorFlow2.0/img/062d680b7bfc538f75dbd6e3d7562502.png new file mode 100644 index 00000000..721061db Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/062d680b7bfc538f75dbd6e3d7562502.png differ diff --git a/Tensorflow/TensorFlow2.0/img/073182c1df7eec341936447672fb0376.png b/Tensorflow/TensorFlow2.0/img/073182c1df7eec341936447672fb0376.png new file mode 100644 index 00000000..df1bf1bf Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/073182c1df7eec341936447672fb0376.png differ diff --git a/Tensorflow/TensorFlow2.0/img/07f89687b786f68c1561b81ac448c45e.png b/Tensorflow/TensorFlow2.0/img/07f89687b786f68c1561b81ac448c45e.png new file mode 100644 index 00000000..de252075 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/07f89687b786f68c1561b81ac448c45e.png differ diff --git a/Tensorflow/TensorFlow2.0/img/07fde30d678eaceba2bf9695ee89c403.png b/Tensorflow/TensorFlow2.0/img/07fde30d678eaceba2bf9695ee89c403.png new file mode 100644 index 00000000..1e0226ac Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/07fde30d678eaceba2bf9695ee89c403.png differ diff --git a/Tensorflow/TensorFlow2.0/img/086f42f4106103986aa93b67010f2cf9.png b/Tensorflow/TensorFlow2.0/img/086f42f4106103986aa93b67010f2cf9.png new file mode 100644 index 00000000..395fd7a8 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/086f42f4106103986aa93b67010f2cf9.png differ diff --git a/Tensorflow/TensorFlow2.0/img/0b0835e78f54f2c464c9df77cfe6a93b.png b/Tensorflow/TensorFlow2.0/img/0b0835e78f54f2c464c9df77cfe6a93b.png new file mode 100644 index 00000000..10e2e15f Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/0b0835e78f54f2c464c9df77cfe6a93b.png differ diff --git a/Tensorflow/TensorFlow2.0/img/0ba1e7316ba7e228576bbcd85280c309.png b/Tensorflow/TensorFlow2.0/img/0ba1e7316ba7e228576bbcd85280c309.png new file mode 100644 index 00000000..7028311c Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/0ba1e7316ba7e228576bbcd85280c309.png differ diff --git a/Tensorflow/TensorFlow2.0/img/0c7474d216a51a2b258a81a689920596.png b/Tensorflow/TensorFlow2.0/img/0c7474d216a51a2b258a81a689920596.png new file mode 100644 index 00000000..cc3552ac Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/0c7474d216a51a2b258a81a689920596.png differ diff --git a/Tensorflow/TensorFlow2.0/img/0dda76c01237658213cec93698233a22.png b/Tensorflow/TensorFlow2.0/img/0dda76c01237658213cec93698233a22.png new file mode 100644 index 00000000..00398651 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/0dda76c01237658213cec93698233a22.png differ diff --git a/Tensorflow/TensorFlow2.0/img/0eda1b45396cd1c02a76bd76397b9a76.png b/Tensorflow/TensorFlow2.0/img/0eda1b45396cd1c02a76bd76397b9a76.png new file mode 100644 index 00000000..792d740e Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/0eda1b45396cd1c02a76bd76397b9a76.png differ diff --git a/Tensorflow/TensorFlow2.0/img/0f98889f249aed7e8f8f5e90e5432e08.png b/Tensorflow/TensorFlow2.0/img/0f98889f249aed7e8f8f5e90e5432e08.png new file mode 100644 index 00000000..f8ad2704 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/0f98889f249aed7e8f8f5e90e5432e08.png differ diff --git a/Tensorflow/TensorFlow2.0/img/0fc5058e71e5828192048ef6a6b9a595.png b/Tensorflow/TensorFlow2.0/img/0fc5058e71e5828192048ef6a6b9a595.png new file mode 100644 index 00000000..4bd2ff98 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/0fc5058e71e5828192048ef6a6b9a595.png differ diff --git a/Tensorflow/TensorFlow2.0/img/10d71bce93ec45ba7076ef15a37bcb28.png b/Tensorflow/TensorFlow2.0/img/10d71bce93ec45ba7076ef15a37bcb28.png new file mode 100644 index 00000000..8a1a7233 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/10d71bce93ec45ba7076ef15a37bcb28.png differ diff --git a/Tensorflow/TensorFlow2.0/img/11c5fe9ef9f8ed2389fe40e5fa1ccbb7.png b/Tensorflow/TensorFlow2.0/img/11c5fe9ef9f8ed2389fe40e5fa1ccbb7.png new file mode 100644 index 00000000..d2866117 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/11c5fe9ef9f8ed2389fe40e5fa1ccbb7.png differ diff --git a/Tensorflow/TensorFlow2.0/img/12bbad2792cbf9031cf0f5c0e54b36a3.png b/Tensorflow/TensorFlow2.0/img/12bbad2792cbf9031cf0f5c0e54b36a3.png new file mode 100644 index 00000000..740e26e9 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/12bbad2792cbf9031cf0f5c0e54b36a3.png differ diff --git a/Tensorflow/TensorFlow2.0/img/13a6ef1c7f66c4208c56677c5ddd6506.png b/Tensorflow/TensorFlow2.0/img/13a6ef1c7f66c4208c56677c5ddd6506.png new file mode 100644 index 00000000..64cc0fcd Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/13a6ef1c7f66c4208c56677c5ddd6506.png differ diff --git a/Tensorflow/TensorFlow2.0/img/13fa130027f8343fe8d952fec8dd0555.png b/Tensorflow/TensorFlow2.0/img/13fa130027f8343fe8d952fec8dd0555.png new file mode 100644 index 00000000..e40ca1cc Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/13fa130027f8343fe8d952fec8dd0555.png differ diff --git a/Tensorflow/TensorFlow2.0/img/141e262e42c195dfe1174f7824ff4c3c.png b/Tensorflow/TensorFlow2.0/img/141e262e42c195dfe1174f7824ff4c3c.png new file mode 100644 index 00000000..a6c72a35 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/141e262e42c195dfe1174f7824ff4c3c.png differ diff --git a/Tensorflow/TensorFlow2.0/img/147de2a992900105434d95a527a869c8.png b/Tensorflow/TensorFlow2.0/img/147de2a992900105434d95a527a869c8.png new file mode 100644 index 00000000..727947db Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/147de2a992900105434d95a527a869c8.png differ diff --git a/Tensorflow/TensorFlow2.0/img/14fce8d9f2fd98077c5bf9a8db1f25ec.png b/Tensorflow/TensorFlow2.0/img/14fce8d9f2fd98077c5bf9a8db1f25ec.png new file mode 100644 index 00000000..847948de Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/14fce8d9f2fd98077c5bf9a8db1f25ec.png differ diff --git a/Tensorflow/TensorFlow2.0/img/16ea92d12fa8170f3e79e4c56f9affd1.png b/Tensorflow/TensorFlow2.0/img/16ea92d12fa8170f3e79e4c56f9affd1.png new file mode 100644 index 00000000..f4ddae50 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/16ea92d12fa8170f3e79e4c56f9affd1.png differ diff --git a/Tensorflow/TensorFlow2.0/img/17877a5940e1f7245c707d3ecf9783e3.png b/Tensorflow/TensorFlow2.0/img/17877a5940e1f7245c707d3ecf9783e3.png new file mode 100644 index 00000000..bf07422e Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/17877a5940e1f7245c707d3ecf9783e3.png differ diff --git a/Tensorflow/TensorFlow2.0/img/17b66fa7e9565fdeabc4fe4752bad60d.png b/Tensorflow/TensorFlow2.0/img/17b66fa7e9565fdeabc4fe4752bad60d.png new file mode 100644 index 00000000..e6bd59b7 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/17b66fa7e9565fdeabc4fe4752bad60d.png differ diff --git a/Tensorflow/TensorFlow2.0/img/187f414e1afde064024f6898871831da.png b/Tensorflow/TensorFlow2.0/img/187f414e1afde064024f6898871831da.png new file mode 100644 index 00000000..366b3fc3 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/187f414e1afde064024f6898871831da.png differ diff --git a/Tensorflow/TensorFlow2.0/img/193b876c3df893a8ae22aad38d3b264e.png b/Tensorflow/TensorFlow2.0/img/193b876c3df893a8ae22aad38d3b264e.png new file mode 100644 index 00000000..e21313ee Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/193b876c3df893a8ae22aad38d3b264e.png differ diff --git a/Tensorflow/TensorFlow2.0/img/1b0da93649f50a8108ee8ddd8b08bc04.png b/Tensorflow/TensorFlow2.0/img/1b0da93649f50a8108ee8ddd8b08bc04.png new file mode 100644 index 00000000..c1e047dc Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/1b0da93649f50a8108ee8ddd8b08bc04.png differ diff --git a/Tensorflow/TensorFlow2.0/img/1c115680e9c0080caf5ac79ab3f9ba0d.png b/Tensorflow/TensorFlow2.0/img/1c115680e9c0080caf5ac79ab3f9ba0d.png new file mode 100644 index 00000000..abdabd90 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/1c115680e9c0080caf5ac79ab3f9ba0d.png differ diff --git a/Tensorflow/TensorFlow2.0/img/1c3bc0a947aefadc9c04f9c5f0bf1991.png b/Tensorflow/TensorFlow2.0/img/1c3bc0a947aefadc9c04f9c5f0bf1991.png new file mode 100644 index 00000000..bd6c8168 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/1c3bc0a947aefadc9c04f9c5f0bf1991.png differ diff --git a/Tensorflow/TensorFlow2.0/img/1c498df577bb9dd0638c25332e7b68a1.png b/Tensorflow/TensorFlow2.0/img/1c498df577bb9dd0638c25332e7b68a1.png new file mode 100644 index 00000000..458a2529 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/1c498df577bb9dd0638c25332e7b68a1.png differ diff --git a/Tensorflow/TensorFlow2.0/img/1d2f7cb104afa8ee05f37076045f9195.png b/Tensorflow/TensorFlow2.0/img/1d2f7cb104afa8ee05f37076045f9195.png new file mode 100644 index 00000000..bde07eaa Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/1d2f7cb104afa8ee05f37076045f9195.png differ diff --git a/Tensorflow/TensorFlow2.0/img/1d906c8d5397ad3e918d2a91fcfbb78e.png b/Tensorflow/TensorFlow2.0/img/1d906c8d5397ad3e918d2a91fcfbb78e.png new file mode 100644 index 00000000..9690b230 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/1d906c8d5397ad3e918d2a91fcfbb78e.png differ diff --git a/Tensorflow/TensorFlow2.0/img/1f9a0765029471b20952ac80887f73a4.png b/Tensorflow/TensorFlow2.0/img/1f9a0765029471b20952ac80887f73a4.png new file mode 100644 index 00000000..1e83b544 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/1f9a0765029471b20952ac80887f73a4.png differ diff --git a/Tensorflow/TensorFlow2.0/img/20fad379e19d0355132a97db41137f4b.png b/Tensorflow/TensorFlow2.0/img/20fad379e19d0355132a97db41137f4b.png new file mode 100644 index 00000000..1a8e9c8e Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/20fad379e19d0355132a97db41137f4b.png differ diff --git a/Tensorflow/TensorFlow2.0/img/2127fb93f97c5aaf91e991540bbe84ed.png b/Tensorflow/TensorFlow2.0/img/2127fb93f97c5aaf91e991540bbe84ed.png new file mode 100644 index 00000000..f8acbad8 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/2127fb93f97c5aaf91e991540bbe84ed.png differ diff --git a/Tensorflow/TensorFlow2.0/img/217c4e55f89d4a55a78dc082bbdc1e2f.png b/Tensorflow/TensorFlow2.0/img/217c4e55f89d4a55a78dc082bbdc1e2f.png new file mode 100644 index 00000000..2a9777de Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/217c4e55f89d4a55a78dc082bbdc1e2f.png differ diff --git a/Tensorflow/TensorFlow2.0/img/21b3b7303748422d35a6212f940d399c.png b/Tensorflow/TensorFlow2.0/img/21b3b7303748422d35a6212f940d399c.png new file mode 100644 index 00000000..fde4a734 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/21b3b7303748422d35a6212f940d399c.png differ diff --git a/Tensorflow/TensorFlow2.0/img/2230343d999d9f0dd8b71b8bf390e82f.png b/Tensorflow/TensorFlow2.0/img/2230343d999d9f0dd8b71b8bf390e82f.png new file mode 100644 index 00000000..14ac85ba Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/2230343d999d9f0dd8b71b8bf390e82f.png differ diff --git a/Tensorflow/TensorFlow2.0/img/224feef511a9609d22547ec5c769829a.png b/Tensorflow/TensorFlow2.0/img/224feef511a9609d22547ec5c769829a.png new file mode 100644 index 00000000..c7364c7a Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/224feef511a9609d22547ec5c769829a.png differ diff --git a/Tensorflow/TensorFlow2.0/img/22f7bd226b742292050c368b980067f4.png b/Tensorflow/TensorFlow2.0/img/22f7bd226b742292050c368b980067f4.png new file mode 100644 index 00000000..5f498d42 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/22f7bd226b742292050c368b980067f4.png differ diff --git a/Tensorflow/TensorFlow2.0/img/23878cd7b5d0e50299dced2de8ebd52d.png b/Tensorflow/TensorFlow2.0/img/23878cd7b5d0e50299dced2de8ebd52d.png new file mode 100644 index 00000000..4157f8aa Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/23878cd7b5d0e50299dced2de8ebd52d.png differ diff --git a/Tensorflow/TensorFlow2.0/img/25091cb1e90c92e9948c6c6cb9d0238b.png b/Tensorflow/TensorFlow2.0/img/25091cb1e90c92e9948c6c6cb9d0238b.png new file mode 100644 index 00000000..9397e7f3 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/25091cb1e90c92e9948c6c6cb9d0238b.png differ diff --git a/Tensorflow/TensorFlow2.0/img/253f679c0d56ad236d24246ddb70d466.png b/Tensorflow/TensorFlow2.0/img/253f679c0d56ad236d24246ddb70d466.png new file mode 100644 index 00000000..69237f64 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/253f679c0d56ad236d24246ddb70d466.png differ diff --git a/Tensorflow/TensorFlow2.0/img/25794664318bbd0dc1284a9ea6754d14.png b/Tensorflow/TensorFlow2.0/img/25794664318bbd0dc1284a9ea6754d14.png new file mode 100644 index 00000000..eaa104b4 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/25794664318bbd0dc1284a9ea6754d14.png differ diff --git a/Tensorflow/TensorFlow2.0/img/25a15211c7a5c4ce6da843197b4b85eb.png b/Tensorflow/TensorFlow2.0/img/25a15211c7a5c4ce6da843197b4b85eb.png new file mode 100644 index 00000000..57df8191 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/25a15211c7a5c4ce6da843197b4b85eb.png differ diff --git a/Tensorflow/TensorFlow2.0/img/25c5372b82b31daf5535e4f1571434a9.png b/Tensorflow/TensorFlow2.0/img/25c5372b82b31daf5535e4f1571434a9.png new file mode 100644 index 00000000..908d667a Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/25c5372b82b31daf5535e4f1571434a9.png differ diff --git a/Tensorflow/TensorFlow2.0/img/267bdfdd72740285a56d6dbc3f34c679.png b/Tensorflow/TensorFlow2.0/img/267bdfdd72740285a56d6dbc3f34c679.png new file mode 100644 index 00000000..e78ecf6b Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/267bdfdd72740285a56d6dbc3f34c679.png differ diff --git a/Tensorflow/TensorFlow2.0/img/27014e8a5d49cb56f79937d834cb2466.png b/Tensorflow/TensorFlow2.0/img/27014e8a5d49cb56f79937d834cb2466.png new file mode 100644 index 00000000..f63460e2 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/27014e8a5d49cb56f79937d834cb2466.png differ diff --git a/Tensorflow/TensorFlow2.0/img/27bba4e01880a45cea2ffcffdc0aa311.png b/Tensorflow/TensorFlow2.0/img/27bba4e01880a45cea2ffcffdc0aa311.png new file mode 100644 index 00000000..0d5d8a3b Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/27bba4e01880a45cea2ffcffdc0aa311.png differ diff --git a/Tensorflow/TensorFlow2.0/img/28877f41199c01fa4ec5041f5b1fac9f.png b/Tensorflow/TensorFlow2.0/img/28877f41199c01fa4ec5041f5b1fac9f.png new file mode 100644 index 00000000..27e3cb28 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/28877f41199c01fa4ec5041f5b1fac9f.png differ diff --git a/Tensorflow/TensorFlow2.0/img/295a20785cb201af0f19ee7414550082.png b/Tensorflow/TensorFlow2.0/img/295a20785cb201af0f19ee7414550082.png new file mode 100644 index 00000000..b754271d Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/295a20785cb201af0f19ee7414550082.png differ diff --git a/Tensorflow/TensorFlow2.0/img/29af5825a7303165115c9cfbc59ae606.png b/Tensorflow/TensorFlow2.0/img/29af5825a7303165115c9cfbc59ae606.png new file mode 100644 index 00000000..a1a41323 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/29af5825a7303165115c9cfbc59ae606.png differ diff --git a/Tensorflow/TensorFlow2.0/img/29af7886a5834acb3b056b86d97b4128.png b/Tensorflow/TensorFlow2.0/img/29af7886a5834acb3b056b86d97b4128.png new file mode 100644 index 00000000..4712c2d0 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/29af7886a5834acb3b056b86d97b4128.png differ diff --git a/Tensorflow/TensorFlow2.0/img/2a2da1f076940ee9e540d308733418d2.png b/Tensorflow/TensorFlow2.0/img/2a2da1f076940ee9e540d308733418d2.png new file mode 100644 index 00000000..34f5ab62 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/2a2da1f076940ee9e540d308733418d2.png differ diff --git a/Tensorflow/TensorFlow2.0/img/2ab61e10f9f53c1738f397150ea65f3d.png b/Tensorflow/TensorFlow2.0/img/2ab61e10f9f53c1738f397150ea65f3d.png new file mode 100644 index 00000000..13c01bd4 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/2ab61e10f9f53c1738f397150ea65f3d.png differ diff --git a/Tensorflow/TensorFlow2.0/img/2b4481790f3718f3fc44681160e972ab.png b/Tensorflow/TensorFlow2.0/img/2b4481790f3718f3fc44681160e972ab.png new file mode 100644 index 00000000..fdef8aaf Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/2b4481790f3718f3fc44681160e972ab.png differ diff --git a/Tensorflow/TensorFlow2.0/img/2bc3a9da8c0e479bf906dd0c765549f4.png b/Tensorflow/TensorFlow2.0/img/2bc3a9da8c0e479bf906dd0c765549f4.png new file mode 100644 index 00000000..1bcaa779 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/2bc3a9da8c0e479bf906dd0c765549f4.png differ diff --git a/Tensorflow/TensorFlow2.0/img/2c848f6027c084a244c86c336c02ce35.png b/Tensorflow/TensorFlow2.0/img/2c848f6027c084a244c86c336c02ce35.png new file mode 100644 index 00000000..2e33c877 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/2c848f6027c084a244c86c336c02ce35.png differ diff --git a/Tensorflow/TensorFlow2.0/img/2d28811c34377a23029670cd3acc5d2e.png b/Tensorflow/TensorFlow2.0/img/2d28811c34377a23029670cd3acc5d2e.png new file mode 100644 index 00000000..8c70a582 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/2d28811c34377a23029670cd3acc5d2e.png differ diff --git a/Tensorflow/TensorFlow2.0/img/2e071a2b770d50ed5ef40dabbe1fd1a7.png b/Tensorflow/TensorFlow2.0/img/2e071a2b770d50ed5ef40dabbe1fd1a7.png new file mode 100644 index 00000000..5fea4490 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/2e071a2b770d50ed5ef40dabbe1fd1a7.png differ diff --git a/Tensorflow/TensorFlow2.0/img/2f1f2fc54135afd798139d45c013ef1f.png b/Tensorflow/TensorFlow2.0/img/2f1f2fc54135afd798139d45c013ef1f.png new file mode 100644 index 00000000..bc2d2e50 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/2f1f2fc54135afd798139d45c013ef1f.png differ diff --git a/Tensorflow/TensorFlow2.0/img/30586460013d859e496dd27ce6b18cbc.png b/Tensorflow/TensorFlow2.0/img/30586460013d859e496dd27ce6b18cbc.png new file mode 100644 index 00000000..7d4deb39 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/30586460013d859e496dd27ce6b18cbc.png differ diff --git a/Tensorflow/TensorFlow2.0/img/3077ee17ed3bcc58a21e5ae34601aeaa.png b/Tensorflow/TensorFlow2.0/img/3077ee17ed3bcc58a21e5ae34601aeaa.png new file mode 100644 index 00000000..62728972 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/3077ee17ed3bcc58a21e5ae34601aeaa.png differ diff --git a/Tensorflow/TensorFlow2.0/img/309f23cd3db44be87a1c9d9d25619301.png b/Tensorflow/TensorFlow2.0/img/309f23cd3db44be87a1c9d9d25619301.png new file mode 100644 index 00000000..75abf85c Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/309f23cd3db44be87a1c9d9d25619301.png differ diff --git a/Tensorflow/TensorFlow2.0/img/31f7567659d665b396ba290e435595b4.png b/Tensorflow/TensorFlow2.0/img/31f7567659d665b396ba290e435595b4.png new file mode 100644 index 00000000..e5fdc20a Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/31f7567659d665b396ba290e435595b4.png differ diff --git a/Tensorflow/TensorFlow2.0/img/322fe6c3912c3a4f2472e8f42ced52ce.png b/Tensorflow/TensorFlow2.0/img/322fe6c3912c3a4f2472e8f42ced52ce.png new file mode 100644 index 00000000..8fcab198 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/322fe6c3912c3a4f2472e8f42ced52ce.png differ diff --git a/Tensorflow/TensorFlow2.0/img/33043d022bdb4912f00756593d5b4a7c.png b/Tensorflow/TensorFlow2.0/img/33043d022bdb4912f00756593d5b4a7c.png new file mode 100644 index 00000000..45901f9d Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/33043d022bdb4912f00756593d5b4a7c.png differ diff --git a/Tensorflow/TensorFlow2.0/img/3390b6303137e50e6a04ecf43748a4fc.png b/Tensorflow/TensorFlow2.0/img/3390b6303137e50e6a04ecf43748a4fc.png new file mode 100644 index 00000000..392d13bb Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/3390b6303137e50e6a04ecf43748a4fc.png differ diff --git a/Tensorflow/TensorFlow2.0/img/34e85547487e77a52b9e494a05fdc8f8.png b/Tensorflow/TensorFlow2.0/img/34e85547487e77a52b9e494a05fdc8f8.png new file mode 100644 index 00000000..f7a58a4d Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/34e85547487e77a52b9e494a05fdc8f8.png differ diff --git a/Tensorflow/TensorFlow2.0/img/35228c04a07ff13d63e7c28043db3950.png b/Tensorflow/TensorFlow2.0/img/35228c04a07ff13d63e7c28043db3950.png new file mode 100644 index 00000000..f1406e3f Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/35228c04a07ff13d63e7c28043db3950.png differ diff --git a/Tensorflow/TensorFlow2.0/img/35253af9a3f5a4e0035787fd80b11ca3.png b/Tensorflow/TensorFlow2.0/img/35253af9a3f5a4e0035787fd80b11ca3.png new file mode 100644 index 00000000..701f2b04 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/35253af9a3f5a4e0035787fd80b11ca3.png differ diff --git a/Tensorflow/TensorFlow2.0/img/35aea8e2802acf908920febe4776fbf0.png b/Tensorflow/TensorFlow2.0/img/35aea8e2802acf908920febe4776fbf0.png new file mode 100644 index 00000000..8311f464 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/35aea8e2802acf908920febe4776fbf0.png differ diff --git a/Tensorflow/TensorFlow2.0/img/36943305bc87e9d7bacdd3122d2620ca.png b/Tensorflow/TensorFlow2.0/img/36943305bc87e9d7bacdd3122d2620ca.png new file mode 100644 index 00000000..9dea5f1f Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/36943305bc87e9d7bacdd3122d2620ca.png differ diff --git a/Tensorflow/TensorFlow2.0/img/369c7a269758731e601b622ad8074a20.png b/Tensorflow/TensorFlow2.0/img/369c7a269758731e601b622ad8074a20.png new file mode 100644 index 00000000..41e6bff4 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/369c7a269758731e601b622ad8074a20.png differ diff --git a/Tensorflow/TensorFlow2.0/img/3a91563a3651ff89460c7e6daf95d78e.png b/Tensorflow/TensorFlow2.0/img/3a91563a3651ff89460c7e6daf95d78e.png new file mode 100644 index 00000000..f0b686f6 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/3a91563a3651ff89460c7e6daf95d78e.png differ diff --git a/Tensorflow/TensorFlow2.0/img/3b156bb2ffa19f7a08e10cf29e3fe8c9.png b/Tensorflow/TensorFlow2.0/img/3b156bb2ffa19f7a08e10cf29e3fe8c9.png new file mode 100644 index 00000000..46c202f8 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/3b156bb2ffa19f7a08e10cf29e3fe8c9.png differ diff --git a/Tensorflow/TensorFlow2.0/img/3b5e2e711798f7ff0d6ff949ea4f54f3.png b/Tensorflow/TensorFlow2.0/img/3b5e2e711798f7ff0d6ff949ea4f54f3.png new file mode 100644 index 00000000..cb4b4841 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/3b5e2e711798f7ff0d6ff949ea4f54f3.png differ diff --git a/Tensorflow/TensorFlow2.0/img/3c18a835b05587069a53a3709112d9c2.png b/Tensorflow/TensorFlow2.0/img/3c18a835b05587069a53a3709112d9c2.png new file mode 100644 index 00000000..26ada32c Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/3c18a835b05587069a53a3709112d9c2.png differ diff --git a/Tensorflow/TensorFlow2.0/img/3c3d7b5efcc814913b1fdc4d8ab17c2c.png b/Tensorflow/TensorFlow2.0/img/3c3d7b5efcc814913b1fdc4d8ab17c2c.png new file mode 100644 index 00000000..e0ce9cf1 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/3c3d7b5efcc814913b1fdc4d8ab17c2c.png differ diff --git a/Tensorflow/TensorFlow2.0/img/3e1bc64db4c260d2327ca5a9defae306.png b/Tensorflow/TensorFlow2.0/img/3e1bc64db4c260d2327ca5a9defae306.png new file mode 100644 index 00000000..9a159969 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/3e1bc64db4c260d2327ca5a9defae306.png differ diff --git a/Tensorflow/TensorFlow2.0/img/3e8e9f9ba0ac0f802575b228ffa360c0.png b/Tensorflow/TensorFlow2.0/img/3e8e9f9ba0ac0f802575b228ffa360c0.png new file mode 100644 index 00000000..099541c8 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/3e8e9f9ba0ac0f802575b228ffa360c0.png differ diff --git a/Tensorflow/TensorFlow2.0/img/4027509537fdad6efb501fba81748e0f.png b/Tensorflow/TensorFlow2.0/img/4027509537fdad6efb501fba81748e0f.png new file mode 100644 index 00000000..7cb4d4fd Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/4027509537fdad6efb501fba81748e0f.png differ diff --git a/Tensorflow/TensorFlow2.0/img/40793e753f5cc525c8f3c9cd20d1085c.png b/Tensorflow/TensorFlow2.0/img/40793e753f5cc525c8f3c9cd20d1085c.png new file mode 100644 index 00000000..96581423 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/40793e753f5cc525c8f3c9cd20d1085c.png differ diff --git a/Tensorflow/TensorFlow2.0/img/4123df32a452f5e3727c6372cf1fa755.png b/Tensorflow/TensorFlow2.0/img/4123df32a452f5e3727c6372cf1fa755.png new file mode 100644 index 00000000..f17a357c Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/4123df32a452f5e3727c6372cf1fa755.png differ diff --git a/Tensorflow/TensorFlow2.0/img/42885d1d6ac3f11492d97fe4c7d7f6ff.png b/Tensorflow/TensorFlow2.0/img/42885d1d6ac3f11492d97fe4c7d7f6ff.png new file mode 100644 index 00000000..c581d2fb Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/42885d1d6ac3f11492d97fe4c7d7f6ff.png differ diff --git a/Tensorflow/TensorFlow2.0/img/4341c4ebffdd0a35a50322abd93518de.png b/Tensorflow/TensorFlow2.0/img/4341c4ebffdd0a35a50322abd93518de.png new file mode 100644 index 00000000..2978dd4c Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/4341c4ebffdd0a35a50322abd93518de.png differ diff --git a/Tensorflow/TensorFlow2.0/img/458690c5e898eeb73aae0c7f04339c81.png b/Tensorflow/TensorFlow2.0/img/458690c5e898eeb73aae0c7f04339c81.png new file mode 100644 index 00000000..50435b5f Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/458690c5e898eeb73aae0c7f04339c81.png differ diff --git a/Tensorflow/TensorFlow2.0/img/45d997f6558f9b2c948978653a5112f5.png b/Tensorflow/TensorFlow2.0/img/45d997f6558f9b2c948978653a5112f5.png new file mode 100644 index 00000000..41e3be08 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/45d997f6558f9b2c948978653a5112f5.png differ diff --git a/Tensorflow/TensorFlow2.0/img/461f849577ccb00ee49683e824e095cf.png b/Tensorflow/TensorFlow2.0/img/461f849577ccb00ee49683e824e095cf.png new file mode 100644 index 00000000..fb6a6714 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/461f849577ccb00ee49683e824e095cf.png differ diff --git a/Tensorflow/TensorFlow2.0/img/4630405ff1451bfc3979433eb4bb7a43.png b/Tensorflow/TensorFlow2.0/img/4630405ff1451bfc3979433eb4bb7a43.png new file mode 100644 index 00000000..6b6d79c8 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/4630405ff1451bfc3979433eb4bb7a43.png differ diff --git a/Tensorflow/TensorFlow2.0/img/46db10c9de77d0eba54a4bd268680d27.png b/Tensorflow/TensorFlow2.0/img/46db10c9de77d0eba54a4bd268680d27.png new file mode 100644 index 00000000..07e9a5e4 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/46db10c9de77d0eba54a4bd268680d27.png differ diff --git a/Tensorflow/TensorFlow2.0/img/47c750cbb275e148fd8d76c4bf49d4a6.png b/Tensorflow/TensorFlow2.0/img/47c750cbb275e148fd8d76c4bf49d4a6.png new file mode 100644 index 00000000..36210228 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/47c750cbb275e148fd8d76c4bf49d4a6.png differ diff --git a/Tensorflow/TensorFlow2.0/img/47d10fb7606b95e0fc52794eb6634024.png b/Tensorflow/TensorFlow2.0/img/47d10fb7606b95e0fc52794eb6634024.png new file mode 100644 index 00000000..6f1163ff Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/47d10fb7606b95e0fc52794eb6634024.png differ diff --git a/Tensorflow/TensorFlow2.0/img/47e128c5852147da0f7b0158465fe752.png b/Tensorflow/TensorFlow2.0/img/47e128c5852147da0f7b0158465fe752.png new file mode 100644 index 00000000..a51bbe00 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/47e128c5852147da0f7b0158465fe752.png differ diff --git a/Tensorflow/TensorFlow2.0/img/4a4c68a2d8914e8b1b75bed4a9b81a5b.png b/Tensorflow/TensorFlow2.0/img/4a4c68a2d8914e8b1b75bed4a9b81a5b.png new file mode 100644 index 00000000..d983c988 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/4a4c68a2d8914e8b1b75bed4a9b81a5b.png differ diff --git a/Tensorflow/TensorFlow2.0/img/4aa12ddc0a8f44acc45b9ed9dc9055bf.png b/Tensorflow/TensorFlow2.0/img/4aa12ddc0a8f44acc45b9ed9dc9055bf.png new file mode 100644 index 00000000..451bc4c8 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/4aa12ddc0a8f44acc45b9ed9dc9055bf.png differ diff --git a/Tensorflow/TensorFlow2.0/img/4ae87c070d1160e16944305509636b57.png b/Tensorflow/TensorFlow2.0/img/4ae87c070d1160e16944305509636b57.png new file mode 100644 index 00000000..7042b477 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/4ae87c070d1160e16944305509636b57.png differ diff --git a/Tensorflow/TensorFlow2.0/img/4bebff99ef427fe52c09346e6f6b1971.png b/Tensorflow/TensorFlow2.0/img/4bebff99ef427fe52c09346e6f6b1971.png new file mode 100644 index 00000000..c6f5f356 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/4bebff99ef427fe52c09346e6f6b1971.png differ diff --git a/Tensorflow/TensorFlow2.0/img/4c173dbd57644fa57c04cf1d62ca75e4.png b/Tensorflow/TensorFlow2.0/img/4c173dbd57644fa57c04cf1d62ca75e4.png new file mode 100644 index 00000000..ed258269 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/4c173dbd57644fa57c04cf1d62ca75e4.png differ diff --git a/Tensorflow/TensorFlow2.0/img/4c843845ce60ced7dbe13555dfe3c827.png b/Tensorflow/TensorFlow2.0/img/4c843845ce60ced7dbe13555dfe3c827.png new file mode 100644 index 00000000..b31340ba Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/4c843845ce60ced7dbe13555dfe3c827.png differ diff --git a/Tensorflow/TensorFlow2.0/img/4c8ef6a2c8f0548a9f5bb182b8d3de01.png b/Tensorflow/TensorFlow2.0/img/4c8ef6a2c8f0548a9f5bb182b8d3de01.png new file mode 100644 index 00000000..88ca44b6 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/4c8ef6a2c8f0548a9f5bb182b8d3de01.png differ diff --git a/Tensorflow/TensorFlow2.0/img/4d094983d2f7b41e395fe05b64ee1a36.png b/Tensorflow/TensorFlow2.0/img/4d094983d2f7b41e395fe05b64ee1a36.png new file mode 100644 index 00000000..90bb0c39 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/4d094983d2f7b41e395fe05b64ee1a36.png differ diff --git a/Tensorflow/TensorFlow2.0/img/4e0ba8d4a487b7517b7dbff7e19c4d52.png b/Tensorflow/TensorFlow2.0/img/4e0ba8d4a487b7517b7dbff7e19c4d52.png new file mode 100644 index 00000000..32c30232 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/4e0ba8d4a487b7517b7dbff7e19c4d52.png differ diff --git a/Tensorflow/TensorFlow2.0/img/4f82813c049f4e0039986833c582376d.png b/Tensorflow/TensorFlow2.0/img/4f82813c049f4e0039986833c582376d.png new file mode 100644 index 00000000..885c5b32 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/4f82813c049f4e0039986833c582376d.png differ diff --git a/Tensorflow/TensorFlow2.0/img/516bf734f26c8fd886eecb17974e7f9b.png b/Tensorflow/TensorFlow2.0/img/516bf734f26c8fd886eecb17974e7f9b.png new file mode 100644 index 00000000..8d72f5c9 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/516bf734f26c8fd886eecb17974e7f9b.png differ diff --git a/Tensorflow/TensorFlow2.0/img/518bc2d08038969576066eb381910cc1.png b/Tensorflow/TensorFlow2.0/img/518bc2d08038969576066eb381910cc1.png new file mode 100644 index 00000000..7b16da1d Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/518bc2d08038969576066eb381910cc1.png differ diff --git a/Tensorflow/TensorFlow2.0/img/52194b6e27c77c651d0f3c56066448f5.png b/Tensorflow/TensorFlow2.0/img/52194b6e27c77c651d0f3c56066448f5.png new file mode 100644 index 00000000..95c92989 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/52194b6e27c77c651d0f3c56066448f5.png differ diff --git a/Tensorflow/TensorFlow2.0/img/5224f36bbf08c7f64fa501011640042b.png b/Tensorflow/TensorFlow2.0/img/5224f36bbf08c7f64fa501011640042b.png new file mode 100644 index 00000000..ad7c2ba5 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/5224f36bbf08c7f64fa501011640042b.png differ diff --git a/Tensorflow/TensorFlow2.0/img/536f812a8cb3bafa44a738899b173733.png b/Tensorflow/TensorFlow2.0/img/536f812a8cb3bafa44a738899b173733.png new file mode 100644 index 00000000..5fb56ba3 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/536f812a8cb3bafa44a738899b173733.png differ diff --git a/Tensorflow/TensorFlow2.0/img/54c9e1f17ab75ca37c6360c3e5230475.png b/Tensorflow/TensorFlow2.0/img/54c9e1f17ab75ca37c6360c3e5230475.png new file mode 100644 index 00000000..4530ad60 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/54c9e1f17ab75ca37c6360c3e5230475.png differ diff --git a/Tensorflow/TensorFlow2.0/img/55d2924ed5a33ffad4b9f727cd335194.png b/Tensorflow/TensorFlow2.0/img/55d2924ed5a33ffad4b9f727cd335194.png new file mode 100644 index 00000000..473a533e Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/55d2924ed5a33ffad4b9f727cd335194.png differ diff --git a/Tensorflow/TensorFlow2.0/img/55e97ec60fa87503352f7debd65515af.png b/Tensorflow/TensorFlow2.0/img/55e97ec60fa87503352f7debd65515af.png new file mode 100644 index 00000000..5998eaa3 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/55e97ec60fa87503352f7debd65515af.png differ diff --git a/Tensorflow/TensorFlow2.0/img/563b223dd04889d1963c53d7c10dfa02.png b/Tensorflow/TensorFlow2.0/img/563b223dd04889d1963c53d7c10dfa02.png new file mode 100644 index 00000000..0eef937a Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/563b223dd04889d1963c53d7c10dfa02.png differ diff --git a/Tensorflow/TensorFlow2.0/img/56a137f761015af5a025d2d0cc2a9985.png b/Tensorflow/TensorFlow2.0/img/56a137f761015af5a025d2d0cc2a9985.png new file mode 100644 index 00000000..f36a79f5 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/56a137f761015af5a025d2d0cc2a9985.png differ diff --git a/Tensorflow/TensorFlow2.0/img/572dbe03f38bc28e6575b97b2ce91fda.png b/Tensorflow/TensorFlow2.0/img/572dbe03f38bc28e6575b97b2ce91fda.png new file mode 100644 index 00000000..17c70e0a Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/572dbe03f38bc28e6575b97b2ce91fda.png differ diff --git a/Tensorflow/TensorFlow2.0/img/58c58ebd47eeea7849c83cacae4000e9.png b/Tensorflow/TensorFlow2.0/img/58c58ebd47eeea7849c83cacae4000e9.png new file mode 100644 index 00000000..a482f444 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/58c58ebd47eeea7849c83cacae4000e9.png differ diff --git a/Tensorflow/TensorFlow2.0/img/58d9d20121aa86120aded9afa9cfff6d.png b/Tensorflow/TensorFlow2.0/img/58d9d20121aa86120aded9afa9cfff6d.png new file mode 100644 index 00000000..dc0c1769 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/58d9d20121aa86120aded9afa9cfff6d.png differ diff --git a/Tensorflow/TensorFlow2.0/img/5a99ef9a46f69b148f306ff1fc040611.png b/Tensorflow/TensorFlow2.0/img/5a99ef9a46f69b148f306ff1fc040611.png new file mode 100644 index 00000000..2cbe4815 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/5a99ef9a46f69b148f306ff1fc040611.png differ diff --git a/Tensorflow/TensorFlow2.0/img/5ae7b3b0f94a71db86b4168d116179ff.png b/Tensorflow/TensorFlow2.0/img/5ae7b3b0f94a71db86b4168d116179ff.png new file mode 100644 index 00000000..f2691ecc Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/5ae7b3b0f94a71db86b4168d116179ff.png differ diff --git a/Tensorflow/TensorFlow2.0/img/5c1c4854000457ae3fc18d0292519bfc.png b/Tensorflow/TensorFlow2.0/img/5c1c4854000457ae3fc18d0292519bfc.png new file mode 100644 index 00000000..f0af01c2 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/5c1c4854000457ae3fc18d0292519bfc.png differ diff --git a/Tensorflow/TensorFlow2.0/img/5c6f6f5e851c052e9e53969cd0419cbb.png b/Tensorflow/TensorFlow2.0/img/5c6f6f5e851c052e9e53969cd0419cbb.png new file mode 100644 index 00000000..7b30f5d0 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/5c6f6f5e851c052e9e53969cd0419cbb.png differ diff --git a/Tensorflow/TensorFlow2.0/img/5edc6f55df9224577119e687a2c97282.png b/Tensorflow/TensorFlow2.0/img/5edc6f55df9224577119e687a2c97282.png new file mode 100644 index 00000000..978cfd1d Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/5edc6f55df9224577119e687a2c97282.png differ diff --git a/Tensorflow/TensorFlow2.0/img/5f0049e4eda5b1689106731ac4d622f6.png b/Tensorflow/TensorFlow2.0/img/5f0049e4eda5b1689106731ac4d622f6.png new file mode 100644 index 00000000..a19bd2cb Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/5f0049e4eda5b1689106731ac4d622f6.png differ diff --git a/Tensorflow/TensorFlow2.0/img/5fc218fbba095949ac4ca115de92329b.png b/Tensorflow/TensorFlow2.0/img/5fc218fbba095949ac4ca115de92329b.png new file mode 100644 index 00000000..a8b59097 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/5fc218fbba095949ac4ca115de92329b.png differ diff --git a/Tensorflow/TensorFlow2.0/img/5fcd4749c7b37cf8714bd83753d1da5b.png b/Tensorflow/TensorFlow2.0/img/5fcd4749c7b37cf8714bd83753d1da5b.png new file mode 100644 index 00000000..5600986b Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/5fcd4749c7b37cf8714bd83753d1da5b.png differ diff --git a/Tensorflow/TensorFlow2.0/img/5ff0d069276967832a1c7690ce8bcfa3.png b/Tensorflow/TensorFlow2.0/img/5ff0d069276967832a1c7690ce8bcfa3.png new file mode 100644 index 00000000..947dd2f3 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/5ff0d069276967832a1c7690ce8bcfa3.png differ diff --git a/Tensorflow/TensorFlow2.0/img/60960a15d5ca50a1486f3c3f8c200635.png b/Tensorflow/TensorFlow2.0/img/60960a15d5ca50a1486f3c3f8c200635.png new file mode 100644 index 00000000..a3954c76 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/60960a15d5ca50a1486f3c3f8c200635.png differ diff --git a/Tensorflow/TensorFlow2.0/img/60f296c47de6a4f924d14241f6205011.png b/Tensorflow/TensorFlow2.0/img/60f296c47de6a4f924d14241f6205011.png new file mode 100644 index 00000000..4d32894d Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/60f296c47de6a4f924d14241f6205011.png differ diff --git a/Tensorflow/TensorFlow2.0/img/61002e329110c6cb1db1a82acd8d232f.png b/Tensorflow/TensorFlow2.0/img/61002e329110c6cb1db1a82acd8d232f.png new file mode 100644 index 00000000..6e241739 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/61002e329110c6cb1db1a82acd8d232f.png differ diff --git a/Tensorflow/TensorFlow2.0/img/62b95a860e23180ece55e3d2749a9db0.png b/Tensorflow/TensorFlow2.0/img/62b95a860e23180ece55e3d2749a9db0.png new file mode 100644 index 00000000..1e2cd483 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/62b95a860e23180ece55e3d2749a9db0.png differ diff --git a/Tensorflow/TensorFlow2.0/img/6396c35912fab965e30d9adf6c7c8981.png b/Tensorflow/TensorFlow2.0/img/6396c35912fab965e30d9adf6c7c8981.png new file mode 100644 index 00000000..18472aca Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/6396c35912fab965e30d9adf6c7c8981.png differ diff --git a/Tensorflow/TensorFlow2.0/img/643455194a29bfd2dc25c8821cbbf3b4.png b/Tensorflow/TensorFlow2.0/img/643455194a29bfd2dc25c8821cbbf3b4.png new file mode 100644 index 00000000..d5a24976 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/643455194a29bfd2dc25c8821cbbf3b4.png differ diff --git a/Tensorflow/TensorFlow2.0/img/643d654e7e1e3d928041b42363e0f099.png b/Tensorflow/TensorFlow2.0/img/643d654e7e1e3d928041b42363e0f099.png new file mode 100644 index 00000000..a2694b77 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/643d654e7e1e3d928041b42363e0f099.png differ diff --git a/Tensorflow/TensorFlow2.0/img/644c999529792fb810f213e660e582b8.png b/Tensorflow/TensorFlow2.0/img/644c999529792fb810f213e660e582b8.png new file mode 100644 index 00000000..f84a64d0 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/644c999529792fb810f213e660e582b8.png differ diff --git a/Tensorflow/TensorFlow2.0/img/651f6b2fbf396ac15fb7661af005bb0e.png b/Tensorflow/TensorFlow2.0/img/651f6b2fbf396ac15fb7661af005bb0e.png new file mode 100644 index 00000000..9eee8f7c Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/651f6b2fbf396ac15fb7661af005bb0e.png differ diff --git a/Tensorflow/TensorFlow2.0/img/65e3cc57565dea4503cb5f3f7dca3035.png b/Tensorflow/TensorFlow2.0/img/65e3cc57565dea4503cb5f3f7dca3035.png new file mode 100644 index 00000000..c58a90a6 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/65e3cc57565dea4503cb5f3f7dca3035.png differ diff --git a/Tensorflow/TensorFlow2.0/img/6637dace2ef4faea4a327361aec7c4ae.png b/Tensorflow/TensorFlow2.0/img/6637dace2ef4faea4a327361aec7c4ae.png new file mode 100644 index 00000000..a506ac8f Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/6637dace2ef4faea4a327361aec7c4ae.png differ diff --git a/Tensorflow/TensorFlow2.0/img/66503afc507478f400022c625de3c878.png b/Tensorflow/TensorFlow2.0/img/66503afc507478f400022c625de3c878.png new file mode 100644 index 00000000..a06e88c2 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/66503afc507478f400022c625de3c878.png differ diff --git a/Tensorflow/TensorFlow2.0/img/677f173984390980c5f4af3a22313c24.png b/Tensorflow/TensorFlow2.0/img/677f173984390980c5f4af3a22313c24.png new file mode 100644 index 00000000..868b98d7 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/677f173984390980c5f4af3a22313c24.png differ diff --git a/Tensorflow/TensorFlow2.0/img/67c5e6fec9fd9b11f792765822fbb538.png b/Tensorflow/TensorFlow2.0/img/67c5e6fec9fd9b11f792765822fbb538.png new file mode 100644 index 00000000..89918a48 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/67c5e6fec9fd9b11f792765822fbb538.png differ diff --git a/Tensorflow/TensorFlow2.0/img/696df8a523ce550bf177c7051cef2c75.png b/Tensorflow/TensorFlow2.0/img/696df8a523ce550bf177c7051cef2c75.png new file mode 100644 index 00000000..8903589f Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/696df8a523ce550bf177c7051cef2c75.png differ diff --git a/Tensorflow/TensorFlow2.0/img/6a2da58193a35a905b6a102743897cbd.png b/Tensorflow/TensorFlow2.0/img/6a2da58193a35a905b6a102743897cbd.png new file mode 100644 index 00000000..77221d24 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/6a2da58193a35a905b6a102743897cbd.png differ diff --git a/Tensorflow/TensorFlow2.0/img/6ae78bb4c1ad3a2e0ade4489d4fdf706.png b/Tensorflow/TensorFlow2.0/img/6ae78bb4c1ad3a2e0ade4489d4fdf706.png new file mode 100644 index 00000000..5ac9884a Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/6ae78bb4c1ad3a2e0ade4489d4fdf706.png differ diff --git a/Tensorflow/TensorFlow2.0/img/6b1df74992f4b9253cc3e928d73cd3a8.png b/Tensorflow/TensorFlow2.0/img/6b1df74992f4b9253cc3e928d73cd3a8.png new file mode 100644 index 00000000..ececa3b0 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/6b1df74992f4b9253cc3e928d73cd3a8.png differ diff --git a/Tensorflow/TensorFlow2.0/img/6b843e3001e6a49928fc35d8af4c843d.png b/Tensorflow/TensorFlow2.0/img/6b843e3001e6a49928fc35d8af4c843d.png new file mode 100644 index 00000000..3cbdbd86 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/6b843e3001e6a49928fc35d8af4c843d.png differ diff --git a/Tensorflow/TensorFlow2.0/img/6c3e8444c64a773d92f67fd4f07992b7.png b/Tensorflow/TensorFlow2.0/img/6c3e8444c64a773d92f67fd4f07992b7.png new file mode 100644 index 00000000..864e7840 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/6c3e8444c64a773d92f67fd4f07992b7.png differ diff --git a/Tensorflow/TensorFlow2.0/img/6c6872963385e03ea4f524449a601d33.png b/Tensorflow/TensorFlow2.0/img/6c6872963385e03ea4f524449a601d33.png new file mode 100644 index 00000000..4642d521 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/6c6872963385e03ea4f524449a601d33.png differ diff --git a/Tensorflow/TensorFlow2.0/img/6cd4981eb3c80dc3045b45bd7fd0e7ea.png b/Tensorflow/TensorFlow2.0/img/6cd4981eb3c80dc3045b45bd7fd0e7ea.png new file mode 100644 index 00000000..014f1817 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/6cd4981eb3c80dc3045b45bd7fd0e7ea.png differ diff --git a/Tensorflow/TensorFlow2.0/img/6d3cbd4df24ab25d853127035bfd6b5b.png b/Tensorflow/TensorFlow2.0/img/6d3cbd4df24ab25d853127035bfd6b5b.png new file mode 100644 index 00000000..f66c5e5d Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/6d3cbd4df24ab25d853127035bfd6b5b.png differ diff --git a/Tensorflow/TensorFlow2.0/img/6d5e829de3a867f7bb56dff003b7e217.png b/Tensorflow/TensorFlow2.0/img/6d5e829de3a867f7bb56dff003b7e217.png new file mode 100644 index 00000000..6c327f78 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/6d5e829de3a867f7bb56dff003b7e217.png differ diff --git a/Tensorflow/TensorFlow2.0/img/6d8da708f09878fc993e75adb40fd2a1.png b/Tensorflow/TensorFlow2.0/img/6d8da708f09878fc993e75adb40fd2a1.png new file mode 100644 index 00000000..671efbd1 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/6d8da708f09878fc993e75adb40fd2a1.png differ diff --git a/Tensorflow/TensorFlow2.0/img/6e7c99c48b9dcda32be57092d1d9d6bb.png b/Tensorflow/TensorFlow2.0/img/6e7c99c48b9dcda32be57092d1d9d6bb.png new file mode 100644 index 00000000..bf163c45 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/6e7c99c48b9dcda32be57092d1d9d6bb.png differ diff --git a/Tensorflow/TensorFlow2.0/img/6e877844629f2888009486ac2f5ece0b.png b/Tensorflow/TensorFlow2.0/img/6e877844629f2888009486ac2f5ece0b.png new file mode 100644 index 00000000..7f07f2f7 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/6e877844629f2888009486ac2f5ece0b.png differ diff --git a/Tensorflow/TensorFlow2.0/img/707c82ef753d85482b462054a3e19161.png b/Tensorflow/TensorFlow2.0/img/707c82ef753d85482b462054a3e19161.png new file mode 100644 index 00000000..73a12beb Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/707c82ef753d85482b462054a3e19161.png differ diff --git a/Tensorflow/TensorFlow2.0/img/70aad5930c300b7d64d15effb7f89419.png b/Tensorflow/TensorFlow2.0/img/70aad5930c300b7d64d15effb7f89419.png new file mode 100644 index 00000000..76b42cff Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/70aad5930c300b7d64d15effb7f89419.png differ diff --git a/Tensorflow/TensorFlow2.0/img/70c92a3e3d823d4469d81b23380fef63.png b/Tensorflow/TensorFlow2.0/img/70c92a3e3d823d4469d81b23380fef63.png new file mode 100644 index 00000000..dabe8001 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/70c92a3e3d823d4469d81b23380fef63.png differ diff --git a/Tensorflow/TensorFlow2.0/img/711ebb2cc10e3bb88f77a6eb89fac014.png b/Tensorflow/TensorFlow2.0/img/711ebb2cc10e3bb88f77a6eb89fac014.png new file mode 100644 index 00000000..140d1a3d Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/711ebb2cc10e3bb88f77a6eb89fac014.png differ diff --git a/Tensorflow/TensorFlow2.0/img/7138c243e1e2c00466be2191f6395597.png b/Tensorflow/TensorFlow2.0/img/7138c243e1e2c00466be2191f6395597.png new file mode 100644 index 00000000..0070a46e Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/7138c243e1e2c00466be2191f6395597.png differ diff --git a/Tensorflow/TensorFlow2.0/img/714e44b919585c6cb716b313e7f35787.png b/Tensorflow/TensorFlow2.0/img/714e44b919585c6cb716b313e7f35787.png new file mode 100644 index 00000000..94fb10e8 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/714e44b919585c6cb716b313e7f35787.png differ diff --git a/Tensorflow/TensorFlow2.0/img/717d3c9c631162f5b991acff83eda7bc.png b/Tensorflow/TensorFlow2.0/img/717d3c9c631162f5b991acff83eda7bc.png new file mode 100644 index 00000000..580edf0c Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/717d3c9c631162f5b991acff83eda7bc.png differ diff --git a/Tensorflow/TensorFlow2.0/img/720daa43063b46bf1f7dcc96c25df451.png b/Tensorflow/TensorFlow2.0/img/720daa43063b46bf1f7dcc96c25df451.png new file mode 100644 index 00000000..e5b70874 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/720daa43063b46bf1f7dcc96c25df451.png differ diff --git a/Tensorflow/TensorFlow2.0/img/72de951dda8439ddee9fe72a64a9ed2e.png b/Tensorflow/TensorFlow2.0/img/72de951dda8439ddee9fe72a64a9ed2e.png new file mode 100644 index 00000000..7ee7b1cb Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/72de951dda8439ddee9fe72a64a9ed2e.png differ diff --git a/Tensorflow/TensorFlow2.0/img/72fcb6a7bcc602106e2c60268d3642c5.png b/Tensorflow/TensorFlow2.0/img/72fcb6a7bcc602106e2c60268d3642c5.png new file mode 100644 index 00000000..26b5f801 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/72fcb6a7bcc602106e2c60268d3642c5.png differ diff --git a/Tensorflow/TensorFlow2.0/img/74469ffcda8aa53d4c29cfcfda5831ad.png b/Tensorflow/TensorFlow2.0/img/74469ffcda8aa53d4c29cfcfda5831ad.png new file mode 100644 index 00000000..c18ed4c6 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/74469ffcda8aa53d4c29cfcfda5831ad.png differ diff --git a/Tensorflow/TensorFlow2.0/img/74badb10696c0ffbba886121ce004be0.png b/Tensorflow/TensorFlow2.0/img/74badb10696c0ffbba886121ce004be0.png new file mode 100644 index 00000000..0cc1fe87 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/74badb10696c0ffbba886121ce004be0.png differ diff --git a/Tensorflow/TensorFlow2.0/img/74d574698188902058a67179ab1b7192.png b/Tensorflow/TensorFlow2.0/img/74d574698188902058a67179ab1b7192.png new file mode 100644 index 00000000..dd7aa5b4 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/74d574698188902058a67179ab1b7192.png differ diff --git a/Tensorflow/TensorFlow2.0/img/74d6d6302722b19888cd2b8a076a9899.png b/Tensorflow/TensorFlow2.0/img/74d6d6302722b19888cd2b8a076a9899.png new file mode 100644 index 00000000..31daa66a Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/74d6d6302722b19888cd2b8a076a9899.png differ diff --git a/Tensorflow/TensorFlow2.0/img/7534c154062dc8f522f01d83838f3161.png b/Tensorflow/TensorFlow2.0/img/7534c154062dc8f522f01d83838f3161.png new file mode 100644 index 00000000..c2d46e3d Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/7534c154062dc8f522f01d83838f3161.png differ diff --git a/Tensorflow/TensorFlow2.0/img/77a7189086e1a02a870dbf630c311e5d.png b/Tensorflow/TensorFlow2.0/img/77a7189086e1a02a870dbf630c311e5d.png new file mode 100644 index 00000000..83ecf420 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/77a7189086e1a02a870dbf630c311e5d.png differ diff --git a/Tensorflow/TensorFlow2.0/img/77a9a1e4b542e966076c493155a71253.png b/Tensorflow/TensorFlow2.0/img/77a9a1e4b542e966076c493155a71253.png new file mode 100644 index 00000000..aa5bf4ff Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/77a9a1e4b542e966076c493155a71253.png differ diff --git a/Tensorflow/TensorFlow2.0/img/78576e063fbd26107e5efc5a23a8ec2d.png b/Tensorflow/TensorFlow2.0/img/78576e063fbd26107e5efc5a23a8ec2d.png new file mode 100644 index 00000000..e285ae30 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/78576e063fbd26107e5efc5a23a8ec2d.png differ diff --git a/Tensorflow/TensorFlow2.0/img/78ca1a250f36dcb02f9c3bdeed26f8cc.png b/Tensorflow/TensorFlow2.0/img/78ca1a250f36dcb02f9c3bdeed26f8cc.png new file mode 100644 index 00000000..51ad2eb6 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/78ca1a250f36dcb02f9c3bdeed26f8cc.png differ diff --git a/Tensorflow/TensorFlow2.0/img/79de81de8fa8f26b206d9f7e2e29232f.png b/Tensorflow/TensorFlow2.0/img/79de81de8fa8f26b206d9f7e2e29232f.png new file mode 100644 index 00000000..19cb2e09 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/79de81de8fa8f26b206d9f7e2e29232f.png differ diff --git a/Tensorflow/TensorFlow2.0/img/7cdba0cc0b42056375b866ad3630f029.png b/Tensorflow/TensorFlow2.0/img/7cdba0cc0b42056375b866ad3630f029.png new file mode 100644 index 00000000..49836832 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/7cdba0cc0b42056375b866ad3630f029.png differ diff --git a/Tensorflow/TensorFlow2.0/img/7d1de3cd2c94ab5fb2b9e44445a2fa6b.png b/Tensorflow/TensorFlow2.0/img/7d1de3cd2c94ab5fb2b9e44445a2fa6b.png new file mode 100644 index 00000000..a278b8af Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/7d1de3cd2c94ab5fb2b9e44445a2fa6b.png differ diff --git a/Tensorflow/TensorFlow2.0/img/7e31e526f055ddde2fd0d3a4e5d60aef.png b/Tensorflow/TensorFlow2.0/img/7e31e526f055ddde2fd0d3a4e5d60aef.png new file mode 100644 index 00000000..d1f82973 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/7e31e526f055ddde2fd0d3a4e5d60aef.png differ diff --git a/Tensorflow/TensorFlow2.0/img/7eb3066f3b0de91799a8bcae1606a337.png b/Tensorflow/TensorFlow2.0/img/7eb3066f3b0de91799a8bcae1606a337.png new file mode 100644 index 00000000..fcdcb29f Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/7eb3066f3b0de91799a8bcae1606a337.png differ diff --git a/Tensorflow/TensorFlow2.0/img/7eb74540a68f2291fad5d0e857c5b792.png b/Tensorflow/TensorFlow2.0/img/7eb74540a68f2291fad5d0e857c5b792.png new file mode 100644 index 00000000..7f936f80 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/7eb74540a68f2291fad5d0e857c5b792.png differ diff --git a/Tensorflow/TensorFlow2.0/img/7ef992617c160736f94c086cc0a754d5.png b/Tensorflow/TensorFlow2.0/img/7ef992617c160736f94c086cc0a754d5.png new file mode 100644 index 00000000..88f4dd1c Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/7ef992617c160736f94c086cc0a754d5.png differ diff --git a/Tensorflow/TensorFlow2.0/img/7f05b53be9225270c3955654d7d465de.png b/Tensorflow/TensorFlow2.0/img/7f05b53be9225270c3955654d7d465de.png new file mode 100644 index 00000000..8b875b8a Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/7f05b53be9225270c3955654d7d465de.png differ diff --git a/Tensorflow/TensorFlow2.0/img/7fb60d07e3fa3bd88b02197b1f12223f.png b/Tensorflow/TensorFlow2.0/img/7fb60d07e3fa3bd88b02197b1f12223f.png new file mode 100644 index 00000000..c3f7cd1f Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/7fb60d07e3fa3bd88b02197b1f12223f.png differ diff --git a/Tensorflow/TensorFlow2.0/img/7fe4fe0b14735050369dc31f05672d65.png b/Tensorflow/TensorFlow2.0/img/7fe4fe0b14735050369dc31f05672d65.png new file mode 100644 index 00000000..ec512ee4 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/7fe4fe0b14735050369dc31f05672d65.png differ diff --git a/Tensorflow/TensorFlow2.0/img/80b7b3f55ab0abb83e2fa9303508fd68.png b/Tensorflow/TensorFlow2.0/img/80b7b3f55ab0abb83e2fa9303508fd68.png new file mode 100644 index 00000000..2e820d81 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/80b7b3f55ab0abb83e2fa9303508fd68.png differ diff --git a/Tensorflow/TensorFlow2.0/img/810312a7c3f1fcd32a88650d8631239e.png b/Tensorflow/TensorFlow2.0/img/810312a7c3f1fcd32a88650d8631239e.png new file mode 100644 index 00000000..177389d5 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/810312a7c3f1fcd32a88650d8631239e.png differ diff --git a/Tensorflow/TensorFlow2.0/img/810fd9bfb9c1343a9406169df9bf12df.png b/Tensorflow/TensorFlow2.0/img/810fd9bfb9c1343a9406169df9bf12df.png new file mode 100644 index 00000000..6bbf04e2 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/810fd9bfb9c1343a9406169df9bf12df.png differ diff --git a/Tensorflow/TensorFlow2.0/img/8142c6b01c1a35d86e4ace60827bcce8.png b/Tensorflow/TensorFlow2.0/img/8142c6b01c1a35d86e4ace60827bcce8.png new file mode 100644 index 00000000..7f6efba0 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/8142c6b01c1a35d86e4ace60827bcce8.png differ diff --git a/Tensorflow/TensorFlow2.0/img/815371be4cdb93da43df2c0cb17bb929.png b/Tensorflow/TensorFlow2.0/img/815371be4cdb93da43df2c0cb17bb929.png new file mode 100644 index 00000000..3eeaf5bc Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/815371be4cdb93da43df2c0cb17bb929.png differ diff --git a/Tensorflow/TensorFlow2.0/img/82444fa7539ed0a798d9a1de5aaf147b.png b/Tensorflow/TensorFlow2.0/img/82444fa7539ed0a798d9a1de5aaf147b.png new file mode 100644 index 00000000..d20abdbc Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/82444fa7539ed0a798d9a1de5aaf147b.png differ diff --git a/Tensorflow/TensorFlow2.0/img/82589e03628e8bb6a3ba169733c4cc9e.png b/Tensorflow/TensorFlow2.0/img/82589e03628e8bb6a3ba169733c4cc9e.png new file mode 100644 index 00000000..f204eaac Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/82589e03628e8bb6a3ba169733c4cc9e.png differ diff --git a/Tensorflow/TensorFlow2.0/img/82eeef92c3c39a6fc38d679c9e4c37fa.png b/Tensorflow/TensorFlow2.0/img/82eeef92c3c39a6fc38d679c9e4c37fa.png new file mode 100644 index 00000000..90bac262 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/82eeef92c3c39a6fc38d679c9e4c37fa.png differ diff --git a/Tensorflow/TensorFlow2.0/img/8317a6a03bbf1bff4913755d5d89c9c6.png b/Tensorflow/TensorFlow2.0/img/8317a6a03bbf1bff4913755d5d89c9c6.png new file mode 100644 index 00000000..9a19ac3f Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/8317a6a03bbf1bff4913755d5d89c9c6.png differ diff --git a/Tensorflow/TensorFlow2.0/img/833d9eeff633ce77dec2eb85f74e8bbb.png b/Tensorflow/TensorFlow2.0/img/833d9eeff633ce77dec2eb85f74e8bbb.png new file mode 100644 index 00000000..f9ff390f Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/833d9eeff633ce77dec2eb85f74e8bbb.png differ diff --git a/Tensorflow/TensorFlow2.0/img/8350c367e4679800cd155cf00a343b47.png b/Tensorflow/TensorFlow2.0/img/8350c367e4679800cd155cf00a343b47.png new file mode 100644 index 00000000..d08040e8 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/8350c367e4679800cd155cf00a343b47.png differ diff --git a/Tensorflow/TensorFlow2.0/img/852e0228b5aebca16dfadf758d11e902.png b/Tensorflow/TensorFlow2.0/img/852e0228b5aebca16dfadf758d11e902.png new file mode 100644 index 00000000..77360ac7 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/852e0228b5aebca16dfadf758d11e902.png differ diff --git a/Tensorflow/TensorFlow2.0/img/85d63bf8a53bc6d25baa38c0e3e2dde0.png b/Tensorflow/TensorFlow2.0/img/85d63bf8a53bc6d25baa38c0e3e2dde0.png new file mode 100644 index 00000000..3eaf725b Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/85d63bf8a53bc6d25baa38c0e3e2dde0.png differ diff --git a/Tensorflow/TensorFlow2.0/img/864dc18cd572f84b2ce62f1f5d620663.png b/Tensorflow/TensorFlow2.0/img/864dc18cd572f84b2ce62f1f5d620663.png new file mode 100644 index 00000000..31a6e1ce Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/864dc18cd572f84b2ce62f1f5d620663.png differ diff --git a/Tensorflow/TensorFlow2.0/img/867e80eb383cce30a1f013a43e465d02.png b/Tensorflow/TensorFlow2.0/img/867e80eb383cce30a1f013a43e465d02.png new file mode 100644 index 00000000..19c4c9ba Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/867e80eb383cce30a1f013a43e465d02.png differ diff --git a/Tensorflow/TensorFlow2.0/img/868f946086995ef931b7b454d904e14b.png b/Tensorflow/TensorFlow2.0/img/868f946086995ef931b7b454d904e14b.png new file mode 100644 index 00000000..e90a9097 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/868f946086995ef931b7b454d904e14b.png differ diff --git a/Tensorflow/TensorFlow2.0/img/8699bedea334d056223fb477ad561204.png b/Tensorflow/TensorFlow2.0/img/8699bedea334d056223fb477ad561204.png new file mode 100644 index 00000000..54170326 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/8699bedea334d056223fb477ad561204.png differ diff --git a/Tensorflow/TensorFlow2.0/img/86f4e22b402c9e48d76da7068ace2175.png b/Tensorflow/TensorFlow2.0/img/86f4e22b402c9e48d76da7068ace2175.png new file mode 100644 index 00000000..4fb6db89 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/86f4e22b402c9e48d76da7068ace2175.png differ diff --git a/Tensorflow/TensorFlow2.0/img/87abb24bd5c5230158bc1ff3b3bb5624.png b/Tensorflow/TensorFlow2.0/img/87abb24bd5c5230158bc1ff3b3bb5624.png new file mode 100644 index 00000000..94eb6504 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/87abb24bd5c5230158bc1ff3b3bb5624.png differ diff --git a/Tensorflow/TensorFlow2.0/img/87e59b9663f1f875cba8bbc04b3ec8d7.png b/Tensorflow/TensorFlow2.0/img/87e59b9663f1f875cba8bbc04b3ec8d7.png new file mode 100644 index 00000000..f3253762 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/87e59b9663f1f875cba8bbc04b3ec8d7.png differ diff --git a/Tensorflow/TensorFlow2.0/img/87f405a26e039fc527ac7f2dd59de28d.png b/Tensorflow/TensorFlow2.0/img/87f405a26e039fc527ac7f2dd59de28d.png new file mode 100644 index 00000000..3723f1a0 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/87f405a26e039fc527ac7f2dd59de28d.png differ diff --git a/Tensorflow/TensorFlow2.0/img/89837448c36d73e735364038e7b3e689.png b/Tensorflow/TensorFlow2.0/img/89837448c36d73e735364038e7b3e689.png new file mode 100644 index 00000000..a105772f Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/89837448c36d73e735364038e7b3e689.png differ diff --git a/Tensorflow/TensorFlow2.0/img/8a26efaab988f8c9054ea977baabb45a.png b/Tensorflow/TensorFlow2.0/img/8a26efaab988f8c9054ea977baabb45a.png new file mode 100644 index 00000000..3b6a9ef4 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/8a26efaab988f8c9054ea977baabb45a.png differ diff --git a/Tensorflow/TensorFlow2.0/img/8aa1d48ada55b367535dbe964ad2cd79.png b/Tensorflow/TensorFlow2.0/img/8aa1d48ada55b367535dbe964ad2cd79.png new file mode 100644 index 00000000..52b26e5f Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/8aa1d48ada55b367535dbe964ad2cd79.png differ diff --git a/Tensorflow/TensorFlow2.0/img/8c90a903ef2c498784c14d3c169e9ee6.png b/Tensorflow/TensorFlow2.0/img/8c90a903ef2c498784c14d3c169e9ee6.png new file mode 100644 index 00000000..52a10393 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/8c90a903ef2c498784c14d3c169e9ee6.png differ diff --git a/Tensorflow/TensorFlow2.0/img/8cfd7ac31a67f47e311e6db012947c3e.png b/Tensorflow/TensorFlow2.0/img/8cfd7ac31a67f47e311e6db012947c3e.png new file mode 100644 index 00000000..7d49568f Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/8cfd7ac31a67f47e311e6db012947c3e.png differ diff --git a/Tensorflow/TensorFlow2.0/img/8d456c03cff000c86147a07dbbcb6f32.png b/Tensorflow/TensorFlow2.0/img/8d456c03cff000c86147a07dbbcb6f32.png new file mode 100644 index 00000000..e1f15ecb Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/8d456c03cff000c86147a07dbbcb6f32.png differ diff --git a/Tensorflow/TensorFlow2.0/img/8ece215a943b54af726cd35da72293a2.png b/Tensorflow/TensorFlow2.0/img/8ece215a943b54af726cd35da72293a2.png new file mode 100644 index 00000000..badcee15 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/8ece215a943b54af726cd35da72293a2.png differ diff --git a/Tensorflow/TensorFlow2.0/img/8f2ea45ce97f58e2339af71a7d349532.png b/Tensorflow/TensorFlow2.0/img/8f2ea45ce97f58e2339af71a7d349532.png new file mode 100644 index 00000000..6921c691 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/8f2ea45ce97f58e2339af71a7d349532.png differ diff --git a/Tensorflow/TensorFlow2.0/img/8f40b70083328d6f68f1d2c5821927d1.png b/Tensorflow/TensorFlow2.0/img/8f40b70083328d6f68f1d2c5821927d1.png new file mode 100644 index 00000000..593dd407 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/8f40b70083328d6f68f1d2c5821927d1.png differ diff --git a/Tensorflow/TensorFlow2.0/img/8f8b815630d4213a923f492eacc9d2d0.png b/Tensorflow/TensorFlow2.0/img/8f8b815630d4213a923f492eacc9d2d0.png new file mode 100644 index 00000000..ebf3880e Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/8f8b815630d4213a923f492eacc9d2d0.png differ diff --git a/Tensorflow/TensorFlow2.0/img/8fcdc694ecba49a443b3d3fa3db737c8.png b/Tensorflow/TensorFlow2.0/img/8fcdc694ecba49a443b3d3fa3db737c8.png new file mode 100644 index 00000000..b6419b4a Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/8fcdc694ecba49a443b3d3fa3db737c8.png differ diff --git a/Tensorflow/TensorFlow2.0/img/906a04e5434908ec33033e39f2e83f6b.png b/Tensorflow/TensorFlow2.0/img/906a04e5434908ec33033e39f2e83f6b.png new file mode 100644 index 00000000..10d735aa Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/906a04e5434908ec33033e39f2e83f6b.png differ diff --git a/Tensorflow/TensorFlow2.0/img/90c153ba31f6c32d7d760bc031b5d956.png b/Tensorflow/TensorFlow2.0/img/90c153ba31f6c32d7d760bc031b5d956.png new file mode 100644 index 00000000..b3c8b2a8 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/90c153ba31f6c32d7d760bc031b5d956.png differ diff --git a/Tensorflow/TensorFlow2.0/img/918c568bb9e84acfad1ad27dbca52256.png b/Tensorflow/TensorFlow2.0/img/918c568bb9e84acfad1ad27dbca52256.png new file mode 100644 index 00000000..d16a8f41 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/918c568bb9e84acfad1ad27dbca52256.png differ diff --git a/Tensorflow/TensorFlow2.0/img/921588a88d035dfd280c98f420033345.png b/Tensorflow/TensorFlow2.0/img/921588a88d035dfd280c98f420033345.png new file mode 100644 index 00000000..275aeea9 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/921588a88d035dfd280c98f420033345.png differ diff --git a/Tensorflow/TensorFlow2.0/img/921a4bdd48f0d134cefd395ce4a30708.png b/Tensorflow/TensorFlow2.0/img/921a4bdd48f0d134cefd395ce4a30708.png new file mode 100644 index 00000000..c9eada40 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/921a4bdd48f0d134cefd395ce4a30708.png differ diff --git a/Tensorflow/TensorFlow2.0/img/92783ce299d17a6100276f8087c404f5.png b/Tensorflow/TensorFlow2.0/img/92783ce299d17a6100276f8087c404f5.png new file mode 100644 index 00000000..46c202f8 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/92783ce299d17a6100276f8087c404f5.png differ diff --git a/Tensorflow/TensorFlow2.0/img/93885449f16f07fe470e025617b7acf3.png b/Tensorflow/TensorFlow2.0/img/93885449f16f07fe470e025617b7acf3.png new file mode 100644 index 00000000..7b04a694 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/93885449f16f07fe470e025617b7acf3.png differ diff --git a/Tensorflow/TensorFlow2.0/img/9461d6f88eb7d390eea25f1f034101b5.png b/Tensorflow/TensorFlow2.0/img/9461d6f88eb7d390eea25f1f034101b5.png new file mode 100644 index 00000000..88c81049 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/9461d6f88eb7d390eea25f1f034101b5.png differ diff --git a/Tensorflow/TensorFlow2.0/img/952f47865ecb556d71d2bcdfe9caad34.png b/Tensorflow/TensorFlow2.0/img/952f47865ecb556d71d2bcdfe9caad34.png new file mode 100644 index 00000000..8df0f437 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/952f47865ecb556d71d2bcdfe9caad34.png differ diff --git a/Tensorflow/TensorFlow2.0/img/9534e12498035f0762e0c85eee6f02ca.png b/Tensorflow/TensorFlow2.0/img/9534e12498035f0762e0c85eee6f02ca.png new file mode 100644 index 00000000..01342d82 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/9534e12498035f0762e0c85eee6f02ca.png differ diff --git a/Tensorflow/TensorFlow2.0/img/9564eb108080dfcb0a0231e7db795b06.png b/Tensorflow/TensorFlow2.0/img/9564eb108080dfcb0a0231e7db795b06.png new file mode 100644 index 00000000..ea434e01 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/9564eb108080dfcb0a0231e7db795b06.png differ diff --git a/Tensorflow/TensorFlow2.0/img/9666a8448543e729e35645bbb2244464.png b/Tensorflow/TensorFlow2.0/img/9666a8448543e729e35645bbb2244464.png new file mode 100644 index 00000000..bda77a79 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/9666a8448543e729e35645bbb2244464.png differ diff --git a/Tensorflow/TensorFlow2.0/img/968128f6d4f50734ad92597399dacd79.png b/Tensorflow/TensorFlow2.0/img/968128f6d4f50734ad92597399dacd79.png new file mode 100644 index 00000000..2438684f Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/968128f6d4f50734ad92597399dacd79.png differ diff --git a/Tensorflow/TensorFlow2.0/img/96d1ebcace7bea24e438e658512e1e53.png b/Tensorflow/TensorFlow2.0/img/96d1ebcace7bea24e438e658512e1e53.png new file mode 100644 index 00000000..1c5a1218 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/96d1ebcace7bea24e438e658512e1e53.png differ diff --git a/Tensorflow/TensorFlow2.0/img/96e942cbf28db3cfff8c8f90167f2a8b.png b/Tensorflow/TensorFlow2.0/img/96e942cbf28db3cfff8c8f90167f2a8b.png new file mode 100644 index 00000000..de5cbd97 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/96e942cbf28db3cfff8c8f90167f2a8b.png differ diff --git a/Tensorflow/TensorFlow2.0/img/982e1307bbc8145644b791d775fcc2c7.png b/Tensorflow/TensorFlow2.0/img/982e1307bbc8145644b791d775fcc2c7.png new file mode 100644 index 00000000..2604ce7e Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/982e1307bbc8145644b791d775fcc2c7.png differ diff --git a/Tensorflow/TensorFlow2.0/img/996d41e44b9998dc439ec88b9b370cec.png b/Tensorflow/TensorFlow2.0/img/996d41e44b9998dc439ec88b9b370cec.png new file mode 100644 index 00000000..a82f27e1 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/996d41e44b9998dc439ec88b9b370cec.png differ diff --git a/Tensorflow/TensorFlow2.0/img/9a244f6224055e7727787fe289c2ca7c.png b/Tensorflow/TensorFlow2.0/img/9a244f6224055e7727787fe289c2ca7c.png new file mode 100644 index 00000000..a51bbe00 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/9a244f6224055e7727787fe289c2ca7c.png differ diff --git a/Tensorflow/TensorFlow2.0/img/9c459926609b3f3452425d5e76209223.png b/Tensorflow/TensorFlow2.0/img/9c459926609b3f3452425d5e76209223.png new file mode 100644 index 00000000..d327ec24 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/9c459926609b3f3452425d5e76209223.png differ diff --git a/Tensorflow/TensorFlow2.0/img/9c9248a99f6346e02b6be5c21e5ab7be.png b/Tensorflow/TensorFlow2.0/img/9c9248a99f6346e02b6be5c21e5ab7be.png new file mode 100644 index 00000000..908cc653 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/9c9248a99f6346e02b6be5c21e5ab7be.png differ diff --git a/Tensorflow/TensorFlow2.0/img/9c9dba05c6503363ec77df2a3d25a70b.png b/Tensorflow/TensorFlow2.0/img/9c9dba05c6503363ec77df2a3d25a70b.png new file mode 100644 index 00000000..5c782375 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/9c9dba05c6503363ec77df2a3d25a70b.png differ diff --git a/Tensorflow/TensorFlow2.0/img/9cada0d075f4e1a104766ddd3754aba4.png b/Tensorflow/TensorFlow2.0/img/9cada0d075f4e1a104766ddd3754aba4.png new file mode 100644 index 00000000..cd52a65c Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/9cada0d075f4e1a104766ddd3754aba4.png differ diff --git a/Tensorflow/TensorFlow2.0/img/9de9afda3f7352b4106f61a83025d8e1.png b/Tensorflow/TensorFlow2.0/img/9de9afda3f7352b4106f61a83025d8e1.png new file mode 100644 index 00000000..a961bedf Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/9de9afda3f7352b4106f61a83025d8e1.png differ diff --git a/Tensorflow/TensorFlow2.0/img/9ec1c5121631fabdd2734d5a380b07fc.png b/Tensorflow/TensorFlow2.0/img/9ec1c5121631fabdd2734d5a380b07fc.png new file mode 100644 index 00000000..73544a0f Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/9ec1c5121631fabdd2734d5a380b07fc.png differ diff --git a/Tensorflow/TensorFlow2.0/img/9fb3392ca5993899f47b18579cbe31b2.png b/Tensorflow/TensorFlow2.0/img/9fb3392ca5993899f47b18579cbe31b2.png new file mode 100644 index 00000000..b7aa33a7 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/9fb3392ca5993899f47b18579cbe31b2.png differ diff --git a/Tensorflow/TensorFlow2.0/img/9fe57f7953fe53d67a38187c0888cb68.png b/Tensorflow/TensorFlow2.0/img/9fe57f7953fe53d67a38187c0888cb68.png new file mode 100644 index 00000000..a58dd240 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/9fe57f7953fe53d67a38187c0888cb68.png differ diff --git a/Tensorflow/TensorFlow2.0/img/a010d7e20315211304fe4a28f6c6f8e7.png b/Tensorflow/TensorFlow2.0/img/a010d7e20315211304fe4a28f6c6f8e7.png new file mode 100644 index 00000000..9a1307d7 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/a010d7e20315211304fe4a28f6c6f8e7.png differ diff --git a/Tensorflow/TensorFlow2.0/img/a07663add154e45bd446c2344b406190.png b/Tensorflow/TensorFlow2.0/img/a07663add154e45bd446c2344b406190.png new file mode 100644 index 00000000..cd1aaf4d Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/a07663add154e45bd446c2344b406190.png differ diff --git a/Tensorflow/TensorFlow2.0/img/a0fc8485669a9e30fd4f2ba2db4ebe0e.png b/Tensorflow/TensorFlow2.0/img/a0fc8485669a9e30fd4f2ba2db4ebe0e.png new file mode 100644 index 00000000..b87776fc Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/a0fc8485669a9e30fd4f2ba2db4ebe0e.png differ diff --git a/Tensorflow/TensorFlow2.0/img/a208fe52aacc14e668b9b681805d8302.png b/Tensorflow/TensorFlow2.0/img/a208fe52aacc14e668b9b681805d8302.png new file mode 100644 index 00000000..09f73479 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/a208fe52aacc14e668b9b681805d8302.png differ diff --git a/Tensorflow/TensorFlow2.0/img/a2d79e6f20ade2372271c76afeaca800.png b/Tensorflow/TensorFlow2.0/img/a2d79e6f20ade2372271c76afeaca800.png new file mode 100644 index 00000000..1d639a26 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/a2d79e6f20ade2372271c76afeaca800.png differ diff --git a/Tensorflow/TensorFlow2.0/img/a2df7f5d9e1c60caea95f5df1cc01ee2.png b/Tensorflow/TensorFlow2.0/img/a2df7f5d9e1c60caea95f5df1cc01ee2.png new file mode 100644 index 00000000..b025278d Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/a2df7f5d9e1c60caea95f5df1cc01ee2.png differ diff --git a/Tensorflow/TensorFlow2.0/img/a3920eb34218a65a21b046a30c7d3808.png b/Tensorflow/TensorFlow2.0/img/a3920eb34218a65a21b046a30c7d3808.png new file mode 100644 index 00000000..e3a0f9c4 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/a3920eb34218a65a21b046a30c7d3808.png differ diff --git a/Tensorflow/TensorFlow2.0/img/a3923a442896cffee97920f98141a84c.png b/Tensorflow/TensorFlow2.0/img/a3923a442896cffee97920f98141a84c.png new file mode 100644 index 00000000..9f32bb11 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/a3923a442896cffee97920f98141a84c.png differ diff --git a/Tensorflow/TensorFlow2.0/img/a3d4072cdd299fedb28dda8fdab7e611.png b/Tensorflow/TensorFlow2.0/img/a3d4072cdd299fedb28dda8fdab7e611.png new file mode 100644 index 00000000..81cc5015 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/a3d4072cdd299fedb28dda8fdab7e611.png differ diff --git a/Tensorflow/TensorFlow2.0/img/a3f9320fc0125d442f2b7412c2492197.png b/Tensorflow/TensorFlow2.0/img/a3f9320fc0125d442f2b7412c2492197.png new file mode 100644 index 00000000..0e7031ce Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/a3f9320fc0125d442f2b7412c2492197.png differ diff --git a/Tensorflow/TensorFlow2.0/img/a49dab0e9e9ab0a58b2928fb2760dab6.png b/Tensorflow/TensorFlow2.0/img/a49dab0e9e9ab0a58b2928fb2760dab6.png new file mode 100644 index 00000000..17a40f9f Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/a49dab0e9e9ab0a58b2928fb2760dab6.png differ diff --git a/Tensorflow/TensorFlow2.0/img/a4e3dc308082391c952aa74215418950.png b/Tensorflow/TensorFlow2.0/img/a4e3dc308082391c952aa74215418950.png new file mode 100644 index 00000000..e1f951b2 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/a4e3dc308082391c952aa74215418950.png differ diff --git a/Tensorflow/TensorFlow2.0/img/a50df54eefaa2e0b41728c5e66685f3a.png b/Tensorflow/TensorFlow2.0/img/a50df54eefaa2e0b41728c5e66685f3a.png new file mode 100644 index 00000000..d4cde950 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/a50df54eefaa2e0b41728c5e66685f3a.png differ diff --git a/Tensorflow/TensorFlow2.0/img/a71722493bd8850dfcd7c1ec0e69918f.png b/Tensorflow/TensorFlow2.0/img/a71722493bd8850dfcd7c1ec0e69918f.png new file mode 100644 index 00000000..2e1acaa8 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/a71722493bd8850dfcd7c1ec0e69918f.png differ diff --git a/Tensorflow/TensorFlow2.0/img/a83182d7f6b11d76dd2d428db01ade58.png b/Tensorflow/TensorFlow2.0/img/a83182d7f6b11d76dd2d428db01ade58.png new file mode 100644 index 00000000..10f33e87 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/a83182d7f6b11d76dd2d428db01ade58.png differ diff --git a/Tensorflow/TensorFlow2.0/img/a84da0fdd95c0b8365360f941f57e017.png b/Tensorflow/TensorFlow2.0/img/a84da0fdd95c0b8365360f941f57e017.png new file mode 100644 index 00000000..8198760a Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/a84da0fdd95c0b8365360f941f57e017.png differ diff --git a/Tensorflow/TensorFlow2.0/img/a8a6734d5e53ebf66610af0af887bc96.png b/Tensorflow/TensorFlow2.0/img/a8a6734d5e53ebf66610af0af887bc96.png new file mode 100644 index 00000000..e8e52bd7 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/a8a6734d5e53ebf66610af0af887bc96.png differ diff --git a/Tensorflow/TensorFlow2.0/img/a949ce3a88563b30ea494e744fe3cd5f.png b/Tensorflow/TensorFlow2.0/img/a949ce3a88563b30ea494e744fe3cd5f.png new file mode 100644 index 00000000..66c34c4b Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/a949ce3a88563b30ea494e744fe3cd5f.png differ diff --git a/Tensorflow/TensorFlow2.0/img/aa45f39cd51486760afc706f90cf0afa.png b/Tensorflow/TensorFlow2.0/img/aa45f39cd51486760afc706f90cf0afa.png new file mode 100644 index 00000000..08e34e65 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/aa45f39cd51486760afc706f90cf0afa.png differ diff --git a/Tensorflow/TensorFlow2.0/img/aaf0cfc73c7f275786e66d759ad26df6.png b/Tensorflow/TensorFlow2.0/img/aaf0cfc73c7f275786e66d759ad26df6.png new file mode 100644 index 00000000..c07e06ca Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/aaf0cfc73c7f275786e66d759ad26df6.png differ diff --git a/Tensorflow/TensorFlow2.0/img/ab876a0a7878b27ea0658f95d96f1ddb.png b/Tensorflow/TensorFlow2.0/img/ab876a0a7878b27ea0658f95d96f1ddb.png new file mode 100644 index 00000000..55cf57d0 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/ab876a0a7878b27ea0658f95d96f1ddb.png differ diff --git a/Tensorflow/TensorFlow2.0/img/ac69959225a206f2b2c5ed2e33218511.png b/Tensorflow/TensorFlow2.0/img/ac69959225a206f2b2c5ed2e33218511.png new file mode 100644 index 00000000..ec65f711 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/ac69959225a206f2b2c5ed2e33218511.png differ diff --git a/Tensorflow/TensorFlow2.0/img/ac9289d9a0758fb9367cad77aaac2cad.png b/Tensorflow/TensorFlow2.0/img/ac9289d9a0758fb9367cad77aaac2cad.png new file mode 100644 index 00000000..196214c1 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/ac9289d9a0758fb9367cad77aaac2cad.png differ diff --git a/Tensorflow/TensorFlow2.0/img/ac978eb483a5f2c471a3cbeb70bb6784.png b/Tensorflow/TensorFlow2.0/img/ac978eb483a5f2c471a3cbeb70bb6784.png new file mode 100644 index 00000000..45956b65 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/ac978eb483a5f2c471a3cbeb70bb6784.png differ diff --git a/Tensorflow/TensorFlow2.0/img/acec4256cc684c59dc4dc1bad6a07a7f.png b/Tensorflow/TensorFlow2.0/img/acec4256cc684c59dc4dc1bad6a07a7f.png new file mode 100644 index 00000000..66c92959 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/acec4256cc684c59dc4dc1bad6a07a7f.png differ diff --git a/Tensorflow/TensorFlow2.0/img/ad462e5b3dc8d32430aaa7de7e4bf303.png b/Tensorflow/TensorFlow2.0/img/ad462e5b3dc8d32430aaa7de7e4bf303.png new file mode 100644 index 00000000..013e3118 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/ad462e5b3dc8d32430aaa7de7e4bf303.png differ diff --git a/Tensorflow/TensorFlow2.0/img/ae14079092bef088b0ecf83c8f429ef2.png b/Tensorflow/TensorFlow2.0/img/ae14079092bef088b0ecf83c8f429ef2.png new file mode 100644 index 00000000..dec7d5c1 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/ae14079092bef088b0ecf83c8f429ef2.png differ diff --git a/Tensorflow/TensorFlow2.0/img/ae60ced5a9a18ef2a947912ada799ca0.png b/Tensorflow/TensorFlow2.0/img/ae60ced5a9a18ef2a947912ada799ca0.png new file mode 100644 index 00000000..adf9a989 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/ae60ced5a9a18ef2a947912ada799ca0.png differ diff --git a/Tensorflow/TensorFlow2.0/img/aef2c569f7fec52ed4d6e656dddb8da4.png b/Tensorflow/TensorFlow2.0/img/aef2c569f7fec52ed4d6e656dddb8da4.png new file mode 100644 index 00000000..6d562f43 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/aef2c569f7fec52ed4d6e656dddb8da4.png differ diff --git a/Tensorflow/TensorFlow2.0/img/b038d931bd8b770e0d244bb981ce446c.png b/Tensorflow/TensorFlow2.0/img/b038d931bd8b770e0d244bb981ce446c.png new file mode 100644 index 00000000..b891aeab Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/b038d931bd8b770e0d244bb981ce446c.png differ diff --git a/Tensorflow/TensorFlow2.0/img/b14e045bbc9dfbc31c1d4f77e4567e6d.png b/Tensorflow/TensorFlow2.0/img/b14e045bbc9dfbc31c1d4f77e4567e6d.png new file mode 100644 index 00000000..87b0eec0 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/b14e045bbc9dfbc31c1d4f77e4567e6d.png differ diff --git a/Tensorflow/TensorFlow2.0/img/b38a330815f01aa53bb358d34b4a3c5b.png b/Tensorflow/TensorFlow2.0/img/b38a330815f01aa53bb358d34b4a3c5b.png new file mode 100644 index 00000000..b4f05d7a Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/b38a330815f01aa53bb358d34b4a3c5b.png differ diff --git a/Tensorflow/TensorFlow2.0/img/b4bcda4ec74a98071e75941c07503a6c.png b/Tensorflow/TensorFlow2.0/img/b4bcda4ec74a98071e75941c07503a6c.png new file mode 100644 index 00000000..deeab29f Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/b4bcda4ec74a98071e75941c07503a6c.png differ diff --git a/Tensorflow/TensorFlow2.0/img/b59ec7633714f0220810184e7792c80a.png b/Tensorflow/TensorFlow2.0/img/b59ec7633714f0220810184e7792c80a.png new file mode 100644 index 00000000..717e7eba Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/b59ec7633714f0220810184e7792c80a.png differ diff --git a/Tensorflow/TensorFlow2.0/img/b5a9ca25aab20c2b09a25fdab4c2b92b.png b/Tensorflow/TensorFlow2.0/img/b5a9ca25aab20c2b09a25fdab4c2b92b.png new file mode 100644 index 00000000..892679e9 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/b5a9ca25aab20c2b09a25fdab4c2b92b.png differ diff --git a/Tensorflow/TensorFlow2.0/img/b7fd03ac59129ba2515cf59b292f3296.png b/Tensorflow/TensorFlow2.0/img/b7fd03ac59129ba2515cf59b292f3296.png new file mode 100644 index 00000000..52d3b6ae Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/b7fd03ac59129ba2515cf59b292f3296.png differ diff --git a/Tensorflow/TensorFlow2.0/img/b8397a070205f9293fbc989d8421eec5.png b/Tensorflow/TensorFlow2.0/img/b8397a070205f9293fbc989d8421eec5.png new file mode 100644 index 00000000..88b11353 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/b8397a070205f9293fbc989d8421eec5.png differ diff --git a/Tensorflow/TensorFlow2.0/img/b84cdc6273e2ce65876a86561af080a5.png b/Tensorflow/TensorFlow2.0/img/b84cdc6273e2ce65876a86561af080a5.png new file mode 100644 index 00000000..88896d1e Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/b84cdc6273e2ce65876a86561af080a5.png differ diff --git a/Tensorflow/TensorFlow2.0/img/b9c02a2b74c4b9c99f2707fddae68a8d.png b/Tensorflow/TensorFlow2.0/img/b9c02a2b74c4b9c99f2707fddae68a8d.png new file mode 100644 index 00000000..313a5ed6 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/b9c02a2b74c4b9c99f2707fddae68a8d.png differ diff --git a/Tensorflow/TensorFlow2.0/img/b9f520a19b0bd493834819ddfc1ef26b.png b/Tensorflow/TensorFlow2.0/img/b9f520a19b0bd493834819ddfc1ef26b.png new file mode 100644 index 00000000..9a91caf7 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/b9f520a19b0bd493834819ddfc1ef26b.png differ diff --git a/Tensorflow/TensorFlow2.0/img/bb63d10882d3aa9a631d3cf50ff7f21e.png b/Tensorflow/TensorFlow2.0/img/bb63d10882d3aa9a631d3cf50ff7f21e.png new file mode 100644 index 00000000..657a40e0 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/bb63d10882d3aa9a631d3cf50ff7f21e.png differ diff --git a/Tensorflow/TensorFlow2.0/img/be737507a3c4409c7dc8aa33d2196e15.png b/Tensorflow/TensorFlow2.0/img/be737507a3c4409c7dc8aa33d2196e15.png new file mode 100644 index 00000000..658846be Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/be737507a3c4409c7dc8aa33d2196e15.png differ diff --git a/Tensorflow/TensorFlow2.0/img/bf058b152584cc8e8c3987a57eb7331f.png b/Tensorflow/TensorFlow2.0/img/bf058b152584cc8e8c3987a57eb7331f.png new file mode 100644 index 00000000..0e8f19b6 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/bf058b152584cc8e8c3987a57eb7331f.png differ diff --git a/Tensorflow/TensorFlow2.0/img/bf18f34f1076405e0bca19939de2bd91.png b/Tensorflow/TensorFlow2.0/img/bf18f34f1076405e0bca19939de2bd91.png new file mode 100644 index 00000000..fd38897c Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/bf18f34f1076405e0bca19939de2bd91.png differ diff --git a/Tensorflow/TensorFlow2.0/img/bfdf8a09e5b289018514265caa15c398.png b/Tensorflow/TensorFlow2.0/img/bfdf8a09e5b289018514265caa15c398.png new file mode 100644 index 00000000..f953c1e8 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/bfdf8a09e5b289018514265caa15c398.png differ diff --git a/Tensorflow/TensorFlow2.0/img/c05647567564d6207cec5962f562a1d7.png b/Tensorflow/TensorFlow2.0/img/c05647567564d6207cec5962f562a1d7.png new file mode 100644 index 00000000..47fad364 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/c05647567564d6207cec5962f562a1d7.png differ diff --git a/Tensorflow/TensorFlow2.0/img/c12f3797e75b6aa8bdc206f4b91344c1.png b/Tensorflow/TensorFlow2.0/img/c12f3797e75b6aa8bdc206f4b91344c1.png new file mode 100644 index 00000000..b12b517d Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/c12f3797e75b6aa8bdc206f4b91344c1.png differ diff --git a/Tensorflow/TensorFlow2.0/img/c152c7181c46470c9085620544ab9be7.png b/Tensorflow/TensorFlow2.0/img/c152c7181c46470c9085620544ab9be7.png new file mode 100644 index 00000000..65ece1f6 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/c152c7181c46470c9085620544ab9be7.png differ diff --git a/Tensorflow/TensorFlow2.0/img/c22fa60f9660a04e79c3549bc99392df.png b/Tensorflow/TensorFlow2.0/img/c22fa60f9660a04e79c3549bc99392df.png new file mode 100644 index 00000000..ed898b47 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/c22fa60f9660a04e79c3549bc99392df.png differ diff --git a/Tensorflow/TensorFlow2.0/img/c239b9ad6cf9b5f72e3d6d37fd17b9d1.png b/Tensorflow/TensorFlow2.0/img/c239b9ad6cf9b5f72e3d6d37fd17b9d1.png new file mode 100644 index 00000000..740b8b45 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/c239b9ad6cf9b5f72e3d6d37fd17b9d1.png differ diff --git a/Tensorflow/TensorFlow2.0/img/c2a117375845a6a7d1c87b2c84de54e8.png b/Tensorflow/TensorFlow2.0/img/c2a117375845a6a7d1c87b2c84de54e8.png new file mode 100644 index 00000000..ce27fca9 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/c2a117375845a6a7d1c87b2c84de54e8.png differ diff --git a/Tensorflow/TensorFlow2.0/img/c40faed8e3efd524ec22fb4842e81867.png b/Tensorflow/TensorFlow2.0/img/c40faed8e3efd524ec22fb4842e81867.png new file mode 100644 index 00000000..1108f9be Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/c40faed8e3efd524ec22fb4842e81867.png differ diff --git a/Tensorflow/TensorFlow2.0/img/c542bc6784512a8abdc2e3a85a1e1905.png b/Tensorflow/TensorFlow2.0/img/c542bc6784512a8abdc2e3a85a1e1905.png new file mode 100644 index 00000000..165da78c Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/c542bc6784512a8abdc2e3a85a1e1905.png differ diff --git a/Tensorflow/TensorFlow2.0/img/c55318af9effb720969a76a37cfc3a42.png b/Tensorflow/TensorFlow2.0/img/c55318af9effb720969a76a37cfc3a42.png new file mode 100644 index 00000000..a42bf678 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/c55318af9effb720969a76a37cfc3a42.png differ diff --git a/Tensorflow/TensorFlow2.0/img/c5f05439bb7e2eb354fda7f89beadeb3.png b/Tensorflow/TensorFlow2.0/img/c5f05439bb7e2eb354fda7f89beadeb3.png new file mode 100644 index 00000000..b65655d7 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/c5f05439bb7e2eb354fda7f89beadeb3.png differ diff --git a/Tensorflow/TensorFlow2.0/img/c67ce581d874e2d04e2761cc44b1d094.png b/Tensorflow/TensorFlow2.0/img/c67ce581d874e2d04e2761cc44b1d094.png new file mode 100644 index 00000000..2b1f6a20 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/c67ce581d874e2d04e2761cc44b1d094.png differ diff --git a/Tensorflow/TensorFlow2.0/img/c705ca2a522a41c9585fcf3ac5b4f667.png b/Tensorflow/TensorFlow2.0/img/c705ca2a522a41c9585fcf3ac5b4f667.png new file mode 100644 index 00000000..dc0dc52e Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/c705ca2a522a41c9585fcf3ac5b4f667.png differ diff --git a/Tensorflow/TensorFlow2.0/img/c7a03d480eb2e9be1f4499761937f9ee.png b/Tensorflow/TensorFlow2.0/img/c7a03d480eb2e9be1f4499761937f9ee.png new file mode 100644 index 00000000..e02a5daf Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/c7a03d480eb2e9be1f4499761937f9ee.png differ diff --git a/Tensorflow/TensorFlow2.0/img/c7e184638977ee322a898f6148f543c3.png b/Tensorflow/TensorFlow2.0/img/c7e184638977ee322a898f6148f543c3.png new file mode 100644 index 00000000..f7e28e36 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/c7e184638977ee322a898f6148f543c3.png differ diff --git a/Tensorflow/TensorFlow2.0/img/c91d625a0312bd25acf8dab10ecb51ed.png b/Tensorflow/TensorFlow2.0/img/c91d625a0312bd25acf8dab10ecb51ed.png new file mode 100644 index 00000000..1dec8e35 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/c91d625a0312bd25acf8dab10ecb51ed.png differ diff --git a/Tensorflow/TensorFlow2.0/img/c9a00de59da2a696666612d0bd08c7a7.png b/Tensorflow/TensorFlow2.0/img/c9a00de59da2a696666612d0bd08c7a7.png new file mode 100644 index 00000000..e03b8aa7 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/c9a00de59da2a696666612d0bd08c7a7.png differ diff --git a/Tensorflow/TensorFlow2.0/img/ca5c6f8a5c7444a19483d53990e42c33.png b/Tensorflow/TensorFlow2.0/img/ca5c6f8a5c7444a19483d53990e42c33.png new file mode 100644 index 00000000..ce8f9f37 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/ca5c6f8a5c7444a19483d53990e42c33.png differ diff --git a/Tensorflow/TensorFlow2.0/img/cb18ad8212a0648018238babc8fe2325.png b/Tensorflow/TensorFlow2.0/img/cb18ad8212a0648018238babc8fe2325.png new file mode 100644 index 00000000..5a73a574 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/cb18ad8212a0648018238babc8fe2325.png differ diff --git a/Tensorflow/TensorFlow2.0/img/cd8127c26455c518a827f0ce6a07b1e0.png b/Tensorflow/TensorFlow2.0/img/cd8127c26455c518a827f0ce6a07b1e0.png new file mode 100644 index 00000000..8cdd16b9 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/cd8127c26455c518a827f0ce6a07b1e0.png differ diff --git a/Tensorflow/TensorFlow2.0/img/cfa82b128c103151f142dae7b5ddecda.png b/Tensorflow/TensorFlow2.0/img/cfa82b128c103151f142dae7b5ddecda.png new file mode 100644 index 00000000..08e34e65 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/cfa82b128c103151f142dae7b5ddecda.png differ diff --git a/Tensorflow/TensorFlow2.0/img/d33aeddd8358af50caa929e3528cf6e5.png b/Tensorflow/TensorFlow2.0/img/d33aeddd8358af50caa929e3528cf6e5.png new file mode 100644 index 00000000..6bbf04e2 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/d33aeddd8358af50caa929e3528cf6e5.png differ diff --git a/Tensorflow/TensorFlow2.0/img/d48861f9a16908e3c3980990638ecb40.png b/Tensorflow/TensorFlow2.0/img/d48861f9a16908e3c3980990638ecb40.png new file mode 100644 index 00000000..b025278d Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/d48861f9a16908e3c3980990638ecb40.png differ diff --git a/Tensorflow/TensorFlow2.0/img/d5d8ce7c60645b3e01a6e088280470ba.png b/Tensorflow/TensorFlow2.0/img/d5d8ce7c60645b3e01a6e088280470ba.png new file mode 100644 index 00000000..5a62c81f Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/d5d8ce7c60645b3e01a6e088280470ba.png differ diff --git a/Tensorflow/TensorFlow2.0/img/d6513785291f1616fa5a88b830c9a438.png b/Tensorflow/TensorFlow2.0/img/d6513785291f1616fa5a88b830c9a438.png new file mode 100644 index 00000000..82b44f39 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/d6513785291f1616fa5a88b830c9a438.png differ diff --git a/Tensorflow/TensorFlow2.0/img/d653a0d6330958d36f31b35e1410ff6d.png b/Tensorflow/TensorFlow2.0/img/d653a0d6330958d36f31b35e1410ff6d.png new file mode 100644 index 00000000..be55cc8f Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/d653a0d6330958d36f31b35e1410ff6d.png differ diff --git a/Tensorflow/TensorFlow2.0/img/d68f92600680dfc45d965045e843ec4d.png b/Tensorflow/TensorFlow2.0/img/d68f92600680dfc45d965045e843ec4d.png new file mode 100644 index 00000000..aa190631 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/d68f92600680dfc45d965045e843ec4d.png differ diff --git a/Tensorflow/TensorFlow2.0/img/d6c8610603858ddd864cc7f024f16e40.png b/Tensorflow/TensorFlow2.0/img/d6c8610603858ddd864cc7f024f16e40.png new file mode 100644 index 00000000..13accab2 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/d6c8610603858ddd864cc7f024f16e40.png differ diff --git a/Tensorflow/TensorFlow2.0/img/d6d4178e447bc9f8c984345c73202b01.png b/Tensorflow/TensorFlow2.0/img/d6d4178e447bc9f8c984345c73202b01.png new file mode 100644 index 00000000..f6aa17ff Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/d6d4178e447bc9f8c984345c73202b01.png differ diff --git a/Tensorflow/TensorFlow2.0/img/d85fdaff014f0211e5ef646977087e50.png b/Tensorflow/TensorFlow2.0/img/d85fdaff014f0211e5ef646977087e50.png new file mode 100644 index 00000000..ee37ede5 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/d85fdaff014f0211e5ef646977087e50.png differ diff --git a/Tensorflow/TensorFlow2.0/img/d99736f992ec3e1883b57ef705221367.png b/Tensorflow/TensorFlow2.0/img/d99736f992ec3e1883b57ef705221367.png new file mode 100644 index 00000000..7ad866d8 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/d99736f992ec3e1883b57ef705221367.png differ diff --git a/Tensorflow/TensorFlow2.0/img/dbd4a3a9bd5a14a61bcaf558a2231993.png b/Tensorflow/TensorFlow2.0/img/dbd4a3a9bd5a14a61bcaf558a2231993.png new file mode 100644 index 00000000..9f130204 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/dbd4a3a9bd5a14a61bcaf558a2231993.png differ diff --git a/Tensorflow/TensorFlow2.0/img/dcd2e24d351259809e8bd2dfe61f3f59.png b/Tensorflow/TensorFlow2.0/img/dcd2e24d351259809e8bd2dfe61f3f59.png new file mode 100644 index 00000000..5c5eec51 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/dcd2e24d351259809e8bd2dfe61f3f59.png differ diff --git a/Tensorflow/TensorFlow2.0/img/dcf39b63e54ff302e7e37e39b90facc7.png b/Tensorflow/TensorFlow2.0/img/dcf39b63e54ff302e7e37e39b90facc7.png new file mode 100644 index 00000000..161ad9fd Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/dcf39b63e54ff302e7e37e39b90facc7.png differ diff --git a/Tensorflow/TensorFlow2.0/img/dd1b792428257ee1ffcb4e02d4e81c11.png b/Tensorflow/TensorFlow2.0/img/dd1b792428257ee1ffcb4e02d4e81c11.png new file mode 100644 index 00000000..5c91f967 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/dd1b792428257ee1ffcb4e02d4e81c11.png differ diff --git a/Tensorflow/TensorFlow2.0/img/dd4897a112d5aa5b56d54b9d8f8ac97d.png b/Tensorflow/TensorFlow2.0/img/dd4897a112d5aa5b56d54b9d8f8ac97d.png new file mode 100644 index 00000000..171c9b3e Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/dd4897a112d5aa5b56d54b9d8f8ac97d.png differ diff --git a/Tensorflow/TensorFlow2.0/img/dda6acab76c9a017bbe16c3bebb8e54c.png b/Tensorflow/TensorFlow2.0/img/dda6acab76c9a017bbe16c3bebb8e54c.png new file mode 100644 index 00000000..8bd2ff4d Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/dda6acab76c9a017bbe16c3bebb8e54c.png differ diff --git a/Tensorflow/TensorFlow2.0/img/de0d6e38c509169ec5a4edafdcb08e55.png b/Tensorflow/TensorFlow2.0/img/de0d6e38c509169ec5a4edafdcb08e55.png new file mode 100644 index 00000000..cbfa59c0 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/de0d6e38c509169ec5a4edafdcb08e55.png differ diff --git a/Tensorflow/TensorFlow2.0/img/df0f8cf3d780ab200f3e674a67324435.png b/Tensorflow/TensorFlow2.0/img/df0f8cf3d780ab200f3e674a67324435.png new file mode 100644 index 00000000..c72b376c Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/df0f8cf3d780ab200f3e674a67324435.png differ diff --git a/Tensorflow/TensorFlow2.0/img/df68f144581e032c370ca708f4983bdb.png b/Tensorflow/TensorFlow2.0/img/df68f144581e032c370ca708f4983bdb.png new file mode 100644 index 00000000..cf9a1b0d Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/df68f144581e032c370ca708f4983bdb.png differ diff --git a/Tensorflow/TensorFlow2.0/img/e06760b4112e8fd989cdb1f7a948bc17.png b/Tensorflow/TensorFlow2.0/img/e06760b4112e8fd989cdb1f7a948bc17.png new file mode 100644 index 00000000..db3e02e8 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/e06760b4112e8fd989cdb1f7a948bc17.png differ diff --git a/Tensorflow/TensorFlow2.0/img/e078fc18f5dbcddacb9cc4e95629a3bf.png b/Tensorflow/TensorFlow2.0/img/e078fc18f5dbcddacb9cc4e95629a3bf.png new file mode 100644 index 00000000..67bd2d17 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/e078fc18f5dbcddacb9cc4e95629a3bf.png differ diff --git a/Tensorflow/TensorFlow2.0/img/e0ae5b8f55425e83ab36e4a3a097b4e8.png b/Tensorflow/TensorFlow2.0/img/e0ae5b8f55425e83ab36e4a3a097b4e8.png new file mode 100644 index 00000000..c581d2fb Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/e0ae5b8f55425e83ab36e4a3a097b4e8.png differ diff --git a/Tensorflow/TensorFlow2.0/img/e0df7b7876498420dcf8663d9c91a023.png b/Tensorflow/TensorFlow2.0/img/e0df7b7876498420dcf8663d9c91a023.png new file mode 100644 index 00000000..93075b18 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/e0df7b7876498420dcf8663d9c91a023.png differ diff --git a/Tensorflow/TensorFlow2.0/img/e2143b6a00159c480e078bcbc7c8c72b.png b/Tensorflow/TensorFlow2.0/img/e2143b6a00159c480e078bcbc7c8c72b.png new file mode 100644 index 00000000..2776aad5 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/e2143b6a00159c480e078bcbc7c8c72b.png differ diff --git a/Tensorflow/TensorFlow2.0/img/e297781397cdc97e304b45625f7ae423.png b/Tensorflow/TensorFlow2.0/img/e297781397cdc97e304b45625f7ae423.png new file mode 100644 index 00000000..a702ec30 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/e297781397cdc97e304b45625f7ae423.png differ diff --git a/Tensorflow/TensorFlow2.0/img/e2e6d59bb8ebd47a957558d11e836ec1.png b/Tensorflow/TensorFlow2.0/img/e2e6d59bb8ebd47a957558d11e836ec1.png new file mode 100644 index 00000000..7f948808 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/e2e6d59bb8ebd47a957558d11e836ec1.png differ diff --git a/Tensorflow/TensorFlow2.0/img/e2f893946253a84a1410e69624e915bc.png b/Tensorflow/TensorFlow2.0/img/e2f893946253a84a1410e69624e915bc.png new file mode 100644 index 00000000..0934615a Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/e2f893946253a84a1410e69624e915bc.png differ diff --git a/Tensorflow/TensorFlow2.0/img/e3d2caa770c7f600fb5cdc2a95ad0e0a.png b/Tensorflow/TensorFlow2.0/img/e3d2caa770c7f600fb5cdc2a95ad0e0a.png new file mode 100644 index 00000000..bbe26f08 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/e3d2caa770c7f600fb5cdc2a95ad0e0a.png differ diff --git a/Tensorflow/TensorFlow2.0/img/e3e3424830f874b566c07a0e86696a13.png b/Tensorflow/TensorFlow2.0/img/e3e3424830f874b566c07a0e86696a13.png new file mode 100644 index 00000000..8188cc33 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/e3e3424830f874b566c07a0e86696a13.png differ diff --git a/Tensorflow/TensorFlow2.0/img/e3ef015d6cba8b2d535ce30b60549453.png b/Tensorflow/TensorFlow2.0/img/e3ef015d6cba8b2d535ce30b60549453.png new file mode 100644 index 00000000..223d0e52 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/e3ef015d6cba8b2d535ce30b60549453.png differ diff --git a/Tensorflow/TensorFlow2.0/img/e3ffe6a29488821b01dd98cba6690e5f.png b/Tensorflow/TensorFlow2.0/img/e3ffe6a29488821b01dd98cba6690e5f.png new file mode 100644 index 00000000..6e263b50 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/e3ffe6a29488821b01dd98cba6690e5f.png differ diff --git a/Tensorflow/TensorFlow2.0/img/e46db7cde2b53be53d302c4b00d582a5.png b/Tensorflow/TensorFlow2.0/img/e46db7cde2b53be53d302c4b00d582a5.png new file mode 100644 index 00000000..2ef0f536 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/e46db7cde2b53be53d302c4b00d582a5.png differ diff --git a/Tensorflow/TensorFlow2.0/img/e47b08aec7cc62d5268c6c6af8cf2b16.png b/Tensorflow/TensorFlow2.0/img/e47b08aec7cc62d5268c6c6af8cf2b16.png new file mode 100644 index 00000000..7a0b4d56 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/e47b08aec7cc62d5268c6c6af8cf2b16.png differ diff --git a/Tensorflow/TensorFlow2.0/img/e4d27c794147e0649dec40c1e673fa3d.png b/Tensorflow/TensorFlow2.0/img/e4d27c794147e0649dec40c1e673fa3d.png new file mode 100644 index 00000000..fc02b115 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/e4d27c794147e0649dec40c1e673fa3d.png differ diff --git a/Tensorflow/TensorFlow2.0/img/e61ee4650b5d251079d57baa98bff19e.png b/Tensorflow/TensorFlow2.0/img/e61ee4650b5d251079d57baa98bff19e.png new file mode 100644 index 00000000..4bb11e4d Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/e61ee4650b5d251079d57baa98bff19e.png differ diff --git a/Tensorflow/TensorFlow2.0/img/e68189c9da69b7848e9033d29a0dc574.png b/Tensorflow/TensorFlow2.0/img/e68189c9da69b7848e9033d29a0dc574.png new file mode 100644 index 00000000..a676c13a Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/e68189c9da69b7848e9033d29a0dc574.png differ diff --git a/Tensorflow/TensorFlow2.0/img/e8229311b22645eacfe9d45893aa40bc.png b/Tensorflow/TensorFlow2.0/img/e8229311b22645eacfe9d45893aa40bc.png new file mode 100644 index 00000000..889f033d Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/e8229311b22645eacfe9d45893aa40bc.png differ diff --git a/Tensorflow/TensorFlow2.0/img/e8d23da7a633c8eaa5878bca988b63f3.png b/Tensorflow/TensorFlow2.0/img/e8d23da7a633c8eaa5878bca988b63f3.png new file mode 100644 index 00000000..9dea5f1f Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/e8d23da7a633c8eaa5878bca988b63f3.png differ diff --git a/Tensorflow/TensorFlow2.0/img/e954331a93f7da6b3ebeb6d2c90586f4.png b/Tensorflow/TensorFlow2.0/img/e954331a93f7da6b3ebeb6d2c90586f4.png new file mode 100644 index 00000000..404d97b8 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/e954331a93f7da6b3ebeb6d2c90586f4.png differ diff --git a/Tensorflow/TensorFlow2.0/img/ea3d81df30a4c5fa3b8c108a2be71e73.png b/Tensorflow/TensorFlow2.0/img/ea3d81df30a4c5fa3b8c108a2be71e73.png new file mode 100644 index 00000000..71337fd6 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/ea3d81df30a4c5fa3b8c108a2be71e73.png differ diff --git a/Tensorflow/TensorFlow2.0/img/ec46666ea7ae5ed4692fbbd51a12f72c.png b/Tensorflow/TensorFlow2.0/img/ec46666ea7ae5ed4692fbbd51a12f72c.png new file mode 100644 index 00000000..0083d384 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/ec46666ea7ae5ed4692fbbd51a12f72c.png differ diff --git a/Tensorflow/TensorFlow2.0/img/ed58bf9aecc879b9fef41103a0178752.png b/Tensorflow/TensorFlow2.0/img/ed58bf9aecc879b9fef41103a0178752.png new file mode 100644 index 00000000..04010ab7 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/ed58bf9aecc879b9fef41103a0178752.png differ diff --git a/Tensorflow/TensorFlow2.0/img/edb8cf06303c60cf812dce4865e8d331.png b/Tensorflow/TensorFlow2.0/img/edb8cf06303c60cf812dce4865e8d331.png new file mode 100644 index 00000000..8a9a3335 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/edb8cf06303c60cf812dce4865e8d331.png differ diff --git a/Tensorflow/TensorFlow2.0/img/ee3ae6c62d5acf6adfea6458312bcb02.png b/Tensorflow/TensorFlow2.0/img/ee3ae6c62d5acf6adfea6458312bcb02.png new file mode 100644 index 00000000..a29bdc04 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/ee3ae6c62d5acf6adfea6458312bcb02.png differ diff --git a/Tensorflow/TensorFlow2.0/img/ee409d59cd41f3de0f02655abfc4d0c0.png b/Tensorflow/TensorFlow2.0/img/ee409d59cd41f3de0f02655abfc4d0c0.png new file mode 100644 index 00000000..1ef1d423 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/ee409d59cd41f3de0f02655abfc4d0c0.png differ diff --git a/Tensorflow/TensorFlow2.0/img/eeb0649161960b7c64a96a75e088eab6.png b/Tensorflow/TensorFlow2.0/img/eeb0649161960b7c64a96a75e088eab6.png new file mode 100644 index 00000000..9fadc3b7 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/eeb0649161960b7c64a96a75e088eab6.png differ diff --git a/Tensorflow/TensorFlow2.0/img/f021b204e92d0e77d8439a03a43bb21e.png b/Tensorflow/TensorFlow2.0/img/f021b204e92d0e77d8439a03a43bb21e.png new file mode 100644 index 00000000..d9bf7664 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/f021b204e92d0e77d8439a03a43bb21e.png differ diff --git a/Tensorflow/TensorFlow2.0/img/f1fa73dd5b685549afd6264592919903.png b/Tensorflow/TensorFlow2.0/img/f1fa73dd5b685549afd6264592919903.png new file mode 100644 index 00000000..c8701cb8 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/f1fa73dd5b685549afd6264592919903.png differ diff --git a/Tensorflow/TensorFlow2.0/img/f2f53e7a4522a77ce6e821a299a77c76.png b/Tensorflow/TensorFlow2.0/img/f2f53e7a4522a77ce6e821a299a77c76.png new file mode 100644 index 00000000..cb5544f9 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/f2f53e7a4522a77ce6e821a299a77c76.png differ diff --git a/Tensorflow/TensorFlow2.0/img/f33f21e1842dad5f569cc75d0c43b4c1.png b/Tensorflow/TensorFlow2.0/img/f33f21e1842dad5f569cc75d0c43b4c1.png new file mode 100644 index 00000000..b548eeca Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/f33f21e1842dad5f569cc75d0c43b4c1.png differ diff --git a/Tensorflow/TensorFlow2.0/img/f3c5a66b35a03bd6a2bf9c3a65a39dfb.png b/Tensorflow/TensorFlow2.0/img/f3c5a66b35a03bd6a2bf9c3a65a39dfb.png new file mode 100644 index 00000000..c06ef4e1 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/f3c5a66b35a03bd6a2bf9c3a65a39dfb.png differ diff --git a/Tensorflow/TensorFlow2.0/img/f40a6da0d8471d4b9b979d456cb09d0d.png b/Tensorflow/TensorFlow2.0/img/f40a6da0d8471d4b9b979d456cb09d0d.png new file mode 100644 index 00000000..e6d97c45 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/f40a6da0d8471d4b9b979d456cb09d0d.png differ diff --git a/Tensorflow/TensorFlow2.0/img/f5468d2d190850a0c5adf6682ae9a2b3.png b/Tensorflow/TensorFlow2.0/img/f5468d2d190850a0c5adf6682ae9a2b3.png new file mode 100644 index 00000000..aa258ba4 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/f5468d2d190850a0c5adf6682ae9a2b3.png differ diff --git a/Tensorflow/TensorFlow2.0/img/f769d692ddcca3810cad6e32307d9b3a.png b/Tensorflow/TensorFlow2.0/img/f769d692ddcca3810cad6e32307d9b3a.png new file mode 100644 index 00000000..e75f1408 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/f769d692ddcca3810cad6e32307d9b3a.png differ diff --git a/Tensorflow/TensorFlow2.0/img/f7ad29c64650bcb7c917f1bd3c34a678.png b/Tensorflow/TensorFlow2.0/img/f7ad29c64650bcb7c917f1bd3c34a678.png new file mode 100644 index 00000000..039334cd Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/f7ad29c64650bcb7c917f1bd3c34a678.png differ diff --git a/Tensorflow/TensorFlow2.0/img/f80edb85b33ee8d70b05983a35aca22f.png b/Tensorflow/TensorFlow2.0/img/f80edb85b33ee8d70b05983a35aca22f.png new file mode 100644 index 00000000..ac522c7a Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/f80edb85b33ee8d70b05983a35aca22f.png differ diff --git a/Tensorflow/TensorFlow2.0/img/f865018e54d4c67ed60313c72d71e99c.png b/Tensorflow/TensorFlow2.0/img/f865018e54d4c67ed60313c72d71e99c.png new file mode 100644 index 00000000..7a2c68a7 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/f865018e54d4c67ed60313c72d71e99c.png differ diff --git a/Tensorflow/TensorFlow2.0/img/f8876684e2b6e5576c9f4dc1029bb237.png b/Tensorflow/TensorFlow2.0/img/f8876684e2b6e5576c9f4dc1029bb237.png new file mode 100644 index 00000000..afcb4945 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/f8876684e2b6e5576c9f4dc1029bb237.png differ diff --git a/Tensorflow/TensorFlow2.0/img/f89cb56c5d3c77f56118a42ca7fb3936.png b/Tensorflow/TensorFlow2.0/img/f89cb56c5d3c77f56118a42ca7fb3936.png new file mode 100644 index 00000000..342e1751 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/f89cb56c5d3c77f56118a42ca7fb3936.png differ diff --git a/Tensorflow/TensorFlow2.0/img/f8f7d813408efbfa9b764dfcfe54d2ad.png b/Tensorflow/TensorFlow2.0/img/f8f7d813408efbfa9b764dfcfe54d2ad.png new file mode 100644 index 00000000..1799122d Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/f8f7d813408efbfa9b764dfcfe54d2ad.png differ diff --git a/Tensorflow/TensorFlow2.0/img/f9843723cb76f7e84a4d3e7435c3a2c0.png b/Tensorflow/TensorFlow2.0/img/f9843723cb76f7e84a4d3e7435c3a2c0.png new file mode 100644 index 00000000..e55c2f69 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/f9843723cb76f7e84a4d3e7435c3a2c0.png differ diff --git a/Tensorflow/TensorFlow2.0/img/f9857876c8d165f58db57e4f8cb7bbc4.png b/Tensorflow/TensorFlow2.0/img/f9857876c8d165f58db57e4f8cb7bbc4.png new file mode 100644 index 00000000..4e4806a9 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/f9857876c8d165f58db57e4f8cb7bbc4.png differ diff --git a/Tensorflow/TensorFlow2.0/img/f9f505f9e0bb94757eb576cd0aa1c1f3.png b/Tensorflow/TensorFlow2.0/img/f9f505f9e0bb94757eb576cd0aa1c1f3.png new file mode 100644 index 00000000..6b56b66f Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/f9f505f9e0bb94757eb576cd0aa1c1f3.png differ diff --git a/Tensorflow/TensorFlow2.0/img/fa2d5bf8aeffd46cc7559d7104f99ba2.png b/Tensorflow/TensorFlow2.0/img/fa2d5bf8aeffd46cc7559d7104f99ba2.png new file mode 100644 index 00000000..7e666c5e Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/fa2d5bf8aeffd46cc7559d7104f99ba2.png differ diff --git a/Tensorflow/TensorFlow2.0/img/fa351313bf1ddb033fe47d07a518db6c.png b/Tensorflow/TensorFlow2.0/img/fa351313bf1ddb033fe47d07a518db6c.png new file mode 100644 index 00000000..565aae81 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/fa351313bf1ddb033fe47d07a518db6c.png differ diff --git a/Tensorflow/TensorFlow2.0/img/fb0bdd5ec0ad3a81aa686b46a6fa16d7.png b/Tensorflow/TensorFlow2.0/img/fb0bdd5ec0ad3a81aa686b46a6fa16d7.png new file mode 100644 index 00000000..491c1f6e Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/fb0bdd5ec0ad3a81aa686b46a6fa16d7.png differ diff --git a/Tensorflow/TensorFlow2.0/img/fb25ef686fd2905523f707dff3e20b36.png b/Tensorflow/TensorFlow2.0/img/fb25ef686fd2905523f707dff3e20b36.png new file mode 100644 index 00000000..2bc30421 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/fb25ef686fd2905523f707dff3e20b36.png differ diff --git a/Tensorflow/TensorFlow2.0/img/fc1492a9c4124dcf0d9fb207c0a323d0.png b/Tensorflow/TensorFlow2.0/img/fc1492a9c4124dcf0d9fb207c0a323d0.png new file mode 100644 index 00000000..fdf754fa Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/fc1492a9c4124dcf0d9fb207c0a323d0.png differ diff --git a/Tensorflow/TensorFlow2.0/img/fe72873df8e5156872c578827579ba34.png b/Tensorflow/TensorFlow2.0/img/fe72873df8e5156872c578827579ba34.png new file mode 100644 index 00000000..71ea3138 Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/fe72873df8e5156872c578827579ba34.png differ diff --git a/Tensorflow/TensorFlow2.0/img/fea91dec17000f60233620455539d42b.png b/Tensorflow/TensorFlow2.0/img/fea91dec17000f60233620455539d42b.png new file mode 100644 index 00000000..c7e6557f Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/fea91dec17000f60233620455539d42b.png differ diff --git a/Tensorflow/TensorFlow2.0/img/fecda9bde6f4c7551c164dc066491cb5.png b/Tensorflow/TensorFlow2.0/img/fecda9bde6f4c7551c164dc066491cb5.png new file mode 100644 index 00000000..5cf06baa Binary files /dev/null and b/Tensorflow/TensorFlow2.0/img/fecda9bde6f4c7551c164dc066491cb5.png differ diff --git a/pytorch/01.md b/pytorch/01.md new file mode 100644 index 00000000..a50456ff --- /dev/null +++ b/pytorch/01.md @@ -0,0 +1 @@ +# PyTorch 简介 \ No newline at end of file diff --git a/pytorch/02.md b/pytorch/02.md new file mode 100644 index 00000000..eca57a08 --- /dev/null +++ b/pytorch/02.md @@ -0,0 +1,36 @@ +# 1.1 – Why PyTorch? + +## 为什么用 PyTorch + +[PyTorch](http://pytorch.org/) 是 [Torch](http://torch.ch/) 在 Python 上的衍生. 因为 [Torch](http://torch.ch/) 是一个使用 [Lua](https://www.lua.org/) 语言的神经网络库, Torch 很好用, 但是 Lua 又不是特别流行, 所有开发团队将 Lua 的 Torch 移植到了更流行的语言 Python 上. 是的 PyTorch 一出生就引来了剧烈的反响. 为什么呢? + +很简单, 我们就看看有谁在用 PyTorch 吧. + +![](img/20e2ebdf112e4aa3202e951e072c2dc2.png) + +可见, 著名的 Facebook, twitter 等都在使用它, 这就说明 PyTorch 的确是好用的, 而且是值得推广. + +而且如果你知道 [Numpy](http://www.numpy.org/), PyTorch 说他就是在神经网络领域可以用来替换 numpy 的模块. + +## 神经网络在做什么 + +神经网络在学习拟合线条(回归): + +![](img/c8011979fa20046cbfa36e46cf508689.png) + +神经网络在学习区分数据(分类): + +![](img/94268b7d9687d039d872da203453c97b.png) + +## PyTorch 和 Tensorflow + +据 PyTorch 自己介绍, 他们家的最大优点就是建立的神经网络是动态的, 对比静态的 Tensorflow, 他能更有效地处理一些问题, 比如说 RNN 变化时间长度的输出. 而我认为, 各家有各家的优势和劣势, 所以我们要以中立的态度. 两者都是大公司, Tensorflow 自己说自己在分布式训练上下了很大的功夫, 那我就默认 Tensorflow 在这一点上要超出 PyTorch, 但是 Tensorflow 的静态计算图使得他在 RNN 上有一点点被动 (虽然它用其他途径解决了), 不过用 PyTorch 的时候, 你会对这种动态的 RNN 有更好的理解. + +而且 Tensorflow 的高度工业化, 它的底层代码… 你是看不懂的. PyTorch 好那么一点点, 如果你深入 API, 你至少能比看 Tensorflow 多看懂一点点 PyTorch 的底层在干嘛. + +最后我的建议就是: + +* 如果你是学生, 随便选一个学, 或者稍稍偏向 PyTorch, 因为写代码的时候应该更好理解. 懂了一个模块, 转换 Tensorflow 或者其他的模块都好说. +* 如果是上班了, 跟着你公司来, 公司用什么, 你就用什么, 不要脱群. + +文章来源:[莫烦](https://morvanzhou.github.io/) \ No newline at end of file diff --git a/pytorch/03.md b/pytorch/03.md new file mode 100644 index 00000000..326541d1 --- /dev/null +++ b/pytorch/03.md @@ -0,0 +1,24 @@ +# 1.2 – 安装 PyTorch + +## 支持的系统 + +PyTorch 暂时只支持 MacOS, Linux. 暂不支持 Windows! (可怜的 Windows 同学们.. 又被抛弃了). 不过说不定像 Tensorflow 一样, 因为 Windows 用户的强烈要求, 他们在某天就突然支持了. + +## 安装 + +PyTorch 安装起来很简单, [它自家网页](http://pytorch.org/)上就有很方便的选择方式 (网页升级改版后可能和下图有点不同): + +![](img/b708f231f544bd7bcefa1d55c82653dd.png) + +所以根据你的情况选择适合你的安装方法, 我已自己为例, 我使用的是 MacOS, 想用 pip 安装, 我的 Python 是 3.5 版的, 我没有 GPU 加速, 那我就按上面的选: + +然后根据上面的提示, 我只需要在我的 Terminal 当中输入以下指令就好了: + +```py +$ pip install http://download.pytorch.org/whl/torch-0.1.11.post5-cp35-cp35m-macosx_10_7_x86_64.whl +$ pip install torchvision +``` + +注意, 我安装的是0.1.11版本的 torch, 你需要去他们网站上看是否有新版本的. 安装 PyTorch 会安装两个模块, 一个是 torch, 一个 torchvision, torch 是主模块, 用来搭建神经网络的, torchvision 是辅模块, 有数据库, 还有一些已经训练好的神经网络等着你直接用, 比如 ([VGG, AlexNet, ResNet](http://pytorch.org/docs/torchvision/models.html)). + +文章来源:[莫烦](https://morvanzhou.github.io/) \ No newline at end of file diff --git a/pytorch/04.md b/pytorch/04.md new file mode 100644 index 00000000..bdded7df --- /dev/null +++ b/pytorch/04.md @@ -0,0 +1 @@ +# PyTorch 神经网络基础 \ No newline at end of file diff --git a/pytorch/05.md b/pytorch/05.md new file mode 100644 index 00000000..c14f3676 --- /dev/null +++ b/pytorch/05.md @@ -0,0 +1,74 @@ +# 2.1 – Torch vs Numpy + +## 用 Numpy 还是 Torch + +Torch 自称为神经网络界的 Numpy, 因为他能将 torch 产生的 tensor 放在 GPU 中加速运算 (前提是你有合适的 GPU), 就像 Numpy 会把 array 放在 CPU 中加速运算. 所以神经网络的话, 当然是用 Torch 的 tensor 形式数据最好咯. 就像 Tensorflow 当中的 tensor 一样. + +当然, 我们对 Numpy 还是爱不释手的, 因为我们太习惯 numpy 的形式了. 不过 torch 看出来我们的喜爱, 他把 torch 做的和 numpy 能很好的兼容. 比如这样就能自由地转换 numpy array 和 torch tensor 了: + +```py +import torch +import numpy as np + +np_data = np.arange(6).reshape((2, 3)) +torch_data = torch.from_numpy(np_data) +tensor2array = torch_data.numpy() +print( + \'\nnumpy array:\', np_data, # [[0 1 2], [3 4 5]] + \'\ntorch tensor:\', torch_data, # 0 1 2 \n 3 4 5 [torch.LongTensor of size 2x3] + \'\ntensor to array:\', tensor2array, # [[0 1 2], [3 4 5]] +) +``` + +## Torch 中的数学运算 + +其实 torch 中 tensor 的运算和 numpy array 的如出一辙, 我们就以对比的形式来看. 如果想了解 torch 中其它更多有用的运算符, [API就是你要去的地方](http://pytorch.org/docs/torch.html#math-operations). + +```py +# abs 绝对值计算 +data = [-1, -2, 1, 2] +tensor = torch.FloatTensor(data) # 转换成32位浮点 tensor +print( + \'\nabs\', + \'\nnumpy: \', np.abs(data), # [1 2 1 2] + \'\ntorch: \', torch.abs(tensor) # [1 2 1 2] +) + +# sin 三角函数 sin +print( + \'\nsin\', + \'\nnumpy: \', np.sin(data), # [-0.84147098 -0.90929743 0.84147098 0.90929743] + \'\ntorch: \', torch.sin(tensor) # [-0.8415 -0.9093 0.8415 0.9093] +) + +# mean 均值 +print( + \'\nmean\', + \'\nnumpy: \', np.mean(data), # 0.0 + \'\ntorch: \', torch.mean(tensor) # 0.0 +) +``` + +除了简单的计算, 矩阵运算才是神经网络中最重要的部分. 所以我们展示下矩阵的乘法. 注意一下包含了一个 numpy 中可行, 但是 torch 中不可行的方式. + +```py +# matrix multiplication 矩阵点乘 +data = [[1,2], [3,4]] +tensor = torch.FloatTensor(data) # 转换成32位浮点 tensor +# correct method +print( + \'\nmatrix multiplication (matmul)\', + \'\nnumpy: \', np.matmul(data, data), # [[7, 10], [15, 22]] + \'\ntorch: \', torch.mm(tensor, tensor) # [[7, 10], [15, 22]] +) + +# !!!! 下面是错误的方法 !!!! +data = np.array(data) +print( + \'\nmatrix multiplication (dot)\', + \'\nnumpy: \', data.dot(data), # [[7, 10], [15, 22]] 在numpy 中可行 + \'\ntorch: \', tensor.dot(tensor) # torch 会转换成 [1,2,3,4].dot([1,2,3,4) = 30.0 +) +``` + +文章来源:[莫烦](https://morvanzhou.github.io/) \ No newline at end of file diff --git a/pytorch/06.md b/pytorch/06.md new file mode 100644 index 00000000..5bc71a1d --- /dev/null +++ b/pytorch/06.md @@ -0,0 +1,90 @@ +# 2.2 – 变量 (Variable) + +## 什么是 Variable + +在 Torch 中的 Variable 就是一个存放会变化的值的地理位置. 里面的值会不停的变化. 就像一个裝鸡蛋的篮子, 鸡蛋数会不停变动. 那谁是里面的鸡蛋呢, 自然就是 Torch 的 Tensor 咯. **如果用一个 Variable 进行计算, 那返回的也是一个同类型的 Variable.** + +我们定义一个 Variable: + +```py +import torch +from torch.autograd import Variable # torch 中 Variable 模块 + +# 先生鸡蛋 +tensor = torch.FloatTensor([[1,2],[3,4]]) +# 把鸡蛋放到篮子里, requires_grad是参不参与误差反向传播, 要不要计算梯度 +variable = Variable(tensor, requires_grad=True) + +print(tensor) +""" + 1 2 + 3 4 +[torch.FloatTensor of size 2x2] +""" + +print(variable) +""" +Variable containing: + 1 2 + 3 4 +[torch.FloatTensor of size 2x2] +""" +``` + +## Variable 计算, 梯度 + +我们再对比一下 tensor 的计算和 variable 的计算.\’ + +```py +t_out = torch.mean(tensor*tensor) # x^2 +v_out = torch.mean(variable*variable) # x^2 +print(t_out) +print(v_out) # 7.5 +``` + +到目前为止, 我们看不出什么不同, **但是时刻记住, Variable 计算时, 它在背景幕布后面一步步默默地搭建着一个庞大的系统, 叫做计算图, computational graph. 这个图是用来干嘛的? 原来是将所有的计算步骤 (节点) 都连接起来, 最后进行误差反向传递的时候, 一次性将所有 variable 里面的修改幅度 (梯度) 都计算出来, 而 tensor 就没有这个能力啦.** + +v_out = torch.mean(variable*variable) 就是在计算图中添加的一个计算步骤, 计算误差反向传递的时候有他一份功劳, 我们就来举个例子: + +```py +v_out.backward() # 模拟 v_out 的误差反向传递 + +# 下面两步看不懂没关系, 只要知道 Variable 是计算图的一部分, 可以用来传递误差就好. +# v_out = 1/4 * sum(variable*variable) 这是计算图中的 v_out 计算步骤 +# 针对于 v_out 的梯度就是, d(v_out)/d(variable) = 1/4*2*variable = variable/2 + +print(variable.grad) # 初始 Variable 的梯度 +\'\'\' + 0.5000 1.0000 + 1.5000 2.0000 +\'\'\' +``` + +#### 获取 Variable 里面的数据 + +直接 print(variable) 只会输出 Variable 形式的数据, 在很多时候是用不了的(比如想要用 plt 画图), 所以我们要转换一下, 将它变成 tensor 形式. + +```py +print(variable) # Variable 形式 +""" +Variable containing: + 1 2 + 3 4 +[torch.FloatTensor of size 2x2] +""" + +print(variable.data) # tensor 形式 +""" + 1 2 + 3 4 +[torch.FloatTensor of size 2x2] +""" + +print(variable.data.numpy()) # numpy 形式 +""" +[[ 1\. 2.] + [ 3\. 4.]] +""" +``` + +文章来源:[莫烦](https://morvanzhou.github.io/) \ No newline at end of file diff --git a/pytorch/07.md b/pytorch/07.md new file mode 100644 index 00000000..31b0c1bc --- /dev/null +++ b/pytorch/07.md @@ -0,0 +1,65 @@ +# 2.3 – 激励函数 (Activation) + +## 什么是 Activation + +一句话概括 Activation: 就是让神经网络可以描述非线性问题的步骤, 是神经网络变得更强大. 如果还不是特别了解, 我有制作一个动画短片(如下), 浅显易懂的阐述了激励函数的作用. 包懂. + +## Torch 中的激励函数 + +Torch 中的激励函数有很多, 不过我们平时要用到的就这几个.  relu, sigmoid, tanh, softplus . 那我们就看看他们各自长什么样啦. + +```py +import torch +import torch.nn.functional as F # 激励函数都在这 +from torch.autograd import Variable + +# 做一些假数据来观看图像 +x = torch.linspace(-5, 5, 200) # x data (tensor), shape=(100, 1) +x = Variable(x) +``` + +接着就是做生成不同的激励函数数据: + +```py +x_np = x.data.numpy() # 换成 numpy array, 出图时用 + +# 几种常用的 激励函数 +y_relu = F.relu(x).data.numpy() +y_sigmoid = F.sigmoid(x).data.numpy() +y_tanh = F.tanh(x).data.numpy() +y_softplus = F.softplus(x).data.numpy() +# y_softmax = F.softmax(x) softmax 比较特殊, 不能直接显示, 不过他是关于概率的, 用于分类 +``` + +接着我们开始画图, 画图的代码也在下面: + +![](img/f1108a1b6941305fa7a39e488c023fe9.png) + +```py +import matplotlib.pyplot as plt # python 的可视化模块, 我有教程 (https://morvanzhou.github.io/tutorials/data-manipulation/plt/) + +plt.figure(1, figsize=(8, 6)) +plt.subplot(221) +plt.plot(x_np, y_relu, c=\'red\', label=\'relu\') +plt.ylim((-1, 5)) +plt.legend(loc=\'best\') + +plt.subplot(222) +plt.plot(x_np, y_sigmoid, c=\'red\', label=\'sigmoid\') +plt.ylim((-0.2, 1.2)) +plt.legend(loc=\'best\') + +plt.subplot(223) +plt.plot(x_np, y_tanh, c=\'red\', label=\'tanh\') +plt.ylim((-1.2, 1.2)) +plt.legend(loc=\'best\') + +plt.subplot(224) +plt.plot(x_np, y_softplus, c=\'red\', label=\'softplus\') +plt.ylim((-0.2, 6)) +plt.legend(loc=\'best\') + +plt.show() +``` + +文章来源:[莫烦](https://morvanzhou.github.io/) \ No newline at end of file diff --git a/pytorch/08.md b/pytorch/08.md new file mode 100644 index 00000000..bf7ddb5b --- /dev/null +++ b/pytorch/08.md @@ -0,0 +1 @@ +# 建造第一个神经网络 \ No newline at end of file diff --git a/pytorch/09.md b/pytorch/09.md new file mode 100644 index 00000000..67158afa --- /dev/null +++ b/pytorch/09.md @@ -0,0 +1,108 @@ +# 3.1 – 关系拟合 (回归 Regression) + +我会这次会来见证神经网络是如何通过简单的形式将一群数据用一条线条来表示. 或者说, 是如何在数据当中找到他们的关系, 然后用神经网络模型来建立一个可以代表他们关系的线条. + +![](img/1f0b990d5a8d78692d3730d855fe44ea.png) + +## 建立数据集 + +我们创建一些假数据来模拟真实的情况. 比如一个一元二次函数:  y = a * x^2 b , 我们给  y  数据加上一点噪声来更加真实的展示它. + +```py +import torch +from torch.autograd import Variable +import matplotlib.pyplot as plt + +x = torch.unsqueeze(torch.linspace(-1, 1, 100), dim=1) # x data (tensor), shape=(100, 1) +y = x.pow(2) 0.2*torch.rand(x.size()) # noisy y data (tensor), shape=(100, 1) + +# 用 Variable 来修饰这些数据 tensor +x, y = torch.autograd.Variable(x), Variable(y) + +# 画图 +plt.scatter(x.data.numpy(), y.data.numpy()) +plt.show() +``` + +## 建立神经网络 + +建立一个神经网络我们可以直接运用 torch 中的体系. 先定义所有的层属性( __init__() ), 然后再一层层搭建( forward(x) )层于层的关系链接. 建立关系的时候, 我们会用到激励函数, 如果还不清楚激励函数用途的同学, 这里有非常好的一篇动画教程 (如下). + +```py +import torch +import torch.nn.functional as F # 激励函数都在这 + +class Net(torch.nn.Module): # 继承 torch 的 Module + def __init__(self, n_feature, n_hidden, n_output): + super(Net, self).__init__() # 继承 __init__ 功能 + # 定义每层用什么样的形式 + self.hidden = torch.nn.Linear(n_feature, n_hidden) # 隐藏层线性输出 + self.predict = torch.nn.Linear(n_hidden, n_output) # 输出层线性输出 + + def forward(self, x): # 这同时也是 Module 中的 forward 功能 + # 正向传播输入值, 神经网络分析出输出值 + x = F.relu(self.hidden(x)) # 激励函数(隐藏层的线性值) + x = self.predict(x) # 输出值 + return x + +net = Net(n_feature=1, n_hidden=10, n_output=1) + +print(net) # net 的结构 +""" +Net ( + (hidden): Linear (1 -> 10) + (predict): Linear (10 -> 1) +) +""" +``` + +## 训练网络 + +训练的步骤很简单, 如下: + +```py +# optimizer 是训练的工具 +optimizer = torch.optim.SGD(net.parameters(), lr=0.5) # 传入 net 的所有参数, 学习率 +loss_func = torch.nn.MSELoss() # 预测值和真实值的误差计算公式 (均方差) + +for t in range(100): + prediction = net(x) # 喂给 net 训练数据 x, 输出预测值 + + loss = loss_func(prediction, y) # 计算两者的误差 + + optimizer.zero_grad() # 清空上一步的残余更新参数值 + loss.backward() # 误差反向传播, 计算参数更新值 + optimizer.step() # 将参数更新值施加到 net 的 parameters 上 +``` + +## 可视化训练过程 + +为了可视化整个训练的过程, 更好的理解是如何训练, 我们如下操作: + +```py +import matplotlib.pyplot as plt + +plt.ion() # 画图 +plt.show() + +for t in range(100): + + ... + loss.backward() + optimizer.step() + + # 接着上面来 + if t % 5 == 0: + # plot and show learning process + plt.cla() + plt.scatter(x.data.numpy(), y.data.numpy()) + plt.plot(x.data.numpy(), prediction.data.numpy(), \'r-\', lw=5) + plt.text(0.5, 0, \'Loss=%.4f\' % loss.data[0], fontdict={\'size\': 20, \'color\': \'red\'}) + plt.pause(0.1) +``` + +![](img/13e0473ef73a9de2569a81c62e30d054.png) + +所以这也就是在我 [github 代码](https://github.com/MorvanZhou/PyTorch-Tutorial/blob/master/tutorial-contents/301_regression.py) 中的每一步的意义啦. + +文章来源:[莫烦](https://morvanzhou.github.io/) \ No newline at end of file diff --git a/pytorch/10.md b/pytorch/10.md new file mode 100644 index 00000000..4c1976b9 --- /dev/null +++ b/pytorch/10.md @@ -0,0 +1,126 @@ +# 3.2 – 区分类型 (分类 Classification) + +这次我们也是用最简单的途径来看看神经网络是怎么进行事物的分类. + +![](img/5a415b795ebbb116db6d4a2394d93b27.png) + +## 建立数据集 + +我们创建一些假数据来模拟真实的情况. 比如两个二次分布的数据, 不过他们的均值都不一样. + +```py +import torch +from torch.autograd import Variable +import matplotlib.pyplot as plt + +# 假数据 +n_data = torch.ones(100, 2) # 数据的基本形态 +x0 = torch.normal(2*n_data, 1) # 类型0 x data (tensor), shape=(100, 2) +y0 = torch.zeros(100) # 类型0 y data (tensor), shape=(100, 1) +x1 = torch.normal(-2*n_data, 1) # 类型1 x data (tensor), shape=(100, 1) +y1 = torch.ones(100) # 类型1 y data (tensor), shape=(100, 1) + +# 注意 x, y 数据的数据形式是一定要像下面一样 (torch.cat 是在合并数据) +x = torch.cat((x0, x1), 0).type(torch.FloatTensor) # FloatTensor = 32-bit floating +y = torch.cat((y0, y1), ).type(torch.LongTensor) # LongTensor = 64-bit integer + +# torch 只能在 Variable 上训练, 所以把它们变成 Variable +x, y = Variable(x), Variable(y) + +# plt.scatter(x.data.numpy()[:, 0], x.data.numpy()[:, 1], c=y.data.numpy(), s=100, lw=0, cmap=\'RdYlGn\') +# plt.show() + +# 画图 +plt.scatter(x.data.numpy(), y.data.numpy()) +plt.show() +``` + +## 建立神经网络 + +建立一个神经网络我们可以直接运用 torch 中的体系. 先定义所有的层属性( __init__() ), 然后再一层层搭建( forward(x) )层于层的关系链接. 这个和我们在前面 regression 的时候的神经网络基本没差. 建立关系的时候, 我们会用到激励函数. + +```py +import torch +import torch.nn.functional as F # 激励函数都在这 + +class Net(torch.nn.Module): # 继承 torch 的 Module + def __init__(self, n_feature, n_hidden, n_output): + super(Net, self).__init__() # 继承 __init__ 功能 + self.hidden = torch.nn.Linear(n_feature, n_hidden) # 隐藏层线性输出 + self.out = torch.nn.Linear(n_hidden, n_output) # 输出层线性输出 + + def forward(self, x): + # 正向传播输入值, 神经网络分析出输出值 + x = F.relu(self.hidden(x)) # 激励函数(隐藏层的线性值) + x = self.out(x) # 输出值, 但是这个不是预测值, 预测值还需要再另外计算 + return x + +net = Net(n_feature=2, n_hidden=10, n_output=2) # 几个类别就几个 output + +print(net) # net 的结构 +""" +Net ( + (hidden): Linear (2 -> 10) + (out): Linear (10 -> 2) +) +""" +``` + +## 训练网络 + +训练的步骤很简单, 如下: + +```py +# optimizer 是训练的工具 +optimizer = torch.optim.SGD(net.parameters(), lr=0.02) # 传入 net 的所有参数, 学习率 +# 算误差的时候, 注意真实值!不是! one-hot 形式的, 而是1D Tensor, (batch,) +# 但是预测值是2D tensor (batch, n_classes) +loss_func = torch.nn.CrossEntropyLoss() + +for t in range(100): + out = net(x) # 喂给 net 训练数据 x, 输出分析值 + + loss = loss_func(out, y) # 计算两者的误差 + + optimizer.zero_grad() # 清空上一步的残余更新参数值 + loss.backward() # 误差反向传播, 计算参数更新值 + optimizer.step() # 将参数更新值施加到 net 的 parameters 上 +``` + +## 可视化训练过程 + +为了可视化整个训练的过程, 更好的理解是如何训练, 我们如下操作: + +```py +import matplotlib.pyplot as plt + +plt.ion() # 画图 +plt.show() + +for t in range(100): + + ... + loss.backward() + optimizer.step() + + # 接着上面来 + if t % 2 == 0: + plt.cla() + # 过了一道 softmax 的激励函数后的最大概率才是预测值 + prediction = torch.max(F.softmax(out), 1)[1] + pred_y = prediction.data.numpy().squeeze() + target_y = y.data.numpy() + plt.scatter(x.data.numpy()[:, 0], x.data.numpy()[:, 1], c=pred_y, s=100, lw=0, cmap=\'RdYlGn\') + accuracy = sum(pred_y == target_y)/200 # 预测中有多少和真实值一样 + plt.text(1.5, -4, \'Accuracy=%.2f\' % accuracy, fontdict={\'size\': 20, \'color\': \'red\'}) + plt.pause(0.1) + +plt.ioff() # 停止画图 +plt.show() +``` + +![](img/bce7313d5ac6f2600b62a4962a6daf3a.png) + +所以这也就是在我 [github 代码](https://github.com/MorvanZhou/PyTorch-Tutorial/blob/master/tutorial-contents/302_classification.py) 中的每一步的意义啦. + +文章来源:[莫烦](https://morvanzhou.github.io/) \ No newline at end of file diff --git a/pytorch/11.md b/pytorch/11.md new file mode 100644 index 00000000..ffbc7c3a --- /dev/null +++ b/pytorch/11.md @@ -0,0 +1,58 @@ +# 3.3 – 快速搭建回归神经网络 + +Torch 中提供了很多方便的途径, 同样是神经网络, 能快则快, 我们看看如何用更简单的方式搭建同样的回归神经网络. + +## 快速搭建 + +我们先看看之前写神经网络时用到的步骤. 我们用 net1  代表这种方式搭建的神经网络. + +```py +class Net(torch.nn.Module): + def __init__(self, n_feature, n_hidden, n_output): + super(Net, self).__init__() + self.hidden = torch.nn.Linear(n_feature, n_hidden) + self.predict = torch.nn.Linear(n_hidden, n_output) + + def forward(self, x): + x = F.relu(self.hidden(x)) + x = self.predict(x) + return x + +net1 = Net(1, 10, 1) # 这是我们用这种方式搭建的 net1 +``` + +我们用 class 继承了一个 torch 中的神经网络结构, 然后对其进行了修改, 不过还有更快的一招, 用一句话就概括了上面所有的内容! + +```py +net2 = torch.nn.Sequential( + torch.nn.Linear(1, 10), + torch.nn.ReLU(), + torch.nn.Linear(10, 1) +) +``` + +我们再对比一下两者的结构: + +```py +print(net1) +""" +Net ( + (hidden): Linear (1 -> 10) + (predict): Linear (10 -> 1) +) +""" +print(net2) +""" +Sequential ( + (0): Linear (1 -> 10) + (1): ReLU () + (2): Linear (10 -> 1) +) +""" +``` + +我们会发现 net2  多显示了一些内容, 这是为什么呢? 原来他把激励函数也一同纳入进去了, 但是 net1  中, 激励函数实际上是在 forward()  功能中才被调用的. 这也就说明了, 相比 net2 , net1  的好处就是, 你可以根据你的个人需要更加个性化你自己的前向传播过程, 比如(RNN). 不过如果你不需要七七八八的过程, 相信 net2  这种形式更适合你. + +所以这也就是在我 [github 代码](https://github.com/MorvanZhou/PyTorch-Tutorial/blob/master/tutorial-contents/303_build_nn_quickly.py) 中的每一步的意义啦. + +文章来源:[莫烦](https://morvanzhou.github.io/) \ No newline at end of file diff --git a/pytorch/12.md b/pytorch/12.md new file mode 100644 index 00000000..3faff92a --- /dev/null +++ b/pytorch/12.md @@ -0,0 +1,82 @@ +# 3.4 – 保存和恢复模型 + +训练好了一个模型, 我们当然想要保存它, 留到下次要用的时候直接提取直接用, 这就是这节的内容啦. 我们用回归的神经网络举例实现保存提取. + +## 保存 + +我们快速地建造数据, 搭建网络: + +```py +torch.manual_seed(1) # reproducible + +# 假数据 +x = torch.unsqueeze(torch.linspace(-1, 1, 100), dim=1) # x data (tensor), shape=(100, 1) +y = x.pow(2) 0.2*torch.rand(x.size()) # noisy y data (tensor), shape=(100, 1) +x, y = Variable(x, requires_grad=False), Variable(y, requires_grad=False) + +def save(): + # 建网络 + net1 = torch.nn.Sequential( + torch.nn.Linear(1, 10), + torch.nn.ReLU(), + torch.nn.Linear(10, 1) + ) + optimizer = torch.optim.SGD(net1.parameters(), lr=0.5) + loss_func = torch.nn.MSELoss() + + # 训练 + for t in range(100): + prediction = net1(x) + loss = loss_func(prediction, y) + optimizer.zero_grad() + loss.backward() + optimizer.step() +``` + +接下来我们有两种途径来保存 + +```py +torch.save(net1, \'net.pkl\') # 保存整个网络 +torch.save(net1.state_dict(), \'net_params.pkl\') # 只保存网络中的参数 (速度快, 占内存少) +``` + +## 提取网络 + +这种方式将会提取整个神经网络, 网络大的时候可能会比较慢. + +```py +def restore_net(): + # restore entire net1 to net2 + net2 = torch.load(\'net.pkl\') + prediction = net2(x) +``` + +## 只提取网络参数 + +这种方式将会提取所有的参数, 然后再放到你的新建网络中. + +```py +def restore_params(): + # 新建 net3 + net3 = torch.nn.Sequential( + torch.nn.Linear(1, 10), + torch.nn.ReLU(), + torch.nn.Linear(10, 1) + ) + + # 将保存的参数复制到 net3 + net3.load_state_dict(torch.load(\'net_params.pkl\')) + prediction = net3(x) +``` + +## 显示结果 + +调用上面建立的几个功能, 然后出图. + +![](img/68f39521fc6853acdf26440e7d5a2861.png) + +这样我们就能看出三个网络完全一模一样啦. + +所以这也就是在我 [github 代码](https://github.com/MorvanZhou/PyTorch-Tutorial/blob/master/tutorial-contents/304_save_reload.py) 中的每一步的意义啦. + +文章来源:[莫烦](https://morvanzhou.github.io/) \ No newline at end of file diff --git a/pytorch/13.md b/pytorch/13.md new file mode 100644 index 00000000..5be9e301 --- /dev/null +++ b/pytorch/13.md @@ -0,0 +1,72 @@ +# 3.5 – 数据读取 (Data Loader) + +DataLoader  是 torch 给你用来包装你的数据的工具. 所以你要讲自己的 (numpy array 或其他) 数据形式装换成 Tensor, 然后再放进这个包装器中. 使用 DataLoader  有什么好处呢? 就是他们帮你有效地迭代数据, 举例: + +```py +import torch +import torch.utils.data as Data +torch.manual_seed(1) # reproducible + +BATCH_SIZE = 5 # 批训练的数据个数 + +x = torch.linspace(1, 10, 10) # x data (torch tensor) +y = torch.linspace(10, 1, 10) # y data (torch tensor) + +# 先转换成 torch 能识别的 Dataset +torch_dataset = Data.TensorDataset(data_tensor=x, target_tensor=y) + +# 把 dataset 放入 DataLoader +loader = Data.DataLoader( + dataset=torch_dataset, # torch TensorDataset format + batch_size=BATCH_SIZE, # mini batch size + shuffle=True, # 要不要打乱数据 (打乱比较好) + num_workers=2, # 多线程来读数据 +) + +for epoch in range(3): # 训练所有!整套!数据 3 次 + for step, (batch_x, batch_y) in enumerate(loader): # 每一步 loader 释放一小批数据用来学习 + # 假设这里就是你训练的地方... + + # 打出来一些数据 + print(\'Epoch: \', epoch, \'| Step: \', step, \'| batch x: \', + batch_x.numpy(), \'| batch y: \', batch_y.numpy()) + +""" +Epoch: 0 | Step: 0 | batch x: [ 6\. 7\. 2\. 3\. 1.] | batch y: [ 5\. 4\. 9\. 8\. 10.] +Epoch: 0 | Step: 1 | batch x: [ 9\. 10\. 4\. 8\. 5.] | batch y: [ 2\. 1\. 7\. 3\. 6.] +Epoch: 1 | Step: 0 | batch x: [ 3\. 4\. 2\. 9\. 10.] | batch y: [ 8\. 7\. 9\. 2\. 1.] +Epoch: 1 | Step: 1 | batch x: [ 1\. 7\. 8\. 5\. 6.] | batch y: [ 10\. 4\. 3\. 6\. 5.] +Epoch: 2 | Step: 0 | batch x: [ 3\. 9\. 2\. 6\. 7.] | batch y: [ 8\. 2\. 9\. 5\. 4.] +Epoch: 2 | Step: 1 | batch x: [ 10\. 4\. 8\. 1\. 5.] | batch y: [ 1\. 7\. 3\. 10\. 6.] +""" +``` + +可以看出, 每步都导出了5个数据进行学习. 然后每个 epoch 的导出数据都是先打乱了以后再导出. + +真正方便的还不是这点. 如果我们改变一下 BATCH_SIZE = 8 , 这样我们就知道, step=0  会导出8个数据, 但是, step=1  时数据库中的数据不够 8个, 这时怎么办呢: + +```py +BATCH_SIZE = 8 # 批训练的数据个数 + +... + +for ...: + for ...: + ... + print(\'Epoch: \', epoch, \'| Step: \', step, \'| batch x: \', + batch_x.numpy(), \'| batch y: \', batch_y.numpy()) +""" +Epoch: 0 | Step: 0 | batch x: [ 6\. 7\. 2\. 3\. 1\. 9\. 10\. 4.] | batch y: [ 5\. 4\. 9\. 8\. 10\. 2\. 1\. 7.] +Epoch: 0 | Step: 1 | batch x: [ 8\. 5.] | batch y: [ 3\. 6.] +Epoch: 1 | Step: 0 | batch x: [ 3\. 4\. 2\. 9\. 10\. 1\. 7\. 8.] | batch y: [ 8\. 7\. 9\. 2\. 1\. 10\. 4\. 3.] +Epoch: 1 | Step: 1 | batch x: [ 5\. 6.] | batch y: [ 6\. 5.] +Epoch: 2 | Step: 0 | batch x: [ 3\. 9\. 2\. 6\. 7\. 10\. 4\. 8.] | batch y: [ 8\. 2\. 9\. 5\. 4\. 1\. 7\. 3.] +Epoch: 2 | Step: 1 | batch x: [ 1\. 5.] | batch y: [ 10\. 6.] +""" +``` + +这时, 在 step=1  就只给你返回这个 epoch 中剩下的数据就好了. + +所以这也就是在我 [github 代码](https://github.com/MorvanZhou/PyTorch-Tutorial/blob/master/tutorial-contents/305_batch_train.py) 中的每一步的意义啦. + +文章来源:[莫烦](https://morvanzhou.github.io/) \ No newline at end of file diff --git a/pytorch/14.md b/pytorch/14.md new file mode 100644 index 00000000..3c2fca9d --- /dev/null +++ b/pytorch/14.md @@ -0,0 +1,109 @@ +# 3.6 – 优化器 (Optimizer) + +这节内容主要是用 Torch 实践几种优化器, 这几种优化器具体的优势不会在这个节内容中说了, 所以想快速了解的话, 上面的那个动画链接是很好的去处. + +下图就是这节内容对比各种优化器的效果: + +![](img/7a8716c377832b032ee24276b7ddcc31.png) + +## 伪数据 + +为了对比各种优化器的效果, 我们需要有一些数据, 今天我们还是自己编一些伪数据, 这批数据是这样的: + +![](img/2fc34594dcb247d4a3414467eed4a109.png) + +```py +import torch +import torch.utils.data as Data +import torch.nn.functional as F +from torch.autograd import Variable +import matplotlib.pyplot as plt + +torch.manual_seed(1) # reproducible + +LR = 0.01 +BATCH_SIZE = 32 +EPOCH = 12 + +# fake dataset +x = torch.unsqueeze(torch.linspace(-1, 1, 1000), dim=1) +y = x.pow(2) 0.1*torch.normal(torch.zeros(*x.size())) + +# plot dataset +plt.scatter(x.numpy(), y.numpy()) +plt.show() + +# 使用上节内容提到的 data loader +torch_dataset = Data.TensorDataset(data_tensor=x, target_tensor=y) +loader = Data.DataLoader(dataset=torch_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2,) +``` + +## 每个优化器优化一个神经网络 + +为了对比每一种优化器, 我们给他们各自创建一个神经网络, 但这个神经网络都来自同一个 Net 形式. + +```py +# 默认的 network 形式 +class Net(torch.nn.Module): + def __init__(self): + super(Net, self).__init__() + self.hidden = torch.nn.Linear(1, 20) # hidden layer + self.predict = torch.nn.Linear(20, 1) # output layer + + def forward(self, x): + x = F.relu(self.hidden(x)) # activation function for hidden layer + x = self.predict(x) # linear output + return x + +# 为每个优化器创建一个 net +net_SGD = Net() +net_Momentum = Net() +net_RMSprop = Net() +net_Adam = Net() +nets = [net_SGD, net_Momentum, net_RMSprop, net_Adam] +``` + +## 优化器 Optimizer + +接下来在创建不同的优化器, 用来训练不同的网络. 并创建一个 loss_func  用来计算误差. 我们用几种常见的优化器, SGD , Momentum , RMSprop , Adam . + +```py +# different optimizers +opt_SGD = torch.optim.SGD(net_SGD.parameters(), lr=LR) +opt_Momentum = torch.optim.SGD(net_Momentum.parameters(), lr=LR, momentum=0.8) +opt_RMSprop = torch.optim.RMSprop(net_RMSprop.parameters(), lr=LR, alpha=0.9) +opt_Adam = torch.optim.Adam(net_Adam.parameters(), lr=LR, betas=(0.9, 0.99)) +optimizers = [opt_SGD, opt_Momentum, opt_RMSprop, opt_Adam] + +loss_func = torch.nn.MSELoss() +losses_his = [[], [], [], []] # 记录 training 时不同神经网络的 loss +``` + +## 训练/出图 + +接下来训练和 loss 画图. + +```py +for epoch in range(EPOCH): + print(\\'Epoch: \\', epoch) + for step, (batch_x, batch_y) in enumerate(loader): + b_x = Variable(batch_x) # 务必要用 Variable 包一下 + b_y = Variable(batch_y) + + # 对每个优化器, 优化属于他的神经网络 + for net, opt, l_his in zip(nets, optimizers, losses_his): + output = net(b_x) # get output for every net + loss = loss_func(output, b_y) # compute loss for every net + opt.zero_grad() # clear gradients for next train + loss.backward() # backpropagation, compute gradients + opt.step() # apply gradients + l_his.append(loss.data[0]) # loss recoder +``` + +![](img/7a8716c377832b032ee24276b7ddcc31.png) + +SGD  是最普通的优化器, 也可以说没有加速效果, 而 Momentum  是 SGD  的改良版, 它加入了动量原则. 后面的 RMSprop  又是 Momentum  的升级版. 而 Adam  又是 RMSprop  的升级版. 不过从这个结果中我们看到, Adam  的效果似乎比 RMSprop  要差一点. 所以说并不是越先进的优化器, 结果越佳. 我们在自己的试验中可以尝试不同的优化器, 找到那个最适合你数据/网络的优化器. + +所以这也就是在我 [github 代码](https://github.com/MorvanZhou/PyTorch-Tutorial/blob/master/tutorial-contents/306_optimizer.py) 中的每一步的意义啦. + +文章来源:[莫烦](https://morvanzhou.github.io/) \ No newline at end of file diff --git a/pytorch/15.md b/pytorch/15.md new file mode 100644 index 00000000..3e29533e --- /dev/null +++ b/pytorch/15.md @@ -0,0 +1 @@ +# 高级神经网络结构 \ No newline at end of file diff --git a/pytorch/16.md b/pytorch/16.md new file mode 100644 index 00000000..7595a7b6 --- /dev/null +++ b/pytorch/16.md @@ -0,0 +1,159 @@ +# 4.1 – CNN 卷积神经网络 + +卷积神经网络目前被广泛地用在图片识别上, 已经有层出不穷的应用, 如果你对卷积神经网络还没有特别了解, 我制作的 卷积神经网络 动画简介 (如下) 能让你花几分钟就了解什么是卷积神经网络. 接着我们就一步一步做一个分析手写数字的 CNN 吧. + +下面是一个 CNN 最后一层的学习过程, 我们先可视化看看: + +![](img/388ca39bf710c8f053f533ad10872cd7.png) + +## MNIST手写数据 + +```py +import torch +import torch.nn as nn +from torch.autograd import Variable +import torch.utils.data as Data +import torchvision # 数据库模块 +import matplotlib.pyplot as plt + +torch.manual_seed(1) # reproducible + +# Hyper Parameters +EPOCH = 1 # 训练整批数据多少次, 为了节约时间, 我们只训练一次 +BATCH_SIZE = 50 +LR = 0.001 # 学习率 +DOWNLOAD_MNIST = True # 如果你已经下载好了mnist数据就写上 Fasle + +# Mnist 手写数字 +train_data = torchvision.datasets.MNIST( + root=\\'./mnist/\\', # 保存或者提取位置 + train=True, # this is training data + transform=torchvision.transforms.ToTensor(), # 转换 PIL.Image or numpy.ndarray 成 + # torch.FloatTensor (C x H x W), 训练的时候 normalize 成 [0.0, 1.0] 区间 + download=DOWNLOAD_MNIST, # 没下载就下载, 下载了就不用再下了 +) +``` + +![](img/25ed82d9ef8a8b1c9c60445c7c08c732.png) + +黑色的地方的值都是0, 白色的地方值大于0. + +同样, 我们除了训练数据, 还给一些测试数据, 测试看看它有没有训练好. + +```py +test_data = torchvision.datasets.MNIST(root=\\'./mnist/\\', train=False) + +# 批训练 50samples, 1 channel, 28x28 (50, 1, 28, 28) +train_loader = Data.DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True) + +# 为了节约时间, 我们测试时只测试前2000个 +test_x = Variable(torch.unsqueeze(test_data.test_data, dim=1), volatile=True).type(torch.FloatTensor)[:2000]/255\. # shape from (2000, 28, 28) to (2000, 1, 28, 28), value in range(0,1) +test_y = test_data.test_labels[:2000] +``` + +## CNN模型 + +和以前一样, 我们用一个 class 来建立 CNN 模型. 这个 CNN 整体流程是 卷积( Conv2d ) -> 激励函数( ReLU ) -> 池化, 向下采样 ( MaxPooling ) -> 再来一遍 -> 展平多维的卷积成的特征图 -> 接入全连接层 ( Linear ) -> 输出 + +```py +class CNN(nn.Module): + def __init__(self): + super(CNN, self).__init__() + self.conv1 = nn.Sequential( # input shape (1, 28, 28) + nn.Conv2d( + in_channels=1, # input height + out_channels=16, # n_filters + kernel_size=5, # filter size + stride=1, # filter movement/step + padding=2, # 如果想要 con2d 出来的图片长宽没有变化, padding=(kernel_size-1)/2 当 stride=1 + ), # output shape (16, 28, 28) + nn.ReLU(), # activation + nn.MaxPool2d(kernel_size=2), # 在 2x2 空间里向下采样, output shape (16, 14, 14) + ) + self.conv2 = nn.Sequential( # input shape (1, 28, 28) + nn.Conv2d(16, 32, 5, 1, 2), # output shape (32, 14, 14) + nn.ReLU(), # activation + nn.MaxPool2d(2), # output shape (32, 7, 7) + ) + self.out = nn.Linear(32 * 7 * 7, 10) # fully connected layer, output 10 classes + + def forward(self, x): + x = self.conv1(x) + x = self.conv2(x) + x = x.view(x.size(0), -1) # 展平多维的卷积图成 (batch_size, 32 * 7 * 7) + output = self.out(x) + return output + +cnn = CNN() +print(cnn) # net architecture +""" +CNN ( + (conv1): Sequential ( + (0): Conv2d(1, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2)) + (1): ReLU () + (2): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1)) + ) + (conv2): Sequential ( + (0): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2)) + (1): ReLU () + (2): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1)) + ) + (out): Linear (1568 -> 10) +) +""" +``` + +## 训练 + +下面我们开始训练, 将  y 都用 Variable 包起来, 然后放入 cnn 中计算 output, 最后再计算误差. 下面代码省略了计算精确度 accuracy 的部分, 如果想细看 accuracy 代码的同学, 请去往我的 github 看全部代码. + +```py +optimizer = torch.optim.Adam(cnn.parameters(), lr=LR) # optimize all cnn parameters +loss_func = nn.CrossEntropyLoss() # the target label is not one-hotted + +# training and testing +for epoch in range(EPOCH): + for step, (x, y) in enumerate(train_loader): # 分配 batch data, normalize x when iterate train_loader + b_x = Variable(x) # batch x + b_y = Variable(y) # batch y + + output = cnn(b_x) # cnn output + loss = loss_func(output, b_y) # cross entropy loss + optimizer.zero_grad() # clear gradients for this training step + loss.backward() # backpropagation, compute gradients + optimizer.step() # apply gradients + +""" +... +Epoch: 0 | train loss: 0.0306 | test accuracy: 0.97 +Epoch: 0 | train loss: 0.0147 | test accuracy: 0.98 +Epoch: 0 | train loss: 0.0427 | test accuracy: 0.98 +Epoch: 0 | train loss: 0.0078 | test accuracy: 0.98 +""" +``` + +最后我们再来取10个数据, 看看预测的值到底对不对: + +```py +test_output = cnn(test_x[:10]) +pred_y = torch.max(test_output, 1)[1].data.numpy().squeeze() +print(pred_y, \\'prediction number\\') +print(test_y[:10].numpy(), \\'real number\\') + +""" +[7 2 1 0 4 1 4 9 5 9] prediction number +[7 2 1 0 4 1 4 9 5 9] real number +""" +``` + +#### 可视化训练(视频中没有) + +这是做完视频后突然想要补充的内容, 因为可视化可以帮助理解, 所以还是有必要提一下. 可视化的代码主要是用 matplotlib  和 sklearn  来完成的, 因为其中我们用到了 T-SNE  的降维手段, 将高维的 CNN 最后一层输出结果可视化, 也就是 CNN forward 代码中的 x = x.view(x.size(0), -1)  这一个结果. + +可视化的代码不是重点, 我们就直接展示可视化的结果吧. + +![](img/388ca39bf710c8f053f533ad10872cd7.png) + +所以这也就是在我 [github 代码](https://github.com/MorvanZhou/PyTorch-Tutorial/blob/master/tutorial-contents/401_CNN.py) 中的每一步的意义啦. + +文章来源:[莫烦](https://morvanzhou.github.io/) \ No newline at end of file diff --git a/pytorch/17.md b/pytorch/17.md new file mode 100644 index 00000000..646caf7c --- /dev/null +++ b/pytorch/17.md @@ -0,0 +1,145 @@ +# 4.2 – RNN 循环神经网络 (分类 Classification) + +循环神经网络让神经网络有了记忆, 对于序列话的数据,循环神经网络能达到更好的效果. 如果你对循环神经网络还没有特别了解, 请观看几分钟的短动画, RNN 动画简介(如下) 和 LSTM(如下) 动画简介 能让你生动理解 RNN. 接着我们就一步一步做一个分析手写数字的 RNN 吧. + +## RNN 简介 + +## LSTM 简介 + +## MNIST手写数据 + +```py +import torch +from torch import nn +from torch.autograd import Variable +import torchvision.datasets as dsets +import torchvision.transforms as transforms +import matplotlib.pyplot as plt + +torch.manual_seed(1) # reproducible + +# Hyper Parameters +EPOCH = 1 # 训练整批数据多少次, 为了节约时间, 我们只训练一次 +BATCH_SIZE = 64 +TIME_STEP = 28 # rnn 时间步数 / 图片高度 +INPUT_SIZE = 28 # rnn 每步输入值 / 图片每行像素 +LR = 0.01 # learning rate +DOWNLOAD_MNIST = True # 如果你已经下载好了mnist数据就写上 Fasle + +# Mnist 手写数字 +train_data = torchvision.datasets.MNIST( + root=\\'./mnist/\\', # 保存或者提取位置 + train=True, # this is training data + transform=torchvision.transforms.ToTensor(), # 转换 PIL.Image or numpy.ndarray 成 + # torch.FloatTensor (C x H x W), 训练的时候 normalize 成 [0.0, 1.0] 区间 + download=DOWNLOAD_MNIST, # 没下载就下载, 下载了就不用再下了 +) +``` + +![](img/99c72d57612c137b62599837526f0e0e.png) + +黑色的地方的值都是0, 白色的地方值大于0. + +同样, 我们除了训练数据, 还给一些测试数据, 测试看看它有没有训练好. + +```py +test_data = torchvision.datasets.MNIST(root=\\'./mnist/\\', train=False) + +# 批训练 50samples, 1 channel, 28x28 (50, 1, 28, 28) +train_loader = Data.DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True) + +# 为了节约时间, 我们测试时只测试前2000个 +test_x = Variable(torch.unsqueeze(test_data.test_data, dim=1), volatile=True).type(torch.FloatTensor)[:2000]/255\. # shape from (2000, 28, 28) to (2000, 1, 28, 28), value in range(0,1) +test_y = test_data.test_labels[:2000] +``` + +#### RNN模型 + +和以前一样, 我们用一个 class 来建立 RNN 模型. 这个 RNN 整体流程是 + +1. (input0, state0) -> LSTM -> (output0, state1) ; +2. (input1, state1) -> LSTM -> (output1, state2) ; +3. … +4. (inputN, stateN)-> LSTM -> (outputN, stateN 1) ; +5. outputN -> Linear -> prediction . 通过LSTM分析每一时刻的值, 并且将这一时刻和前面时刻的理解合并在一起, 生成当前时刻对前面数据的理解或记忆. 传递这种理解给下一时刻分析. + +```py +class RNN(nn.Module): + def __init__(self): + super(RNN, self).__init__() + + self.rnn = nn.LSTM( # LSTM 效果要比 nn.RNN() 好多了 + input_size=28, # 图片每行的数据像素点 + hidden_size=64, # rnn hidden unit + num_layers=1, # 有几层 RNN layers + batch_first=True, # input & output 会是以 batch size 为第一维度的特征集 e.g. (batch, time_step, input_size) + ) + + self.out = nn.Linear(64, 10) # 输出层 + + def forward(self, x): + # x shape (batch, time_step, input_size) + # r_out shape (batch, time_step, output_size) + # h_n shape (n_layers, batch, hidden_size) LSTM 有两个 hidden states, h_n 是分线, h_c 是主线 + # h_c shape (n_layers, batch, hidden_size) + r_out, (h_n, h_c) = self.rnn(x, None) # None 表示 hidden state 会用全0的 state + + # 选取最后一个时间点的 r_out 输出 + # 这里 r_out[:, -1, :] 的值也是 h_n 的值 + out = self.out(r_out[:, -1, :]) + return out + +rnn = RNN() +print(rnn) +""" +RNN ( + (rnn): LSTM(28, 64, batch_first=True) + (out): Linear (64 -> 10) +) +""" +``` + +#### 训练 + +我们将图片数据看成一个时间上的连续数据, 每一行的像素点都是这个时刻的输入, 读完整张图片就是从上而下的读完了每行的像素点. 然后我们就可以拿出 RNN 在最后一步的分析值判断图片是哪一类了. 下面的代码省略了计算 accuracy 的部分, 你可以在我的 github 中看到全部代码. + +```py +optimizer = torch.optim.Adam(rnn.parameters(), lr=LR) # optimize all parameters +loss_func = nn.CrossEntropyLoss() # the target label is not one-hotted + +# training and testing +for epoch in range(EPOCH): + for step, (x, y) in enumerate(train_loader): # gives batch data + b_x = Variable(x.view(-1, 28, 28)) # reshape x to (batch, time_step, input_size) + b_y = Variable(y) # batch y + + output = rnn(b_x) # rnn output + loss = loss_func(output, b_y) # cross entropy loss + optimizer.zero_grad() # clear gradients for this training step + loss.backward() # backpropagation, compute gradients + optimizer.step() # apply gradients +""" +... +Epoch: 0 | train loss: 0.0945 | test accuracy: 0.94 +Epoch: 0 | train loss: 0.0984 | test accuracy: 0.94 +Epoch: 0 | train loss: 0.0332 | test accuracy: 0.95 +Epoch: 0 | train loss: 0.1868 | test accuracy: 0.96 +""" +``` + +最后我们再来取10个数据, 看看预测的值到底对不对: + +```py +test_output = rnn(test_x[:10].view(-1, 28, 28)) +pred_y = torch.max(test_output, 1)[1].data.numpy().squeeze() +print(pred_y, \\'prediction number\\') +print(test_y[:10], \\'real number\\') +""" +[7 2 1 0 4 1 4 9 5 9] prediction number +[7 2 1 0 4 1 4 9 5 9] real number +""" +``` + +所以这也就是在我 [github 代码](https://github.com/MorvanZhou/PyTorch-Tutorial/blob/master/tutorial-contents/402_RNN_classifier.py) 中的每一步的意义啦. + +文章来源:[莫烦](https://morvanzhou.github.io/) \ No newline at end of file diff --git a/pytorch/18.md b/pytorch/18.md new file mode 100644 index 00000000..80f86cd5 --- /dev/null +++ b/pytorch/18.md @@ -0,0 +1,117 @@ +# 4.3 – RNN 循环神经网络 (回归 Regression) + +循环神经网络让神经网络有了记忆, 对于序列话的数据,循环神经网络能达到更好的效果. 如果你对循环神经网络还没有特别了解, 请观看几分钟的短动画,RNN 动画简介(如下) 和 LSTM(如下)动画简介 能让你生动理解 RNN. 上次我们提到了用 RNN 的最后一个时间点输出来判断之前看到的图片属于哪一类, 这次我们来真的了, 用 RNN 来及时预测时间序列. + +![](img/f38868821469cadc36810cfd827511d1.png) + +## RNN 简介 + +## LSTM 简介 + +## 训练数据 + +我们要用到的数据就是这样的一些数据, 我们想要用 sin  的曲线预测出 cos  的曲线. + +![](img/22309cd02ee52b3a65e1f0022e8b964e.png) + +```py +import torch +from torch import nn +from torch.autograd import Variable +import numpy as np +import matplotlib.pyplot as plt + +torch.manual_seed(1) # reproducible + +# Hyper Parameters +TIME_STEP = 10 # rnn time step / image height +INPUT_SIZE = 1 # rnn input size / image width +LR = 0.02 # learning rate +DOWNLOAD_MNIST = False # set to True if haven\'t download the data +``` + +## RNN模型 + +这一次的 RNN, 我们对每一个 r_out  都得放到 Linear  中去计算出预测的 output , 所以我们能用一个 for loop 来循环计算. **这点是 Tensorflow 望尘莫及的!** 除了这点, 还有一些动态的过程都可以在这个教程中查看, 看看我们的 PyTorch 和 Tensorflow 到底哪家强. + +```py +class RNN(nn.Module): + def __init__(self): + super(RNN, self).__init__() + + self.rnn = nn.RNN( # 这回一个普通的 RNN 就能胜任 + input_size=1, + hidden_size=32, # rnn hidden unit + num_layers=1, # 有几层 RNN layers + batch_first=True, # input & output 会是以 batch size 为第一维度的特征集 e.g. (batch, time_step, input_size) + ) + self.out = nn.Linear(32, 1) + + def forward(self, x, h_state): # 因为 hidden state 是连续的, 所以我们要一直传递这一个 state + # x (batch, time_step, input_size) + # h_state (n_layers, batch, hidden_size) + # r_out (batch, time_step, output_size) + r_out, h_state = self.rnn(x, h_state) # h_state 也要作为 RNN 的一个输入 + + outs = [] # 保存所有时间点的预测值 + for time_step in range(r_out.size(1)): # 对每一个时间点计算 output + outs.append(self.out(r_out[:, time_step, :])) + return torch.stack(outs, dim=1), h_state + +rnn = RNN() +print(rnn) +""" +RNN ( + (rnn): RNN(1, 32, batch_first=True) + (out): Linear (32 -> 1) +) +""" +``` + +其实熟悉 RNN 的朋友应该知道, forward  过程中的对每个时间点求输出还有一招使得计算量比较小的. 不过上面的内容主要是为了呈现 PyTorch 在动态构图上的优势, 所以我用了一个 for loop  来搭建那套输出系统. 下面介绍一个替换方式. 使用 reshape 的方式整批计算. + +```py +def forward(self, x, h_state): + r_out, h_state = self.rnn(x, h_state) + r_out_reshaped = r_out.view(-1, HIDDEN_SIZE) # to 2D data + outs = self.linear_layer(r_out_reshaped) + outs = outs.view(-1, TIME_STEP, INPUT_SIZE) # to 3D data +``` + +## 训练 + +下面的代码就能实现动图的效果啦~开心, 可以看出, 我们使用 x 作为输入的 sin 值, 然后 y作为想要拟合的输出, cos 值. 因为他们两条曲线是存在某种关系的, 所以我们就能用 sin 来预测 cos. rnn 会理解他们的关系, 并用里面的参数分析出来这个时刻 sin 曲线上的点如何对应上 cos 曲线上的点. + +![](img/f38868821469cadc36810cfd827511d1.png) + +```py +optimizer = torch.optim.Adam(rnn.parameters(), lr=LR) # optimize all rnn parameters +loss_func = nn.MSELoss() + +h_state = None # 要使用初始 hidden state, 可以设成 None + +for step in range(60): + start, end = step * np.pi, (step 1)*np.pi # time steps + # sin 预测 cos + steps = np.linspace(start, end, 10, dtype=np.float32) + x_np = np.sin(steps) # float32 for converting torch FloatTensor + y_np = np.cos(steps) + + x = Variable(torch.from_numpy(x_np[np.newaxis, :, np.newaxis])) # shape (batch, time_step, input_size) + y = Variable(torch.from_numpy(y_np[np.newaxis, :, np.newaxis])) + + prediction, h_state = rnn(x, h_state) # rnn 对于每个 step 的 prediction, 还有最后一个 step 的 h_state + # !! 下一步十分重要 !! + h_state = Variable(h_state.data) # 要把 h_state 重新包装一下才能放入下一个 iteration, 不然会报错 + + loss = loss_func(prediction, y) # cross entropy loss + optimizer.zero_grad() # clear gradients for this training step + loss.backward() # backpropagation, compute gradients + optimizer.step() # apply gradients +``` + +![](img/04a6040ad4f06a69a92f440ea17dde44.png) + +所以这也就是在我 [github 代码](https://github.com/MorvanZhou/PyTorch-Tutorial/blob/master/tutorial-contents/403_RNN_regressor.py) 中的每一步的意义啦. + +文章来源:[莫烦](https://morvanzhou.github.io/) \ No newline at end of file diff --git a/pytorch/19.md b/pytorch/19.md new file mode 100644 index 00000000..8e147cd5 --- /dev/null +++ b/pytorch/19.md @@ -0,0 +1,138 @@ +# 4.4 – AutoEncoder (自编码/非监督学习) + +神经网络也能进行非监督学习, 只需要训练数据, 不需要标签数据. 自编码就是这样一种形式. 自编码能自动分类数据, 而且也能嵌套在半监督学习的上面, 用少量的有标签样本和大量的无标签样本学习. + +这次我们还用 MNIST 手写数字数据来压缩再解压图片. + +![](img/c429fb827df769a542339e200e2ea20c.png) + +然后用压缩的特征进行非监督分类. + +![](img/f790e22ee4be05f818e52467c2f13b37.png) + +## 训练数据 + +自编码只用训练集就好了, 而且只需要训练 training data 的 image, 不用训练 labels. + +```py +import torch +import torch.nn as nn +from torch.autograd import Variable +import torch.utils.data as Data +import torchvision + +# 超参数 +EPOCH = 10 +BATCH_SIZE = 64 +LR = 0.005 +DOWNLOAD_MNIST = True # 下过数据的话, 就可以设置成 False +N_TEST_IMG = 5 # 到时候显示 5张图片看效果, 如上图一 + +# Mnist digits dataset +train_data = torchvision.datasets.MNIST( + root=\'./mnist/\', + train=True, # this is training data + transform=torchvision.transforms.ToTensor(), # Converts a PIL.Image or numpy.ndarray to + # torch.FloatTensor of shape (C x H x W) and normalize in the range [0.0, 1.0] + download=DOWNLOAD_MNIST, # download it if you don\'t have it +) +``` + +![](img/ffeedc89cc695e61aa6e941c1c696a39.png) + +这就是一张我们要训练的手写数字 4. + +## AutoEncoder + +AutoEncoder 形式很简单, 分别是 encoder  和 decoder , 压缩和解压, 压缩后得到压缩的特征值, 再从压缩的特征值解压成原图片. + +```py +class AutoEncoder(nn.Module): + def __init__(self): + super(AutoEncoder, self).__init__() + + # 压缩 + self.encoder = nn.Sequential( + nn.Linear(28*28, 128), + nn.Tanh(), + nn.Linear(128, 64), + nn.Tanh(), + nn.Linear(64, 12), + nn.Tanh(), + nn.Linear(12, 3), # 压缩成3个特征, 进行 3D 图像可视化 + ) + # 解压 + self.decoder = nn.Sequential( + nn.Linear(3, 12), + nn.Tanh(), + nn.Linear(12, 64), + nn.Tanh(), + nn.Linear(64, 128), + nn.Tanh(), + nn.Linear(128, 28*28), + nn.Sigmoid(), # 激励函数让输出值在 (0, 1) + ) + + def forward(self, x): + encoded = self.encoder(x) + decoded = self.decoder(encoded) + return encoded, decoded + +autoencoder = AutoEncoder() +``` + +#### 训练 + +训练, 并可视化训练的过程. 我们可以有效的利用 encoder 和 decoder 来做很多事, 比如这里我们用 decoder 的信息输出看和原图片的对比, 还能用 encoder 来看经过压缩后, 神经网络对原图片的理解. encoder 能将不同图片数据大概的分离开来. 这样就是一个无监督学习的过程. + +![](img/c429fb827df769a542339e200e2ea20c.png) + +```py +optimizer = torch.optim.Adam(autoencoder.parameters(), lr=LR) +loss_func = nn.MSELoss() + +for epoch in range(EPOCH): + for step, (x, y) in enumerate(train_loader): + b_x = Variable(x.view(-1, 28*28)) # batch x, shape (batch, 28*28) + b_y = Variable(x.view(-1, 28*28)) # batch y, shape (batch, 28*28) + b_label = Variable(y) # batch label + + encoded, decoded = autoencoder(b_x) + + loss = loss_func(decoded, b_y) # mean square error + optimizer.zero_grad() # clear gradients for this training step + loss.backward() # backpropagation, compute gradients + optimizer.step() # apply gradients +``` + +![](img/9e1df524980c8f42ab4353070c2a1b74.png) + +## 画3D图 + +![](img/f790e22ee4be05f818e52467c2f13b37.png) + +3D 的可视化图挺有趣的, 还能挪动观看, 更加直观, 好理解. + +```py +# 要观看的数据 +view_data = Variable(train_data.train_data[:200].view(-1, 28*28).type(torch.FloatTensor)/255.) +encoded_data, _ = autoencoder(view_data) # 提取压缩的特征值 +fig = plt.figure(2) +ax = Axes3D(fig) # 3D 图 +# x, y, z 的数据值 +X = encoded_data.data[:, 0].numpy() +Y = encoded_data.data[:, 1].numpy() +Z = encoded_data.data[:, 2].numpy() +values = train_data.train_labels[:200].numpy() # 标签值 +for x, y, z, s in zip(X, Y, Z, values): + c = cm.rainbow(int(255*s/9)) # 上色 + ax.text(x, y, z, s, backgroundcolor=c) # 标位子 +ax.set_xlim(X.min(), X.max()) +ax.set_ylim(Y.min(), Y.max()) +ax.set_zlim(Z.min(), Z.max()) +plt.show() +``` + +所以这也就是在我 [github 代码](https://github.com/MorvanZhou/PyTorch-Tutorial/blob/master/tutorial-contents/404_autoencoder.py) 中的每一步的意义啦. + +文章来源:[莫烦](https://morvanzhou.github.io/) \ No newline at end of file diff --git a/pytorch/20.md b/pytorch/20.md new file mode 100644 index 00000000..0bf22ec8 --- /dev/null +++ b/pytorch/20.md @@ -0,0 +1,165 @@ +# 4.5 – DQN 强化学习 (Reinforcement Learning) + +Torch 是神经网络库, 那么也可以拿来做强化学习, 之前我用另一个强大神经网络库 Tensorflow来制作了这一个 从浅入深强化学习教程, 你同样也可以用 PyTorch 来实现, 这次我们就举 DQN 的例子, 我对比了我的 Tensorflow DQN 的代码, 发现 PyTorch 写的要简单很多. 如果对 DQN 或者强化学习还没有太多概念, 强烈推荐我的这个DQN动画短片(如下), 让你秒懂DQN. 还有强推这套花了我几个月来制作的[强化学习教程](https://morvanzhou.github.io/tutorials/machine-learning/reinforcement-learning/)! + + + +## 模块导入和参数设置 + +这次除了 Torch 自家模块, 我们还要导入 Gym 环境库模块. + +```py +import torch +import torch.nn as nn +from torch.autograd import Variable +import torch.nn.functional as F +import numpy as np +import gym + +# 超参数 +BATCH_SIZE = 32 +LR = 0.01 # learning rate +EPSILON = 0.9 # 最优选择动作百分比 +GAMMA = 0.9 # 奖励递减参数 +TARGET_REPLACE_ITER = 100 # Q 现实网络的更新频率 +MEMORY_CAPACITY = 2000 # 记忆库大小 +env = gym.make(\'CartPole-v0\') # 立杆子游戏 +env = env.unwrapped +N_ACTIONS = env.action_space.n # 杆子能做的动作 +N_STATES = env.observation_space.shape[0] # 杆子能获取的环境信息数 +``` + +## 神经网络 + +DQN 当中的神经网络模式, 我们将依据这个模式建立两个神经网络, 一个是现实网络 (Target Net), 一个是估计网络 (Eval Net). + +```py +class Net(nn.Module): + def __init__(self, ): + super(Net, self).__init__() + self.fc1 = nn.Linear(N_STATES, 10) + self.fc1.weight.data.normal_(0, 0.1) # initialization + self.out = nn.Linear(10, N_ACTIONS) + self.out.weight.data.normal_(0, 0.1) # initialization + + def forward(self, x): + x = self.fc1(x) + x = F.relu(x) + actions_value = self.out(x) + return actions_value +``` + +## DQN体系 + +简化的 DQN 体系是这样, 我们有两个 net, 有选动作机制, 有存经历机制, 有学习机制. + +```py +class DQN(object): + def __init__(self): + # 建立 target net 和 eval net 还有 memory + + def choose_action(self, x): + # 根据环境观测值选择动作的机制 + return action + + def store_transition(self, s, a, r, s_): + # 存储记忆 + + def learn(self): + # target 网络更新 + # 学习记忆库中的记忆 +``` + +接下来就是具体的啦, 在 DQN 中每个功能都是怎么做的. + +```py +class DQN(object): + def __init__(self): + self.eval_net, self.target_net = Net(), Net() + + self.learn_step_counter = 0 # 用于 target 更新计时 + self.memory_counter = 0 # 记忆库记数 + self.memory = np.zeros((MEMORY_CAPACITY, N_STATES * 2 2)) # 初始化记忆库 + self.optimizer = torch.optim.Adam(self.eval_net.parameters(), lr=LR) # torch 的优化器 + self.loss_func = nn.MSELoss() # 误差公式 + + def choose_action(self, x): + x = Variable(torch.unsqueeze(torch.FloatTensor(x), 0)) + # 这里只输入一个 sample + if np.random.uniform() < EPSILON: # 选最优动作 + actions_value = self.eval_net.forward(x) + action = torch.max(actions_value, 1)[1].data.numpy()[0, 0] # return the argmax + else: # 选随机动作 + action = np.random.randint(0, N_ACTIONS) + return action + + def store_transition(self, s, a, r, s_): + transition = np.hstack((s, [a, r], s_)) + # 如果记忆库满了, 就覆盖老数据 + index = self.memory_counter % MEMORY_CAPACITY + self.memory[index, :] = transition + self.memory_counter = 1 + + def learn(self): + # target net 参数更新 + if self.learn_step_counter % TARGET_REPLACE_ITER == 0: + self.target_net.load_state_dict(self.eval_net.state_dict()) + self.learn_step_counter = 1 + + # 抽取记忆库中的批数据 + sample_index = np.random.choice(MEMORY_CAPACITY, BATCH_SIZE) + b_memory = self.memory[sample_index, :] + b_s = Variable(torch.FloatTensor(b_memory[:, :N_STATES])) + b_a = Variable(torch.LongTensor(b_memory[:, N_STATES:N_STATES 1].astype(int))) + b_r = Variable(torch.FloatTensor(b_memory[:, N_STATES 1:N_STATES 2])) + b_s_ = Variable(torch.FloatTensor(b_memory[:, -N_STATES:])) + + # 针对做过的动作b_a, 来选 q_eval 的值, (q_eval 原本有所有动作的值) + q_eval = self.eval_net(b_s).gather(1, b_a) # shape (batch, 1) + q_next = self.target_net(b_s_).detach() # q_next 不进行反向传递误差, 所以 detach + q_target = b_r GAMMA * q_next.max(1)[0] # shape (batch, 1) + loss = self.loss_func(q_eval, q_target) + + # 计算, 更新 eval net + self.optimizer.zero_grad() + loss.backward() + self.optimizer.step() +``` + +## 训练 + +按照 Qlearning 的形式进行 off-policy 的更新. 我们进行回合制更行, 一个回合完了, 进入下一回合. 一直到他们将杆子立起来很久. + +```py +dqn = DQN() # 定义 DQN 系统 + +for i_episode in range(400): + s = env.reset() + while True: + env.render() # 显示实验动画 + a = dqn.choose_action(s) + + # 选动作, 得到环境反馈 + s_, r, done, info = env.step(a) + + # 修改 reward, 使 DQN 快速学习 + x, x_dot, theta, theta_dot = s_ + r1 = (env.x_threshold - abs(x)) / env.x_threshold - 0.8 + r2 = (env.theta_threshold_radians - abs(theta)) / env.theta_threshold_radians - 0.5 + r = r1 r2 + + # 存记忆 + dqn.store_transition(s, a, r, s_) + + if dqn.memory_counter > MEMORY_CAPACITY: + dqn.learn() # 记忆库满了就进行学习 + + if done: # 如果回合结束, 进入下回合 + break + + s = s_ +``` + +所以这也就是在我 [github 代码](https://github.com/MorvanZhou/PyTorch-Tutorial/blob/master/tutorial-contents/405_DQN_Reinforcement_learning.py) 中的每一步的意义啦. + +文章来源:[莫烦](https://morvanzhou.github.io/) \ No newline at end of file diff --git a/pytorch/21.md b/pytorch/21.md new file mode 100644 index 00000000..c0635944 --- /dev/null +++ b/pytorch/21.md @@ -0,0 +1,112 @@ +# 4.6 – GAN (Generative Adversarial Nets 生成对抗网络) + +GAN 是一个近几年比较流行的生成网络形式. 对比起传统的生成模型, 他减少了模型限制和生成器限制, 他具有有更好的生成能力. 人们常用假钞鉴定者和假钞制造者来打比喻, 但是我不喜欢这个比喻, 觉得没有真实反映出 GAN 里面的机理. + +所以我的一句话介绍 GAN 就是: Generator 是新手画家, Discriminator 是新手鉴赏家, 你是高级鉴赏家. 你将著名画家的品和新手画家的作品都给新手鉴赏家评定, 并告诉新手鉴赏家哪些是新手画家画的, 哪些是著名画家画的, 新手鉴赏家就慢慢学习怎么区分新手画家和著名画家的画, 但是新手画家和新手鉴赏家是好朋友, 新手鉴赏家会告诉新手画家要怎么样画得更像著名画家, 新手画家就能将自己的突然来的灵感 (random noise) 画得更像著名画家. 我用一个短动画形式来诠释了整个过程 (GAN 动画简介) (如下). + +下面是本节内容的效果, 绿线的变化是新手画家慢慢学习如何踏上画家之路的过程. 而能被认定为著名的画作在 upper bound  和 lower bound  之间. + +![](img/febe7e5dc5d5b9a5004d15c50d3228c1.png) + +## 超参数设置 + +新手画家 (Generator) 在作画的时候需要有一些灵感 (random noise), 我们这些灵感的个数定义为 N_IDEAS . 而一幅画需要有一些规格, 我们将这幅画的画笔数定义一下, N_COMPONENTS  就是一条一元二次曲线(这幅画画)上的点个数. 为了进行批训练, 我们将一整批话的点都规定一下( PAINT_POINTS ). + +```py +import torch +import torch.nn as nn +from torch.autograd import Variable +import numpy as np +import matplotlib.pyplot as plt + +torch.manual_seed(1) # reproducible +np.random.seed(1) + +# 超参数 +BATCH_SIZE = 64 +LR_G = 0.0001 # learning rate for generator +LR_D = 0.0001 # learning rate for discriminator +N_IDEAS = 5 # think of this as number of ideas for generating an art work (Generator) +ART_COMPONENTS = 15 # it could be total point G can draw in the canvas +PAINT_POINTS = np.vstack([np.linspace(-1, 1, ART_COMPONENTS) for _ in range(BATCH_SIZE)]) +``` + +## 著名画家的画 + +我们需要有很多画是来自著名画家的(real data), 将这些著名画家的画, 和新手画家的画都传给新手鉴赏家, 让鉴赏家来区分哪些是著名画家, 哪些是新手画家的画. 如何区分我们在后面呈现. 这里我们生成一些著名画家的画 (batch 条不同的一元二次方程曲线). + +```py +def artist_works(): # painting from the famous artist (real target) + a = np.random.uniform(1, 2, size=BATCH_SIZE)[:, np.newaxis] + paintings = a * np.power(PAINT_POINTS, 2) (a-1) + paintings = torch.from_numpy(paintings).float() + return Variable(paintings) +``` + +下面就是会产生曲线的一个上限和下限. + +![](img/a577eb2dc81a64cfc4f6d04ff9a25873.png) + +## 神经网络 + +这里会创建两个神经网络, 分别是 Generator (新手画家), Discriminator(新手鉴赏家). G 会拿着自己的一些灵感当做输入, 输出一元二次曲线上的点 (G 的画). + +D 会接收一幅画作 (一元二次曲线), 输出这幅画作到底是不是著名画家的画(是著名画家的画的概率). + +```py +G = nn.Sequential( # Generator + nn.Linear(N_IDEAS, 128), # random ideas (could from normal distribution) + nn.ReLU(), + nn.Linear(128, ART_COMPONENTS), # making a painting from these random ideas +) + +D = nn.Sequential( # Discriminator + nn.Linear(ART_COMPONENTS, 128), # receive art work either from the famous artist or a newbie like G + nn.ReLU(), + nn.Linear(128, 1), + nn.Sigmoid(), # tell the probability that the art work is made by artist +) +``` + +## 训练 + +接着我们来同时训练 D 和 G. 训练之前, 我们来看看G作画的原理. G 首先会有些灵感, G_ideas 就会拿到这些随机灵感 (可以是正态分布的随机数), 然后 G 会根据这些灵感画画. 接着我们拿着著名画家的画和 G 的画, 让 D 来判定这两批画作是著名画家画的概率. + +```py +for step in range(10000): + artist_paintings = artist_works() # real painting from artist + G_ideas = Variable(torch.randn(BATCH_SIZE, N_IDEAS)) # random ideas + G_paintings = G(G_ideas()) # fake painting from G (random ideas) + + prob_artist0 = D(artist_paintings) # D try to increase this prob + prob_artist1 = D(G_paintings) # D try to reduce this prob +``` + +然后计算有多少来之画家的画猜对了, 有多少来自 G 的画猜对了, 我们想最大化这些猜对的次数. 这也就是 log(D(x)) log(1-D(G(z))  在[论文](https://arxiv.org/abs/1406.2661)中的形式. 而因为 torch 中提升参数的形式是最小化误差, 那我们把最大化 score 转换成最小化 loss, 在两个 score 的合的地方加一个符号就好. 而 G 的提升就是要减小 D 猜测 G 生成数据的正确率, 也就是减小 D_score1. + +```py + D_loss = - torch.mean(torch.log(prob_artist0) torch.log(1\. - prob_artist1)) + G_loss = torch.mean(torch.log(1\. - prob_artist1)) +``` + +最后我们在根据 loss  提升神经网络就好了. + +```py + opt_D.zero_grad() + D_loss.backward(retain_variables=True) # retain_variables 这个参数是为了再次使用计算图纸 + opt_D.step() + + opt_G.zero_grad() + G_loss.backward() + opt_G.step() +``` + +上面的全部代码内容在我的 [github](https://github.com/MorvanZhou/PyTorch-Tutorial/blob/master/tutorial-contents/406_GAN.py). + +## 可视化训练过程 + +可视化的代码很简单, 在这里就不会意义叙说了, 大家直接看[代码](https://github.com/MorvanZhou/PyTorch-Tutorial/blob/master/tutorial-contents/406_GAN.py) 吧. 在本节的最上面就是这次的动图效果, 最后达到收敛时, 下过如下, G 能成功的根据自己的”灵感”, 产生出一条很像 artist画出的曲线, 而 D 再也没有能力猜出这到底是 G 的画作还是 artist 的画作, 他只能一半时间猜是 G 的, 一半时间猜是 artist的. + +![](img/7eca2f8318f254b17ca0bc215ec4f5a0.png) + +文章来源:[莫烦](https://morvanzhou.github.io/) \ No newline at end of file diff --git a/pytorch/22.md b/pytorch/22.md new file mode 100644 index 00000000..2c0e24be --- /dev/null +++ b/pytorch/22.md @@ -0,0 +1 @@ +# 高阶内容 \ No newline at end of file diff --git a/pytorch/23.md b/pytorch/23.md new file mode 100644 index 00000000..270f2271 --- /dev/null +++ b/pytorch/23.md @@ -0,0 +1,61 @@ +# 5.1 – 为什么 Torch 是动态的 + +听说过 Torch 的人都听说了 torch 是动态的, 那他的动态到底是什么呢? 我们用一个 RNN 的例子来展示一下动态计算到底长什么样. + +## 动态?静态? + +对比静态动态, 我们就得知道谁是静态的. 在流行的神经网络模块中, Tensorflow 就是最典型的静态计算模块. 下图是一种我在强化学习教程中的 Tensorflow 计算图. 也就是说, 大部分时候, 用 Tensorflow 是先搭建好这样一个计算系统, 一旦搭建好了, 就不能改动了 (也有例外, 比如 dynamic_rnn() , 但是总体来说他还是运用了一个静态思维), 所有的计算都会在这种图中流动, 当然很多情况, 这样就够了, 我们不需要改动什么结构. 不动结构当然可以提高效率. 但是一旦计算流程不是静态的, 计算图要变动. 最典型的例子就是 RNN, 有时候 RNN 的 time step 不会一样, 或者在 training 和 testing 的时候, batch_size  和 time_step  也不一样, 这时, Tensorflow 就头疼了, Tensorflow 的人也头疼了. 哈哈, 如果用一个动态计算图的 Torch, 我们就好理解多了, 写起来也简单多了. + +![](img/1b292936f4a0c3be1d04e43a994fd48c.png) + +## 动态RNN + +我们拿 [这一节内容的 RNN](https://www.pytorchtutorial.com/4-3-rnn-for-regression/) 来解释动态计算图. 那节内容的[代码在这](https://github.com/MorvanZhou/PyTorch-Tutorial/blob/master/tutorial-contents/11_RNN_regressor.py). + +```py +.. + +######################## 前面代码都一样, 下面开始不同 ######################### + +################ 那节内容的代码结构 (静态 time step) ########## +for step in range(60): + start, end = step * np.pi, (step 1)*np.pi # time steps 都是一样长的 + # use sin predicts cos + steps = np.linspace(start, end, 10, dtype=np.float32) + ... + +################ 这节内容修改代码 (动态 time step) ######### +step = 0 +for i in range(60): + dynamic_steps = np.random.randint(1, 4) # 随机 time step 长度 + start, end = step * np.pi, (step dynamic_steps) * np.pi # different time steps length + step = dynamic_steps + + # use sin predicts cos + steps = np.linspace(start, end, 10 * dynamic_steps, dtype=np.float32) + +####################### 这下面又一样了 ########################### + print(len(steps)) # print how many time step feed to RNN + + x_np = np.sin(steps) # float32 for converting torch FloatTensor + y_np = np.cos(steps) + ... + +""" +输出的动态time step 长 +30 +30 +10 +30 +20 +30 +""" +``` + +有人会说了, Tensorflow 也有类似的功能呀, 比如说 dynamic_rnn(). 对的, 没错, 不过大家是否想过, 如果我在 Tensorflow 当中定义一个 input 的 placeholder, 这个 placeholder 将会有 (batch, time step, input size) 这几个维度, batch 好说, 随便什么大小都可以, 可是 time step 可是固定的呀, 这可不好改, 或者说改起来很麻烦. 那 PyTorch 中又可以变 batch 又可以变 time step, 这不是很方便吗. 这就体现了动态神经网络的好处. + +经过这样的折腾, torch 还能 handle 住, 已经很不容易啦. 所以当你想要处理这些动态计算图的时候, Torch 还是你首选的神经网络模块. + +所以这也就是在我 [github 代码](https://github.com/MorvanZhou/PyTorch-Tutorial/blob/master/tutorial-contents/501_why_torch_dynamic_graph.py) 中的每一步的意义啦. + +文章来源:[莫烦](https://morvanzhou.github.io/) \ No newline at end of file diff --git a/pytorch/24.md b/pytorch/24.md new file mode 100644 index 00000000..9cad93c8 --- /dev/null +++ b/pytorch/24.md @@ -0,0 +1,66 @@ +# 5.2 – GPU 加速运算 + +在 GPU 训练可以大幅提升运算速度. 而且 Torch 也有一套很好的 GPU 运算体系. 但是要强调的是: + +* 你的电脑里有合适的 GPU 显卡(NVIDIA), 且支持 CUDA 模块. [请在NVIDIA官网查询](https://developer.nvidia.com/cuda-gpus) +* 必须安装 GPU 版的 Torch, [点击这里查看如何安装](https://www.pytorchtutorial.com/1-2-install-pytorch/) + +## 用 GPU 训练 CNN + +这份 GPU 的代码是依据[之前这份CNN](https://github.com/MorvanZhou/PyTorch-Tutorial/blob/master/tutorial-contents/401_CNN.py)的代码修改的. 大概修改的地方包括将数据的形式变成 GPU 能读的形式, 然后将 CNN 也变成 GPU 能读的形式. 做法就是在后面加上 .cuda() , 很简单. + +```py +... + +test_data = torchvision.datasets.MNIST(root=\'./mnist/\', train=False) + +# !!!!!!!! 修改 test data 形式 !!!!!!!!! # +test_x = Variable(torch.unsqueeze(test_data.test_data, dim=1)).type(torch.FloatTensor)[:2000].cuda()/255\. # Tensor on GPU +test_y = test_data.test_labels[:2000].cuda() +``` + +再来把我们的 CNN 参数也变成 GPU 兼容形式. + +```py +class CNN(nn.Module): + ... + +cnn = CNN() + +# !!!!!!!! 转换 cnn 去 CUDA !!!!!!!!! # +cnn.cuda() # Moves all model parameters and buffers to the GPU. +``` + +然后就是在 train 的时候, 将每次的training data 变成 GPU 形式. .cuda() + +```py +for epoch ..: + for step, ...: + # !!!!!!!! 这里有修改 !!!!!!!!! # + b_x = Variable(x).cuda() # Tensor on GPU + b_y = Variable(y).cuda() # Tensor on GPU + + ... + + if step % 50 == 0: + test_output = cnn(test_x) + + # !!!!!!!! 这里有修改 !!!!!!!!! # + pred_y = torch.max(test_output, 1)[1].cuda().data.squeeze() # 将操作放去 GPU + + accuracy = torch.sum(pred_y == test_y) / test_y.size(0) + ... + +test_output = cnn(test_x[:10]) + +# !!!!!!!! 这里有修改 !!!!!!!!! # +pred_y = torch.max(test_output, 1)[1].cuda().data.squeeze() # 将操作放去 GPU +... +print(test_y[:10], \'real number\') +``` + +大功告成~ + +所以这也就是在我 [github 代码](https://github.com/MorvanZhou/PyTorch-Tutorial/blob/master/tutorial-contents/502_GPU.py) 中的每一步的意义啦. + +文章来源:[莫烦](https://morvanzhou.github.io/) \ No newline at end of file diff --git a/pytorch/25.md b/pytorch/25.md new file mode 100644 index 00000000..acd16d5c --- /dev/null +++ b/pytorch/25.md @@ -0,0 +1,121 @@ +# 5.3 – Dropout 防止过拟合 + +过拟合让人头疼, 明明训练时误差已经降得足够低, 可是测试的时候误差突然飙升. 这很有可能就是出现了过拟合现象. 强烈推荐通过(下面)这个动画的形式短时间了解什么是过拟合, 怎么解决过拟合. 下面动图就显示了我们成功缓解了过拟合现象. + +![](img/a545e4a49909bd7a80e042fd6d8267cb.png) + +## 做点数据 + +自己做一些伪数据, 用来模拟真实情况. 数据少, 才能凸显过拟合问题, 所以我们就做10个数据点. + +![](img/761c210ceb0fdd69c7e0f8bd85e39698.png) + +```py +import torch +from torch.autograd import Variable +import matplotlib.pyplot as plt + +torch.manual_seed(1) # reproducible + +N_SAMPLES = 20 +N_HIDDEN = 300 + +# training data +x = torch.unsqueeze(torch.linspace(-1, 1, N_SAMPLES), 1) +y = x 0.3*torch.normal(torch.zeros(N_SAMPLES, 1), torch.ones(N_SAMPLES, 1)) +x, y = Variable(x, requires_grad=False), Variable(y, requires_grad=False) + +# test data +test_x = torch.unsqueeze(torch.linspace(-1, 1, N_SAMPLES), 1) +test_y = test_x 0.3*torch.normal(torch.zeros(N_SAMPLES, 1), torch.ones(N_SAMPLES, 1)) +test_x, test_y = Variable(test_x, requires_grad=False), Variable(test_y, requires_grad=False) + +# show data +plt.scatter(x.data.numpy(), y.data.numpy(), c=\'magenta\', s=50, alpha=0.5, label=\'train\') +plt.scatter(test_x.data.numpy(), test_y.data.numpy(), c=\'cyan\', s=50, alpha=0.5, label=\'test\') +plt.legend(loc=\'upper left\') +plt.ylim((-2.5, 2.5)) +plt.show() +``` + +## 搭建神经网络 + +我们在这里搭建两个神经网络, 一个没有 dropout, 一个有 dropout. 没有 dropout 的容易出现 过拟合, 那我们就命名为 net_overfitting, 另一个就是 net_dropped.  torch.nn.Dropout(0.5)  这里的 0.5 指的是随机有 50% 的神经元会被关闭/丢弃. + +```py +net_overfitting = torch.nn.Sequential( + torch.nn.Linear(1, N_HIDDEN), + torch.nn.ReLU(), + torch.nn.Linear(N_HIDDEN, N_HIDDEN), + torch.nn.ReLU(), + torch.nn.Linear(N_HIDDEN, 1), +) + +net_dropped = torch.nn.Sequential( + torch.nn.Linear(1, N_HIDDEN), + torch.nn.Dropout(0.5), # drop 50% of the neuron + torch.nn.ReLU(), + torch.nn.Linear(N_HIDDEN, N_HIDDEN), + torch.nn.Dropout(0.5), # drop 50% of the neuron + torch.nn.ReLU(), + torch.nn.Linear(N_HIDDEN, 1), +) +``` + +## 训练 + +训练的时候, 这两个神经网络分开训练. 训练的环境都一样. + +```py +optimizer_ofit = torch.optim.Adam(net_overfitting.parameters(), lr=0.01) +optimizer_drop = torch.optim.Adam(net_dropped.parameters(), lr=0.01) +loss_func = torch.nn.MSELoss() + +for t in range(500): + pred_ofit = net_overfitting(x) + pred_drop = net_dropped(x) + + loss_ofit = loss_func(pred_ofit, y) + loss_drop = loss_func(pred_drop, y) + + optimizer_ofit.zero_grad() + optimizer_drop.zero_grad() + loss_ofit.backward() + loss_drop.backward() + optimizer_ofit.step() + optimizer_drop.step() +``` + +## 对比测试结果 + +在这个 for  循环里, 我们加上画测试图的部分. 注意在测试时, 要将网络改成 eval()  形式, 特别是 net_dropped , net_overfitting  改不改其实无所谓. 画好图再改回 train()  模式. + +![](img/a545e4a49909bd7a80e042fd6d8267cb.png) + +```py +... + + optimizer_ofit.step() + optimizer_drop.step() + + # 接着上面来 + if t % 10 == 0: # 每 10 步画一次图 + # 将神经网络转换成测试形式, 画好图之后改回 训练形式 + net_overfitting.eval() + net_dropped.eval() # 因为 drop 网络在 train 的时候和 test 的时候参数不一样. + + ... + test_pred_ofit = net_overfitting(test_x) + test_pred_drop = net_dropped(test_x) + ... + + # 将两个网络改回 训练形式 + net_overfitting.train() + net_dropped.train() +``` + +![](img/c2914d88b6f17b84982e162cf6930a88.png) + +所以这也就是在我 [github 代码](https://github.com/MorvanZhou/PyTorch-Tutorial/blob/master/tutorial-contents/503_dropout.py) 中的每一步的意义啦. + +文章来源:[莫烦](https://morvanzhou.github.io/) \ No newline at end of file diff --git a/pytorch/26.md b/pytorch/26.md new file mode 100644 index 00000000..8bfcb766 --- /dev/null +++ b/pytorch/26.md @@ -0,0 +1,187 @@ +# 5.4 – Batch Normalization 批标准化 + +批标准化通俗来说就是对每一层神经网络进行标准化 (normalize) 处理, 我们知道对输入数据进行标准化能让机器学习有效率地学习. 如果把每一层后看成这种接受输入数据的模式, 那我们何不 “批标准化” 所有的层呢? 具体而且清楚的解释请看到 我(原作者)制作的 什么批标准化 动画简介(推荐)(如下). + +那我们就看看下面的两个动图, 这就是在每层神经网络有无 batch normalization 的区别啦. + +![](img/6730e1145d2a40e8ced1fda4d453d9c6.png) + +![](img/cb2138c3f800c7ca4b5ae38076d09429.png) + +## 做点数据 + +自己做一些伪数据, 用来模拟真实情况. 而且 Batch Normalization (之后都简称BN) 还能有效的控制坏的参数初始化 (initialization), 比如说 ReLU  这种激励函数最怕所有的值都落在附属区间, 那我们就将所有的参数都水平移动一个 -0.2 ( bias_initialization = -0.2 , 来看看 BN 的实力. + +![](img/fedaa24e2fcad876c77a2038c2d8d14d.png) + +```py +import torch +from torch.autograd import Variable +from torch import nn +from torch.nn import init +import torch.utils.data as Data +import torch.nn.functional as F +import matplotlib.pyplot as plt +import numpy as np + +# 超参数 +N_SAMPLES = 2000 +BATCH_SIZE = 64 +EPOCH = 12 +LR = 0.03 +N_HIDDEN = 8 +ACTIVATION = F.tanh # 你可以换 relu 试试 +B_INIT = -0.2 # 模拟不好的 参数初始化 + +# training data +x = np.linspace(-7, 10, N_SAMPLES)[:, np.newaxis] +noise = np.random.normal(0, 2, x.shape) +y = np.square(x) - 5 noise + +# test data +test_x = np.linspace(-7, 10, 200)[:, np.newaxis] +noise = np.random.normal(0, 2, test_x.shape) +test_y = np.square(test_x) - 5 noise + +train_x, train_y = torch.from_numpy(x).float(), torch.from_numpy(y).float() +test_x = Variable(torch.from_numpy(test_x).float(), volatile=True) # volatile=True 不进行梯度计算 +test_y = Variable(torch.from_numpy(test_y).float(), volatile=True) + +train_dataset = Data.TensorDataset(data_tensor=train_x, target_tensor=train_y) +train_loader = Data.DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2,) + +# show data +plt.scatter(train_x.numpy(), train_y.numpy(), c=\'#FF9359\', s=50, alpha=0.2, label=\'train\') +plt.legend(loc=\'upper left\') +plt.show() +``` + +## 搭建神经网络 + +这里就教你如何构建带有 BN 的神经网络的. BN 其实可以看做是一个 layer ( BN layer ). 我们就像平时加层一样加 BN layer  就好了. 注意, 我还对输入数据进行了一个 BN 处理, 因为如果你把输入数据看出是 从前面一层来的输出数据, 我们同样也能对她进行 BN. + +```py +class Net(nn.Module): + def __init__(self, batch_normalization=False): + super(Net, self).__init__() + self.do_bn = batch_normalization + self.fcs = [] # 太多层了, 我们用 for loop 建立 + self.bns = [] + self.bn_input = nn.BatchNorm1d(1, momentum=0.5) # 给 input 的 BN + + for i in range(N_HIDDEN): # 建层 + input_size = 1 if i == 0 else 10 + fc = nn.Linear(input_size, 10) + setattr(self, \'fc%i\' % i, fc) # 注意! pytorch 一定要你将层信息变成 class 的属性! 我在这里花了2天时间发现了这个 bug + self._set_init(fc) # 参数初始化 + self.fcs.append(fc) + if self.do_bn: + bn = nn.BatchNorm1d(10, momentum=0.5) + setattr(self, \'bn%i\' % i, bn) # 注意! pytorch 一定要你将层信息变成 class 的属性! 我在这里花了2天时间发现了这个 bug + self.bns.append(bn) + + self.predict = nn.Linear(10, 1) # output layer + self._set_init(self.predict) # 参数初始化 + + def _set_init(self, layer): # 参数初始化 + init.normal(layer.weight, mean=0., std=.1) + init.constant(layer.bias, B_INIT) + + def forward(self, x): + pre_activation = [x] + if self.do_bn: x = self.bn_input(x) # 判断是否要加 BN + layer_input = [x] + for i in range(N_HIDDEN): + x = self.fcs[i](x) + pre_activation.append(x) # 为之后出图 + if self.do_bn: x = self.bns[i](x) # 判断是否要加 BN + x = ACTIVATION(x) + layer_input.append(x) # 为之后出图 + out = self.predict(x) + return out, layer_input, pre_activation + +# 建立两个 net, 一个有 BN, 一个没有 +nets = [Net(batch_normalization=False), Net(batch_normalization=True)] +``` + +## 训练 + +训练的时候, 这两个神经网络分开训练. 训练的环境都一样. + +```py +opts = [torch.optim.Adam(net.parameters(), lr=LR) for net in nets] + +loss_func = torch.nn.MSELoss() + +losses = [[], []] # 每个网络一个 list 来记录误差 +for epoch in range(EPOCH): + print(\'Epoch: \', epoch) + for step, (b_x, b_y) in enumerate(train_loader): + b_x, b_y = Variable(b_x), Variable(b_y) + for net, opt in zip(nets, opts): # 训练两个网络 + pred, _, _ = net(b_x) + loss = loss_func(pred, b_y) + opt.zero_grad() + loss.backward() + opt.step() # 这也会训练 BN 里面的参数 +``` + +## 画图 + +这个教程有几张图要画, 首先我们画训练时的动态图. 我单独定义了一个画动图的功能 plot_histogram() , 因为不是重点, 所以代码的具体细节请看我的 [github](https://github.com/MorvanZhou/PyTorch-Tutorial/blob/master/tutorial-contents/504_batch_normalization.py), + +![](img/cb2138c3f800c7ca4b5ae38076d09429.png) + +```py +f, axs = plt.subplots(4, N_HIDDEN 1, figsize=(10, 5)) + +def plot_histogram(l_in, l_in_bn, pre_ac, pre_ac_bn): + ... + +for epoch in range(EPOCH): + layer_inputs, pre_acts = [], [] + for net, l in zip(nets, losses): + # 一定要把 net 的设置成 eval 模式, eval下的 BN 参数会被固定 + net.eval() + pred, layer_input, pre_act = net(test_x) + l.append(loss_func(pred, test_y).data[0]) + layer_inputs.append(layer_input) + pre_acts.append(pre_act) + # 收集好信息后将 net 设置成 train 模式, 继续训练 + net.train() + plot_histogram(*layer_inputs, *pre_acts) # plot histogram + + # 后面接着之前 for loop 中的代码来 + for step, (b_x, b_y) in enumerate(train_loader): + ... +``` + +后面还有两张图, 一张是预测曲线, 一张是误差变化曲线, 具体代码不在这里呈现, 想知道如何画图的朋友, 请参考我的 [github](https://github.com/MorvanZhou/PyTorch-Tutorial/blob/master/tutorial-contents/504_batch_normalization.py) + +## 对比结果 + +首先来看看这次对比的两个激励函数是长什么样: + +![](img/7345448d48d8d6c6c1b03fda91334e3e.png) + +然后我们来对比使用不同激励函数的结果. + +![](img/4d69c0a49be83a66f774caf12e64c3a7.png) + +![](img/25959870b2b1e7d6fd61229cb20bed29.png) + +![](img/03f4667f9aae4918338a56b74865a389.png) + +上面是使用 relu  激励函数的结果, 我们可以看到, 没有使用 BN 的误差要高, 线条不能拟合数据, 原因是我们有一个 “Bad initialization”, 初始 bias = -0.2 , 这一招, 让 relu  无法捕捉到在负数区间的输入值. 而有了 BN, 这就不成问题了. + +![](img/bbd3ae66e0235fac8745c37306e74a52.png) + +![](img/90e1ab65f286c889d94c9f6c57d596c9.png) + +![](img/c42f3cec435a83431d5a1737e99b8b8c.png) + +上面结果是使用 tanh  作为激励函数的结果, 可以看出, 不好的初始化, 让输入数据在激活前分散得非常离散, 而有了 BN, 数据都被收拢了. 收拢的数据再放入激励函数就能很好地利用激励函数的非线性. 而且可以看出没有 BN 的数据让激活后的结果都分布在 tanh  的两端, 而这两端的梯度又非常的小, 是的后面的误差都不能往前传, 导致神经网络死掉了. + +所以这也就是在我 [github 代码](https://github.com/MorvanZhou/PyTorch-Tutorial/blob/master/tutorial-contents/504_batch_normalization.py) 中的每一步的意义啦. + +文章来源:[莫烦](https://morvanzhou.github.io/) \ No newline at end of file diff --git a/pytorch/README.md b/pytorch/README.md new file mode 100644 index 00000000..b5ce3919 --- /dev/null +++ b/pytorch/README.md @@ -0,0 +1 @@ +# 莫烦 PyTorch 系列教程 diff --git a/pytorch/SUMMARY.md b/pytorch/SUMMARY.md new file mode 100644 index 00000000..2ee25f55 --- /dev/null +++ b/pytorch/SUMMARY.md @@ -0,0 +1,27 @@ ++ [莫烦 PyTorch 系列教程](README.md) ++ [PyTorch 简介](01.md) + + [1.1 – Why PyTorch?](02.md) + + [1.2 – 安装 PyTorch](03.md) ++ [PyTorch 神经网络基础](04.md) + + [2.1 – Torch vs Numpy](05.md) + + [2.2 – 变量 (Variable)](06.md) + + [2.3 – 激励函数 (Activation)](07.md) ++ [建造第一个神经网络](08.md) + + [3.1 – 关系拟合 (回归 Regression)](09.md) + + [3.2 – 区分类型 (分类 Classification)](10.md) + + [3.3 – 快速搭建回归神经网络](11.md) + + [3.4 – 保存和恢复模型](12.md) + + [3.5 – 数据读取 (Data Loader)](13.md) + + [3.6 – 优化器 (Optimizer)](14.md) ++ [高级神经网络结构](15.md) + + [4.1 – CNN 卷积神经网络](16.md) + + [4.2 – RNN 循环神经网络 (分类 Classification)](17.md) + + [4.3 – RNN 循环神经网络 (回归 Regression)](18.md) + + [4.4 – AutoEncoder (自编码/非监督学习)](19.md) + + [4.5 – DQN 强化学习 (Reinforcement Learning)](20.md) + + [4.6 – GAN (Generative Adversarial Nets 生成对抗网络)](21.md) ++ [高阶内容](22.md) + + [5.1 – 为什么 Torch 是动态的](23.md) + + [5.2 – GPU 加速运算](24.md) + + [5.3 – Dropout 防止过拟合](25.md) + + [5.4 – Batch Normalization 批标准化](26.md) \ No newline at end of file diff --git a/pytorch/img/03f4667f9aae4918338a56b74865a389.png b/pytorch/img/03f4667f9aae4918338a56b74865a389.png new file mode 100644 index 00000000..d729375e Binary files /dev/null and b/pytorch/img/03f4667f9aae4918338a56b74865a389.png differ diff --git a/pytorch/img/04a6040ad4f06a69a92f440ea17dde44.png b/pytorch/img/04a6040ad4f06a69a92f440ea17dde44.png new file mode 100644 index 00000000..5792acb4 Binary files /dev/null and b/pytorch/img/04a6040ad4f06a69a92f440ea17dde44.png differ diff --git a/pytorch/img/13e0473ef73a9de2569a81c62e30d054.png b/pytorch/img/13e0473ef73a9de2569a81c62e30d054.png new file mode 100644 index 00000000..35bab638 Binary files /dev/null and b/pytorch/img/13e0473ef73a9de2569a81c62e30d054.png differ diff --git a/pytorch/img/1b292936f4a0c3be1d04e43a994fd48c.png b/pytorch/img/1b292936f4a0c3be1d04e43a994fd48c.png new file mode 100644 index 00000000..afc1e3a4 Binary files /dev/null and b/pytorch/img/1b292936f4a0c3be1d04e43a994fd48c.png differ diff --git a/pytorch/img/1f0b990d5a8d78692d3730d855fe44ea.png b/pytorch/img/1f0b990d5a8d78692d3730d855fe44ea.png new file mode 100644 index 00000000..9d305fb9 Binary files /dev/null and b/pytorch/img/1f0b990d5a8d78692d3730d855fe44ea.png differ diff --git a/pytorch/img/20e2ebdf112e4aa3202e951e072c2dc2.png b/pytorch/img/20e2ebdf112e4aa3202e951e072c2dc2.png new file mode 100644 index 00000000..af1af4c0 Binary files /dev/null and b/pytorch/img/20e2ebdf112e4aa3202e951e072c2dc2.png differ diff --git a/pytorch/img/22309cd02ee52b3a65e1f0022e8b964e.png b/pytorch/img/22309cd02ee52b3a65e1f0022e8b964e.png new file mode 100644 index 00000000..ccbb2119 Binary files /dev/null and b/pytorch/img/22309cd02ee52b3a65e1f0022e8b964e.png differ diff --git a/pytorch/img/25959870b2b1e7d6fd61229cb20bed29.png b/pytorch/img/25959870b2b1e7d6fd61229cb20bed29.png new file mode 100644 index 00000000..657eb7ae Binary files /dev/null and b/pytorch/img/25959870b2b1e7d6fd61229cb20bed29.png differ diff --git a/pytorch/img/25ed82d9ef8a8b1c9c60445c7c08c732.png b/pytorch/img/25ed82d9ef8a8b1c9c60445c7c08c732.png new file mode 100644 index 00000000..d92ae449 Binary files /dev/null and b/pytorch/img/25ed82d9ef8a8b1c9c60445c7c08c732.png differ diff --git a/pytorch/img/2fc34594dcb247d4a3414467eed4a109.png b/pytorch/img/2fc34594dcb247d4a3414467eed4a109.png new file mode 100644 index 00000000..5b51f0b6 Binary files /dev/null and b/pytorch/img/2fc34594dcb247d4a3414467eed4a109.png differ diff --git a/pytorch/img/388ca39bf710c8f053f533ad10872cd7.png b/pytorch/img/388ca39bf710c8f053f533ad10872cd7.png new file mode 100644 index 00000000..363f334b Binary files /dev/null and b/pytorch/img/388ca39bf710c8f053f533ad10872cd7.png differ diff --git a/pytorch/img/4d69c0a49be83a66f774caf12e64c3a7.png b/pytorch/img/4d69c0a49be83a66f774caf12e64c3a7.png new file mode 100644 index 00000000..1e7de9aa Binary files /dev/null and b/pytorch/img/4d69c0a49be83a66f774caf12e64c3a7.png differ diff --git a/pytorch/img/5a415b795ebbb116db6d4a2394d93b27.png b/pytorch/img/5a415b795ebbb116db6d4a2394d93b27.png new file mode 100644 index 00000000..d405db74 Binary files /dev/null and b/pytorch/img/5a415b795ebbb116db6d4a2394d93b27.png differ diff --git a/pytorch/img/6730e1145d2a40e8ced1fda4d453d9c6.png b/pytorch/img/6730e1145d2a40e8ced1fda4d453d9c6.png new file mode 100644 index 00000000..43b03488 Binary files /dev/null and b/pytorch/img/6730e1145d2a40e8ced1fda4d453d9c6.png differ diff --git a/pytorch/img/68f39521fc6853acdf26440e7d5a2861.png b/pytorch/img/68f39521fc6853acdf26440e7d5a2861.png new file mode 100644 index 00000000..1c4e6a0f Binary files /dev/null and b/pytorch/img/68f39521fc6853acdf26440e7d5a2861.png differ diff --git a/pytorch/img/7345448d48d8d6c6c1b03fda91334e3e.png b/pytorch/img/7345448d48d8d6c6c1b03fda91334e3e.png new file mode 100644 index 00000000..ce2ce47e Binary files /dev/null and b/pytorch/img/7345448d48d8d6c6c1b03fda91334e3e.png differ diff --git a/pytorch/img/761c210ceb0fdd69c7e0f8bd85e39698.png b/pytorch/img/761c210ceb0fdd69c7e0f8bd85e39698.png new file mode 100644 index 00000000..14ccd41b Binary files /dev/null and b/pytorch/img/761c210ceb0fdd69c7e0f8bd85e39698.png differ diff --git a/pytorch/img/7a8716c377832b032ee24276b7ddcc31.png b/pytorch/img/7a8716c377832b032ee24276b7ddcc31.png new file mode 100644 index 00000000..925b6190 Binary files /dev/null and b/pytorch/img/7a8716c377832b032ee24276b7ddcc31.png differ diff --git a/pytorch/img/7eca2f8318f254b17ca0bc215ec4f5a0.png b/pytorch/img/7eca2f8318f254b17ca0bc215ec4f5a0.png new file mode 100644 index 00000000..ed65e285 Binary files /dev/null and b/pytorch/img/7eca2f8318f254b17ca0bc215ec4f5a0.png differ diff --git a/pytorch/img/90e1ab65f286c889d94c9f6c57d596c9.png b/pytorch/img/90e1ab65f286c889d94c9f6c57d596c9.png new file mode 100644 index 00000000..6d237097 Binary files /dev/null and b/pytorch/img/90e1ab65f286c889d94c9f6c57d596c9.png differ diff --git a/pytorch/img/94268b7d9687d039d872da203453c97b.png b/pytorch/img/94268b7d9687d039d872da203453c97b.png new file mode 100644 index 00000000..d405db74 Binary files /dev/null and b/pytorch/img/94268b7d9687d039d872da203453c97b.png differ diff --git a/pytorch/img/99c72d57612c137b62599837526f0e0e.png b/pytorch/img/99c72d57612c137b62599837526f0e0e.png new file mode 100644 index 00000000..d92ae449 Binary files /dev/null and b/pytorch/img/99c72d57612c137b62599837526f0e0e.png differ diff --git a/pytorch/img/9e1df524980c8f42ab4353070c2a1b74.png b/pytorch/img/9e1df524980c8f42ab4353070c2a1b74.png new file mode 100644 index 00000000..32b9b261 Binary files /dev/null and b/pytorch/img/9e1df524980c8f42ab4353070c2a1b74.png differ diff --git a/pytorch/img/a545e4a49909bd7a80e042fd6d8267cb.png b/pytorch/img/a545e4a49909bd7a80e042fd6d8267cb.png new file mode 100644 index 00000000..c75ef432 Binary files /dev/null and b/pytorch/img/a545e4a49909bd7a80e042fd6d8267cb.png differ diff --git a/pytorch/img/a577eb2dc81a64cfc4f6d04ff9a25873.png b/pytorch/img/a577eb2dc81a64cfc4f6d04ff9a25873.png new file mode 100644 index 00000000..3a34918c Binary files /dev/null and b/pytorch/img/a577eb2dc81a64cfc4f6d04ff9a25873.png differ diff --git a/pytorch/img/b708f231f544bd7bcefa1d55c82653dd.png b/pytorch/img/b708f231f544bd7bcefa1d55c82653dd.png new file mode 100644 index 00000000..661d17af Binary files /dev/null and b/pytorch/img/b708f231f544bd7bcefa1d55c82653dd.png differ diff --git a/pytorch/img/bbd3ae66e0235fac8745c37306e74a52.png b/pytorch/img/bbd3ae66e0235fac8745c37306e74a52.png new file mode 100644 index 00000000..cf4e02d0 Binary files /dev/null and b/pytorch/img/bbd3ae66e0235fac8745c37306e74a52.png differ diff --git a/pytorch/img/bce7313d5ac6f2600b62a4962a6daf3a.png b/pytorch/img/bce7313d5ac6f2600b62a4962a6daf3a.png new file mode 100644 index 00000000..a9f9d1a1 Binary files /dev/null and b/pytorch/img/bce7313d5ac6f2600b62a4962a6daf3a.png differ diff --git a/pytorch/img/c2914d88b6f17b84982e162cf6930a88.png b/pytorch/img/c2914d88b6f17b84982e162cf6930a88.png new file mode 100644 index 00000000..a5f3e171 Binary files /dev/null and b/pytorch/img/c2914d88b6f17b84982e162cf6930a88.png differ diff --git a/pytorch/img/c429fb827df769a542339e200e2ea20c.png b/pytorch/img/c429fb827df769a542339e200e2ea20c.png new file mode 100644 index 00000000..b9f33818 Binary files /dev/null and b/pytorch/img/c429fb827df769a542339e200e2ea20c.png differ diff --git a/pytorch/img/c42f3cec435a83431d5a1737e99b8b8c.png b/pytorch/img/c42f3cec435a83431d5a1737e99b8b8c.png new file mode 100644 index 00000000..411c7987 Binary files /dev/null and b/pytorch/img/c42f3cec435a83431d5a1737e99b8b8c.png differ diff --git a/pytorch/img/c8011979fa20046cbfa36e46cf508689.png b/pytorch/img/c8011979fa20046cbfa36e46cf508689.png new file mode 100644 index 00000000..9d305fb9 Binary files /dev/null and b/pytorch/img/c8011979fa20046cbfa36e46cf508689.png differ diff --git a/pytorch/img/cb2138c3f800c7ca4b5ae38076d09429.png b/pytorch/img/cb2138c3f800c7ca4b5ae38076d09429.png new file mode 100644 index 00000000..d704b279 Binary files /dev/null and b/pytorch/img/cb2138c3f800c7ca4b5ae38076d09429.png differ diff --git a/pytorch/img/f1108a1b6941305fa7a39e488c023fe9.png b/pytorch/img/f1108a1b6941305fa7a39e488c023fe9.png new file mode 100644 index 00000000..9ebfce3e Binary files /dev/null and b/pytorch/img/f1108a1b6941305fa7a39e488c023fe9.png differ diff --git a/pytorch/img/f38868821469cadc36810cfd827511d1.png b/pytorch/img/f38868821469cadc36810cfd827511d1.png new file mode 100644 index 00000000..ae431fad Binary files /dev/null and b/pytorch/img/f38868821469cadc36810cfd827511d1.png differ diff --git a/pytorch/img/f790e22ee4be05f818e52467c2f13b37.png b/pytorch/img/f790e22ee4be05f818e52467c2f13b37.png new file mode 100644 index 00000000..c2934477 Binary files /dev/null and b/pytorch/img/f790e22ee4be05f818e52467c2f13b37.png differ diff --git a/pytorch/img/febe7e5dc5d5b9a5004d15c50d3228c1.png b/pytorch/img/febe7e5dc5d5b9a5004d15c50d3228c1.png new file mode 100644 index 00000000..595f5fe8 Binary files /dev/null and b/pytorch/img/febe7e5dc5d5b9a5004d15c50d3228c1.png differ diff --git a/pytorch/img/fedaa24e2fcad876c77a2038c2d8d14d.png b/pytorch/img/fedaa24e2fcad876c77a2038c2d8d14d.png new file mode 100644 index 00000000..7821901c Binary files /dev/null and b/pytorch/img/fedaa24e2fcad876c77a2038c2d8d14d.png differ diff --git a/pytorch/img/ffeedc89cc695e61aa6e941c1c696a39.png b/pytorch/img/ffeedc89cc695e61aa6e941c1c696a39.png new file mode 100644 index 00000000..3403989c Binary files /dev/null and b/pytorch/img/ffeedc89cc695e61aa6e941c1c696a39.png differ diff --git a/机器学习/AI学习路线.md b/机器学习/AI学习路线.md new file mode 100644 index 00000000..2cec8527 --- /dev/null +++ b/机器学习/AI学习路线.md @@ -0,0 +1,660 @@ +# 基础知识 +![图片](https://uploader.shimo.im/f/bxR0xWoah6gf2gMo.png!thumbnail) +## 1.数学 +数学是学不完的,也没有几个人能像博士一样扎实地学好数学基础,入门人工智能领域,其实只需要掌握必要的基础知识就好。AI的数学基础最主要是高等数学、线性代数、概率论与数理统计三门课程,这三门课程是本科必修的。这里整理了一个简易的数学入门文章。 +数学基础: 高等数学 +[https://zhuanlan.zhihu.com/p/36311622](https://zhuanlan.zhihu.com/p/36311622) +数学基础: 线性代数 +[https://zhuanlan.zhihu.com/p/36584206](https://zhuanlan.zhihu.com/p/36584206) +数学基础: 概率论与数理统计 +[https://zhuanlan.zhihu.com/p/36584335](https://zhuanlan.zhihu.com/p/36584335) + +机器学习的数学基础资料下载: +1.机器学习的数学基础.docx +中文版,对高等数学、线性代数、概率论与数理统计三门课的公式做了总结 +2) 斯坦福大学机器学习的数学基础.pdf +原版英文材料,非常全面,建议英语好的同学直接学习这个材料 +下载链接: [https://pan.baidu.com/s/1LaUlrJzy98CG1Wma9FgBtg](https://pan.baidu.com/s/1LaUlrJzy98CG1Wma9FgBtg) 提取码: hktx + +推荐教材 +相比国内浙大版和同济版的数学教材,通俗易懂,便于初学者更好地奠定数学基础 +下载链接: [https://blog.csdn.net/Datawhale/article/details/81744961](https://blog.csdn.net/Datawhale/article/details/81744961) + +## 2.统计学 +* 入门教材: + +深入浅出统计学 +* 进阶教材: + +商务与经济统计 +* 视频 + +可汗学院统计学: [http://open.163.com/special/Khan/khstatistics.html](http://open.163.com/special/Khan/khstatistics.html) + +## 3.编程 +入门人工智能领域,推荐Python这门编程语言。 +1) Python安装: +关于python安装包,我推荐下载Anaconda,Anaconda是一个用于科学计算的Python发行版,支持 Linux, Mac, Windows系统,提供了包管理与环境管理的功能,可以很方便地解决多版本Python并存、切换以及各种第三方包安装问题。 +下载地址: [https://www.](https://link.zhihu.com/?target=https%3A//www.anaconda.com/download/)[anaconda.com/download/](https://link.zhihu.com/?target=https%3A//www.anaconda.com/download/) 推荐选Anaconda (python 3.7版本) + +IDE: 推荐使用pycharm,社区版免费 +下载地址: [https://www.](https://link.zhihu.com/?target=https%3A//www.jetbrains.com/)[jetbrains.com/](https://link.zhihu.com/?target=https%3A//www.jetbrains.com/) + +安装教程: +Anaconda+Jupyter notebook+Pycharm: +[https://zhuanlan.zhihu.com/p/59027692](https://zhuanlan.zhihu.com/p/59027692) +Ubuntu18.04深度学习环境配置(CUDA9+CUDNN7.4+TensorFlow1.8): +[https://zhuanlan.zhihu.com/p/50302396](https://zhuanlan.zhihu.com/p/50302396) + + +2) python入门的资料推荐 +a.廖雪峰python学习笔记 +[https://blog.csdn.net/datawhale/article/category/7779959](https://blog.csdn.net/datawhale/article/category/7779959) + +b.python入门笔记 +作者李金,这个是jupyter notebook文件,把python的主要语法演示了一次,值得推荐。 +下载链接: [https://pan.baidu.com/s/1IPZI5rygbIh5R5OuTHajzA](https://pan.baidu.com/s/1IPZI5rygbIh5R5OuTHajzA) 提取码: 2bzh + + +c.南京大学python视频教程 +这个教程非常值得推荐,python主要语法和常用的库基本涵盖了。 +查看地址: [https://www.icourse163.org/course/0809NJU004-1001571005?from=study](https://www.icourse163.org/course/0809NJU004-1001571005?from=study) + +看完这三个资料,python基本入门了,可以使用scikit-learn等机器学习库来解决机器学习的 +问题了。 + +3)补充 +代码规范: +[https://zhuanlan.zhihu.com/p/59763076](https://zhuanlan.zhihu.com/p/59763076) +numpy练习题: +[https://zhuanlan.zhihu.com/p/57872490](https://zhuanlan.zhihu.com/p/57872490) +pandas练习题: +[https://zhuanlan.zhihu.com/p/56644669](https://zhuanlan.zhihu.com/p/56644669) + +# 数据分析/挖掘 +![图片](https://uploader.shimo.im/f/G1qZ7iUeRrAhFK9u.png!thumbnail) +## 1.数据分析的基础书籍: +《利用python进行数据分析》 +这本书含有大量的实践案例,你将学会如何利用各种Python库(包括NumPy,Pandas、Matplotlib以及IPython等)高效地解决各式各样的数据分析问题。如果把代码都运行一次,基本上就能解决数据分析的大部分问题了。 +## 2.特征工程: +[https://blog.csdn.net/Datawhale/article/details/83033869](https://blog.csdn.net/Datawhale/article/details/83033869) +## 3.数据挖掘项目: +[https://blog.csdn.net/datawhale/article/details/80847662](https://blog.csdn.net/datawhale/article/details/80847662) +# 机器学习 +![图片](https://uploader.shimo.im/f/udrFwkqrEeA6mjVe.png!thumbnail) +## 1.公开课 - 吴恩达《Machine Learning》 +这绝对是机器学习入门的首选课程,没有之一!即便你没有扎实的机器学习所需的扎实的概率论、线性代数等数学基础,也能轻松上手这门机器学习入门课,并体会到机器学习的无穷趣味。 + +课程主页 +[https://www.coursera.org/learn/machine-learning](https://www.coursera.org/learn/machine-learning) + +课程完整思维导图: + +![图片](https://uploader.shimo.im/f/NZFYqVr4ghI09BnJ.png!thumbnail) + +下载链接: [https://pan.baidu.com/s/16065BpNAP7JEx_PpFHLSOw](https://pan.baidu.com/s/16065BpNAP7JEx_PpFHLSOw)[ ](https://pan.baidu.com/s/16065BpNAP7JEx_PpFHLSOw提取码)提取码: xcmi + +**中文视频** +网易云课堂搬运了这门课,并由黄海广等人翻译了中文字幕。 + +观看地址: [https://study.163.com/course/introduction.htm?courseId=1004570029&_trace_c_p_k2_=d107b2ac93794ae79c941899f93332a1](https://study.163.com/course/introduction.htm?courseId=1004570029&_trace_c_p_k2_=d107b2ac93794ae79c941899f93332a1) + +**中文笔记及作业代码** +[https://github.com/fengdu78/Coursera-ML-AndrewNg-Notes](https://github.com/fengdu78/Coursera-ML-AndrewNg-Notes) + + +## 2.公开课 - 吴恩达 CS229 +吴恩达在斯坦福教授的机器学习课程 CS229 与 吴恩达在 Coursera 上的《Machine Learning》相似,但是有更多的数学要求和公式的推导,难度稍难一些。该课程对机器学习和统计模式识别进行了广泛的介绍。主题包括: 监督学习(生成/鉴别学习、参数/非参数学习、神经网络、支持向量机);无监督学习(聚类、降维、核方法);学习理论(偏差/方差权衡;VC理论;大幅度利润);强化学习和自适应控制。本课程还将讨论机器学习的最新应用,如机器人控制、数据挖掘、自主导航、生物信息学、语音识别以及文本和Web数据处理。 +### 课程主页: [http://cs229.stanford.edu/](http://cs229.stanford.edu/) +### 中文视频 +[http://open.163.com/special/opencourse/machinelearning.html](http://open.163.com/special/opencourse/machinelearning.html) +### 中文笔记 +[https://kivy-cn.github.io/Stanford-CS-229-CN/#/](https://kivy-cn.github.io/Stanford-CS-229-CN/#/) +### 速查表 +这份给力的资源贡献者是一名斯坦福的毕业生 Shervine Amidi。作者关于 CS229 整理了一份超级详细的速查表 +[https://zhuanlan.zhihu.com/p/56534902](https://zhuanlan.zhihu.com/p/56534902) +### 作业代码 +[https://github.com/Sierkinhane/CS229-ML-Implements](https://github.com/Sierkinhane/CS229-ML-Implements) +## 3.公开课 - 林轩田《机器学习基石》 +### 课程介绍 +台湾大学林轩田老师的《机器学习基石》课程由浅入深、内容全面,基本涵盖了机器学习领域的很多方面。其作为机器学习的入门和进阶资料非常适合。而且林老师的教学风格也很幽默风趣,总让读者在轻松愉快的氛围中掌握知识。这门课比 Ng 的《Machine Learning》稍难一些,侧重于机器学习理论知识。 +### 中文视频 +[https://www.bilibili.com/video/av36731342](https://www.bilibili.com/video/av36731342) +### 中文笔记 +[https://redstonewill.com/category/ai-notes/lin-ml-foundations/](https://redstonewill.com/category/ai-notes/lin-ml-foundations/) +### 配套教材 +配套书籍为《Learning From Data》,在线书籍主页: [http://amlbook.com/](http://amlbook.com/) +## 4.公开课 - 林轩田《机器学习技法》 +### 课程介绍 +《机器学习技法》课程是《机器学习基石》的进阶课程。主要介绍了机器学习领域经典的一些算法,包括支持向量机、决策树、随机森林、神经网络等等。难度要略高于《机器学习基石》,具有很强的实用性。 +### 中文视频 +[https://www.bilibili.com/video/av36760800](https://www.bilibili.com/video/av36760800) +### 中文笔记 +[https://redstonewill.com/category/ai-notes/lin-ml-techniques/](https://redstonewill.com/category/ai-notes/lin-ml-techniques/) +## 5.书籍 - 《机器学习》 +周志华的《机器学习》被大家亲切地称为“西瓜书”。这本书非常经典,讲述了机器学习核心数学理论和算法,适合有作为学校的教材或者中阶读者自学使用,入门时学习这本书籍难度稍微偏高了一些。 +![图片](https://uploader.shimo.im/f/P3AKvtlFmbIlJVrN.png!thumbnail) +这本书配合《机器学习实战》这本书,效果很好! +### 读书笔记 +[https://www.cnblogs.com/limitlessun/p/8505647.html#_label0](https://www.cnblogs.com/limitlessun/p/8505647.html#_label0) +### 公式推导 +[https://datawhalechina.github.io/pumpkin-book/#/](https://datawhalechina.github.io/pumpkin-book/#/) +### 课后习题 +[https://zhuanlan.zhihu.com/c_1013850291887845376](https://zhuanlan.zhihu.com/c_1013850291887845376) + +## 6.书籍 - 《统计学习方法》 +李航的这本《统计学习方法》堪称经典,包含更加完备和专业的机器学习理论知识,作为夯实理论非常不错。 +![图片](https://uploader.shimo.im/f/5n2sq6L07OkA1XJg.png!thumbnail) +### 课讲 PPT +[https://github.com/fengdu78/lihang-code/tree/master/ppt](https://github.com/fengdu78/lihang-code/tree/master/ppt) +### 读书笔记 +[http://www.cnblogs.com/limitlessun/p/86111](http://www.cnblogs.com/limitlessun/p/8611103.html)[03.html](http://03.html +参考笔记 +https://zhuanlan.zhihu.com/p/36378498 +代码实现 +) + +[https://github.com/SmirkCao/Lihang](https://github.com/SmirkCao/Lihang) +### 参考笔记 +[https://zhuanlan.zhihu.com/p/36378498](http://03.html +参考笔记 +https://zhuanlan.zhihu.com/p/36378498 +代码实现 +) +### 代码实现 +[https://github.com/fengdu78/lihang-code/tree/master/code](https://github.com/fengdu78/lihang-code/tree/master/code) +## 7.书籍 - 《Scikit-Learn 与 TensorFlow 机器学习实用指南》 +在经过前面的学习之后,这本《Scikit-Learn 与 TensorFlow 机器学习实用指南》非常适合提升你的机器学习实战编程能力。这本书分为两大部分,第一部分介绍机器学习基础算法,每章都配备 Scikit-Learn 实操项目;第二部分介绍神经网络与深度学习,每章配备 TensorFlow 实操项目。如果只是机器学习,可先看第一部分的内容。 +![图片](https://uploader.shimo.im/f/ARnTj6fm9UA67x7z.png!thumbnail) +### 全书代码 +[https://github.com/ageron/handson-ml](https://github.com/ageron/handson-ml) +## 8.实战 - Kaggle 比赛 +比赛是提升自己机器学习实战能力的最有效的方式,首选 Kaggle 比赛。 +### Kaggle 主页 +[https://www.kaggle.com/](https://www.kaggle.com/) +### Kaggle 路线 +[https://github.com/apachecn/kaggle](https://github.com/apachecn/kaggle) +## 9.工具 - Scikit-Learn 官方文档 +Scikit-Learn 作为机器学习一个非常全面的库,是一份不可多得的实战编程手册。 +### 官方文档 +[https://scikit-learn.org/stable/index.html](https://scikit-learn.org/stable/index.html) +### 中文文档(0.19) +[http://sklearn.apachecn.org/#/](http://sklearn.apachecn.org/#/) +# 深度学习 +![图片](https://uploader.shimo.im/f/CWIQXBU0YksSqDOc.png!thumbnail) +## 1.公开课 - 吴恩达《Deep Learning》 +在吴恩达开设了机器学习课程之后,发布的《Deep Learning》课程也备受好评,吴恩达老师的课程最大的特点就是将知识循序渐进的传授给你,是入门学习不可多得良好视频资料。整个专题共包括五门课程: 01.神经网络和深度学习;02.改善深层神经网络-超参数调试、正则化以及优化;03.结构化机器学习项目;04.卷积神经网络;05.序列模型。 +### 课程视频 +网易云课堂: [https://mooc.study.163.com/university/deeplearning_ai#/c](https://mooc.study.163.com/university/deeplearning_ai#/c) +Coursera: [https://www.coursera.org/specializations/deep-learning](https://www.coursera.org/specializations/deep-learning) + +### 课程笔记 +之前编写过吴恩达老师机器学习个人笔记黄海广博士带领团队整理了中文笔记 +地址: [https://github.com/fengdu78/deeplearning_ai_books](https://github.com/fengdu78/deeplearning_ai_books) +### 参考论文 +吴恩达老师在课程中提到了很多优秀论文,黄海广博士整理如下: +[https://github.com/fengdu78/deeplearning_ai_books/tree/master/%E5%8F%82%E8%80%83%E8%AE%BA%E6%96%87](https://github.com/fengdu78/deeplearning_ai_books/tree/master/%E5%8F%82%E8%80%83%E8%AE%BA%E6%96%87) + +### 课程PPT及课后作业 +吴恩达深度学习课程,包含课程的课件、课后作业和一些其他资料: +[https://github.com/stormstone/deeplearning.ai](https://github.com/stormstone/deeplearning.ai) + +## 2.公开课 - Fast.ai《程序员深度学习实战》 +说到深度学习的公开课,与吴恩达《Deep Learning》并驾齐驱的另一门公开课便是由Fast.ai出品的《程序员深度学习实战》。这门课最大的特点便是**“自上而下”**而不是**“自下而上”**,是绝佳的通过实战学习深度学习的课程。 +### 视频地址 +B站地址(英文字幕): +[https://www.bilibili.com/video/av18904696?from=search&seid=10813837536595120136](https://www.bilibili.com/video/av18904696?from=search&seid=10813837536595120136) +CSDN地址(2017版中文字幕): +[https://edu.csdn.net/course/detail/5192](https://edu.csdn.net/course/detail/5192) +### 课程笔记 +英文笔记原文: +[https://medium.com/@hiromi_suenaga/deep-learning-2-part-1-lesson-1-602f73869197](https://medium.com/@hiromi_suenaga/deep-learning-2-part-1-lesson-1-602f73869197) +由ApacheCN组织进行的中文翻译: +[https://github.com/apachecn/fastai-ml-dl-notes-zh](https://github.com/apachecn/fastai-ml-dl-notes-zh) + +## 3.公开课-CS230 Deep Learning +### 课程介绍 +斯坦福的深度学习课程CS230在4月2日刚刚开课,对应的全套PPT也随之上线。从内容来看,今年的课程与去年的差别不大,涵盖了CNNs, RNNs, LSTM, Adam, Dropout, BatchNorm, Xavier/He initialization 等深度学习的基本模型,涉及医疗、自动驾驶、手语识别、音乐生成和自然语言处理等领域。 + +Datawhale整理了该门课程的详细介绍及参考资料 +[https://mp.weixin.qq.com/s/kA-L8t5mGq6jExMBRjyg4g](https://mp.weixin.qq.com/s/kA-L8t5mGq6jExMBRjyg4g) +### +## 4.书籍-复旦教授邱锡鹏《神经网络与深度学习》 +本书是入门深度学习领域的极佳教材,主要介绍了神经网络与深度学习中的基础知识、主要模型(前馈网络、卷积网络、循环网络等)以及在计算机视觉、自然语言处理等领域的应用。 +[https://mp.weixin.qq.com/s/-NaDpXsxvu4DpXqVNXIAvQ](https://mp.weixin.qq.com/s/-NaDpXsxvu4DpXqVNXIAvQ) + +## 5.书籍 -《深度学习》 +![图片](https://uploader.shimo.im/f/ExWrjEUvH1E0SakQ.png!thumbnail) +完成以上学习后,想要更加系统的建立深度学习的知识体系,阅读《深度学习》准没错。该书从浅入深介绍了基础数学知识、机器学习经验以及现阶段深度学习的理论和发展,它能帮助人工智能技术爱好者和从业人员在三位专家学者的思维带领下全方位了解深度学习。 +### 书籍介绍 +《深度学习》通常又被称为花书,深度学习领域最经典的畅销书。由全球知名的三位专家IanGoodfellow、YoshuaBengio和AaronCourville撰写,是深度学习领域奠基性的经典教材。全书的内容包括3个部分: 第1部分介绍基本的数学工具和机器学习的概念,它们是深度学习的预备知识;第2部分系统深入地讲解现今已成熟的深度学习方法和技术;第3部分讨论某些具有前瞻性的方向和想法,它们被公认为是深度学习未来的研究重点。该书被大众尊称为“AI圣经”。 + +### 在线阅读 +该书由众多网友众包翻译,电子版在以下地址获得: +[https://github.com/exacity/deeplearningbook-chinese](https://github.com/exacity/deeplearningbook-chinese) + +## 6.书籍 -《深度学习 500 问》 +当你看完了所有的视频,研习了AI圣经,一定充满了满脑子问号,此时不如来深度学习面试中常见的500个问题。 +### 书籍介绍 +DeepLearning-500-questions,作者是川大的一名优秀毕业生谈继勇。该项目以深度学习面试问答形式,收集了 500 个问题和答案。内容涉及了常用的概率知识、线性代数、机器学习、深度学习、计算机视觉等热点问题,该书目前尚未完结,却已经收获了Github 2.4w stars。 +### 项目地址 +[https://github.com/scutan90/DeepLearning-500-questions](https://github.com/scutan90/DeepLearning-500-questions) + +## 7.工具 - TensorFlow 官方文档 +进行深度学习怎么离得开TensorFlow +### 官方文档 +[https://www.tensorflow.org/api_docs/python/tf](https://www.tensorflow.org/api_docs/python/tf) +### 中文文档 +[https://github.com/jikexueyuanwiki/tensorflow-zh](https://github.com/jikexueyuanwiki/tensorflow-zh) + +## 7.工具 - PyTorch官方文档 +PyTorch是进行深度学习的另一个主流框架 +### 官方文档 +[https://pytorch.org/docs/stable/index.html](https://pytorch.org/docs/stable/index.html) +### 中文文档(版本0.3) +[https://github.com/apachecn/pytorch-doc-zh](https://github.com/apachecn/pytorch-doc-zh) +# 强化学习 +![图片](https://uploader.shimo.im/f/E8K8fklpvKceOGQe.png!thumbnail) +## 1.公开课 - David Silver《Reinforcement Learning》 +同吴恩达的课程对于机器学习和深度学习初学者的意义一样,David Silver的这门课程绝对是大多数人学习强化学习必选的课程。课程从浅到深,把强化学习的内容娓娓道来,极其详尽。不过由于强化学习本身的难度,听讲这门课还是有一定的门槛,建议还是在大致了解这个领域之后观看该视频学习效果更佳,更容易找到学习的重点。另外,由于强化学习领域飞速地发展,最前沿的知识(特别是与深度学习相关的)没有被涵盖在这个课程中,需要另外补充。 +### 课程介绍 +该课程对强化学习领域做了相当详尽的讲解,其主要内容有: 马尔可夫决策过程(强化学习的基础理论)、动态规划、免模型预测(蒙特卡洛学习、时序差分学习和λ时序差分强化学习)、免模型控制(On-policy Learning和Off-policy Learning)、价值函数的近似表示、策略梯度算法、集成学习与计划、探索与利用以及实例演示。 + +### 视频地址 +B站地址(中文字幕): +[https://www.bilibili.com/video/av45357759?from=search&seid=9547815852611563503](https://www.bilibili.com/video/av45357759?from=search&seid=9547815852611563503) +课程原地址: +[https://www.youtube.com/watch?v=2pWv7GOvuf0](https://www.youtube.com/watch?v=2pWv7GOvuf0) + +### 课程资料 +课程PPT: +[http://www0.cs.ucl.ac.uk/staff/d.silver/web/Teaching.html](http://www0.cs.ucl.ac.uk/staff/d.silver/web/Teaching.html) +课程笔记: +[https://www.zhihu.com/people/qqiang00/posts](https://www.zhihu.com/people/qqiang00/posts) + +## 2.公开课 - 李宏毅《深度强化学习》 +David Silver的课程虽然内容详尽,但前沿的很多内容都没有被包括在内,这时,台大李宏毅的《深度强化学习》就是学习前沿动态的不二之选。李宏毅老师讲课非常幽默风趣,并且浅显易懂,而且对于大多数初学者来说,中文教学可谓是福音。当然,这门课程也有着没有对理论知识做太多详尽地展开、内容主要围绕着深度强化学习进行等缺陷,但这并不妨碍其成为初学者们的首选之一。 +### 课程介绍 +该课程上线于2018年,基本涵盖了当年的前沿技术,其主要内容有: 策略梯度算法(David Silver的课程中提到的算法大多都在这部分的内容中提到,但其主要是从神经网络的角度出发)、Q-learning(这部分涵盖了大量的Q-learning优化的讲解)、Actor-Critic、Sparse Reward 和 Imitation Learning。 +### 视频地址 +B站地址(中文字幕): +[https://www.bilibili.com/video/av24724071?from=search&seid=9547815852611563503](https://www.bilibili.com/video/av24724071?from=search&seid=9547815852611563503) +课程原地址: +[https://www.youtube.com/watch?v=2pWv7GOvuf0](https://www.youtube.com/watch?v=2pWv7GOvuf0) + +### 课程资料 +课程PPT: +[http://speech.ee.ntu.edu.tw/~tlkagk/courses_MLDS18.html](http://speech.ee.ntu.edu.tw/~tlkagk/courses_MLDS18.html) +课程笔记: +[https://blog.csdn.net/cindy_1102/article/details/87905272](https://blog.csdn.net/cindy_1102/article/details/87905272) + + +# 前沿 Paper +![图片](https://uploader.shimo.im/f/zViCOYrXGokl9Taa.png!thumbnail) +## Arxiv +### Arxiv Stats +Arxiv 机器学习最新论文检索,主页地址: +[https://arxiv.or](https://arxiv.org/list/stat.ML/recent?ref=bestofml.com)[g/list/stat.ML](https://arxiv.org/list/stat.ML/recent?ref=bestofml.com)[/recent?ref=bestofml.com](https://arxiv.org/list/stat.ML/recent?ref=bestofml.com) +### Arxiv Sanity Preserver +Andrej Karpathy 开发了 Arxiv Sanity Preserver,帮助分类、搜索和过滤特征,主页地址: +[ht](http://www.arxiv-sanity.com/?ref=bestofml.com)[tp://www.arxiv-sanity.com/?ref=bestofml.co](http://www.arxiv-sanity.com/?ref=bestofml.com)[m](http://www.arxiv-sanity.com/?ref=bestofml.com) +## Papers with Code +### Papers with Code(Browse state-of-the-art) +这个网站叫做 Browse state-of-the-art。它将 ArXiv 上的最新深度学习论文与 GitHub 上的开源代码联系起来。该项目目前包含了 651 个排行榜,1016 个深度学习任务,795 个数据集,以及重磅的 10257 个含复现代码的优秀论文。简直就是一个寻找论文和代码的利器。它将 1016 个深度学习任务分成了 16 大类,涉及了深度学习的各个方面。 + +主页地址: +[https://paperswithcode.com/sota](https://paperswithcode.com/sota) + +举两个例子: + +(1)CV: [https://paperswithcode.com/area/computer-vision](https://paperswithcode.com/area/computer-vision) +(2)NLP: [https://paperswithcode.com/area/natural-language-processing](https://paperswithcode.com/area/natural-language-processing) + +介绍: [https://redstonewill.com/2039/](https://redstonewill.com/2039/) +### Papers with Code(Sorted by stars) +这份资源收集了 AI 领域从 2013 - 2018 年所有的论文,并按照在 GitHub 上的标星数量进行排序。GitHub 项目地址: +[https://github.com/zziz/pwc](https://github.com/zziz/pwc) +## Deep Learning Papers(Reading Roadmap) +如果你是深度学习领域的新手,你可能会遇到的第一个问题是“我应该从哪篇论文开始阅读?”下面是一个深入学习论文的阅读路线图!GitHub 项目地址: +[https://github.com/floodsung/Deep-Learning-Papers-Reading-Roadmap](https://github.com/floodsung/Deep-Learning-Papers-Reading-Roadmap) + +这份深度学习论文阅读路线分为三大块: + +**1 Deep Learning History and Basics** +**2 Deep Learning Method** +**3 Applications** +## Deep Learning Object Detection +目标检测(Object Detection)是深度学习 CV 领域的一个核心研究领域和重要分支。纵观 2013 年到 2019 年,从最早的 R-CNN、Fast R-CNN 到后来的 YOLO v2、YOLO v3 再到今年的 M2Det,新模型层出不穷,性能也越来越好!本资源对目标检测近几年的发展和相关论文做出一份系统介绍,总结一份超全的文献 paper 列表。 + +![图片](https://uploader.shimo.im/f/wIBvBV6ZPYYpL5GA.png!thumbnail) + +GitHub 项目地址: +[https://github.com/hoya012/deep_learning_object_detection](https://github.com/hoya012/deep_learning_object_detection) + +介绍: [https://redstonewill.com/1934/](https://redstonewill.com/1934/) +## 知名会议 +**NeurIPS**: [https://nips.cc/](https://nips.cc/) +**ICML**: [https://icml.cc/](https://icml.cc/) +**ICLR**: [https://iclr.cc/](https://iclr.cc/) +**AAAI**: [https://aaai.org/Conferences/AAAI-19/](https://aaai.org/Conferences/AAAI-19/) +**IJCAI**: [https://www.ijcai.org/](https://www.ijcai.org/) +**UAI**: [http://www.auai.org/uai2019/index.php](http://www.auai.org/uai2019/index.php) + +### 计算机视觉: +**CVPR**: [http://cvpr2019.thecvf.com/](http://cvpr2019.thecvf.com/) +**ECCV**: [https://eccv2018.org/program/main-conference/](https://eccv2018.org/program/main-conference/) +**ICCV**: [http://iccv2019.thecvf.com/](http://iccv2019.thecvf.com/) + +### 自然语言处理: +**ACL**: [http://www.aclcargo.com/](http://www.aclcargo.com/) +**EMNLP**: [https://www.aclweb.org/portal/content/emnlp-2018](https://www.aclweb.org/portal/content/emnlp-2018) +**NAACL**: [https://naacl2019.org/](https://naacl2019.org/) +### 知名期刊 +**JAIR**: [https://www.jair.org/index.php/jair](https://www.jair.org/index.php/jair) +**JMLR**: [http://www.jmlr.org/](http://www.jmlr.org/) +### 其它 +机器人方面,有 CoRL(学习)、ICAPS(规划,包括但不限于机器人)、ICRA、IROS、RSS;对于更理论性的研究,有 AISTATS、COLT、KDD。 +# 理论应用 +![图片](https://uploader.shimo.im/f/O1TdCdH56vcst1dF.png!thumbnail) +## 自然语言处理 + +1. NLP是什么 + +自然语言处理(NLP,Natural Language Processing)是研究计算机处理人类语言的一门技术,目的是弥补人类交流(自然语言)和计算机理解(机器语言)之间的差距。NLP包含句法语义分析、信息抽取、文本挖掘、机器翻译、信息检索、问答系统和对话系统等领域。 + +1. 课程推荐 + +① CS224n 斯坦福深度自然语言处理课 +17版中文字幕 [https://www.bilibili.com/video/av41393758/?p=1](https://www.bilibili.com/video/av41393758/?p=1) +课程笔记: [http://www.hankcs.com/?s=CS224n%E7%AC%94%E8%AE%B0](http://www.hankcs.com/?s=CS224n%E7%AC%94%E8%AE%B0) +2019版课程主页: [http://web.stanford.edu/class/cs224n/](http://web.stanford.edu/class/cs224n/) (需科学上网) +② 自然语言处理 - Dan Jurafsky 和 Chris Manning +B站英文字幕版: [https://www.bilibili.com/video/av35805262/](https://www.bilibili.com/video/av35805262/) +学术激流网: [http://academictorrents.com/details/d2c8f8f1651740520b7dfab23438d89bc8c0c0ab](http://academictorrents.com/details/d2c8f8f1651740520b7dfab23438d89bc8c0c0ab) + +1. 书籍推荐 + +① Python自然语言处理 +中英文版 +>入门读物,整本书不仅涉及了语料库的操作,也对传统的基于规则的方法有所涉及。全书包括了分词(tokenization)、词性标注(POS)、语块(Chunk)标注、句法剖析与语义剖析等方面,是nlp中不错的一本实用教程。 + +② 自然语言处理综论 +中英文版 +>By Daniel Jurafsky和James H. Martin +>权威性杠杠的!经典的NLP教科书,涵盖了经典自然语言处理、统计自然语言处理、语音识别和计算语言学等方面。 + +③ 统计自然语言处理基础 +中英文版 +>By Chris Manning和HinrichSchütze +>更高级的统计NLP方法,在统计基本部分和n元语法部分介绍地都很不错  + +1. 博客推荐 + +我爱自然语言处理网站 [http://www.52nlp.cn/](http://www.52nlp.cn/) +>TFIDF、文档相似度等等网站上都有通俗易懂的解释 + +语言日志博客(Mark Liberman)[http://languagelog.ldc.upenn.edu/nll/](http://languagelog.ldc.upenn.edu/nll/)   +natural language processing blog [https://nlpers.blogspot.com/](https://nlpers.blogspot.com/) +>美国Hal Daumé III维护的一个natural language processing的博客,经常评论最新学术动态,值得关注。有关于ACL、NAACL等学术会议的参会感想和对论文的点评 +### 5.项目推荐 +基于LSTM的中文问答系统 [https://github.com/S-H-Y-GitHub/QA](https://github.com/S-H-Y-GitHub/QA) +基于RNN的文本生成器 [https://github.com/karpathy/char-rnn](https://github.com/karpathy/char-rnn) +基于char-rnn的汪峰歌词生成器 [https://github.com/phunterlau/wangfeng-rnn](https://github.com/phunterlau/wangfeng-rnn) +用RNN生成手写数字 [https://github.com/skaae/lasagne-draw](https://github.com/skaae/lasagne-draw) +1. 开源NLP工具包 + +中文NLP相关: [https://github.com/crownpku/Awesome-Chinese-NLP](https://github.com/crownpku/Awesome-Chinese-NLP) +英文NLP相关: +>NLTK [http://www.nltk.org/](http://www.nltk.org/) +>TextBlob: [http://textblob.readthedocs.org/en/dev/](http://textblob.readthedocs.org/en/dev/) +>Gensim: [http://radimrehurek.com/gensim/](http://radimrehurek.com/gensim/) +>Pattern: [http://www.clips.ua.ac.be/pattern](http://www.clips.ua.ac.be/pattern) +>Spacy:: [http://spacy.io](http://spacy.io) +>Orange: [http://orange.biolab.si/features/](http://orange.biolab.si/features/) +>Pineapple: [https://github.com/proycon/pynlpl](https://github.com/proycon/pynlpl) +1. 相关论文 + +100 Must-Read NLP Papers [https://github.com/mhagiwara/100-nlp-papers](https://github.com/mhagiwara/100-nlp-papers) +## **计算机视觉** +1. 计算机视觉的应用 +| 计算机视觉的应用 | 无人驾驶 | +|:----:|:----:|:----:|:----:| +| | 无人安防 | +| | 人脸识别 | +| | 车辆车牌识别 | +| | 以图搜图 | +| | VR/AR | +| | 3D重构 | +| | 无人机 | +| | 医学图像分析 | +| | 其他 | + +### 2.课程推荐 +**Stanford CS223B**  +比较适合基础,适合刚刚入门的同学,跟深度学习的结合相对来说会少一点,不会整门课讲深度学习,而是主要讲计算机视觉,方方面面都会讲到 + +李飞飞: CS231n课程 +[https://mp.weixin.qq.com/s/-NaDpXsxvu4DpXqVNXIAvQ](https://mp.weixin.qq.com/s/-NaDpXsxvu4DpXqVNXIAvQ) +### 3.书籍推荐 +1)入门学习: 《**Computer Vision: Models, Learning and Inference**》 +2)经典权威的参考资料: 《**Computer Vision: Algorithms and Applications**》 +3)理论实践: 《**OpenCV3编程入门**》 +## **推荐系统** +### 1.推荐系统是什么 +推荐系统就是自动联系用户和物品的一种工具,它能够在信息过载的环境中帮助用户发现令他们感兴趣的信息,也能将信息推送给对它们感兴趣的用户。 推荐系统属于资讯过滤的一种应用。 +### 2.推荐课程 +推荐系统专项课程《[Recommender Systems Specialization](https://www.coursera.org/specializations/recommender-systems)》 +这个系列由4门子课程和1门毕业项目课程组成,包括推荐系统导论,最近邻协同过滤,推荐系统评价,矩阵分解和高级技术等。 +观看地址: [https://www.coursera.org/specializations/recommender-systems](https://www.coursera.org/specializations/recommender-systems) +### 3.书籍推荐 +《推荐系统实践》(项亮 著) +《推荐系统》(Dietmar Jannach等 著,蒋凡 译) +《用户网络行为画像》(牛温佳等 著) +《Recommender Systems Handbook》(Paul B·Kantor等 著) +### 4.算法库 +**LibRec** +LibRec是一个Java版本的覆盖了70余个各类型推荐算法的推荐系统开源算法库,由国内的推荐系统大牛郭贵冰创办,目前已更新到2.0版本,它有效地解决了评分预测和物品推荐两大关键的推荐问题。 +项目地址: [https://github.com/guoguibing/librec](https://github.com/guoguibing/librec) +官网: [官网https://www.librec.net/](https://www.librec.net/) +**LibMF** +C++版本开源推荐系统,主要实现了基于矩阵分解的推荐系统。针对SGD(随即梯度下降)优化方法在并行计算中存在的 locking problem 和 memory discontinuity问题,提出了一种 矩阵分解的高效算法FPSGD(Fast Parallel SGD),根据计算节点的个数来划分评分矩阵block,并分配计算节点。 +项目地址: [http://www.csie.ntu.edu.tw/~cjlin/libmf/](http://www.csie.ntu.edu.tw/~cjlin/libmf/) +**SurPRISE** +一个Python版本的开源推荐系统,有多种经典推荐算法 +项目地址: [http://surpriselib.com/](http://surpriselib.com/) +**Neural Collaborative Filtering** +神经协同过滤推荐算法的Python实现 +项目地址: [https://github.com/hexiangnan/neural_collaborative_filtering](https://github.com/hexiangnan/neural_collaborative_filtering) +**Crab** +基于Python开发的开源推荐软件,其中实现有item和user的协同过滤 +项目地址: [http://muricoca.github.io/crab/](http://muricoca.github.io/crab/) + +### 5.常用数据集 +**MovieLen** +地址: [https://grouplens.org/datasets/movielens/](https://grouplens.org/datasets/movielens/) +MovieLens数据集中,用户对自己看过的电影进行评分,分值为1~5。MovieLens包括两个不同大小的库,适用于不同规模的算法。小规模的库是943个独立用户对1 682部电影作的10 000次评分的数据;大规模的库是6 040个独立用户对3 900部电影作的大约100万次评分。适用于传统的推荐任务 +**Douban** +地址: [https://www.cse.cuhk.edu.hk/irwin.king.new/pub/data/douban](https://www.cse.cuhk.edu.hk/irwin.king.new/pub/data/douban) +Douban是豆瓣的匿名数据集,它包含了12万用户和5万条电影数据,是用户对电影的评分信息和用户间的社交信息,适用于社会化推荐任务。 +**BookCrossing** +地址: [http://www2.informatik.uni-freiburg.de/~cziegler/BX/](http://www2.informatik.uni-freiburg.de/~cziegler/BX/) +这个数据集是网上的Book-Crossing图书社区的278858个用户对271379本书进行的评分,包括显式和隐式的评分。这些用户的年龄等人口统计学属性(demographic feature)都以匿名的形式保存并供分析。这个数据集是由Cai-Nicolas Ziegler使用爬虫程序在2004年从Book-Crossing图书社区上采集的。 +**Jester Joke** +地址: [http://eigentaste.berkeley.edu/dataset/](http://eigentaste.berkeley.edu/dataset/) +Jester Joke是一个网上推荐和分享笑话的网站。这个数据集有73496个用户对100个笑话作的410万次评分。评分范围是−10~10的连续实数。这些数据是由加州大学伯克利分校的Ken Goldberg公布的。 +**Netflix** +[地址: http://academictorrents.com/details/9b13183dc4d60676b773c9e2cd6de5e5542cee9a](http://academictorrents.com/details/9b13183dc4d60676b773c9e2cd6de5e5542cee9a) +这个数据集来自于电影租赁网址Netflix的数据库。Netflix于2005年底公布此数据集并设立百万美元的奖金(netflix prize),征集能够使其推荐系统性能上升10%的推荐算法和架构。这个数据集包含了480 189个匿名用户对大约17 770部电影作的大约10亿次评分。 +Usenet Newsgroups(http://qwone.com/~jason/20Newsgroups/) +这个数据集包括20个新闻组的用户浏览数据。最新的应用是在KDD 2007上的论文。新闻组的内容和讨论的话题包括计算机技术、摩托车、篮球、政治等。用户们对这些话题进行评价和反馈。 +**UCI库** +地址: [https://archive.ics.uci.edu/ml/datasets.html](https://archive.ics.uci.edu/ml/datasets.html) +UCI库是Blake等人在1998年开放的一个用于机器学习和评测的数据库,其中存储大量用于模型训练的标注样本,可用于推荐系统的性能测试数据。 +### 6.推荐论文 +经典必读论文整理,包括综述文章、传统经典推荐文章、社会化推荐文章、基于深度学习的推荐系统文章、专门用于解决冷启动的文章、POI相关的论文、利用哈希技术来加速推荐的文章以及推荐系统中经典的探索与利用问题的相关文章等。 +项目地址: [https://github.com/hongleizhang/RSPapers](https://github.com/hongleizhang/RSPapers) +### 7.推荐项目 +今日头条推荐系统机制介绍,面向内容创作者 +[分享人: 项亮,今日头条推荐算法架构师](https://v.qq.com/x/page/f0800qavik7.html?) +[https://v.qq.com/x/page/f0800qavik7.html?](https://v.qq.com/x/page/f0800qavik7.html?) + +3分钟了解今日头条推荐系统原理 +[https://v.qq.com/x/page/g05349lb80j.html?](https://v.qq.com/x/page/g05349lb80j.html?) + +facebook是如何为十亿人推荐好友的 +[https://code.facebook.com/posts/861999383875667/recommending-items-to-more-than-a-billion-people/](https://code.facebook.com/posts/861999383875667/recommending-items-to-more-than-a-billion-people/) + +Netflix的个性化和推荐系统架构 +[http://techblog.netflix.com/2013/03/system-architectures-for.html](http://techblog.netflix.com/2013/03/system-architectures-for.html) + +## 风控模型(评分卡模型) +### 1.评分卡模型简介 + 评分卡模型时在银行、互金等公司与借贷相关业务中最常见也是最重要的模型之一。简而言之它的作用就是对客户进行打分,来对客户是否优质进行评判。根据评分卡模型应用的业务阶段不用,评分卡模型主要分为三大类: A卡(Application score card)申请评分卡、B卡(Behavior score card)行为评分卡、C卡(Collection score card)催收评分卡。其中申请评分卡用于贷前,行为评分卡用于贷中,催收评分卡用于贷后,这三种评分卡在我们的信贷业务的整个生命周期都至关重要。 +### 2.推荐书籍 +《信用风险评分卡研究——基于SAS的开发与实施》 +### 3.评分卡模型建模过程 +(1)样本选取: 确定训练样本、测试样本的观察窗(特征的时间跨度)与表现窗(标签的时间跨度),且样本的标签定义是什么?一般情况下风险评分卡的标签都是考虑客户某一段时间内的延滞情况。 +(2)特征准备: 原始特征、衍生变量 +(3)数据清洗: 根据业务需求对缺失值或异常值等进行处理 +(4)特征筛选: 根据特征的IV值(特征对模型的贡献度)、PSI(特征的稳定性)来进行特征筛选,IV值越大越好(但是一个特征的IV值超过一定阈值可能要考虑是否用到未来数据),PSI越小越好(一般建模时取特征的PSI小于等于0.01) +(5)对特征进行WOE转换,即对特征进行分箱操作,注意在进行WOE转换时要注重特征的可解释性 +(6)建立模型,在建立模型过程中可根据模型和变量的统计量判断模型中包含和不包含每个变量时的模型质量来进行变量的二次筛选。 +(7)评分卡模型一般关注的指标是KS值(衡量的是好坏样本累计分部之间的差值)、模型的PSI(即模型整体的稳定性)、AUC值等。 +## **知识图谱** +### 1.知识图谱是什么 +知识图谱是一种结构化数据的处理方法,它涉及知识的提取、表示、存储、检索等一系列技术。从渊源上讲,它是知识表示与推理、数据库、信息检索、自然语言处理等多种技术发展的融合。 +### 2.推荐资料 +[为什么需要知识图谱?什么是知识图谱?——KG的前世今生](https://zhuanlan.zhihu.com/p/31726910) +[什么是知识图谱?](https://zhuanlan.zhihu.com/p/34393554) +[智能搜索时代: 知识图谱有何价值?](https://zhuanlan.zhihu.com/p/35982177?from=1084395010&wm=9848_0009&weiboauthoruid=5249689143) +[百度王海峰: 知识图谱是 AI 的基石](http://www.infoq.com/cn/news/2017/11/Knowledge-map-cornerstone-AI#0-tsina-1-5001-397232819ff9a47a7b7e80a40613cfe1) +[译文|从知识抽取到RDF知识图谱可视化](http://rdc.hundsun.com/portal/article/907.html?hmsr=toutiao.io&utm_medium=toutiao.io&utm_source=toutiao.io) +### **3.主要内容** +1. **知识提取** + +构建kg首先需要解决的是数据,知识提取是要解决结构化数据生成的问题。我们可以用自然语言处理的方法,也可以利用规则。 +* 使用规则: + * 正则表达式: + + 正则表达式(Regular Expression, regex)是字符串处理的基本功。数据爬取、数据清洗、实体提取、关系提取,都离不开regex。 +推荐资料入门: + * [精通正则表达式](https://book.douban.com/subject/2154713/) + * regexper 可视化:  [例 [a-z]*(\d{4}(\D+))](https://regexper.com/#%5Ba-z%5D*(%5Cd%7B4%7D(%5CD%2B))) + * pythex 在线测试正则表达式 [http://pythex.org/](http://pythex.org/) + +推荐资料进阶: + * re2 一个更快的Cython实现 [https://pypi.python.org/pypi/re2/](https://pypi.python.org/pypi/re2/) + * Parsley 更人性化的正则表达语法 [http://parsley.readthedocs.io/en/latest/tutorial.html](http://parsley.readthedocs.io/en/latest/tutorial.html) + + * 中文分词和词性标注 + + 分词也是后续所有处理的基础,词性(Part of Speech, POS)就是中学大家学过的动词、名词、形容词等等的词的分类。一般的分词工具都会有词性标注的选项。 +推荐资料入门: + * jieba 中文分词包 [https://github.com/fxsjy/jieba](https://github.com/fxsjy/jieba) + * 中文词性标记集 [https://github.com/memect/kg-beijing/wiki/中文词性标记集](https://github.com/memect/kg-beijing/wiki/%E4%B8%AD%E6%96%87%E8%AF%8D%E6%80%A7%E6%A0%87%E8%AE%B0%E9%9B%86) + + 推荐资料进阶: + * genius 采用 CRF条件随机场算法 [https://github.com/duanhongyi/genius](https://github.com/duanhongyi/genius) + * Stanford CoreNLP分词 [https://blog.csdn.net/guolindonggld/article/details/72795022](https://blog.csdn.net/guolindonggld/article/details/72795022) + + * 命名实体识别 + + 命名实体识别(NER)是信息提取应用领域的重要基础工具,一般来说,命名实体识别的任务就是识别出待处理文本中三大类(实体类、时间类和数字类)、七小类(人名、机构名、地名、时间、日期、货币和百分比)命名实体。 + +推荐资料: + * Stanford CoreNLP 进行中文命名实体识别 [https://blog.csdn.net/guolindonggld/article/details/72795022](https://blog.csdn.net/guolindonggld/article/details/72795022) +* 使用深度学习: + + 使用自然语言处理的方法,一般是给定schema,从非结构化数据中抽取特定领域的三元组(spo),如最近[百度举办的比赛](http://lic2019.ccf.org.cn/kg)就是使用DL模型进行信息抽取。 + + * 序列标注 + + 使用序列生出模型,主要是标记出三元组中subject及object的起始位置,从而抽取信息。 +推荐资料: + * 序列标注问题 [https://www.cnblogs.com/jiangxinyang/p/9368482.html](https://www.cnblogs.com/jiangxinyang/p/9368482.html) + + + * seq2seq + + 使用seq2seq端到端的模型,主要借鉴文本摘要的思想,将三元组看成是非结构化文本的摘要,从而进行抽取,其中还涉及Attention机制。 + +推荐资料: + * seq2seq详解 [https://blog.csdn.net/irving_zhang/article/details/78889364](https://blog.csdn.net/irving_zhang/article/details/78889364) + * 详解从Seq2Seq模型到Attention模型 [https://caicai.science/2018/10/06/attention%E6%80%BB%E8%A7%88/](https://caicai.science/2018/10/06/attention%E6%80%BB%E8%A7%88/) + +1. **知识表示** + + 知识表示(Knowledge Representation,KR,也译为知识表现)是研究如何将结构化数据组织,以便于机器处理和人的理解的方法。 + +需要熟悉下面内容: +* JSON和YAML + * json库 [https://docs.python.org/2/library/json.html](https://docs.python.org/2/library/json.html) + * PyYAML是Python里的Yaml处理库 [http://pyyaml.org/wiki/PyYAML](http://pyyaml.org/wiki/PyYAML) +* RDF和OWL + * RDF和OWL语义 [http://blog.memect.cn/?p=871](http://blog.memect.cn/?p=871)  +* JSON-LD + * JSON-LD主页 [http://json-ld.org/](http://json-ld.org/) + +1. **知识存储** + +需要熟悉常见的图数据库 +* 知识链接的方式: 字符串、外键、URI +* PostgreSQL及其JSON扩展 + * Psycopg包操作PostgreSQL [http://initd.org/psycopg/docs/](http://initd.org/psycopg/docs/) +* 图数据库 Neo4j和OrientDB + * Neo4j的Python接口 [https://neo4j.com/developer/python/](https://neo4j.com/developer/python/) + * OrientDB: [http://orientdb.com/orientdb/](http://orientdb.com/orientdb/) +* RDF数据库Stardog + * Stardog官网: [http://stardog.com/](http://stardog.com/) + +1. **知识检索** + +需要熟悉常见的检索技术 +* ElasticSearch + +ES教程: [http://joelabrahamsson.com/elasticsearch-101/](http://joelabrahamsson.com/elasticsearch-101/) + +1. 相关术语及技术路线 +* [本体](https://www.zhihu.com/question/19558514) +* [RDF](https://www.w3.org/RDF/) +* [Apache Jena](https://jena.apache.org/) +* [D2RQ](http://d2rq.org/getting-started) + * Protege构建本体系列 +* [protege](https://protege.stanford.edu/) +* [protege使用](https://zhuanlan.zhihu.com/p/32389370) + * 开发语言 +* python或java + * 图数据库技术 +* [Neo4j](https://neo4j.com/) +* [AllegroGraph](https://franz.com/agraph/allegrograph/) + * 可视化技术 +* [d3.js](https://d3js.org/) +* [Cytoscape.js](http://js.cytoscape.org/) + * 分词技术 +* [jieba](https://github.com/fxsjy/jieba) +* [hanlp](https://github.com/hankcs/HanLP) +### 5.项目实战 +* [基于知识图谱的问答](https://github.com/kangzhun/KnowledgeGraph-QA-Service) +* [Agriculture_KnowledgeGraph](https://github.com/qq547276542/Agriculture_KnowledgeGraph) +# 贡献平台 +由知名开源平台,AI技术平台以及领域专家: ApacheCN,Datawhale,AI有道和黄海广博士联合整理贡献。 +参与名单: +ApacheCN: 片刻,李翔宇,飞龙,王翔 +Datawhale: 范晶晶,马晶敏,李碧涵,李福,光城,居居,康兵兵,郑家豪 +AI有道: 红色石头 +# 平台介绍 +* **Datawhale**: 一个专注于AI领域的开源组织,上海交通大学国家级孵化项目,目前有7个独立团队,聚集了一群有开源精神和探索精神的团队成员,汇聚了来自各个高校和企业的优秀学习者,致力于构建纯粹的学习圈子和优质的开源项目,提供的组队学习涵盖了数据分析,数据挖掘,机器学习,深度学习,编程等16个内容领域。 + +![图片](https://uploader.shimo.im/f/zZmdWtiX9iop46uo.png!thumbnail) +* **AI有道**: 一个专注于 AI 领域的技术公众号。公众号主要涉及人工智能领域 Python、ML 、CV、NLP 等前沿知识、干货笔记和优质资源!我们致力于为广大人工智能爱好者提供优质的 AI 资源和切实可行的 AI 学习路线。 + +![图片](https://uploader.shimo.im/f/SFZ8X6UVSCQom4Ek.png!thumbnail) +* **黄博(机器学习初学者)**: 机器学习课程在国内还不够普及,大部分初学者还是很迷茫,走了很多弯路,黄海广博士希望能尽自己的微薄之力,为机器学习初学者提供一个学习交流的平台。 + +![图片](https://uploader.shimo.im/f/FUax8CI2ZTYyvYcF.png!thumbnail) +* **ApacheCN**: 一个致力于提供优质开源项目的开源组织,致力于AI文档翻译,Kaggle比赛交流、LeetCode算法刷题、大数据交流等项目。我们希望做出广大 AI 爱好者真正需要的东西,打造真正有价值的长尾作品。官方网址: [http://www.apachecn.org/](http://www.apachecn.org/),点击阅读原文即可查看。 + +## + + + diff --git a/机器学习/ApacheCN/apachecn-dl-zh/README.md b/机器学习/ApacheCN/apachecn-dl-zh/README.md new file mode 100644 index 00000000..7f237198 --- /dev/null +++ b/机器学习/ApacheCN/apachecn-dl-zh/README.md @@ -0,0 +1,110 @@ +# ApacheCN 深度学习译文集 + +> 协议:[CC BY-NC-SA 4.0](http://creativecommons.org/licenses/by-nc-sa/4.0/) +> +> 自豪地采用[谷歌翻译](https://translate.google.cn/) +> +> 不要担心自己的形象,只关心如何实现目标。——《原则》,生活原则 2.3.c + +* [在线阅读](https://dl.apachecn.org) +* [在线阅读(Gitee)](https://apachecn.gitee.io/apachecn-dl-zh/) +* [ApacheCN 面试求职交流群 724187166](https://jq.qq.com/?_wv=1027&k=54ujcL3) +* [ApacheCN 学习资源](http://www.apachecn.org/) + +## 目录 + ++ [Sklearn 与 TensorFlow 机器学习实用指南第二版](docs/hands-on-ml-2e-zh/SUMMARY.md) ++ [PyTorch 自然语言处理](docs/nlp-pytorch-zh/SUMMARY.md) ++ [TensorFlow 1.x 深度学习秘籍](docs/tf-1x-dl-cookbook/SUMMARY.md) ++ [PyTorch 中文官方教程 1.7](docs/pt-tut-17/SUMMARY.md) ++ [使用 TensorFlow 构建机器学习项目中文版](docs/build-ml-proj-tf-zh/SUMMARY.md) ++ [TensorFlow 深度学习中文第二版](docs/dl-tf-2e-zh/SUMMARY.md) ++ [TensorFlow 深度学习实战指南中文版](docs/hands-on-dl-tf-zh/SUMMARY.md) ++ [精通 TensorFlow 1.x](docs/mastering-tf-1x-zh/SUMMARY.md) ++ [TensorFlow 机器学习秘籍中文第二版](docs/tf-ml-cookbook-2e-zh/SUMMARY.md) ++ [与 TensorFlow 的初次接触](docs/first_contact_with_tensorFlow/SUMMARY.md) ++ [TensorFlow 学习指南](docs/learning-tf-zh/SUMMARY.md) ++ [TensorFlow Rager 教程](docs/tf-eager-tut/SUMMARY.md) ++ [TensorFlow 高效编程](docs/effective-tf.md) ++ [图嵌入综述:问题,技术与应用](docs/ge-survey-arxiv-1709-07604-zh/SUMMARY.md) ++ [基于深度学习的推荐系统:综述和新视角](docs/rs-survey-arxiv-1707-07435-zh/SUMMARY.md) ++ [关于卷积神经网络我们理解了什么](docs/what-do-we-understand-about-convnet/SUMMARY.md) ++ [机器学习超级复习笔记](docs/super-machine-learning-revision-notes/SUMMARY.md) ++ [Python 迁移学习实用指南](docs/handson-tl-py/SUMMARY.md) ++ [面向计算机视觉的深度学习](docs/dl-cv/SUMMARY.md) ++ [深度学习快速参考](docs/dl-quick-ref/SUMMARY.md) ++ [TensorFlow 2.0 快速入门指南](docs/tf-20-quick-start-guide/SUMMARY.md) ++ [TensorFlow 入门](docs/get-start-tf/SUMMARY.md) ++ [TensorFlow 卷积神经网络实用指南](docs/handson-cnn-tf/SUMMARY.md) ++ [Python 人工智能中文版](docs/ai-py/SUMMARY.md) ++ [Python 无监督学习实用指南](docs/handson-unsup-learn-py/SUMMARY.md) ++ [生成对抗网络项目](docs/gan-proj/SUMMARY.md) ++ [TensorFlow 智能移动项目](docs/intel-mobi-proj-tf/SUMMARY.md) ++ [TensorFlow 和 Keras 应用开发入门](docs/begin-app-dev-tf-keras/SUMMARY.md) ++ [TensorFlow 图像深度学习实用指南](docs/handson-dl-img-tf/SUMMARY.md) ++ [Python 元学习实用指南](docs/handson-meta-learn-py/SUMMARY.md) ++ [Python 强化学习实用指南](docs/handson-rl-py/SUMMARY.md) ++ [Python 智能项目](docs/intel-proj-py/SUMMARY.md) ++ [精通 Sklearn 和 TensorFlow 预测性分析](docs/master-pred-anal-sklearn-tf/SUMMARY.md) ++ [TensorFlow 2.0 的新增功能](docs/whats-new-tf2/SUMMARY.md) ++ [UCB CS294-112 深度强化学习中文笔记](docs/ucb-cs294-112-notes-zh/SUMMARY.md) ++ [TensorFlow 2 和 Keras 高级深度学习](docs/adv-dl-tf2-keras/SUMMARY.md) ++ [GCP 上的人工智能实用指南](docs/handson-ai-gcp/SUMMARY.md) ++ [Python 深度学习架构实用指南](docs/handson-dl-arch-py/SUMMARY.md) ++ [Python Web 深度学习实用指南](docs/handson-py-dl-web/SUMMARY.md) ++ [精通 TensorFlow 2.x 计算机视觉](docs/master-cv-tf-2x/SUMMARY.md) ++ [TensorFlow Lite,ML Kit 和 Flutter 移动深度学习](docs/mobi-dl-tflite/SUMMARY.md) ++ [PyTorch 人工智能研讨会](docs/dl-pt-workshop/SUMMARY.md) ++ [Python 一次学习实用指南](docs/handson-1shot-learn-py/SUMMARY.md) ++ [Python 自然语言处理实用指南](docs/handson-nlp-pt-1x/SUMMARY.md) ++ [PyTorch 人工智能基础知识](docs/pt-ai-fund/SUMMARY.md) ++ [PyTorch 深度学习实用指南](docs/pt-dl-handson/SUMMARY.md) ++ [TensorFlow 强化学习](docs/rl-tf/SUMMARY.md) + +## 下载 + +### Docker + +``` +docker pull apachecn0/apachecn-dl-zh +docker run -tid -p :80 apachecn0/apachecn-dl-zh +# 访问 http://localhost:{port} 查看文档 +``` + +### PYPI + +``` +pip install apachecn-dl-zh +apachecn-dl-zh +# 访问 http://localhost:{port} 查看文档 +``` + +### NPM + +``` +npm install -g apachecn-dl-zh +apachecn-dl-zh +# 访问 http://localhost:{port} 查看文档 +``` + +## 贡献指南 + +本项目需要校对,欢迎大家提交 Pull Request。 + +> 请您勇敢地去翻译和改进翻译。虽然我们追求卓越,但我们并不要求您做到十全十美,因此请不要担心因为翻译上犯错——在大部分情况下,我们的服务器已经记录所有的翻译,因此您不必担心会因为您的失误遭到无法挽回的破坏。(改编自维基百科) + +## 联系方式 + +### 负责人 + +* [飞龙](https://github.com/wizardforcel): 562826179 + +### 其他 + +* 在我们的 [apachecn/apachecn-tf-zh](https://github.com/apachecn/apachecn-tf-zh) github 上提 issue. +* 发邮件到 Email: `apachecn@163.com`. +* 在我们的 [组织学习交流群](http://www.apachecn.org/organization/348.html) 中联系群主/管理员即可. + +## 赞助我们 + +![](http://data.apachecn.org/img/about/donate.jpg) diff --git a/机器学习/ApacheCN/apachecn-dl-zh/SUMMARY.md b/机器学习/ApacheCN/apachecn-dl-zh/SUMMARY.md new file mode 100644 index 00000000..e7dfd44e --- /dev/null +++ b/机器学习/ApacheCN/apachecn-dl-zh/SUMMARY.md @@ -0,0 +1,613 @@ ++ [Sklearn 与 TensorFlow 机器学习实用指南第二版](docs/hands-on-ml-2e-zh/README.md) + + [零、前言](docs/hands-on-ml-2e-zh/0.md) + + [一、机器学习概览](docs/hands-on-ml-2e-zh/1.md) + + [二、端到端的机器学习项目](docs/hands-on-ml-2e-zh/2.md) + + [三、分类](docs/hands-on-ml-2e-zh/3.md) + + [四、训练模型](docs/hands-on-ml-2e-zh/4.md) + + [五、支持向量机](docs/hands-on-ml-2e-zh/5.md) + + [六、决策树](docs/hands-on-ml-2e-zh/6.md) + + [七、集成学习和随机森林](docs/hands-on-ml-2e-zh/7.md) + + [八、降维](docs/hands-on-ml-2e-zh/8.md) + + [十、使用 Keras 搭建人工神经网络](docs/hands-on-ml-2e-zh/10.md) + + [十一、训练深度神经网络](docs/hands-on-ml-2e-zh/11.md) + + [十二、使用 TensorFlow 自定义模型并训练](docs/hands-on-ml-2e-zh/12.md) + + [十三、使用 TensorFlow 加载和预处理数据](docs/hands-on-ml-2e-zh/13.md) + + [十四、使用卷积神经网络实现深度计算机视觉](docs/hands-on-ml-2e-zh/14.md) + + [十五、使用 RNN 和 CNN 处理序列](docs/hands-on-ml-2e-zh/15.md) + + [十六、使用 RNN 和注意力机制进行自然语言处理](docs/hands-on-ml-2e-zh/16.md) + + [十七、使用自编码器和 GAN 做表征学习和生成式学习](docs/hands-on-ml-2e-zh/17.md) + + [十八、强化学习](docs/hands-on-ml-2e-zh/18.md) + + [十九、规模化训练和部署 TensorFlow 模型](docs/hands-on-ml-2e-zh/19.md) ++ [PyTorch 自然语言处理](docs/nlp-pytorch-zh/README.md) + + [一、基础介绍](docs/nlp-pytorch-zh/1.md) + + [二、传统 NLP 快速回顾](docs/nlp-pytorch-zh/2.md) + + [三、神经网络基础组件](docs/nlp-pytorch-zh/3.md) + + [四、自然语言处理的前馈网络](docs/nlp-pytorch-zh/4.md) + + [五、嵌入单词和类型](docs/nlp-pytorch-zh/5.md) + + [六、自然语言处理的序列模型](docs/nlp-pytorch-zh/6.md) + + [七、自然语言处理的进阶序列模型](docs/nlp-pytorch-zh/7.md) + + [八、自然语言处理的高级序列模型](docs/nlp-pytorch-zh/8.md) + + [九、经典, 前沿和后续步骤](docs/nlp-pytorch-zh/9.md) ++ [TensorFlow 1.x 深度学习秘籍](docs/tf-1x-dl-cookbook/README.md) + + [零、前言](docs/tf-1x-dl-cookbook/00.md) + + [一、TensorFlow 简介](docs/tf-1x-dl-cookbook/01.md) + + [二、回归](docs/tf-1x-dl-cookbook/02.md) + + [三、神经网络:感知器](docs/tf-1x-dl-cookbook/03.md) + + [四、卷积神经网络](docs/tf-1x-dl-cookbook/04.md) + + [五、高级卷积神经网络](docs/tf-1x-dl-cookbook/05.md) + + [六、循环神经网络](docs/tf-1x-dl-cookbook/06.md) + + [七、无监督学习](docs/tf-1x-dl-cookbook/07.md) + + [八、自编码器](docs/tf-1x-dl-cookbook/08.md) + + [九、强化学习](docs/tf-1x-dl-cookbook/09.md) + + [十、移动计算](docs/tf-1x-dl-cookbook/10.md) + + [十一、生成模型和 CapsNet](docs/tf-1x-dl-cookbook/11.md) + + [十二、分布式 TensorFlow 和云深度学习](docs/tf-1x-dl-cookbook/12.md) + + [十三、AutoML 和学习如何学习(元学习)](docs/tf-1x-dl-cookbook/13.md) + + [十四、TensorFlow 处理单元](docs/tf-1x-dl-cookbook/14.md) ++ [PyTorch 中文官方教程 1.7](docs/pt-tut-17/README.md) + + [学习 PyTorch](docs/pt-tut-17/01.md) + + [PyTorch 深度学习:60 分钟的突击](docs/pt-tut-17/02.md) + + [张量](docs/pt-tut-17/03.md) + + [`torch.autograd`的简要介绍](docs/pt-tut-17/04.md) + + [神经网络](docs/pt-tut-17/05.md) + + [训练分类器](docs/pt-tut-17/06.md) + + [通过示例学习 PyTorch](docs/pt-tut-17/07.md) + + [热身:NumPy](docs/pt-tut-17/08.md) + + [PyTorch:张量](docs/pt-tut-17/09.md) + + [PyTorch:张量和 Autograd](docs/pt-tut-17/10.md) + + [PyTorch:定义新的 Autograd 函数](docs/pt-tut-17/11.md) + + [PyTorch:`nn`](docs/pt-tut-17/12.md) + + [PyTorch:`optim`](docs/pt-tut-17/13.md) + + [PyTorch:自定义`nn`模块](docs/pt-tut-17/14.md) + + [PyTorch:控制流 + 权重共享](docs/pt-tut-17/15.md) + + [`torch.nn`到底是什么?](docs/pt-tut-17/16.md) + + [使用 TensorBoard 可视化模型,数据和训练](docs/pt-tut-17/17.md) + + [图片/视频](docs/pt-tut-17/18.md) + + [`torchvision`对象检测微调教程](docs/pt-tut-17/19.md) + + [计算机视觉的迁移学习教程](docs/pt-tut-17/20.md) + + [对抗示例生成](docs/pt-tut-17/21.md) + + [DCGAN 教程](docs/pt-tut-17/22.md) + + [音频](docs/pt-tut-17/23.md) + + [音频 I/O 和`torchaudio`的预处理](docs/pt-tut-17/24.md) + + [使用`torchaudio`的语音命令识别](docs/pt-tut-17/25.md) + + [文本](docs/pt-tut-17/26.md) + + [使用`nn.Transformer`和`torchtext`的序列到序列建模](docs/pt-tut-17/27.md) + + [从零开始的 NLP:使用字符级 RNN 分类名称](docs/pt-tut-17/28.md) + + [从零开始的 NLP:使用字符级 RNN 生成名称](docs/pt-tut-17/29.md) + + [从零开始的 NLP:使用序列到序列网络和注意力的翻译](docs/pt-tut-17/30.md) + + [使用`torchtext`的文本分类](docs/pt-tut-17/31.md) + + [`torchtext`语言翻译](docs/pt-tut-17/32.md) + + [强化学习](docs/pt-tut-17/33.md) + + [强化学习(DQN)教程](docs/pt-tut-17/34.md) + + [训练玩马里奥的 RL 智能体](docs/pt-tut-17/35.md) + + [在生产中部署 PyTorch 模型](docs/pt-tut-17/36.md) + + [通过使用 Flask 的 REST API 在 Python 中部署 PyTorch](docs/pt-tut-17/37.md) + + [TorchScript 简介](docs/pt-tut-17/38.md) + + [在 C++ 中加载 TorchScript 模型](docs/pt-tut-17/39.md) + + [将模型从 PyTorch 导出到 ONNX 并使用 ONNX 运行时运行它(可选)](docs/pt-tut-17/40.md) + + [前端 API](docs/pt-tut-17/41.md) + + [PyTorch 中的命名张量简介(原型)](docs/pt-tut-17/42.md) + + [PyTorch 中通道在最后的内存格式(beta)](docs/pt-tut-17/43.md) + + [使用 PyTorch C++ 前端](docs/pt-tut-17/44.md) + + [自定义 C++ 和 CUDA 扩展](docs/pt-tut-17/45.md) + + [使用自定义 C++ 运算符扩展 TorchScript](docs/pt-tut-17/46.md) + + [使用自定义 C++ 类扩展 TorchScript](docs/pt-tut-17/47.md) + + [TorchScript 中的动态并行性](docs/pt-tut-17/48.md) + + [C++ 前端中的 Autograd](docs/pt-tut-17/49.md) + + [在 C++ 中注册调度运算符](docs/pt-tut-17/50.md) + + [模型优化](docs/pt-tut-17/51.md) + + [分析您的 PyTorch 模块](docs/pt-tut-17/52.md) + + [使用 Ray Tune 的超参数调整](docs/pt-tut-17/53.md) + + [模型剪裁教程](docs/pt-tut-17/54.md) + + [LSTM 单词语言模型上的动态量化(beta)](docs/pt-tut-17/55.md) + + [BERT 上的动态量化(Beta)](docs/pt-tut-17/56.md) + + [PyTorch 中使用 Eager 模式的静态量化(beta)](docs/pt-tut-17/57.md) + + [计算机视觉的量化迁移学习教程(beta)](docs/pt-tut-17/58.md) + + [并行和分布式训练](docs/pt-tut-17/59.md) + + [PyTorch 分布式概述](docs/pt-tut-17/60.md) + + [单机模型并行最佳实践](docs/pt-tut-17/61.md) + + [分布式数据并行入门](docs/pt-tut-17/62.md) + + [用 PyTorch 编写分布式应用](docs/pt-tut-17/63.md) + + [分布式 RPC 框架入门](docs/pt-tut-17/64.md) + + [使用分布式 RPC 框架实现参数服务器](docs/pt-tut-17/65.md) + + [使用 RPC 的分布式管道并行化](docs/pt-tut-17/66.md) + + [使用异步执行实现批量 RPC 处理](docs/pt-tut-17/67.md) + + [将分布式`DataParallel`与分布式 RPC 框架相结合](docs/pt-tut-17/68.md) ++ [使用 TensorFlow 构建机器学习项目中文版](docs/build-ml-proj-tf-zh/README.md) + + [一、探索和转换数据](docs/build-ml-proj-tf-zh/ch01.md) + + [二、聚类](docs/build-ml-proj-tf-zh/ch02.md) + + [三、线性回归](docs/build-ml-proj-tf-zh/ch03.md) + + [四、逻辑回归](docs/build-ml-proj-tf-zh/ch04.md) + + [五、简单的前馈神经网络](docs/build-ml-proj-tf-zh/ch05.md) + + [六、卷积神经网络](docs/build-ml-proj-tf-zh/ch06.md) + + [七、循环神经网络和 LSTM](docs/build-ml-proj-tf-zh/ch07.md) + + [八、深度神经网络](docs/build-ml-proj-tf-zh/ch08.md) + + [九、大规模运行模型 -- GPU 和服务](docs/build-ml-proj-tf-zh/ch09.md) + + [十、库安装和其他提示](docs/build-ml-proj-tf-zh/ch10.md) ++ [TensorFlow 深度学习中文第二版](docs/dl-tf-2e-zh/README.md) + + [一、人工神经网络](docs/dl-tf-2e-zh/ch01.md) + + [二、TensorFlow v1.6 的新功能是什么?](docs/dl-tf-2e-zh/ch02.md) + + [三、实现前馈神经网络](docs/dl-tf-2e-zh/ch03.md) + + [四、CNN 实战](docs/dl-tf-2e-zh/ch04.md) + + [五、使用 TensorFlow 实现自编码器](docs/dl-tf-2e-zh/ch05.md) + + [六、RNN 和梯度消失或爆炸问题](docs/dl-tf-2e-zh/ch06.md) + + [七、TensorFlow GPU 配置](docs/dl-tf-2e-zh/ch07.md) + + [八、TFLearn](docs/dl-tf-2e-zh/ch08.md) + + [九、使用协同过滤的电影推荐](docs/dl-tf-2e-zh/ch09.md) + + [十、OpenAI Gym](docs/dl-tf-2e-zh/ch10.md) ++ [TensorFlow 深度学习实战指南中文版](docs/hands-on-dl-tf-zh/README.md) + + [一、入门](docs/hands-on-dl-tf-zh/ch01.md) + + [二、深度神经网络](docs/hands-on-dl-tf-zh/ch02.md) + + [三、卷积神经网络](docs/hands-on-dl-tf-zh/ch03.md) + + [四、循环神经网络介绍](docs/hands-on-dl-tf-zh/ch04.md) + + [五、总结](docs/hands-on-dl-tf-zh/ch05.md) ++ [精通 TensorFlow 1.x](docs/mastering-tf-1x-zh/README.md) + + [一、TensorFlow 101](docs/mastering-tf-1x-zh/ch01.md) + + [二、TensorFlow 的高级库](docs/mastering-tf-1x-zh/ch02.md) + + [三、Keras 101](docs/mastering-tf-1x-zh/ch03.md) + + [四、TensorFlow 中的经典机器学习](docs/mastering-tf-1x-zh/ch04.md) + + [五、TensorFlow 和 Keras 中的神经网络和 MLP](docs/mastering-tf-1x-zh/ch05.md) + + [六、TensorFlow 和 Keras 中的 RNN](docs/mastering-tf-1x-zh/ch06.md) + + [七、TensorFlow 和 Keras 中的用于时间序列数据的 RNN](docs/mastering-tf-1x-zh/ch07.md) + + [八、TensorFlow 和 Keras 中的用于文本数据的 RNN](docs/mastering-tf-1x-zh/ch08.md) + + [九、TensorFlow 和 Keras 中的 CNN](docs/mastering-tf-1x-zh/ch09.md) + + [十、TensorFlow 和 Keras 中的自编码器](docs/mastering-tf-1x-zh/ch10.md) + + [十一、TF 服务:生产中的 TensorFlow 模型](docs/mastering-tf-1x-zh/ch11.md) + + [十二、迁移学习和预训练模型](docs/mastering-tf-1x-zh/ch12.md) + + [十三、深度强化学习](docs/mastering-tf-1x-zh/ch13.md) + + [十四、生成对抗网络](docs/mastering-tf-1x-zh/ch14.md) + + [十五、TensorFlow 集群的分布式模型](docs/mastering-tf-1x-zh/ch15.md) + + [十六、移动和嵌入式平台上的 TensorFlow 模型](docs/mastering-tf-1x-zh/ch16.md) + + [十七、R 中的 TensorFlow 和 Keras](docs/mastering-tf-1x-zh/ch17.md) + + [十八、调试 TensorFlow 模型](docs/mastering-tf-1x-zh/ch18.md) + + [十九、张量处理单元](docs/mastering-tf-1x-zh/ch19.md) ++ [TensorFlow 机器学习秘籍中文第二版](docs/tf-ml-cookbook-2e-zh/README.md) + + [一、TensorFlow 入门](docs/tf-ml-cookbook-2e-zh/ch01.md) + + [二、TensorFlow 的方式](docs/tf-ml-cookbook-2e-zh/ch02.md) + + [三、线性回归](docs/tf-ml-cookbook-2e-zh/ch03.md) + + [四、支持向量机](docs/tf-ml-cookbook-2e-zh/ch04.md) + + [五、最近邻方法](docs/tf-ml-cookbook-2e-zh/ch05.md) + + [六、神经网络](docs/tf-ml-cookbook-2e-zh/ch06.md) + + [七、自然语言处理](docs/tf-ml-cookbook-2e-zh/ch07.md) + + [八、卷积神经网络](docs/tf-ml-cookbook-2e-zh/ch08.md) + + [九、循环神经网络](docs/tf-ml-cookbook-2e-zh/ch09.md) + + [十、将 TensorFlow 投入生产](docs/tf-ml-cookbook-2e-zh/ch10.md) + + [十一、更多 TensorFlow](docs/tf-ml-cookbook-2e-zh/ch11.md) ++ [与 TensorFlow 的初次接触](docs/first_contact_with_tensorFlow/README.md) + + [前言](docs/first_contact_with_tensorFlow/0.md) + + [1. TensorFlow 基础知识](docs/first_contact_with_tensorFlow/1.md) + + [2. TensorFlow 中的线性回归](docs/first_contact_with_tensorFlow/2.md) + + [3. TensorFlow 中的聚类](docs/first_contact_with_tensorFlow/3.md) + + [4. TensorFlow 中的单层神经网络](docs/first_contact_with_tensorFlow/4.md) + + [5. TensorFlow 中的多层神经网络](docs/first_contact_with_tensorFlow/5.md) + + [6. 并行](docs/first_contact_with_tensorFlow/6.md) + + [后记](docs/first_contact_with_tensorFlow/7.md) ++ [TensorFlow 学习指南](docs/learning-tf-zh/README.md) + + [一、基础](docs/learning-tf-zh/1.md) + + [二、线性模型](docs/learning-tf-zh/2.md) + + [三、学习](docs/learning-tf-zh/3.md) + + [四、分布式](docs/learning-tf-zh/4.md) ++ [TensorFlow Rager 教程](docs/tf-eager-tut/README.md) + + [一、如何使用 TensorFlow Eager 构建简单的神经网络](docs/tf-eager-tut/1.md) + + [二、在 Eager 模式中使用指标](docs/tf-eager-tut/2.md) + + [三、如何保存和恢复训练模型](docs/tf-eager-tut/3.md) + + [四、文本序列到 TFRecords](docs/tf-eager-tut/4.md) + + [五、如何将原始图片数据转换为 TFRecords](docs/tf-eager-tut/5.md) + + [六、如何使用 TensorFlow Eager 从 TFRecords 批量读取数据](docs/tf-eager-tut/6.md) + + [七、使用 TensorFlow Eager 构建用于情感识别的卷积神经网络(CNN)](docs/tf-eager-tut/7.md) + + [八、用于 TensorFlow Eager 序列分类的动态循坏神经网络](docs/tf-eager-tut/8.md) + + [九、用于 TensorFlow Eager 时间序列回归的递归神经网络](docs/tf-eager-tut/9.md) ++ [TensorFlow 高效编程](docs/effective-tf.md) ++ [图嵌入综述:问题,技术与应用](docs/ge-survey-arxiv-1709-07604-zh/README.md) + + [一、引言](docs/ge-survey-arxiv-1709-07604-zh/1.md) + + [三、图嵌入的问题设定](docs/ge-survey-arxiv-1709-07604-zh/2.md) + + [四、图嵌入技术](docs/ge-survey-arxiv-1709-07604-zh/3.md) + + [基于边重构的优化问题](docs/ge-survey-arxiv-1709-07604-zh/4.md) + + [应用](docs/ge-survey-arxiv-1709-07604-zh/5.md) ++ [基于深度学习的推荐系统:综述和新视角](docs/rs-survey-arxiv-1707-07435-zh/README.md) + + [引言](docs/rs-survey-arxiv-1707-07435-zh/1.md) + + [基于深度学习的推荐:最先进的技术](docs/rs-survey-arxiv-1707-07435-zh/2.md) + + [基于卷积神经网络的推荐](docs/rs-survey-arxiv-1707-07435-zh/3.md) ++ [关于卷积神经网络我们理解了什么](docs/what-do-we-understand-about-convnet/README.md) + + [第1章概论](docs/what-do-we-understand-about-convnet/1.md) + + [第2章多层网络](docs/what-do-we-understand-about-convnet/2.1.1-2.1.3.md) + + [2.1.4生成对抗网络](docs/what-do-we-understand-about-convnet/2.1.4-2.1.6.md) + + [2.2.1最近ConvNets演变中的关键架构](docs/what-do-we-understand-about-convnet/2.2.1.md) + + [2.2.2走向ConvNet不变性](docs/what-do-we-understand-about-convnet/2.2.2-2.2.3.md) + + [2.3时空卷积网络](docs/what-do-we-understand-about-convnet/2.3-2.4.md) + + [第3章了解ConvNets构建块](docs/what-do-we-understand-about-convnet/3.1.md) + + [3.2整改](docs/what-do-we-understand-about-convnet/3.2.md) + + [3.3规范化](docs/what-do-we-understand-about-convnet/3.3.md) + + [3.4汇集](docs/what-do-we-understand-about-convnet/3.4-3.5.md) + + [第四章现状](docs/what-do-we-understand-about-convnet/4.1.md) + + [4.2打开问题](docs/what-do-we-understand-about-convnet/4.2.md) + + [参考](docs/what-do-we-understand-about-convnet/ref.md) ++ [机器学习超级复习笔记](docs/super-machine-learning-revision-notes/README.md) ++ [Python 迁移学习实用指南](docs/handson-tl-py/README.md) + + [零、前言](docs/handson-tl-py/0.md) + + [一、机器学习基础](docs/handson-tl-py/1.md) + + [二、深度学习基础](docs/handson-tl-py/2.md) + + [三、了解深度学习架构](docs/handson-tl-py/3.md) + + [四、迁移学习基础](docs/handson-tl-py/4.md) + + [五、释放迁移学习的力量](docs/handson-tl-py/5.md) + + [六、图像识别与分类](docs/handson-tl-py/6.md) + + [七、文本文件分类](docs/handson-tl-py/7.md) + + [八、音频事件识别与分类](docs/handson-tl-py/8.md) + + [九、DeepDream](docs/handson-tl-py/9.md) + + [十、自动图像字幕生成器](docs/handson-tl-py/10.md) + + [十一、图像着色](docs/handson-tl-py/11.md) ++ [面向计算机视觉的深度学习](docs/dl-cv/README.md) + + [零、前言](docs/dl-cv/00.md) + + [一、入门](docs/dl-cv/01.md) + + [二、图像分类](docs/dl-cv/02.md) + + [三、图像检索](docs/dl-cv/03.md) + + [四、对象检测](docs/dl-cv/04.md) + + [五、语义分割](docs/dl-cv/05.md) + + [六、相似性学习](docs/dl-cv/06.md) + + [七、图像字幕](docs/dl-cv/07.md) + + [八、生成模型](docs/dl-cv/08.md) + + [九、视频分类](docs/dl-cv/09.md) + + [十、部署](docs/dl-cv/10.md) ++ [深度学习快速参考](docs/dl-quick-ref/README.md) + + [零、前言](docs/dl-quick-ref/00.md) + + [一、深度学习的基础](docs/dl-quick-ref/01.md) + + [二、使用深度学习解决回归问题](docs/dl-quick-ref/02.md) + + [三、使用 TensorBoard 监控网络训练](docs/dl-quick-ref/03.md) + + [四、使用深度学习解决二分类问题](docs/dl-quick-ref/04.md) + + [五、使用 Keras 解决多分类问题](docs/dl-quick-ref/05.md) + + [六、超参数优化](docs/dl-quick-ref/06.md) + + [七、从头开始训练 CNN](docs/dl-quick-ref/07.md) + + [八、将预训练的 CNN 用于迁移学习](docs/dl-quick-ref/08.md) + + [九、从头开始训练 RNN](docs/dl-quick-ref/09.md) + + [十、使用词嵌入从头开始训练 LSTM](docs/dl-quick-ref/10.md) + + [十一、训练 Seq2Seq 模型](docs/dl-quick-ref/11.md) + + [十二、深度强化学习](docs/dl-quick-ref/12.md) + + [十三、生成对抗网络](docs/dl-quick-ref/13.md) ++ [TensorFlow 2.0 快速入门指南](docs/tf-20-quick-start-guide/README.md) + + [零、前言](docs/tf-20-quick-start-guide/00.md) + + [第 1 部分:TensorFlow 2.00 Alpha 简介](docs/tf-20-quick-start-guide/s1.md) + + [一、TensorFlow 2 简介](docs/tf-20-quick-start-guide/01.md) + + [二、Keras:TensorFlow 2 的高级 API](docs/tf-20-quick-start-guide/02.md) + + [三、TensorFlow 2 和 ANN 技术](docs/tf-20-quick-start-guide/03.md) + + [第 2 部分:TensorFlow 2.00 Alpha 中的监督和无监督学习](docs/tf-20-quick-start-guide/s2.md) + + [四、TensorFlow 2 和监督机器学习](docs/tf-20-quick-start-guide/04.md) + + [五、TensorFlow 2 和无监督学习](docs/tf-20-quick-start-guide/05.md) + + [第 3 部分:TensorFlow 2.00 Alpha 的神经网络应用](docs/tf-20-quick-start-guide/s3.md) + + [六、使用 TensorFlow 2 识别图像](docs/tf-20-quick-start-guide/06.md) + + [七、TensorFlow 2 和神经风格迁移](docs/tf-20-quick-start-guide/07.md) + + [八、TensorFlow 2 和循环神经网络](docs/tf-20-quick-start-guide/08.md) + + [九、TensorFlow 估计器和 TensorFlow HUB](docs/tf-20-quick-start-guide/09.md) + + [十、从 tf1.12 转换为 tf2](docs/tf-20-quick-start-guide/10.md) ++ [TensorFlow 入门](docs/get-start-tf/README.md) + + [零、前言](docs/get-start-tf/ch00.md) + + [一、TensorFlow 基本概念](docs/get-start-tf/ch01.md) + + [二、TensorFlow 数学运算](docs/get-start-tf/ch02.md) + + [三、机器学习入门](docs/get-start-tf/ch03.md) + + [四、神经网络简介](docs/get-start-tf/ch04.md) + + [五、深度学习](docs/get-start-tf/ch05.md) + + [六、TensorFlow GPU 编程和服务](docs/get-start-tf/ch06.md) ++ [TensorFlow 卷积神经网络实用指南](docs/handson-cnn-tf/README.md) + + [零、前言](docs/handson-cnn-tf/0.md) + + [一、TensorFlow 的设置和介绍](docs/handson-cnn-tf/1.md) + + [二、深度学习和卷积神经网络](docs/handson-cnn-tf/2.md) + + [三、TensorFlow 中的图像分类](docs/handson-cnn-tf/3.md) + + [四、目标检测与分割](docs/handson-cnn-tf/4.md) + + [五、VGG,Inception,ResNet 和 MobileNets](docs/handson-cnn-tf/5.md) + + [六、自编码器,变分自编码器和生成对抗网络](docs/handson-cnn-tf/6.md) + + [七、迁移学习](docs/handson-cnn-tf/7.md) + + [八、机器学习最佳实践和故障排除](docs/handson-cnn-tf/8.md) + + [九、大规模训练](docs/handson-cnn-tf/9.md) + + [十、参考文献](docs/handson-cnn-tf/10.md) ++ [Python 人工智能中文版](docs/ai-py/README.md) + + [0 前言](docs/ai-py/00.md) + + [1 人工智能简介](docs/ai-py/01.md) + + [2 人工智能的基本用例](docs/ai-py/02.md) + + [3 机器学习管道](docs/ai-py/03.md) + + [4 特征选择和特征工程](docs/ai-py/04.md) + + [5 使用监督学习的分类和回归](docs/ai-py/05.md) + + [6 集成学习的预测分析](docs/ai-py/06.md) + + [7 通过无监督学习检测模式](docs/ai-py/07.md) + + [8 构建推荐系统](docs/ai-py/08.md) + + [9 逻辑编程](docs/ai-py/09.md) + + [10 启发式搜索技术](docs/ai-py/10.md) + + [11 遗传算法和遗传编程](docs/ai-py/11.md) + + [12 云上的人工智能](docs/ai-py/12.md) + + [13 使用人工智能构建游戏](docs/ai-py/13.md) + + [14 构建语音识别器](docs/ai-py/14.md) + + [15 自然语言处理](docs/ai-py/15.md) + + [16 聊天机器人](docs/ai-py/16.md) + + [17 序列数据和时间序列分析](docs/ai-py/17.md) + + [18 图像识别](docs/ai-py/18.md) + + [19 神经网络](docs/ai-py/19.md) + + [20 将卷积神经网络用于深度学习](docs/ai-py/20.md) + + [21 循环神经网络和其他深度学习模型](docs/ai-py/21.md) + + [22 通过强化学习创建智能体](docs/ai-py/22.md) + + [23 人工智能和大数据](docs/ai-py/23.md) ++ [Python 无监督学习实用指南](docs/handson-unsup-learn-py/README.md) + + [零、前言](docs/handson-unsup-learn-py/00.md) + + [一、无监督学习入门](docs/handson-unsup-learn-py/01.md) + + [二、聚类基础](docs/handson-unsup-learn-py/02.md) + + [三、高级聚类](docs/handson-unsup-learn-py/03.md) + + [四、实用的层次聚类](docs/handson-unsup-learn-py/04.md) + + [五、软聚类和高斯混合模型](docs/handson-unsup-learn-py/05.md) + + [六、异常检测](docs/handson-unsup-learn-py/06.md) + + [七、降维和成分分析](docs/handson-unsup-learn-py/07.md) + + [八、无监督神经网络模型](docs/handson-unsup-learn-py/08.md) + + [九、生成对抗网络和 SOM](docs/handson-unsup-learn-py/09.md) + + [十、习题](docs/handson-unsup-learn-py/10.md) ++ [生成对抗网络项目](docs/gan-proj/README.md) + + [零、前言](docs/gan-proj/0.md) + + [一、生成对抗网络简介](docs/gan-proj/1.md) + + [二、3D-GAN -- 使用 GAN 生成形状](docs/gan-proj/2.md) + + [三、使用条件 GAN 进行人脸老化](docs/gan-proj/3.md) + + [四、使用 DCGAN 生成动漫角色](docs/gan-proj/4.md) + + [五、使用 SRGAN 生成逼真的图像](docs/gan-proj/5.md) + + [六、StackGAN - 逼真的文本到图像合成](docs/gan-proj/6.md) + + [七、CycleGAN - 将绘画变成照片](docs/gan-proj/7.md) + + [八、条件 GAN - 使用条件对抗网络的图像到图像翻译](docs/gan-proj/8.md) + + [九、预测 GAN 的未来](docs/gan-proj/9.md) ++ [TensorFlow 智能移动项目](docs/intel-mobi-proj-tf/README.md) + + [零、前言](docs/intel-mobi-proj-tf/00.md) + + [一、移动 TensorFlow 入门](docs/intel-mobi-proj-tf/01.md) + + [二、通过迁移学习对图像进行分类](docs/intel-mobi-proj-tf/02.md) + + [三、检测物体及其位置](docs/intel-mobi-proj-tf/03.md) + + [四、以惊人的艺术风格变换图片](docs/intel-mobi-proj-tf/04.md) + + [五、了解简单的语音命令](docs/intel-mobi-proj-tf/05.md) + + [六、用自然语言描述图像](docs/intel-mobi-proj-tf/06.md) + + [七、使用 CNN 和 LSTM 识别绘画](docs/intel-mobi-proj-tf/07.md) + + [八、用 RNN 预测股价](docs/intel-mobi-proj-tf/08.md) + + [九、使用 GAN 生成和增强图像](docs/intel-mobi-proj-tf/09.md) + + [十、构建类似 AlphaZero 的手机游戏应用](docs/intel-mobi-proj-tf/10.md) + + [十一、在移动设备上使用 TensorFlow Lite 和 Core ML](docs/intel-mobi-proj-tf/11.md) + + [十二、在 Raspberry Pi 上开发 TensorFlow 应用](docs/intel-mobi-proj-tf/12.md) ++ [TensorFlow 和 Keras 应用开发入门](docs/begin-app-dev-tf-keras/README.md) + + [零、前言](docs/begin-app-dev-tf-keras/0.md) + + [一、神经网络和深度学习简介](docs/begin-app-dev-tf-keras/1.md) + + [二、模型架构](docs/begin-app-dev-tf-keras/2.md) + + [三、模型评估和优化](docs/begin-app-dev-tf-keras/3.md) + + [四、产品化](docs/begin-app-dev-tf-keras/4.md) ++ [TensorFlow 图像深度学习实用指南](docs/handson-dl-img-tf/README.md) + + [零、前言](docs/handson-dl-img-tf/0.md) + + [一、机器学习工具包](docs/handson-dl-img-tf/1.md) + + [二、图片数据](docs/handson-dl-img-tf/2.md) + + [三、经典神经网络](docs/handson-dl-img-tf/3.md) ++ [Python 元学习实用指南](docs/handson-meta-learn-py/README.md) + + [零、前言](docs/handson-meta-learn-py/00.md) + + [一、元学习导论](docs/handson-meta-learn-py/01.md) + + [二、使用连体网络的人脸和音频识别](docs/handson-meta-learn-py/02.md) + + [三、原型网络及其变体](docs/handson-meta-learn-py/03.md) + + [四、使用 TensorFlow 的关系和匹配网络](docs/handson-meta-learn-py/04.md) + + [五、记忆增强神经网络](docs/handson-meta-learn-py/05.md) + + [六、MAML 及其变体](docs/handson-meta-learn-py/06.md) + + [七、元 SGD 和 Reptile](docs/handson-meta-learn-py/07.md) + + [八、作为优化目标的梯度一致性](docs/handson-meta-learn-py/08.md) + + [九、最新进展和后续步骤](docs/handson-meta-learn-py/09.md) + + [十、答案](docs/handson-meta-learn-py/10.md) ++ [Python 强化学习实用指南](docs/handson-rl-py/README.md) + + [零、前言](docs/handson-rl-py/00.md) + + [一、强化学习导论](docs/handson-rl-py/01.md) + + [二、OpenAI 和 TensorFlow 入门](docs/handson-rl-py/02.md) + + [三、马尔可夫决策过程与动态规划](docs/handson-rl-py/03.md) + + [四、用于游戏的蒙特卡洛方法](docs/handson-rl-py/04.md) + + [五、时间差异学习](docs/handson-rl-py/05.md) + + [六、多臂老虎机问题](docs/handson-rl-py/06.md) + + [七、深度学习基础](docs/handson-rl-py/07.md) + + [八、深度 Q 网络和 Atari 游戏](docs/handson-rl-py/08.md) + + [九、用深度循环 Q 网络玩《毁灭战士》](docs/handson-rl-py/09.md) + + [十、异步优势演员评论家网络](docs/handson-rl-py/10.md) + + [十一、策略梯度和优化](docs/handson-rl-py/11.md) + + [十二、Capstone 项目 – 将 DQN 用于赛车](docs/handson-rl-py/12.md) + + [十三、最新进展和后续步骤](docs/handson-rl-py/13.md) + + [十四、答案](docs/handson-rl-py/14.md) ++ [Python 智能项目](docs/intel-proj-py/README.md) + + [零、前言](docs/intel-proj-py/00.md) + + [一、人工智能系统的基础](docs/intel-proj-py/01.md) + + [二、迁移学习](docs/intel-proj-py/02.md) + + [三、神经机器翻译](docs/intel-proj-py/03.md) + + [四、使用 GAN 的时尚行业样式迁移](docs/intel-proj-py/04.md) + + [五、视频字幕应用](docs/intel-proj-py/05.md) + + [六、智能推荐系统](docs/intel-proj-py/06.md) + + [七、电影评论情感分析移动应用](docs/intel-proj-py/07.md) + + [八、用于客户服务的会话式 AI 聊天机器人](docs/intel-proj-py/08.md) + + [九、使用强化学习的自主无人驾驶汽车](docs/intel-proj-py/09.md) + + [十、深度学习视角的验证码](docs/intel-proj-py/10.md) ++ [精通 Sklearn 和 TensorFlow 预测性分析](docs/master-pred-anal-sklearn-tf/README.md) + + [零、前言](docs/master-pred-anal-sklearn-tf/0.md) + + [一、回归和分类的集成方法](docs/master-pred-anal-sklearn-tf/1.md) + + [二、交叉验证和参数调整](docs/master-pred-anal-sklearn-tf/2.md) + + [三、使用特征](docs/master-pred-anal-sklearn-tf/3.md) + + [四、人工神经网络和 TensorFlow 简介](docs/master-pred-anal-sklearn-tf/4.md) + + [五、将 TensorFlow 和深度神经网络用于预测分析](docs/master-pred-anal-sklearn-tf/5.md) ++ [TensorFlow 2.0 的新增功能](docs/whats-new-tf2/README.md) + + [零、前言](docs/whats-new-tf2/0.md) + + [第 1 部分:TensorFlow 2.0 - 架构和 API 更改](docs/whats-new-tf2/pt1.md) + + [一、TensorFlow 2.0 入门](docs/whats-new-tf2/1.md) + + [二、Keras 默认集成和急切执行](docs/whats-new-tf2/2.md) + + [第 2 部分:TensorFlow 2.0 - 数据和模型训练管道](docs/whats-new-tf2/pt2.md) + + [三、设计和构建输入数据管道](docs/whats-new-tf2/3.md) + + [四、TensorBoard 的模型训练和使用](docs/whats-new-tf2/4.md) + + [第 3 部分:TensorFlow 2.0 - 模型推断和部署以及 AIY](docs/whats-new-tf2/pt3.md) + + [五、模型推理管道 - 多平台部署](docs/whats-new-tf2/5.md) + + [六、AIY 项目和 TensorFlow Lite](docs/whats-new-tf2/6.md) + + [第 4 部分:TensorFlow 2.0 - 迁移,总结](docs/whats-new-tf2/pt4.md) + + [七、从 TensorFlow 1.x 迁移到 2.0](docs/whats-new-tf2/7.md) ++ [UCB CS294-112 深度强化学习中文笔记](docs/ucb-cs294-112-notes-zh/README.md) + + [(1) 简介](docs/ucb-cs294-112-notes-zh/1.md) + + [(2) 模仿学习](docs/ucb-cs294-112-notes-zh/2.md) + + [(3) 增强学习简介](docs/ucb-cs294-112-notes-zh/3.md) + + [(4) 策略梯度法](docs/ucb-cs294-112-notes-zh/4.md) + + [(5) 演员-评论家算法](docs/ucb-cs294-112-notes-zh/5.md) + + [(6) 基于值函数的方法](docs/ucb-cs294-112-notes-zh/6.md) + + [(7) 深度增强学习中的 Q 学习方法](docs/ucb-cs294-112-notes-zh/7.md) + + [(8) 最优控制与规划](docs/ucb-cs294-112-notes-zh/8.md) + + [(9) 用数据拟合模型](docs/ucb-cs294-112-notes-zh/9.md) + + [(10) 基于模型的增强学习的策略训练](docs/ucb-cs294-112-notes-zh/10.md) + + [(11) 概率图模型与软化增强学习](docs/ucb-cs294-112-notes-zh/11.md) + + [(12) 逆增强学习](docs/ucb-cs294-112-notes-zh/12.md) ++ [TensorFlow 2 和 Keras 高级深度学习](docs/adv-dl-tf2-keras/README.md) + + [零、前言](docs/adv-dl-tf2-keras/00.md) + + [一、使用 Keras 入门高级深度学习](docs/adv-dl-tf2-keras/01.md) + + [二、深度神经网络](docs/adv-dl-tf2-keras/02.md) + + [三、自编码器](docs/adv-dl-tf2-keras/03.md) + + [四、生成对抗网络(GAN)](docs/adv-dl-tf2-keras/04.md) + + [五、改进的 GAN](docs/adv-dl-tf2-keras/05.md) + + [六、纠缠表示 GAN](docs/adv-dl-tf2-keras/06.md) + + [七、跨域 GAN](docs/adv-dl-tf2-keras/07.md) + + [八、变分自编码器(VAE)](docs/adv-dl-tf2-keras/08.md) + + [九、深度强化学习](docs/adv-dl-tf2-keras/09.md) + + [十、策略梯度方法](docs/adv-dl-tf2-keras/10.md) + + [十一、对象检测](docs/adv-dl-tf2-keras/11.md) + + [十二、语义分割](docs/adv-dl-tf2-keras/12.md) + + [十三、使用互信息的无监督学习](docs/adv-dl-tf2-keras/13.md) ++ [GCP 上的人工智能实用指南](docs/handson-ai-gcp/README.md) + + [零、前言](docs/handson-ai-gcp/00.md) + + [第 1 节:Google Cloud Platform 的基础](docs/handson-ai-gcp/sec1.md) + + [一、AI 和 GCP 概述](docs/handson-ai-gcp/01.md) + + [二、使用 GCP 组件的计算和处理](docs/handson-ai-gcp/02.md) + + [第 2 节:使用 Google Cloud Platform 的人工智能](docs/handson-ai-gcp/sec2.md) + + [三、XGBoost 的机器学习应用](docs/handson-ai-gcp/03.md) + + [四、使用 Cloud AutoML](docs/handson-ai-gcp/04.md) + + [五、构建大数据云机器学习引擎](docs/handson-ai-gcp/05.md) + + [六、使用 DialogFlow 的智能对话应用](docs/handson-ai-gcp/06.md) + + [第 3 节:Google Cloud Platform 上的 TensorFlow](docs/handson-ai-gcp/sec3.md) + + [七、了解云 TPU](docs/handson-ai-gcp/07.md) + + [八、使用 Cloud ML Engine 实现 TensorFlow 模型](docs/handson-ai-gcp/08.md) + + [九、构建预测应用](docs/handson-ai-gcp/09.md) + + [第 4 节:构建应用和即将发布的功能](docs/handson-ai-gcp/sec4.md) + + [十、构建一个 AI 应用](docs/handson-ai-gcp/10.md) ++ [Python 深度学习架构实用指南](docs/handson-dl-arch-py/README.md) + + [零、前言](docs/handson-dl-arch-py/0.md) + + [第 1 节:深度学习的元素](docs/handson-dl-arch-py/sec1.md) + + [一、深度学习入门](docs/handson-dl-arch-py/1.md) + + [二、深度前馈网络](docs/handson-dl-arch-py/2.md) + + [三、受限玻尔兹曼机和自编码器](docs/handson-dl-arch-py/3.md) + + [第 2 节:卷积神经网络](docs/handson-dl-arch-py/sec2.md) + + [四、CNN 架构](docs/handson-dl-arch-py/4.md) + + [五、移动神经网络和 CNN](docs/handson-dl-arch-py/5.md) + + [第 3 节:序列建模](docs/handson-dl-arch-py/sec3.md) + + [六、循环神经网络](docs/handson-dl-arch-py/6.md) + + [第 4 节:生成对抗网络(GAN)](docs/handson-dl-arch-py/sec4.md) + + [七、生成对抗网络](docs/handson-dl-arch-py/7.md) + + [第 5 节:深度学习和高级人工智能的未来](docs/handson-dl-arch-py/sec5.md) + + [八、深度学习的新趋势](docs/handson-dl-arch-py/8.md) ++ [Python Web 深度学习实用指南](docs/handson-py-dl-web/README.md) + + [零、前言](docs/handson-py-dl-web/00.md) + + [第 1 节:Web 人工智能](docs/handson-py-dl-web/sec1.md) + + [一、揭秘人工智能和机器学习基础](docs/handson-py-dl-web/01.md) + + [第 2 节:使用深度学习的 Web 开发](docs/handson-py-dl-web/sec2.md) + + [二、使用 Python 入门深度学习](docs/handson-py-dl-web/02.md) + + [三、创建您的第一个深度学习 Web 应用](docs/handson-py-dl-web/03.md) + + [四、TensorFlow.js 入门](docs/handson-py-dl-web/04.md) + + [第 3 节:用于 Web 开发的不同深度学习 API 入门](docs/handson-py-dl-web/sec3.md) + + [五、通过 API 进行深度学习](docs/handson-py-dl-web/05.md) + + [六、Google Cloud Platform 上的 Python 深度学习](docs/handson-py-dl-web/06.md) + + [七、AWS 上的 Python DL:对象检测和家庭自动化](docs/handson-py-dl-web/07.md) + + [八、Microsoft Azure 上的 Python 深度学习](docs/handson-py-dl-web/08.md) + + [第 4 节:生产中的深度学习(智能 Web 应用)](docs/handson-py-dl-web/sec4.md) + + [九、启用深度学习的网站的通用生产框架](docs/handson-py-dl-web/09.md) + + [十、通过深度学习保护 Web 应用安全](docs/handson-py-dl-web/10.md) + + [十一、DIY - Web DL 生产环境](docs/handson-py-dl-web/11.md) + + [十二、使用 DL API 和客户支持聊天机器人创建 E2E Web 应用](docs/handson-py-dl-web/12.md) + + [十三、附录:Web 深度学习的成功案例和新兴领域](docs/handson-py-dl-web/13.md) ++ [精通 TensorFlow 2.x 计算机视觉](docs/master-cv-tf-2x/README.md) + + [零、前言](docs/master-cv-tf-2x/0.md) + + [第 1 节:计算机视觉和神经网络概论](docs/master-cv-tf-2x/sec1.md) + + [一、计算机视觉和 TensorFlow 基础知识](docs/master-cv-tf-2x/1.md) + + [二、使用局部二进制模式的内容识别](docs/master-cv-tf-2x/2.md) + + [三、使用 OpenCV 和 CNN 的人脸检测](docs/master-cv-tf-2x/3.md) + + [四、用于图像的深度学习](docs/master-cv-tf-2x/4.md) + + [第 2 节:使用 TensorFlow 的计算机视觉高级概念](docs/master-cv-tf-2x/sec2.md) + + [五、神经网络架构和模型](docs/master-cv-tf-2x/5.md) + + [六、使用迁移学习的视觉搜索](docs/master-cv-tf-2x/6.md) + + [七、YOLO 对象检测](docs/master-cv-tf-2x/7.md) + + [八、语义分割与神经样式迁移](docs/master-cv-tf-2x/8.md) + + [第 3 节:使用 TensorFlow 的计算机视觉的高级实现](docs/master-cv-tf-2x/sec3.md) + + [九、使用多任务深度学习的动作识别](docs/master-cv-tf-2x/9.md) + + [十、R-CNN,SSD 和 R-FCN 对象检测](docs/master-cv-tf-2x/10.md) + + [第 4 节:边缘和云端的 TensorFlow 实现](docs/master-cv-tf-2x/sec4.md) + + [十一、带有 CPU/GPU 优化的边缘设备上的深度学习](docs/master-cv-tf-2x/11.md) + + [十二、用于计算机视觉的云计算平台](docs/master-cv-tf-2x/12.md) ++ [TensorFlow Lite,ML Kit 和 Flutter 移动深度学习](docs/mobi-dl-tflite/README.md) + + [零、前言](docs/mobi-dl-tflite/00.md) + + [一、移动深度学习简介](docs/mobi-dl-tflite/01.md) + + [二、移动视觉 - 使用设备上的模型的人脸检测](docs/mobi-dl-tflite/02.md) + + [三、使用 Google Action 的聊天机器人](docs/mobi-dl-tflite/03.md) + + [四、认识植物种类](docs/mobi-dl-tflite/04.md) + + [五、从摄像机源生成实时字幕](docs/mobi-dl-tflite/05.md) + + [六、构建人工智能认证系统](docs/mobi-dl-tflite/06.md) + + [七、语音/多媒体处理 - 使用 AI 生成音乐](docs/mobi-dl-tflite/07.md) + + [八、基于强化神经网络的国际象棋引擎](docs/mobi-dl-tflite/08.md) + + [九、构建图像超分辨率应用](docs/mobi-dl-tflite/09.md) + + [十、前方的路](docs/mobi-dl-tflite/10.md) + + [十一、附录](docs/mobi-dl-tflite/11.md) ++ [PyTorch 人工智能研讨会](docs/dl-pt-workshop/README.md) + + [零、前言](docs/dl-pt-workshop/0.md) + + [一、深度学习和 PyTorch 简介](docs/dl-pt-workshop/1.md) + + [二、神经网络的构建块](docs/dl-pt-workshop/2.md) + + [三、使用 DNN 的分类问题](docs/dl-pt-workshop/3.md) + + [四、卷积神经网络](docs/dl-pt-workshop/4.md) + + [五、样式迁移](docs/dl-pt-workshop/5.md) + + [六、使用 RNN 分析数据序列](docs/dl-pt-workshop/6.md) + + [七、附录](docs/dl-pt-workshop/7.md) ++ [Python 一次学习实用指南](docs/handson-1shot-learn-py/README.md) + + [零、前言](docs/handson-1shot-learn-py/0.md) + + [第一部分:一次学习简介](docs/handson-1shot-learn-py/sec1.md) + + [一、一次学习简介](docs/handson-1shot-learn-py/1.md) + + [第二部分:深度学习架构](docs/handson-1shot-learn-py/sec2.md) + + [二、基于指标的方法](docs/handson-1shot-learn-py/2.md) + + [三、基于模型的方法](docs/handson-1shot-learn-py/3.md) + + [四、基于优化的方法](docs/handson-1shot-learn-py/4.md) + + [第三部分:其他方法和结论](docs/handson-1shot-learn-py/sec3.md) + + [五、基于生成建模的方法](docs/handson-1shot-learn-py/5.md) + + [六、总结和其他方法](docs/handson-1shot-learn-py/6.md) ++ [Python 自然语言处理实用指南](docs/handson-nlp-pt-1x/README.md) + + [零、前言](docs/handson-nlp-pt-1x/0.md) + + [第一部分:用于 NLP 的 PyTorch 1.x 的要点](docs/handson-nlp-pt-1x/sec1.md) + + [一、机器学习和深度学习的基础](docs/handson-nlp-pt-1x/1.md) + + [二、用于 NLP 的 PyTorch 1.x 入门](docs/handson-nlp-pt-1x/2.md) + + [第二部分:自然语言处理基础](docs/handson-nlp-pt-1x/sec2.md) + + [三、NLP 和文本嵌入](docs/handson-nlp-pt-1x/3.md) + + [四、文本预处理,词干提取和词形还原](docs/handson-nlp-pt-1x/4.md) + + [第三部分:使用 PyTorch 1.x 的实际 NLP 应用](docs/handson-nlp-pt-1x/sec3.md) + + [五、循环神经网络和情感分析](docs/handson-nlp-pt-1x/5.md) + + [六、用于文本分类的卷积神经网络](docs/handson-nlp-pt-1x/6.md) + + [七、使用序列到序列神经网络的文本翻译](docs/handson-nlp-pt-1x/7.md) + + [八、使用基于注意力的神经网络构建聊天机器人](docs/handson-nlp-pt-1x/8.md) + + [九、前方的路](docs/handson-nlp-pt-1x/9.md) ++ [PyTorch 人工智能基础知识](docs/pt-ai-fund/README.md) + + [零、前言](docs/pt-ai-fund/0.md) + + [一、使用 PyTorch 使用张量](docs/pt-ai-fund/1.md) + + [二、与神经网络协作](docs/pt-ai-fund/2.md) + + [三、用于计算机视觉的卷积神经网络](docs/pt-ai-fund/3.md) + + [四、用于 NLP 的循环神经网络](docs/pt-ai-fund/4.md) + + [五、迁移学习和 TensorBoard](docs/pt-ai-fund/5.md) + + [六、探索生成对抗网络](docs/pt-ai-fund/6.md) + + [七、深度强化学习](docs/pt-ai-fund/7.md) + + [八、在 PyTorch 中生产 AI 模型](docs/pt-ai-fund/8.md) ++ [PyTorch 深度学习实用指南](docs/pt-dl-handson/README.md) + + [零、前言](docs/pt-dl-handson/0.md) + + [一、深度学习演练和 PyTorch 简介](docs/pt-dl-handson/1.md) + + [二、简单的神经网络](docs/pt-dl-handson/2.md) + + [三、深度学习工作流程](docs/pt-dl-handson/3.md) + + [四、计算机视觉](docs/pt-dl-handson/4.md) + + [五、序列数据处理](docs/pt-dl-handson/5.md) + + [六、生成网络](docs/pt-dl-handson/6.md) + + [七、强化学习](docs/pt-dl-handson/7.md) + + [八、生产中的 PyTorch ](docs/pt-dl-handson/8.md) ++ [TensorFlow 强化学习](docs/rl-tf/README.md) + + [零、前言](docs/rl-tf/00.md) + + [一、深度学习–架构和框架](docs/rl-tf/01.md) + + [二、使用 OpenAI Gym 训练强化学习智能体](docs/rl-tf/02.md) + + [三、马尔可夫决策过程](docs/rl-tf/03.md) + + [四、策略梯度](docs/rl-tf/04.md) + + [五、Q 学习和深度 Q 网络](docs/rl-tf/05.md) + + [六、异步方法](docs/rl-tf/06.md) + + [七、一切都是机器人-真正的战略游戏](docs/rl-tf/07.md) + + [八、AlphaGo –最好的强化学习](docs/rl-tf/08.md) + + [九、自动驾驶中的强化学习](docs/rl-tf/09.md) + + [十、金融投资组合管理](docs/rl-tf/10.md) + + [十一、机器人技术中的强化学习](docs/rl-tf/11.md) + + [十二、广告技术中的深度强化学习](docs/rl-tf/12.md) + + [十三、图像处理中的强化学习](docs/rl-tf/13.md) + + [十四、NLP 中的深度强化学习](docs/rl-tf/14.md) + + [十五、强化学习的其他主题](docs/rl-tf/15.md) diff --git a/机器学习/ApacheCN/apachecn-dl-zh/adv-dl-tf2-keras/00.md b/机器学习/ApacheCN/apachecn-dl-zh/adv-dl-tf2-keras/00.md new file mode 100644 index 00000000..9d3c83b6 --- /dev/null +++ b/机器学习/ApacheCN/apachecn-dl-zh/adv-dl-tf2-keras/00.md @@ -0,0 +1,137 @@ +# 零、前言 + +近年来,深度学习在视觉,语音,自然语言处理和理解以及所有其他领域的大量数据难题中取得了前所未有的成功案例。 公司,大学,政府和研究组织对该领域的兴趣加速了该领域的发展。 本书通过三个新的章节介绍了深度学习中的重要主题:“对象检测”,“语义分割”和“使用互信息的无监督学习”。 通过提供原理的背景知识,挖掘概念背后的直觉,使用 Keras 实现方程式和算法以及检查结果来解释高级理论。 + +**人工智能**(**AI**)到今天为止还远远不是一个易于理解的领域。 **深度学习**(**DL**)作为 AI 的子字段,处于相同位置。 尽管它还不是一个成熟的领域,但许多现实世界的应用,例如基于视觉的检测和识别,自主导航,产品推荐,语音识别和合成,节能,药物发现,财务和营销,已经在使用 DL 算法。 。 将发现并构建更多应用。 本书的目的是解释高级概念,提供示例实现,并让作为其领域专家的读者识别目标应用。 + +尚未完全成熟的领域是一把双刃剑。 一方面,它为发现和利用提供了很多机会。 深度学习中有许多未解决的问题。 这就意味着可以抢先进入市场的机会–无论是在产品开发,发布还是认可方面。 另一个优势是,在关键任务环境中很难信任一个尚未被完全理解的领域。 我们可以肯定地说,如果被问到,很少有机器学习工程师会乘坐由深度学习系统控制的自动驾驶飞机。 要获得这种信任级别,需要做很多工作。 本书中讨论的高级概念很有可能在获得这种信任级别中扮演重要角色。 + +没有 DL 书能够完全涵盖整个领域。 这本书也不例外。 给定时间和空间,我们可能会涉及到有趣的领域,例如自然语言处理和理解,语音合成,自动机器学习(AutoML),图神经网络(GNN),贝叶斯深度学习等等。 但是,本书相信选择和解释选定的区域,以便读者可以从事其他未涵盖的领域。 + +作为即将着手阅读本书的读者,请记住,您选择的是一个令人兴奋的领域,会对社会产生巨大影响。 我们很幸运能有一份工作,希望我们在早晨醒来时继续努力。 + +# 这本书是给谁的 + +本书面向希望更好地了解深度学习高级主题的机器学习工程师和学生。 每个讨论都通过 Keras 中的代码实现进行了补充。 特别是,使用的是 TensorFlow 2 的 Keras API 或简称为`tf.keras`。这本书适合希望了解如何将理论转化为 Keras 中的工作代码实现的读者。 除了理解理论外,代码实现通常是将机器学习应用于实际问题的艰巨任务之一。 + +# 本书涵盖的内容 + +“第 1 章”,“Keras 高级深度学习入门”涵盖了深度学习的关键概念,例如优化,正则化,损失函数,基本层和网络及其在`tf.keras`中的实现 。 本章回顾了使用顺序 API 的深度学习和`tf.keras`。 + +“第 2 章”,“深度神经网络”讨论了`tf.keras`的函数式 API。 使用函数式 API 在`tf.keras`中检查并实现了两种广泛使用的深度网络架构 ResNet 和 DenseNet。 + +“第 3 章”,“自编码器”涵盖了一种称为自编码器的通用网络结构,该结构用于发现输入数据的潜在表示形式。 `tf.keras`中讨论并实现了自编码器的两个示例应用,即降噪和着色。 + +“第 4 章”,“生成对抗网络(GANs)”讨论了深度学习的最新重大进展之一。 GAN 用于生成看起来真实的新综合数据。 本章介绍 GAN 的原理。 在`tf.keras`中检查并实现了 GAN 的两个示例 DCGAN 和 CGAN。 + +“第 5 章”,“改进的 GAN” 涵盖了改进基本 GAN 的算法。 该算法解决了训练 GAN 的困难,并提高了合成数据的感知质量。 在`tf.keras`中讨论并实现了 WGAN,LSGAN 和 ACGAN。 + +“第 6 章”,“纠缠表示 GAN” 讨论了如何控制 GAN 生成的合成数据的属性。 如果潜在表示被解开,则可以控制属性。 `tf.keras`中介绍了并实现了两种解开表示的技术,即 InfoGAN 和 StackedGAN。 + +“第 7 章”,“跨域 GAN” 涵盖了 GAN 的实际应用,将图像从一个域转换为另一个域,通常称为跨域迁移。 CycleGAN 是一种广泛使用的跨域 GAN,在`tf.keras`中进行了讨论和实现。 本章演示 CycleGAN 执行着色和样式迁移。 + +“第 8 章”,“变分自编码器(VAE)”讨论了 DL 中的另一个重要主题。 与 GAN 类似,VAE 是用于生成综合数据的生成模型。 与 GAN 不同,VAE 专注于可解码的连续潜空间,该空间适合于变化推理。 `tf.keras`涵盖并实现了 VAE 及其变体 CVAE 和 β-VAE。 + +“第 9 章”,“深度强化学习”解释了强化学习和 Q 学习的原理。 提出了两种实现离散动作空间 Q 学习的技术,即 Q 表更新和**深度 Q 网络**(**DQN**)。 在 OpenAI Gym 环境中演示了在`tf.keras`中使用 Python 和 DQN 进行 Q 学习的实现。 + +“第 10 章”,“策略梯度方法”解释了如何使用神经网络来学习强化学习中的决策策略。 在`tf.keras`和 OpenAI Gym 环境中涵盖并实现了四种方法,即 REINFORCE,带有基线的 REINFORCE,演员评论家和优势演员评论家。 本章中的示例演示了连续操作空间上的策略梯度方法。 + +“第 11 章”,“对象检测”讨论了计算机视觉,对象检测或识别和定位图像中对象的最常见应用之一。 涵盖了称为 SSD 的多尺度目标检测算法的关键概念,并使用`tf.keras`逐步构建了实现。 提出了用于数据集收集和标记的示例技术。 之后,使用数据集对 SSD 的`tf.keras`实现进行训练和评估。 + +“第 12 章”,“语义分割”讨论了计算机视觉,语义分割或识别图像中每个像素的对象类别的另一种常见应用。 讨论了分割原理。 然后,将更详细地介绍语义分割。 使用`tf.keras`构建并评估了称为 FCN 的语义分割算法的示例实现。 使用上一章中收集的相同数据集,但重新标记了语义分割。 + +“第 13 章”,“使用互信息的无监督学习”研究了如果 DL 严重依赖人类标签,它将不会继续发展。 无监督学习侧重于不需要人工标签的算法。 一种实现无监督学习的有效技术是利用**互信息**(**MI**)的概念。 通过最大化 MI,可以使用`tf.keras`实现和评估无监督的聚类/分类。 + +# 充分利用这本书 + +* **深度学习和 Python**:读者应该具有深度学习及其在 Python 中的实现的基础知识。 尽管以前使用 Keras 实现深度学习算法的经验很重要,但这不是必需的。“第 1 章”, “Keras 高级深度学习入门”概述了深度学习的概念及其在`tf.keras`中的实现。 +* **数学**:本书中的讨论假定读者熟悉大学级别的微积分,线性代数,统计和概率。 +* **GPU**:本书中的大多数`tf.keras`实现都需要 GPU。 如果没有 GPU,则由于涉及的时间(数小时至数天),因此无法执行许多代码示例。 本书中的示例尽可能多地使用合理数量的数据,以最大程度地减少高性能计算机的使用。 读者应该至少可以使用 NVIDIA GTX 1060。 +* **编辑器**:本书的示例代码是在 Ubuntu Linux 18.04 LTS 和 MacOS Catalina 中使用 vim 编辑的。 任何支持 Python 的文本编辑器都是可以接受的。 +* **TensorFlow 2**:本书中的代码示例是使用 TensorFlow 2 的 Keras API 或`tf2`编写的。 请确保正确安装了 NVIDIA GPU 驱动和`tf2`。 +* **GitHub**:我们通过示例和实验学习。 请从其 GitHub 存储库中`git pull`或`fork`这本书的代码包。 获取代码后,对其进行检查。 运行。 更改。 再次运行。 通过调整代码进行创造性的实验。 这是欣赏本章中解释的所有理论的唯一方法。 在[此书的 GitHub 存储库](https://github.com/PacktPublishing/Advanced-Deep-Learning-with-Keras)上点击星星也受到高度赞赏。 + +## 下载示例代码文件 + +[本书的代码包托管在 GitHub 上](https://github.com/PacktPublishing/Advanced-Deep-Learning-with-Keras)。 + +我们还从[这里](https://github.com/PacktPublishing/)提供了丰富的书籍和视频目录中的其他代码包。 去看一下! + +## 下载彩色图像 + +我们还提供本书中使用的彩色图像图像。 [您可以在此处下载](https://static.packt-cdn.com/downloads/9787838821654_ColorImages.pdf)。 + +## 使用约定 + +本书中的代码使用 Python。 更具体地说,是 Python3。例如: + +代码块设置如下: + +```py +def build_generator(inputs, image_size): + """Build a Generator Model + Stack of BN-ReLU-Conv2DTranpose to generate fake images + Output activation is sigmoid instead of tanh in [1]. + Sigmoid converges easily. + Arguments: + inputs (Layer): Input layer of the generator + the z-vector) + image_size (tensor): Target size of one side + (assuming square image) + Returns: + generator (Model): Generator Model + """ + image_resize = image_size // 4 + # network parameters + kernel_size = 5 + layer_filters = [128, 64, 32, 1] + x = Dense(image_resize * image_resize * layer_filters[0])(inputs) + x = Reshape((image_resize, image_resize, layer_filters[0]))(x) + for filters in layer_filters: + # first two convolution layers use strides = 2 + # the last two use strides = 1 + if filters > layer_filters[-2]: + strides = 2 + else: + strides = 1 + x = BatchNormalization()(x) + x = Activation('relu')(x) + x = Conv2DTranspose(filters=filters, + kernel_size=kernel_size, + strides=strides, + padding='same')(x) + x = Activation('sigmoid')(x) + generator = Model(inputs, x, name='generator') + return generator +``` + +当我们希望提请您注意代码块的特定部分时,相关的行或项以粗体显示: + +```py +# generate fake images +fake_images = generator.predict([noise, fake_labels]) +# real + fake images = 1 batch of train data +x = np.concatenate((real_images, fake_images)) +# real + fake labels = 1 batch of train data labels +labels = np.concatenate((real_labels, fake_labels)) +``` + +只要有可能,都包括文档字符串。 至少,文本注释用于最小化空间使用。 + +任何命令行代码执行都编写如下: + +```py +python3 dcgan-mnist-4.2.1.py +``` + +上面的示例具有以下布局:`algorithm-dataset-chapter.section.number.py`。 命令行示例是“第 4 章”,“生成对抗网络(GANs)”第二部分和第一列表中 MNIST 数据集上的 DCGAN。 在某些情况下,未编写要执行的显式命令行,但假定是: + +```py +python3 name-of-the-file-in-listing +``` + +该代码示例的文件名包含在“列表”标题中。 本书使用“列表”标识文本中的代码示例。 + +**粗体**:表示新的术语,重要的单词或您在屏幕上看到的单词,例如在菜单或对话框中,也显示在这样的文本中。 例如:StackedGAN 具有两个附加损失函数,即**条件**和**熵**。 + +警告或重要提示如下所示。 diff --git a/机器学习/ApacheCN/apachecn-dl-zh/adv-dl-tf2-keras/01.md b/机器学习/ApacheCN/apachecn-dl-zh/adv-dl-tf2-keras/01.md new file mode 100644 index 00000000..458a837b --- /dev/null +++ b/机器学习/ApacheCN/apachecn-dl-zh/adv-dl-tf2-keras/01.md @@ -0,0 +1,934 @@ +# 一、使用 Keras 入门高级深度学习 + +在第一章中,我们将介绍在本书中将使用的三个深度学习人工神经网络。 这些网络是 MLP,CNN 和 RNN(在第 2 节中定义和描述),它们是本书涵盖的所选高级深度学习主题的构建块,例如自回归网络(自编码器,GAN 和 VAE),深度强化学习 ,对象检测和分割以及使用互信息的无监督学习。 + +在本章中,我们将一起讨论如何使用 Keras 库实现基于 MLP,CNN 和 RNN 的模型。 更具体地说,我们将使用名为`tf.keras`的 TensorFlow Keras 库。 我们将首先探讨为什么`tf.keras`是我们的理想选择。 接下来,我们将深入研究三个深度学习网络中的实现细节。 + +本章将: + +* 确定为什么`tf.keras`库是进行高级深度学习的绝佳选择 +* 介绍 MLP,CNN 和 RNN –高级深度学习模型的核心构建模块,我们将在本书中使用它们 +* 提供有关如何使用`tf.keras`实现基于 MLP,CNN 和 RNN 的模型的示例 +* 在此过程中,开始引入重要的深度学习概念,包括优化,正则化和损失函数 + +在本章结束时,我们将使用`tf.keras`实现基本的深度学习网络。 在下一章中,我们将介绍基于这些基础的高级深度学习主题。 让我们通过讨论 Keras 及其作为深度学习库的功能来开始本章。 + +# 1\. Keras 为什么是完美的深度学习库? + +Keras [1]是一个受欢迎的深度学习库,在撰写本文时有 370,000 个开发人员在使用它-这个数字每年以大约 35% 的速度增长。 超过 800 位贡献者积极维护它。 我们将在本书中使用的一些示例已添加到 Keras GitHub 官方存储库中。 + +谷歌的 TensorFlow 是一个流行的开源深度学习库,它使用 Keras 作为其库的高级 API。 通常称为`tf.keras`。 在本书中,我们将交替使用 Keras 和`tf.keras`一词。 + +`tf.keras`作为深度学习库是一种流行的选择,因为它已高度集成到 TensorFlow 中,TensorFlow 因其可靠性而在生产部署中广为人知。 TensorFlow 还提供了各种工具,用于生产部署和维护,调试和可视化以及在嵌入式设备和浏览器上运行模型。 在技​​术行业中,Google,Netflix,Uber 和 NVIDIA 使用 Keras。 + +我们选择`tf.keras`作为本书的首选工具,因为它是致力于加速深度学习模型实现的库。 这使得 Keras 非常适合我们想要实用且动手的时候,例如,当我们探索本书中的高级深度学习概念时。 由于 Keras 旨在加速深度学习模型的开发,训练和验证,因此在有人可以最大限度地利用库之前,必须学习该领域的关键概念。 + +[本书的所有示例都可以在 GitHub 的以下链接上找到](https://github.com/PacktPublishing/Advanced-Deep-Learning-with-Keras)。 + +在`tf.keras`库中,各层之间就像乐高积木一样相互连接,从而形成了一个干净且易于理解的模型。 模型训练非常简单,只需要数据,大量训练和监控指标即可。 + +最终结果是,与其他深度学习库(例如 PyTorch)相比,大多数深度学习模型可以用更少的代码行来实现。 通过使用 Keras,我们将通过节省代码实现时间来提高生产率,而这些时间可以用于执行更关键的任务,例如制定更好的深度学习算法。 + +同样,Keras 是快速实现深度学习模型的理想选择,就像我们将在本书中使用的那样。 使用**顺序模型 API**,只需几行代码即可构建典型模型。 但是,不要被它的简单性所误导。 + +Keras 还可以使用其函数式 API 以及用于动态图的`Model`和`Layer`类来构建更高级和复杂的模型,可以对其进行定制以满足独特的需求。 函数式 API 支持构建类似图的模型,层重用以及创建行为类似于 Python 函数的模型。 同时,`Model`和`Layer`类提供了用于实现罕见或实验性深度学习模型和层的框架。 + +## 安装 Keras 和 TensorFlow + +Keras 不是独立的深度学习库。 如您在“图 1.1.1”中所看到的,它建立在另一个深度学习库或后端的之上。 这可能是 Google 的 TensorFlow,MILA 的 Theano,微软的 CNTK 或 Apache MXNet。 但是,与本书的上一版不同,我们将使用 TensorFlow 2.0(`tf2`或简称为`tf`)提供的 Keras(更好地称为`tf.keras`),以利用 tf2 所提供的有用工具。 `tf.keras`也被认为是 TensorFlow 的事实上的前端,它在生产环境中表现出了公认的可靠性。 此外,在不久的将来,将不再提供 Keras 对 TensorFlow 以外的后端的支持。 + +从 Keras 迁移到`tf.keras`通常就像更改一样简单: + +```py +from keras... import ... +``` + +至 + +```py +from tensorflow.keras... import ... +``` + +本书中的代码示例全部以 **Python 3** 编写,以支持 **Python 2** 于 2020 年结束。 + +在硬件上,Keras 在 CPU,GPU 和 Google 的 TPU 上运行。 在本书中,我们将在 CPU 和 NVIDIA GPU(特别是 GTX 1060,GTX 1080Ti,RTX 2080Ti,V100 和 Quadro RTX 8000)上进行测试: + +![A screenshot of a cell phone Description automatically generated](img/B14853_01_01.png) + +图 1.1.1:Keras 是位于其他深度学习框架之上的高级库。 CPU,GPU 和 TPU 支持 Keras。 + +在继续进行本书的其余部分之前,我们需要确保正确安装了`tf2`。 有多种执行安装的方法。 一个示例是通过使用`pip3`安装`tf2`: + +```py +$ sudo pip3 install tensorflow +``` + +如果我们具有支持已正确安装驱动的 NVIDIA GPU,以及 NVIDIA CUDA 工具包和 cuDNN 深度神经网络库,则强烈建议您安装启用 GPU 的版本,因为它可以加快训练和预测的速度: + +```py +$ sudo pip3 install tensorflow-gpu +``` + +无需安装 Keras,因为它已经是`tf2`中的包。 如果您不愿意在系统范围内安装库,强烈建议使用 [Anaconda](https://www.anaconda.com/distribution/) 之类的环境。 除了具有隔离环境之外,Anaconda 发行版还安装了用于数据科学的常用第三方包,这些包对于深度学习是必不可少的。 + +本书中提供的示例将需要其他包,例如`pydot`,`pydot_ng`,`vizgraph`,`python3-tk`和`matplotlib`。 在继续本章之前,我们需要安装这些包。 + +如果安装了`tf2`及其依赖项,则以下内容不会产生任何错误: + +```py +$ python3 +>>> import tensorflow as tf +>>> print(tf.__version__) +2.0.0 +>>> from tensorflow.keras import backend as K +>>> print(K.epsilon()) +1e-07 +``` + +本书没有涵盖完整的 Keras API。 我们将仅介绍解释本书中选定的高级深度学习主题所需的材料。 有关更多信息,请查阅 Keras 官方文档,该文档在[这里](https://keras.io)或[这里](https://www.tensorflow.org/guide/keras/overview)。 + +在随后的部分中,将讨论 MLP,CNN 和 RNN 的详细信息。 这些网络将用于使用`tf.keras`构建简单的分类器。 + +# 2\. MLP,CNN 和 RNN + +我们已经提到,我们将使用三个深度学习网络,它们是: + +* **MLP**:多层感知器 +* **CNN**:卷积神经网络 +* **RNN**:循环神经网络 + +这些是我们将在本书中使用的三个网络。 稍后,您会发现它们经常结合在一起以利用每个网络的优势。 + +在本章中,我们将更详细地讨论这些构建块。 在以下各节中,将介绍 MLP 以及其他重要主题,例如损失函数,优化器和正则化器。 接下来,我们将介绍 CNN 和 RNN。 + +## MLP,CNN 和 RNN 之间的区别 + +MLP 是**全连接**(**FC**)网络。 在某些文献中,您经常会发现将该称为或深度前馈网络或前馈神经网络。 在本书中,我们将使用术语 MLP。 从已知目标应用的角度了解此网络将有助于我们深入了解高级深度学习模型设计的根本原因。 + +MLP 在简单的逻辑和线性回归问题中很常见。 但是,MLP 对于处理顺序和多维数据模式不是最佳的。 通过设计,MLP 难以记住顺序数据中的模式,并且需要大量参数来处理多维数据。 + +对于顺序数据输入,RNN 很受欢迎,因为内部设计允许网络发现数据历史记录中的依存关系,这对预测很有用。 对于诸如图像和视频之类的多维数据,CNN 擅长提取用于分类,分割,生成和其他下游任务的特征映射。 在某些情况下,一维卷积形式的 CNN 也用于具有顺序输入数据的网络。 但是,在大多数深度学习模型中,将 MLP 和 CNN 或 RNN 结合起来可以充分利用每个网络。 + +MLP,CNN 和 RNN 并不完整整个深度网络。 需要识别**目标**或**损失函数**,**优化器**,和**调节器**。 目标是减少训练期间的损失函数值,因为这样的减少是模型正在学习的一个很好的指标。 + +为了使值最小化,模型使用了优化器。 这是一种算法,它确定在每个训练步骤中应如何调整权重和偏差。 经过训练的模型不仅必须对训练数据起作用,而且还必须对训练环境之外的数据起作用。 正则化器的作用是确保训练后的模型能够推广到新数据。 + +现在,让我们进入这三个网络–我们将从谈论 MLP 网络开始。 + +# 3\. 多层感知器(MLP) + +我们将要看的这三个网络中的第一个是 MLP 网络。 让我们假设目标是创建一个神经网络,用于基于手写数字识别数字。 例如,当网络的输入是手写数字 8 的图像时,相应的预测也必须是数字 8。这是分类器网络的经典工作,可以使用逻辑回归进行训练。 为了训练和验证分类器网络,必须有足够大的手写数字数据集。 *国家标准技术混合研究院*数据集,简称 MNIST [2],通常被视为 **Hello World 深度学习数据集**。 它是用于手写数字分类的合适数据集。 + +在我们讨论 MLP 分类器模型之前,必须了解 MNIST 数据集。 本书中的大量示例都使用 MNIST 数据集。 MNIST 用于来解释并验证许多深度学习理论,因为它包含的 70,000 个样本很小,但是的信息足够丰富: + +![](img/B14853_01_02.png) + +图 1.3.1:来自 MNIST 数据集的示例图像。 每个灰度图像为`28×28`像素。 + +在下面的中,我们将简要介绍 MNIST。 + +## MNIST 数据集 + +MNIST 是从 0 到 9 的手写数字的集合。它具有 60,000 张图像的训练集和 10,000 张测试图像,这些图像被分为相应的类别或标签。 在某些文献中,术语**目标**或**基本事实**也用于指**标签**。 + +在上图中,可以看到 MNIST 数字的样本图像,每个样本的大小为`28 x 28`像素(灰度)。 为了在 Keras 中使用 MNIST 数据集,提供了一个 API,用于下载并自动提取图像和标签。“列表 1.3.1”演示了如何仅在一行中加载 MNIST 数据集,从而使我们既可以计算训练和测试标签,又可以绘制 25 个随机数字图像。 + +“列表 1.3.1”:`mnist-sampler-1.3.1.py` + +```py +import numpy as np +from tensorflow.keras.datasets import mnist +import matplotlib.pyplot as plt +``` + +```py +# load dataset +(x_train, y_train), (x_test, y_test) = mnist.load_data() +``` + +```py +# count the number of unique train labels +unique, counts = np.unique(y_train, return_counts=True) +print("Train labels: ", dict(zip(unique, counts))) +``` + +```py +# count the number of unique test labels +unique, counts = np.unique(y_test, return_counts=True) +print("Test labels: ", dict(zip(unique, counts))) +``` + +```py +# sample 25 mnist digits from train dataset +indexes = np.random.randint(0, x_train.shape[0], size=25) +images = x_train[indexes] +labels = y_train[indexes] +``` + +```py +# plot the 25 mnist digits +plt.figure(figsize=(5,5)) +for i in range(len(indexes)): + plt.subplot(5, 5, i + 1) + image = images[i] + plt.imshow(image, cmap='gray') + plt.axis('off') +``` + +```py +plt.savefig("mnist-samples.png") +plt.show() +plt.close('all') +``` + +`mnist.load_data()`方法很方便,因为不需要分别加载所有 70,000 张图像和标签并将它们存储在数组中。 执行以下命令: + +```py +python3 mnist-sampler-1.3.1.py +``` + +在命令行上,该代码示例打印训练和测试数据集中的标签分布: + +```py +Train labels:{0: 5923, 1: 6742, 2: 5958, 3: 6131, 4: 5842, 5: 5421, 6: 5918, 7: 6265, 8: 5851, 9: 5949} +Test labels:{0: 980, 1: 1135, 2: 1032, 3: 1010, 4: 982, 5: 892, 6: 958, 7: 1028, 8: 974, 9: 1009} +``` + +之后,代码将绘制 25 个随机数字,如先前在“图 1.3.1”中所示。 + +在讨论 MLP 分类器模型之前,必须记住,虽然 MNIST 数据由二维张量组成,但应根据输入层的类型对它进行重塑。 以下“图 1.3.2”显示了如何为 MLP,CNN 和 RNN 输入层重塑`3×3`灰度图像: + +![](img/B14853_01_03.png) + +图 1.3.2:根据输入层的类型,对与 MNIST 数据相似的输入图像进行重塑。 为简单起见,显示了`3×3`灰度图像的重塑。 + +在以下各节中,将介绍 MNIST 的 MLP 分类器模型。 我们将演示如何使用`tf.keras`有效地构建,训练和验证模型。 + +## MNIST 数字分类器模型 + +“图 1.3.3”中显示的建议的 MLP 模型可用于 MNIST 数字分类。 当单元或感知器暴露在外时,MLP 模型是一个全连接网络,如图“图 1.3.4”所示。 我们还将展示如何根据第`n`个单元的权重`w[i]`和偏置`b[n]`的输入来计算感知器的输出。 相应的`tf.keras`实现在“列表 1.3.2”中进行了说明: + +![](img/B14853_01_04.png) + +图 1.3.3:MLP MNIST 数字分类器模型 + +![](img/B14853_01_05.png) + +图 1.3.4:图 1.3.3 中的 MLP MNIST 数字分类器由全连接层组成。 为简单起见,未显示激活层和退出层。 还详细显示了一个单元或感知器。 + +“列表 1.3.2”:`mlp-mnist-1.3.2.py` + +```py +import numpy as np +from tensorflow.keras.models import Sequential +from tensorflow.keras.layers import Dense, Activation, Dropout +from tensorflow.keras.utils import to_categorical, plot_model +from tensorflow.keras.datasets import mnist +``` + +```py +# load mnist dataset +(x_train, y_train), (x_test, y_test) = mnist.load_data() +``` + +```py +# compute the number of labels +num_labels = len(np.unique(y_train)) +``` + +```py +# convert to one-hot vector +y_train = to_categorical(y_train) +y_test = to_categorical(y_test) + +# image dimensions (assumed square) +image_size = x_train.shape[1] +input_size = image_size * image_size +``` + +```py +# resize and normalize +x_train = np.reshape(x_train, [-1, input_size]) +x_train = x_train.astype('float32') / 255 +x_test = np.reshape(x_test, [-1, input_size]) +x_test = x_test.astype('float32') / 255 +``` + +```py +# network parameters +batch_size = 128 +hidden_units = 256 +dropout = 0.45 +``` + +```py +# model is a 3-layer MLP with ReLU and dropout after each layer +model = Sequential() +model.add(Dense(hidden_units, input_dim=input_size)) +model.add(Activation('relu')) +model.add(Dropout(dropout)) +model.add(Dense(hidden_units)) +model.add(Activation('relu')) +model.add(Dropout(dropout)) +model.add(Dense(num_labels)) +# this is the output for one-hot vector +model.add(Activation('softmax')) +model.summary() +plot_model(model, to_file='mlp-mnist.png', show_shapes=True) +``` + +```py +# loss function for one-hot vector +# use of adam optimizer +# accuracy is good metric for classification tasks +model.compile(loss='categorical_crossentropy', + optimizer='adam', + metrics=['accuracy']) +# train the network +model.fit(x_train, y_train, epochs=20, batch_size=batch_size) +``` + +```py +# validate the model on test dataset to determine generalization +_, acc = model.evaluate(x_test, + y_test, + batch_size=batch_size, + verbose=0) +print("\nTest accuracy: %.1f%%" % (100.0 * acc)) +``` + +在讨论模型实现之前,数据必须具有正确的形状和格式。 加载 MNIST 数据集后,标签的数量计算为: + +```py +# compute the number of labels +num_labels = len(np.unique(y_train)) +``` + +硬编码`num_labels = 10`也可以选择。 但是,让计算机完成工作始终是一个好习惯。 该代码假定`y_train`的标签为 0 到 9。 + +此时,标签为数字格式,即从 0 到 9。标签的这种稀疏标量表示形式不适用于按类别输出概率的神经网络预测层。 一种更合适的格式称为`one-hot vector`,这是一个十维向量,除数字类的索引外,所有元素均为 0。 例如,如果标签为 2,则等效`one-hot vector`为[0,0,1,0,0,0,0,0,0,0]。 第一个标签的索引为 0。 + +以下各行将每个标签转换为`one-hot vector`: + +```py +# convert to one-hot vector +y_train = to_categorical(y_train) +y_test = to_categorical(y_test) +``` + +在深度学习中,数据存储在张量中。 张量一词适用于标量(0D 张量),向量(1D 张量),矩阵(二维张量)和多维张量。 + +从这一点出发,除非标量,向量或矩阵使解释更清楚,否则将使用术语张量。 + +如下所示的其余代码将计算图像尺寸,第一密集层的`input_size`值,并将每个像素值从 0 缩放到 255,范围从 0.0 缩放到 1.0。 尽管可以直接使用原始像素值,但最好对输入数据进行规范化,以避免产生可能会使训练变得困难的较大梯度值。 网络的输出也被标准化。 训练后,可以通过将输出张量乘以 255 来将所有内容恢复为整数像素值。 + +提出的模型基于 MLP 层。 因此,输入应为一维张量。 这样,将`x_train`和`x_test`分别重塑为`[60,000,28 * 28]`和`[10,000,28 * 28]`。 在 NumPy 中,大小为 -1 表示让库计算正确的尺寸。 在`x_train`的情况下为 60,000。 + +```py +# image dimensions (assumed square) 400 +image_size = x_train.shape[1] +input_size = image_size * image_size +``` + +```py +# resize and normalize +x_train = np.reshape(x_train, [-1, input_size]) +x_train = x_train.astype('float32') / 255 +x_test = np.reshape(x_test, [-1, input_size]) +x_test = x_test.astype('float32') / 255 +``` + +在准备好数据集之后,以下内容将重点介绍使用 Keras 的顺序 API 构建 MLP 分类器模型。 + +## 使用 MLP 和 Keras 构建模型 + +数据准备之后,接下来是构建模型。 所提出的模型由三个 MLP 层组成。 在 Keras 中,将 MLP 层称为**密集**,它表示紧密连接的层。 第一和第二个 MLP 层本质上是相同的,每个都有 256 个单元,然后是**整流线性单元**(**ReLU**)激活和退出。 由于 128、512 和 1,024 个单元的表现指标较低,因此选择 256 个单元。 在 128 个单元的情况下,网络收敛迅速,但测试精度较低。 512 或 1,024 的额外单元数量不会显着提高测试精度。 + +单元数是超参数。 它控制网络的**容量**。 容量是网络可以近似的函数复杂性的度量。 例如,对于多项式,度是超参数。 随着程度的增加,函数的能力也随之增加。 + +如以下代码行所示,使用 Keras 的顺序 API 实现分类器模型。 如果模型需要一个输入和一个输出(由一系列层处理),这就足够了。 为了简单起见,我们现在将使用它。 但是,在“第 2 章”,“深度神经网络”中,将引入 Keras 的函数式 API 来实现高级深度学习模型,该模型需要更复杂的结构(例如多个输入和输出)。 + +```py +# model is a 3-layer MLP with ReLU and dropout after each layer model = Sequential() +model.add(Dense(hidden_units, input_dim=input_size)) +model.add(Activation('relu')) +model.add(Dropout(dropout)) +model.add(Dense(hidden_units)) +model.add(Activation('relu')) +model.add(Dropout(dropout)) +model.add(Dense(num_labels)) +# this is the output for one-hot vector model.add(Activation('softmax')) +``` + +由于`Dense`层是线性运算,因此`Dense`层的序列只能近似线性函数。 问题是 MNIST 数字分类本质上是非线性过程。 在`Dense`层之间插入`relu`激活将使 MLP 网络能够对非线性映射建模。 `relu`或 ReLU 是一个简单的非线性函数。 这很像一个过滤器,它允许正输入不变地通过,同时将其他所有值都钳位为零。 数学上,`relu`用以下公式表示,见“图 1.3.5”: + +![](img/B14853_01_001.png) + +![](img/B14853_01_06.png) + +图 1.3.5:ReLU 函数图。 ReLU 函数在神经网络中引入了非线性。 + +还可以使用其他非线性函数,例如`elu`,`selu`,`softplus`,`sigmoid`和`tanh`。 但是,`relu`是最常用的函数,由于其简单性,在计算上是有效的。 Sigmoid 和 tanh 函数在输出层中用作激活函数,稍后将描述。“表 1.3.1”显示了每个激活函数的方程式: + +| `relu` | `relu(x) = max(0, x)` | 1.3.1 | +| --- | --- | --- | +| `softplus` | `softplus(x) = log(1 + exp(x))` | 1.3.2 | +| `elu` | ![](img/B14853_01_002.png) 其中`a≥0`并且是可调超参数 | 1.3.3 | +| `selu` | `selu(x) = k×elu(x, a)`其中`k = 1.0507009873554804934193193349852946`和`a = 1.6732632423543772848170429916717` | 1.3.4 | +| `sigmoid` | ![](img/B14853_01_003.png) | 1.3.5 | +| `tanh` | ![](img/B14853_01_004.png) | 1.3.6 | + +表 1.3.1:常见非线性激活函数的定义 + +尽管我们已完成 MLP 分类器模型的关键层,但我们尚未解决泛化问题或模型超出训练数据集的能力。 为了解决这个问题,我们将在下一节介绍正则化。 + +## 正则化 + +神经网络倾向于记住其训练数据,特别是如果它包含的容量超过。 在这种情况下,当经受测试数据时,网络将发生灾难性的故障。 这是网络无法推广的经典情况。 为了避免这种趋势,模型使用了正则化层或函数。 常见的正则化层是`Dropout`。 + +丢弃的想法很简单。 给定丢弃率(此处将其设置为`dropout = 0.45`),丢弃层会从参与下一层的单元中随机删除这一部分。 例如,如果第一层具有 256 个单元,则在应用`dropout = 0.45`之后,只有`(1-0.45) * 256`个单元,来自第 1 层的 140 个单元参与第 2 层。 + +丢弃层使神经网络对于无法预见的输入数据具有鲁棒性,因为即使缺少某些单元,训练后的神经网络也可以正确预测。 值得注意的是,输出层中没有使用丢弃,它仅在训练期间处于活动状态。 此外,在预测期间不存在丢弃现象。 + +除了诸如丢弃之类的正则化之外,还可以使用其他正则化器。 在 Keras 中,可以按层对偏置,权重和激活输出进行正则化。 `l1`和`l2`通过添加罚函数来支持较小的参数值。 `l1`和`l2`都使用绝对值(`l1`)或平方(`l2`)之和的分数来执行惩罚。 换句话说,惩罚函数迫使优化器找到较小的参数值。 参数值小的神经网络对来自输入数据的噪声的存在更加不敏感。 + +例如,带有`fraction=0.001`的`l2`权重正则器可以实现为: + +```py +from tensorflow.keras.regularizers import l2 +model.add(Dense(hidden_units, + kernel_regularizer=l2(0.001), + input_dim=input_size)) +``` + +如果使用`l1`或`l2`正则化,则不添加任何附加层。 正则化在内部施加在`Dense`层中。 对于建议的模型,丢弃仍然具有比`l2`更好的表现。 + +我们的模型几乎已经完成。 下一节将重点介绍输出层和损失函数。 + +## 输出激活和损失函数 + +输出的层具有 10 个单元,其后是`softmax`激活层。 这 10 个单元对应于 10 个可能的标签,类或类别。 可以用数学方式表示`softmax`激活,如以下等式所示: + +![](img/B14853_01_005.png) (Equation 1.3.7) + +该方程适用于所有`N = 10`输出,`x[i]`对于`i = 0, 1, ..., 9`作最终预测。 `softmax`的概念非常简单。 通过对预测进行归一化,将输出压缩为概率。 在此,每个预测输出都是该索引是给定输入图像的正确标签的概率。 所有输出的所有概率之和为 1.0。 例如,当`softmax`层生成预测时,它将是一个 10 维一维张量,看起来像以下输出: + +```py +[3.57351579e-11 7.08998016e-08 + 2.30154569e-07 6.35787558e-07 + 5.57471187e-11 4.15353840e-09 + 3.55973775e-16 9.99995947e-01 + 1.29531730e-09 3.06023480e-06] +``` + +预测输出张量建议输入图像的索引具有最高概率,因此将为 7。 `numpy.argmax()`方法可用于确定具有最高值的元素的索引。 + +输出激活层还有其他选择,例如`linear`,`sigmoid`或`tanh`。 `linear`激活是一种恒等函数。 它将其输入复制到其输出。 `sigmoid`函数更具体地是,称为**逻辑 Sigmoid**。 如果预测张量的元素将独立地映射在 0.0 和 1.0 之间,则将使用此方法。 与`softmax`中不同,预测张量的所有元素的总和不限于 1.0。 例如,`sigmoid`用作情感预测(从 0.0 到 1.0、0.0 不好,1.0 很好)或图像生成(0.0 映射到像素级别 0 和 1.0 映射到像素 255)的最后一层 。 + +`tanh`函数将其输入映射在 -1.0 到 1.0 的范围内。 如果输出可以同时以正值和负值摆幅,则这一点很重要。 `tanh`函数在循环神经网络的内部层中更普遍使用,但也已用作输出层激活。 如果在输出激活中使用 tanh 代替`sigmoid`,则必须适当缩放使用的数据。 例如,不是使用`x = x / 255`缩放`[0.0, 1.0]`范围内的每个灰度像素,而是使用`x = (x - 127.5) / 127.5`将其分配在`[-1.0, 1.0]`范围内。 + +下图“图 1.3.6”显示了`sigmoid`和`tanh`函数。 数学上,Sigmoid 可以用以下公式表示: + +![](img/B14853_01_008.png) (Equation 1.3.5) + +![](img/B14853_01_07.png) + +图 1.3.6:Sigmoid 和正切图 + +预测张量距单热地面真值向量有多远称为损失。 损失函数的一种类型是`mean_squared_error`(**MSE**),或者是目标或标签与预测之间差异的平方的平均值。 在当前示例中,我们使用`categorical_crossentropy`。 它是目标或标签乘积与每个类别的预测对数之和的负数。 Keras 中还有其他损失函数,例如`mean_absolute_error`和`binary_crossentropy`。“表 1.3.2”总结了的常见损失函数。 + +| **损失函数** | **公式** | +| --- | --- | +| `mean_squared_error` | ![](img/B14853_01_009.png) | +| `mean_absolute_error` | ![](img/B14853_01_010.png) | +| `categorical_crossentropy` | ![](img/B14853_01_011.png) | +| `binary_crossentropy` | ![](img/B14853_01_012.png) | + +表 1.3.2:常见损失函数汇总。 类别是指标签和预测中的类别数(例如:MNIST 为 10)。 所示的损失方程式仅适用于一个输出。 平均损失值是整个批量的平均值。 + +损失函数的选择不是任意的,而应作为模型正在学习的标准。 对于按类别进行分类,在`softmax`激活层之后,`categorical_crossentropy`或`mean_squared_error`是一个不错的选择。 `binary_crossentropy`损失函数通常在`sigmoid`激活层之后使用,而`mean_squared_error`是`tanh`输出的选项。 + +在下一部分中,我们将讨论优化算法以最小化我们在此处讨论的损失函数。 + +## 优化 + +通过优化,目标是使损失函数最小化。 这个想法是,如果将损失减少到可接受的水平,则该模型将间接学习将输入映射到输出的函数。 表现指标用于确定模型是否了解了基础数据分布。 Keras 中的默认指标是**损失**。 在训练,验证和测试期间,还可以包括其他指标,例如**准确率**。 准确率是基于地面真实性的正确预测的百分比或分数。 在深度学习中,还有许多其他表现指标。 但是,它取决于模型的目标应用。 在文献中,报告了**测试数据集**上训练后的模型的表现指标,用于与其他深度学习模型进行比较。 + +在 Keras 中,优化器有个选择。 最常用的优化器是**随机梯度下降**(**SGD**),**自适应矩**(**Adam**)和**均方根传播**(**RMSprop**)。 每个优化器均具有可调参数,例如学习率,动量和衰减。 Adam 和 RMSprop 是具有自适应学习率的 SGD 的变体。 在提出的分类器网络中,使用了 Adam,因为它具有最高的测试精度。 + +SGD 被认为是最基本的优化程序。 它是演算中梯度下降的简单版本。 在**梯度下降**(**GD**)中,追踪下坡函数的曲线可找到最小值,就像在山谷中下坡直至到达底部一样。 + +GD 算法如图 1.3.7 所示。 假设`x`是被调整以找到`y`的最小值(例如,损失函数)的参数(例如,权重)。 从`x = -0.5`的任意点开始。 梯度`dy/dx = -2.0`。 GD 算法强加`x`然后更新为`x = -0.5 - ε(-2.0)`。 `x`的新值等于旧值,再加上`ε`缩放的梯度的相反值。 小数字`ε`是指学习率。 如果`ε = 0.01`,则`x`的新值为 -0.48。 GD 是迭代执行的。 在每一步,`y`都将接近其最小值。 在`x = 0.5`时,`dy/dx = 0`。 GD 已找到`y = -1.25`的绝对最小值。 梯度建议不要进一步改变`x`。 + +学习率的选择至关重要。 大的`ε`值可能找不到最小值,因为搜索只会在最小值附近来回摆动。 一方面,在找到最小值之前,较大的`ε`值可能需要进行大量迭代。 在有多个最小值的情况下,搜索可能会陷入局部最小值。 + +![](img/B14853_01_08.png) + +图 1.3.7:GD 类似于在函数曲线上向下走直到到达最低点。 在此图中,全局最小值为`x = 0.5`。 + +多个极小值的示例可以在“图 1.3.8”中看到。 如果由于某种原因从图的左侧开始搜索并且学习率很小,则 GD 很可能会发现`x = -1.51`是*最小值* 。 GD 无法在`x = 1.66`时找到全局最小值。 具有足够值的学习率将使 GD 可以克服`x = 0.0`的问题。 + +在深度学习实践中,通常建议从更高的学习率开始(例如,从 0.1 到 0.001),并随着损失接近最小值而逐渐降低学习率。 + +![](img/B14853_01_09.png) + +图 1.3.8:具有 2 个最小值的函数图,`x = -1.51`和`x = 1.66`。 还显示了该函数的导数。 + +GD 通常不用于深度神经网络,因为遇到数百万个要训练的参数很常见。 执行完整的 GD 在计算上效率低下。 而是使用 SGD。 在 SGD 中,选择一小批样本以计算下降的近似值。 参数(例如权重和偏差)可通过以下公式进行调整: + +![](img/B14853_01_021.png) + +在该等式中,`θ`和`g = 1/m ᐁ[θ] ΣL`分别是损失函数的参数和梯度张量。`g`由损失函数的偏导数计算得出。 出于 GPU 优化的目的,建议最小批量大小为 2 的幂。 在建议的网络中,`batch_size = 128`。 + +“公式 1.3.8”计算最后一层参数更新。 那么,我们如何调整前几层的参数呢? 在这种情况下,应用微分链规则将导数传播到较低层并相应地计算梯度。 该算法在深度学习中称为**反向传播**。 反向传播的详细信息超出了本书的范围。 但是,可以在[这里](http://neuralnetworksanddeeplearning.com)找到很好的在线参考。 + +由于优化是基于微分的,因此得出损失函数的重要标准是它必须平滑或可微。 当引入新的损失函数时,这是要牢记的重要约束。 + +给定训练数据集,损失函数的选择,优化器和正则化器,现在可以通过调用`fit()`函数来训练模型: + +```py +# loss function for one-hot vector +# use of adam optimizer +# accuracy is a good metric for classification tasks model.compile(loss='categorical_crossentropy', +optimizer='adam', metrics=['accuracy']) +``` + +```py +# train the network +model.fit(x_train, y_train, epochs=20, batch_size=batch_size) +``` + +这是 Keras 的另一个有用函数。 通过仅提供`x`和`y`数据,要训练的周期数和批量大小,`fit()`完成了其余工作。 在其他深度学习框架中,这转化为多项任务,例如以适当的格式准备输入和输出数据,加载,监视等等。 尽管所有这些都必须在`for`循环内完成,但在 Keras 中,一切都只需要一行即可完成。 + +在`fit()`函数中,一个周期是整个训练数据的完整采样。 `batch_size`参数是每个训练步骤要处理的输入数量的样本大小。 为了完成一个周期,`fit()`将处理等于训练数据集大小的步数除以批量大小再加上 1,以补偿任何小数部分。 + +训练模型后,我们现在可以评估其表现。 + +## 表现评估 + +至此,MNIST 数字分类器的模型现已完成。 表现评估将是的下一个关键步骤,以确定提议的训练模型是否已提出令人满意的解决方案。 将模型训练 20 个时间段就足以获得可比较的表现指标。 + +下表“表 1.3.3”列出了不同的网络配置和相应的表现指标。 在“层”下,显示第 1 到第 3 层的单元数。对于每个优化器,将使用`tf.keras`中的默认参数。 可以观察到改变正则化器,优化器和每层单元数的效果。“表 1.3.3”中的另一个重要观察结果是,更大的网络不一定会转化为更好的表现。 + +在训练和测试数据集的准确率方面,增加此网络的深度不会显示任何其他好处。 另一方面,较少的单元(例如 128)也可能会降低测试和训练的准确率。 删除正则器后,将在`99.93%`处获得最佳的训练精度,并且每层使用 256 个单元。 但是,由于网络过拟合,测试精度在`98.0%`时要低得多。 + +最高的测试精度是使用 Adam 优化器和`98.5%`处的`Dropout(0.45)`。 从技术上讲,鉴于其训练精度为`99.39%`,仍然存在某种程度的过拟合。 对于`256-512-256`,`Dropout(0.45)`和 SGD,在`98.2%`时,训练和测试精度均相同。 同时去除正则化和 ReLU 层会导致其表现最差。 通常,我们会发现`Dropout`层比`l2`具有更好的表现。 + +下表演示了调整期间典型的深度神经网络表现: + +| **层** | **正则化函数** | **优化器** | **ReLU** | **训练准确率(%)** | **测试准确率(%)** | +| --- | --- | --- | --- | --- | --- | +| 256-256-256 | 没有 | SGD | 没有 | 93.65 | 92.5 | +| 256-256-256 | L2(0.001) | SGD | 是 | 99.35 | 98.0 | +| 256-256-256 | L2(0.01) | SGD | 是 | 96.90 | 96.7 | +| 256-256-256 | 没有 | SGD | 是 | 99.93 | 98.0 | +| 256-256-256 | 丢弃(0.4) | SGD | 是 | 98.23 | 98.1 | +| 256-256-256 | 丢弃(0.45) | SGD | 是 | 98.07 | 98.1 | +| 256-256-256 | 丢弃(0.5) | SGD | 是 | 97.68 | 98.1 | +| 256-256-256 | 丢弃(0.6) | SGD | 是 | 97.11 | 97.9 | +| 256-512-256 | 丢弃(0.45) | SGD | 是 | 98.21 | 98.2 | +| 512-512-512 | 丢弃(0.2) | SGD | 是 | 99.45 | 98.3 | +| 512-512-512 | 丢弃(0.4) | SGD | 是 | 98.95 | 98.3 | +| 512-1024-512 | 丢弃(0.45) | SGD | 是 | 98.90 | 98.2 | +| 1024-1024-1024 | 丢弃(0.4) | SGD | 是 | 99.37 | 98.3 | +| 256-256-256 | 丢弃(0.6) | Adam | 是 | 98.64 | 98.2 | +| 256-256-256 | 丢弃(0.55) | Adam | 是 | 99.02 | 98.3 | +| 256-256-256 | 丢弃(0.45) | Adam | 是 | 99.39 | 98.5 | +| 256-256-256 | 丢弃(0.45) | RMSprop | 是 | 98.75 | 98.1 | +| 128-128-128 | 丢弃(0.45) | Adam | 是 | 98.70 | 97.7 | + +表 1.3.3 不同的 MLP 网络配置和表现指标 + +示例指示需要改进网络架构。 在下一节讨论了 MLP 分类器模型摘要之后,我们将介绍另一个 MNIST 分类器。 下一个模型基于 CNN,并证明了测试准确率的显着提高。 + +## 模型摘要 + +使用 Keras 库为我们提供了一种快速的机制,可以通过调用以下方法来仔细检查模型描述: + +```py +model.summary() +``` + +下面的“列表 1.3.3”显示了所建议网络的模型摘要。 它总共需要 269,322 个参数。 考虑到我们具有对 MNIST 数字进行分类的简单任务,这一点非常重要。 MLP 的参数效率不高。 可以通过关注如何计算感知器的输出,从“图 1.3.4”计算参数的数量。 从输入到密集层:`784 × 256 + 256 = 200,960`。 从第一密集层到第二密集层:`256 × 256 + 256 = 65,792`。 从第二个密集层到输出层:`10 × 256 + 10 = 2,570`。 总数是`269,322`。 + +“列表 1.3.3”:MLP MNIST 数字分类器模型的摘要: + +```py +Layer (type) Output Shape Param # +================================================================= +dense_1 (Dense) (None, 256) 200960 +activation_1 (Activation) (None, 256) 0 +dropout_1 (Dropout) (None, 256) 0 +dense_2 (Dense) (None, 256) 65792 +activation_2 (Activation) (None, 256) 0 +dropout_2 (Dropout) (None, 256) 0 +dense_3 (Dense) (None, 10) 2750 +activation_3 (Activation) (None, 10) 0 +================================================================= +Total params: 269,322 +Trainable params: 269,322 +Non-trainable params: 0 +``` + +验证网络的另一种方法是通过调用: + +```py +plot_model(model, to_file='mlp-mnist.png', show_shapes=True) +``` + +“图 1.3.9”显示了该图。 您会发现这类似于`summary()`的结果,但是以图形方式显示了每个层的互连和 I/O。 + +![A screenshot of a cell phone Description automatically generated](img/B14853_01_10.png) + +图 1.3.9:MLP MNIST 数字分类器的图形描述 + +在总结了我们模型的之后,到此结束了我们对 MLP 的讨论。 在下一部分中,我们将基于 CNN 构建 MNIST 数字分类器模型。 + +# 4\. 卷积神经网络(CNN) + +现在,我们将进入第二个人工神经网络 CNN。 在本节中,我们将解决相同的 MNIST 数字分类问题,但这一次使用 CNN。 + +“图 1.4.1”显示了我们将用于 MNIST 数字分类的 CNN 模型,而其实现在“列表 1.4.1”中进行了说明。 实现 CNN 模型将需要对先前模型进行一些更改。 现在,输入张量不再具有输入向量,而具有新尺寸(`height`,`width`,`channels`)或(`image_size`,`image_size`,`1`)=(`28`,`28` ,`1`)用于 MNIST 灰度图像。 需要调整训练和测试图像的大小以符合此输入形状要求。 + +![](img/B14853_01_11.png) + +图 1.4.1:用于 MNIST 数字分类的 CNN 模型 + +实现上图: + +“列表 1.4.1”:`cnn-mnist-1.4.1.py` + +```py +import numpy as np +from tensorflow.keras.models import Sequential +from tensorflow.keras.layers import Activation, Dense, Dropout +from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten +from tensorflow.keras.utils import to_categorical, plot_model +from tensorflow.keras.datasets import mnist +``` + +```py +# load mnist dataset +(x_train, y_train), (x_test, y_test) = mnist.load_data() +``` + +```py +# compute the number of labels +num_labels = len(np.unique(y_train)) +``` + +```py +# convert to one-hot vector +y_train = to_categorical(y_train) +y_test = to_categorical(y_test) +``` + +```py +# input image dimensions +image_size = x_train.shape[1] +# resize and normalize +x_train = np.reshape(x_train,[-1, image_size, image_size, 1]) +x_test = np.reshape(x_test,[-1, image_size, image_size, 1]) +x_train = x_train.astype('float32') / 255 +x_test = x_test.astype('float32') / 255 +``` + +```py +# network parameters +# image is processed as is (square grayscale) +input_shape = (image_size, image_size, 1) +batch_size = 128 +kernel_size = 3 +pool_size = 2 +filters = 64 +dropout = 0.2 +``` + +```py +# model is a stack of CNN-ReLU-MaxPooling +model = Sequential() +model.add(Conv2D(filters=filters, + kernel_size=kernel_size, + activation='relu', + input_shape=input_shape)) +model.add(MaxPooling2D(pool_size)) +model.add(Conv2D(filters=filters, + kernel_size=kernel_size, + activation='relu')) +model.add(MaxPooling2D(pool_size)) +model.add(Conv2D(filters=filters, + kernel_size=kernel_size, + activation='relu')) +model.add(Flatten()) +# dropout added as regularizer +model.add(Dropout(dropout)) +# output layer is 10-dim one-hot vector +model.add(Dense(num_labels)) +model.add(Activation('softmax')) +model.summary() +plot_model(model, to_file='cnn-mnist.png', show_shapes=True) +``` + +```py +# loss function for one-hot vector +# use of adam optimizer +# accuracy is good metric for classification tasks +model.compile(loss='categorical_crossentropy', + optimizer='adam', + metrics=['accuracy']) +# train the network +model.fit(x_train, y_train, epochs=10, batch_size=batch_size) +``` + +```py +_, acc = model.evaluate(x_test, + y_test, + batch_size=batch_size, + verbose=0) +print("\nTest accuracy: %.1f%%" % (100.0 * acc)) +``` + +的主要更改是`Conv2D`层的使用。 `ReLU`激活函数已经是`Conv2D`的参数。 当模型中包含`batch normalization`层时,可以将`ReLU`函数作为`Activation`层使用。 `Batch normalization`用于深层 CNN,因此可以利用较大的学习率而不会引起训练过程中的不稳定。 + +## 卷积 + +如果在 MLP 模型中,单元数量表示密集层,则核表示 CNN 操作。 如图“图 1.4.2”所示,可以将核可视化为矩形补丁或窗口,该补丁或窗口从左到右,从上到下在整个图像中滑动。 此操作称为卷积。 它将输入图像转换成特征映射,该特征映射表示核从输入图像中学到的内容。 然后将特征映射转换为后续层中的另一个特征映射,依此类推。 每个`Conv2D`生成的特征映射的数量由`filters`参数控制。 + +![](img/B14853_01_12.png) + +图 1.4.2:3×3 核与 MNIST 数字图像卷积。 + +在步骤`t[n]`和`t[n + 1]`中显示了卷积,其中核向右移动了 1 个像素 。 + +卷积中涉及的计算显示在“图 1.4.3”中: + +![](img/B14853_01_13.png) + +图 1.4.3:卷积运算显示如何计算特征映射的一个元素 + +为简单起见,显示了应用了`3×3`核的`3×3`输入图像(或输入特征映射)。 卷积后显示结果特征映射。 特征映射中一个元素的值被加阴影。 您会注意到,结果特征映射小于原始输入图像的,这是因为卷积仅在有效元素上执行。 核不能超出映像的边界。 如果输入的尺寸应与输出特征映射相同,则`Conv2D`接受选项`padding='same'`。 输入在其边界周围填充零,以在卷积后保持尺寸不变。 + +## 池化操作 + +最后的更改是添加了`MaxPooling2D`层以及参数`pool_size=2`。 `MaxPooling2D`压缩每个特征映射。 每个大小为`pool_size × pool_size`的补丁都减少为 1 个特征映射点。 该值等于补丁中的最大特征点值。 下图显示了`MaxPooling2D`的两个补丁: + +![](img/B14853_01_14.png) + +图 1.4.4:`MaxPooling2D`操作。 为简单起见,输入特征映射为`4×4`,结果为`2×2`特征映射。 + +`MaxPooling2D`的意义在于特征映射尺寸的减小,这转化为感受野尺寸的增加。 例如,在`MaxPooling2D(2)`之后,2×2 核现在大约与`4×4`补丁卷积。 CNN 学会了针对不同接收场大小的一组新的特征映射。 + +还有其他合并和压缩方式。 例如,要使`MaxPooling2D(2)`的尺寸减少 50%,`AveragePooling2D(2)`会取一个补丁的平均值而不是找到最大值。 交叉卷积`Conv2D(strides=2,…)`在卷积过程中将跳过每两个像素,并且仍具有相同的 50% 缩小效果。 每种还原技术的有效性都有细微的差异。 + +在`Conv2D`和`MaxPooling2D`中,`pool_size`和`kernel`都可以是非正方形的。 在这些情况下,必须同时指定行和列的大小。 例如,`pool_ size = (1, 2)`和`kernel = (3, 5)`。 + +最后一个`MaxPooling2D`操作的输出是一堆特征映射。 `Flatten`的作用是,将特征映射的栈转换为适用于`Dropout`或`Dense`层的向量格式,类似于 MLP 模型输出层。 + +在下一部分中,我们将评估经过训练的 MNIST CNN 分类器模型的表现。 + +## 表现评估和模型摘要 + +如“列表 1.4.2”中所示,“列表 1.4.1”中的 CNN 模型在 80,226 处需要较少数量的参数,而使用 MLP 层时需要 269,322 个参数。 `conv2d_1`层具有 640 个参数,因为每个核具有`3×3 = 9`个参数,并且 64 个特征映射中的每一个都有一个核,一个偏置参数。 其他卷积层的参数数量可以类似的方式计算。 + +“列表 1.4.2”:CNN MNIST 数字分类器的摘要 + +```py +Layer (type) Output Shape Param # +================================================================= +conv2d_1 (Conv2D) (None, 26, 26, 64) 640 +max_pooling2d_1 (MaxPooiling2) (None, 13, 13, 64) 0 +conv2d_2 (Conv2D) (None, 11, 11, 64) 36928 +max_pooling2d_2 (MaxPooiling2) (None, 5.5, 5, 64) 0 +conv2d_3 (Conv2D) (None, 3.3, 3, 64) 36928 +flatten_1 (Flatten) (None, 576) 0 +dropout_1 (Dropout) (None, 576) 0 +dense_1 (Dense) (None, 10) 5770 +activation_1 (Activation) (None, 10) 0 +=================================================================== +Total params: 80,266 +Trainable params: 80,266 +Non-trainable params: 0 +``` + +“图 1.4.5”:显示了 CNN MNIST 数字分类器的图形表示形式。 + +![A screenshot of a cell phone Description automatically generated](img/B14853_01_15.png) + +图 1.4.5:CNN MNIST 数字分类器的图形描述 + +“表 1.4.1”显示了 99.4% 的最大测试准确率,这对于使用带有`dropout=0.2`的 Adam 优化器的每层具有 64 个特征映射的 3 层网络可以实现。 CNN 比 MLP 具有更高的参数效率,并且具有更高的准确率。 同样,CNN 也适合从顺序数据,图像和视频中学习表示形式。 + +| **层** | **优化器** | **正则化函数** | **训练准确率(%)** | **测试准确率(%)** | +| --- | --- | --- | --- | --- | --- | +| 64-64-64 | SGD | 丢弃(0.2) | 97.76 | 98.50 | +| 64-64-64 | RMSprop | 丢弃(0.2) | 99.11 | 99.00 | +| 64-64-64 | Adam | 丢弃(0.2) | 99.75 | 99.40 | +| 64-64-64 | Adam | 丢弃(0.4) | 99.64 | 99.30 | + +表 1.4.1:CNN MNIST 数字分类器的不同 CNN 网络配置和表现指标。 + +看了 CNN 并评估了训练好的模型之后,让我们看一下我们将在本章中讨论的最终核心网络:RNN。 + +# 5\. 循环神经网络(RNN) + +现在,我们来看一下三个人工神经网络中的最后一个,即 RNN。 + +RNN 是网络的序列,适用于学习顺序数据的表示形式,例如**自然语言处理**(**NLP**)中的文本或仪器中的传感器数据流 。 尽管每个 MNIST 数据样本本质上都不是顺序的,但不难想象每个图像都可以解释为像素行或列的序列。 因此,基于 RNN 的模型可以将每个 MNIST 图像作为 28 个元素的输入向量序列进行处理,时间步长等于 28。下面的清单在“图 1.5.1”中显示了 RNN 模型的代码: + +![](img/B14853_01_16.png) + +图 1.5.1:用于 MNIST 数字分类的 RNN 模型 + +“列表 1.5.1”:`rnn-mnist-1.5.1.py` + +```py +import numpy as np +from tensorflow.keras.models import Sequential +from tensorflow.keras.layers import Dense, Activation, SimpleRNN +from tensorflow.keras.utils import to_categorical, plot_model +from tensorflow.keras.datasets import mnist +``` + +```py +# load mnist dataset +(x_train, y_train), (x_test, y_test) = mnist.load_data() +``` + +```py +# compute the number of labels +num_labels = len(np.unique(y_train)) +``` + +```py +# convert to one-hot vector +y_train = to_categorical(y_train) +y_test = to_categorical(y_test) +``` + +```py +# resize and normalize +image_size = x_train.shape[1] +x_train = np.reshape(x_train,[-1, image_size, image_size]) +x_test = np.reshape(x_test,[-1, image_size, image_size]) +x_train = x_train.astype('float32') / 255 +x_test = x_test.astype('float32') / 255 +``` + +```py +# network parameters +input_shape = (image_size, image_size) +batch_size = 128 +units = 256 +dropout = 0.2 +``` + +```py +# model is RNN with 256 units, input is 28-dim vector 28 timesteps +model = Sequential() +model.add(SimpleRNN(units=units, + dropout=dropout, + input_shape=input_shape)) +model.add(Dense(num_labels)) +model.add(Activation('softmax')) +model.summary() +plot_model(model, to_file='rnn-mnist.png', show_shapes=True) +``` + +```py +# loss function for one-hot vector +# use of sgd optimizer +# accuracy is good metric for classification tasks +model.compile(loss='categorical_crossentropy', + optimizer='sgd', + metrics=['accuracy']) +# train the network +model.fit(x_train, y_train, epochs=20, batch_size=batch_size) +``` + +```py +_, acc = model.evaluate(x_test, + y_test, + batch_size=batch_size, + verbose=0) +print("\nTest accuracy: %.1f%%" % (100.0 * acc)) +``` + +RNN 分类器与之前的两个模型之间有两个主要区别。 首先是`input_shape = (image_size, image_size)`,它实际上是`input_ shape = (timesteps, input_dim)`或时间步长的`input_dim`维向量序列。 其次是使用`SimpleRNN`层以`units=256`表示 RNN 单元。 `units`变量代表输出单元的数量。 如果 CNN 是通过输入特征映射上的核卷积来表征的,则 RNN 输出不仅是当前输入的函数,而且是先前输出或隐藏状态的函数。 由于前一个输出也是前一个输入的函数,因此当前输出也是前一个输出和输入的函数,依此类推。 Keras 中的`SimpleRNN`层是真实 RNN 的简化版本。 以下等式描述了`SimpleRNN`的输出: + +![](img/B14853_01_024.png) (Equation 1.5.1) + +在此等式中,`b`是偏差,而`W`和`U`被称为循环核(先前输出的权重)和核(当前输入的权重) ), 分别。 下标`t`用于指示序列中的位置。 对于具有`units=256`的`SimpleRNN`层,参数总数为`256 + 256×256 + 256×28 = 72,960`,对应于`b`,`W`和个贡献。 + +下图显示了用于分类任务的`SimpleRNN`和 RNN 的图。 使`SimpleRNN`比 RNN 更简单的是缺少输出值`o[t] = Vh[t] + c`在计算`softmax`函数之前: + +![](img/B14853_01_17.png) + +图 1.5.2:`SimpleRNN`和 RNN 图 + +与 MLP 或 CNN 相比,RNN 最初可能较难理解。 在 MLP 中,感知器是基本单元。 一旦了解了感知器的概念,MLP 就是感知器的网络。 在 CNN 中,核是一个补丁或窗口,可在特征映射中滑动以生成另一个特征映射。 在 RNN 中,最重要的是自环的概念。 实际上只有一个单元。 + +出现多个单元的错觉是因为每个时间步都有一个单元,但实际上,除非网络展开,否则它只是重复使用的同一单元。 RNN 的基础神经网络在单元之间共享。 + +“列表 1.5.2”中的摘要指示使用`SimpleRNN`需要较少数量的参数。 + +“列表 1.5.2”:RNN MNIST 数字分类器的摘要 + +```py +Layer (type) Output Shape Param # +================================================================= +simple_rnn_1 (SimpleRNN) (None, 256) 72960 +dense_1 (Dense) (None, 10) 2570 +activation_1 (Activation) (None, 10) 36928 +================================================================= +Total params: 75,530 +Trainable params: 75,530 +Non-trainable params: 0 +``` + +“图 1.5.3”显示了 RNN MNIST 数字分类器的图形描述。 该模型非常简洁: + +![A screenshot of a cell phone Description automatically generated](img/B14853_01_18.png) + +图 1.5.3:RNN MNIST 数字分类器图形说明 + +“表 1.5.1”显示 SimpleRNN 在所呈现的网络中具有最低的准确率: + +| **层** | **优化器** | **正则化函数** | **训练准确率(%)** | **测试准确率(%)** | +| --- | --- | --- | --- | --- | --- | +| 256 | SGD | 丢弃(0.2) | 97.26 | 98.00 | +| 256 | RMSprop | 丢弃(0.2) | 96.72 | 97.60 | +| 256 | Adam | 丢弃(0.2) | 96.79 | 97.40 | +| 512 | SGD | 丢弃(0.2) | 97.88 | 98.30 | + +表 1.5.1:不同的`SimpleRNN`网络配置和表现指标 + +在许多深度神经网络中,更常使用 RNN 家族的其他成员。 例如,机器翻译和问答问题都使用了**长短期记忆**(**LSTM**)。 LSTM 解决了长期依赖或记住与当前输出相关的过去信息的问题。 + +与 RNN 或`SimpleRNN`不同,LSTM 单元的内部结构更为复杂。“图 1.5.4”显示了 LSTM 的示意图。 LSTM 不仅使用当前输入和过去的输出或隐藏状态,还引入了一个单元状态`s[t]`,该状态将信息从一个单元传送到另一个单元。 单元状态之间的信息流由三个门控制`f[t]`,`i[t]`和`q[t]`。 这三个门的作用是确定应保留或替换哪些信息,以及过去对当前单元状态或输出有贡献的信息量以及过去和当前的输入。 我们不会在本书中讨论 LSTM 单元内部结构的细节。 但是,可以在[这个页面](http://colah.github.io/posts/2015-08-Understanding-LSTMs)上找到 LSTM 的直观指南。 + +`LSTM()`层可以用作`SimpleRNN()`的嵌入式替代。 如果 LSTM 对于手头的任务过于苛刻,则可以使用更简单的版本,称为**门控循环单元**(**GRU**)。 GRU 通过将单元状态和隐藏状态组合在一起来简化 LSTM。 GRU 还将门数量减少了一个。 `GRU()`函数也可以用作`SimpleRNN()`的直接替代品。 + +![](img/B14853_01_19.png) + +图 1.5.4:LSTM 图。为了清楚起见,未显示参数。 + +还有许多其他方法可以配置 RNN。 一种方法是制作双向 RNN 模型。 默认情况下,从当前输出仅受过去状态和当前输入影响的意义上讲,RNN 是单向的。 + +在双向 RNN 中,未来状态还可以通过允许信息向后流动来影响当前状态和过去状态。 根据收到的新信息,根据需要更新过去的输出。 可以通过调用包装器函数使 RNN 双向。 例如,双向 LSTM 的实现是`Bidirectional(LSTM())`。 + +对于所有类型的 RNN,增加单元数量也将增加容量。 但是,增加容量的另一种方法是堆叠 RNN 层。 尽管应注意,但作为一般经验法则,只有在需要时才应增加模型的容量。 容量过大可能会导致过拟合,结果可能导致训练时间延长和预测期间的表现降低。 + +# 6\. 总结 + +本章概述了三种深度学习模型(MLP,RNN,CNN),并介绍了 TensorFlow 2 `tf.keras`,这是一个用于快速开发,训练和测试适合于生产环境的深度学习模型的库。 还讨论了 Keras 的顺序 API。 在下一章中,将介绍函数式 API,这将使我们能够构建更复杂的模型,专门用于高级深度神经网络。 + +本章还回顾了深度学习的重要概念,例如优化,正则化和损失函数。 为了便于理解,这些概念是在 MNIST 数字分类的背景下提出的。 + +还讨论了使用人工神经网络(特别是 MLP,CNN 和 RNN)进行 MNIST 数字分类的不同解决方案,它们是深度神经网络的重要组成部分,并讨论了它们的表现指标。 + +了解了深度学习概念以及如何将 Keras 用作工具之后,我们现在可以分析高级深度学习模型。 在下一章讨论了函数式 API 之后,我们将继续执行流行的深度学习模型。 随后的章节将讨论选定的高级主题,例如自回归模型(自编码器,GAN,VAE),深度强化学习,对象检测和分段以及使用互信息的无监督学习。 随附的 Keras 代码实现将在理解这些主题方面发挥重要作用。 + +# 7\. 参考 + +1. `Chollet, François. Keras (2015). https://github.com/keras-team/keras.` +2. `LeCun, Yann, Corinna Cortes, and C. J. Burges. MNIST handwritten digit database. AT&T Labs [Online]. Available: http://yann.lecun.com/exdb/mnist2 (2010).` \ No newline at end of file diff --git a/机器学习/ApacheCN/apachecn-dl-zh/adv-dl-tf2-keras/02.md b/机器学习/ApacheCN/apachecn-dl-zh/adv-dl-tf2-keras/02.md new file mode 100644 index 00000000..d6ddd212 --- /dev/null +++ b/机器学习/ApacheCN/apachecn-dl-zh/adv-dl-tf2-keras/02.md @@ -0,0 +1,897 @@ +# 二、深度神经网络 + +在本章中,我们将研究深度神经网络。 这些网络在更具挑战性的数据集,如 ImageNet,[CIFAR10](https://www.cs.toronto.edu/~kriz/learning-features-2009-TR.pdf) 和 CIFAR100。 为简洁起见,我们仅关注两个网络: **ResNet** [2] [4]和 **DenseNet** [5]。 尽管我们会更加详细,但重要的是花一点时间介绍这些网络。 + +ResNet 引入了残差学习的概念,使残障学习能够通过解决深度卷积网络中消失的梯度问题(在第 2 节中讨论)来构建非常深的网络。 + +DenseNet 允许每个卷积直接访问输入和较低层的特征映射,从而进一步改进了 ResNet。 通过利用**瓶颈**和**过渡层**,还可以在深层网络中将参数的数量保持为较低。 + +但是,为什么这些是两个模型,而不是其他? 好吧,自从引入它们以来,已经有无数的模型,例如 **ResNeXt** [6]和 **WideResNet** [7],它们受到这两个网络使用的技术的启发。 同样,在了解 ResNet 和 DenseNet 的情况下,我们将能够使用他们的设计指南来构建我们自己的模型。 通过使用迁移学习,这也将使我们能够将预训练的 ResNet 和 DenseNet 模型用于我们自己的目的,例如对象检测和分割。 仅出于这些原因,以及与 Keras 的兼容性,这两个模型非常适合探索和补充本书的高级深度学习范围。 + +尽管本章的重点是深度神经网络; 在本章中,我们将讨论 Keras 的重要功能,称为**函数式 API**。 该 API 充当在`tf.keras`中构建网络的替代方法,使我们能够构建更复杂的网络,而这是顺序模型 API 无法实现的。 我们之所以专注于此 API 的原因是,它将成为构建诸如本章重点介绍的两个之类的深度网络的非常有用的工具。 建议您先完成“第 1 章”,“Keras 的高级深度学习介绍”,然后再继续本章,因为我们将参考在本章中探讨的入门级代码和概念,我们将它们带入了更高的层次。 + +本章的目的是介绍: + +* Keras 中的函数式 API,以及探索运行该 API 的网络示例 +* `tf.keras`中的深度残差网络(ResNet 版本 1 和 2)实现 +* `tf.keras`中密集连接卷积网络(DenseNet)的实现 +* 探索两种流行的深度学习模型,即 **ResNet** 和 **DenseNet** + +让我们开始讨论函数式 API。 + +# 1\. 函数式 API + +在我们首先在“第 1 章”,“Keras 高级深度学习入门”的顺序模型 API 中,一层堆叠在另一层之上。 通常,将通过其输入和输出层访问模型。 我们还了解到,如果我们发现自己想要在网络中间添加辅助输入,或者甚至想在最后一层之前提取辅助输出,则没有简单的机制。 + +这种模式也有缺点。 例如,它不支持类似图的模型或行为类似于 Python 函数的模型。 此外,在两个模型之间共享层也很困难。函数式 API 解决了这些局限性,这就是为什么它对于想要使用深度学习模型的任何人来说都是至关重要的工具的原因。 + +函数式 API 遵循以下两个概念: + +* 层是接受张量作为参数的实例。 一层的输出是另一个张量。 为了构建模型,层实例是通过输入和输出张量彼此链接的对象。 这与在顺序模型中堆叠多个层有类似的最终结果。 但是,使用层实例会使模型更容易具有辅助或多个输入和输出,因为每个层的输入/输出将易于访问。 +* 模型是一个或多个输入张量和输出张量之间的函数。 在模型输入和输出之间,张量是通过层输入和输出张量彼此链接的层实例。 因此,模型是一个或多个输入层和一个或多个输出层的函数。 该模型实例将数据从输入流到输出流的形式的计算图形式化。 + +在完成函数式 API 模型的构建之后,训练和评估将由顺序模型中使用的相同函数执行。 为了说明,在函数式 API 中,二维卷积层`Conv2D`带有 32 个过滤器,并且`x`作为层输入张量,`y`作为层输出张量可以写为: + +```py +y = Conv2D(32)(x) +``` + +我们也可以堆叠多层来构建模型。 例如,我们可以使用函数式 API 重写 MNIST `cnn-mnist-1.4.1.py`上的**卷积神经网络**(**CNN**),如下所示: + +“列表 2.1.1”:`cnn-functional-2.1.1.py` + +```py +import numpy as np +from tensorflow.keras.layers import Dense, Dropout, Input +from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten +from tensorflow.keras.models import Model +from tensorflow.keras.datasets import mnist +from tensorflow.keras.utils import to_categorical +``` + +```py +# load MNIST dataset +(x_train, y_train), (x_test, y_test) = mnist.load_data() +``` + +```py +# from sparse label to categorical +num_labels = len(np.unique(y_train)) +y_train = to_categorical(y_train) +y_test = to_categorical(y_test) +``` + +```py +# reshape and normalize input images +image_size = x_train.shape[1] +x_train = np.reshape(x_train,[-1, image_size, image_size, 1]) +x_test = np.reshape(x_test,[-1, image_size, image_size, 1]) +x_train = x_train.astype('float32') / 255 +x_test = x_test.astype('float32') / 255 +``` + +```py +# network parameters +input_shape = (image_size, image_size, 1) +batch_size = 128 +kernel_size = 3 +filters = 64 +dropout = 0.3 +``` + +```py +# use functional API to build cnn layers +inputs = Input(shape=input_shape) +y = Conv2D(filters=filters, + kernel_size=kernel_size, + activation='relu')(inputs) +y = MaxPooling2D()(y) +y = Conv2D(filters=filters, + kernel_size=kernel_size, + activation='relu')(y) +y = MaxPooling2D()(y) +y = Conv2D(filters=filters, + kernel_size=kernel_size, + activation='relu')(y) +# image to vector before connecting to dense layer +y = Flatten()(y) +# dropout regularization +y = Dropout(dropout)(y) +outputs = Dense(num_labels, activation='softmax')(y) +``` + +```py +# build the model by supplying inputs/outputs +model = Model(inputs=inputs, outputs=outputs) +# network model in text +model.summary() +# classifier loss, Adam optimizer, classifier accuracy +model.compile(loss='categorical_crossentropy', + optimizer='adam', + metrics=['accuracy']) +``` + +```py +# train the model with input images and labels +model.fit(x_train, + y_train, + validation_data=(x_test, y_test), + epochs=20, + batch_size=batch_size) +``` + +```py +# model accuracy on test dataset +score = model.evaluate(x_test, + y_test, + batch_size=batch_size, + verbose=0) +print("\nTest accuracy: %.1f%%" % (100.0 * score[1])) +``` + +默认情况下,使用`pool_size=2`作为参数,因此`MaxPooling2D`已被删除。 + +在前面的清单中,每一层都是张量的函数。 每一层生成一个张量作为输出,该张量成为下一层的输入。 要创建此模型,我们可以调用`Model()`并提供`inputs`和`outputs`张量,或者提供张量列表。 其他一切保持不变。 + +类似于顺序模型,也可以使用`fit()`和`evaluate()`函数来训练和评估相同的列表。 实际上,`Sequential`类是`Model`类的子类。 我们需要记住,我们在`fit()`函数中插入了`validation_data`参数,以查看训练期间验证准确率的进度。 在 20 个周期内,准确率范围从 99.3% 到 99.4%。 + +## 创建两输入一输出模型 + +现在,我们将做一些令人兴奋的事情,创建一个具有两个输入和一个输出的高级模型。 在开始之前,重要的是要知道序列模型 API 是为仅构建 1 输入和 1 输出模型而设计的。 + +假设发明了一种用于 MNIST 数字分类的新模型,它称为 Y 网络,如图“图 2.1.1”所示。 Y 网络在左 CNN 分支和右 CNN 分支两次使用相同的输入。 网络使用`concatenate`层合并结果。 合并操作`concatenate`类似于沿连接轴堆叠两个相同形状的张量以形成一个张量。 例如,沿着最后一个轴连接两个形状为`(3, 3, 16)`的张量将导致一个形状为`(3, 3, 32)`的张量。 + +`concatenate`层之后的所有其他内容将与上一章的 CNN MNIST 分类器模型相同:`Flatten`,然后是`Dropout`,然后是`Dense`: + +![](img/B14853_02_01.png) + +图 2.1.1:Y 网络接受两次相同的输入,但是在卷积网络的两个分支中处理输入。 分支的输出使用连接层进行合并。最后一层的预测将类似于上一章的 CNN MNIST 分类器模型。 + +为了提高“列表 2.1.1”中模型的表现,我们可以提出一些更改。 首先,Y 网络的分支将过滤器数量加倍,以补偿`MaxPooling2D()`之后特征映射尺寸的减半。 例如,如果第一个卷积的输出为`(28, 28, 32)`,则在最大池化之后,新形状为`(14, 14, 32)`。 下一个卷积的过滤器大小为 64,输出尺寸为`(14, 14, 64)`。 + +其次,尽管两个分支的核大小相同,但右分支使用 2 的扩展率。“图 2.1.2”显示了不同的扩展率对大小为 3 的核的影响。 这个想法是,通过使用扩张率增加核的有效接受域大小,CNN 将使正确的分支能够学习不同的特征映射。 使用大于 1 的扩张速率是一种计算有效的近似方法,可以增加接收场的大小。 这是近似值,因为该核实际上不是成熟的核。 这是有效的,因为我们使用与膨胀率等于 1 相同的操作数。 + +要了解接受域的概念,请注意,当核计算特征映射的每个点时,其输入是前一层特征映射中的补丁,该补丁也取决于其前一层特征映射。 如果我们继续将此依赖关系一直跟踪到输入图像,则核将依赖于称为接收场的图像补丁。 + +我们将使用选项`padding='same'`来确保使用扩张的 CNN 时不会出现负张量。 通过使用`padding='same'`,我们将使输入的尺寸与输出特征映射相同。 这是通过用零填充输入以确保输出的**大小**相同来实现的。 + +![](img/B14853_02_02.png) + +图 2.1.2:通过从 1 增加膨胀率,有效的核接受域大小也增加了 + +“列表 2.1.2”的`cnn-y-network-2.1.2.py`显示了使用函数式 API 的 Y 网络的实现。 两个分支由两个`for`循环创建。 两个分支期望输入形状相同。 两个`for`循环将创建两个`Conv2D-Dropout-MaxPooling2D`的三层栈。 虽然我们使用`concatenate`层组合了左右分支的输出,但我们还可以利用`tf.keras`的其他合并函数,例如`add`,`dot`和`multiply`。 合并函数的选择并非纯粹是任意的,而必须基于合理的模型设计决策。 + +在 Y 网络中,`concatenate`不会丢弃特征映射的任何部分。 取而代之的是,我们让`Dense`层确定如何处理连接的特征映射。 + +“列表 2.1.2”:`cnn-y-network-2.1.2.py` + +```py +import numpy as np +from tensorflow.keras.layers import Dense, Dropout, Input +from tensorflow.keras.layers import Conv2D, MaxPooling2D +from tensorflow.keras.layers import Flatten, concatenate +from tensorflow.keras.models import Model +from tensorflow.keras.datasets import mnist +from tensorflow.keras.utils import to_categorical +from tensorflow.keras.utils import plot_model +``` + +```py +# load MNIST dataset +(x_train, y_train), (x_test, y_test) = mnist.load_data() +``` + +```py +# from sparse label to categorical +num_labels = len(np.unique(y_train)) +y_train = to_categorical(y_train) +y_test = to_categorical(y_test) +``` + +```py +# reshape and normalize input images +image_size = x_train.shape[1] +x_train = np.reshape(x_train,[-1, image_size, image_size, 1]) +x_test = np.reshape(x_test,[-1, image_size, image_size, 1]) +x_train = x_train.astype('float32') / 255 +x_test = x_test.astype('float32') / 255 +``` + +```py +# network parameters +input_shape = (image_size, image_size, 1) +batch_size = 32 +kernel_size = 3 +dropout = 0.4 +n_filters = 32 +``` + +```py +# left branch of Y network +left_inputs = Input(shape=input_shape) +x = left_inputs +filters = n_filters +# 3 layers of Conv2D-Dropout-MaxPooling2D +# number of filters doubles after each layer (32-64-128) +for i in range(3): + x = Conv2D(filters=filters, + kernel_size=kernel_size, + padding='same', + activation='relu')(x) + x = Dropout(dropout)(x) + x = MaxPooling2D()(x) + filters *= 2 +``` + +```py +# right branch of Y network +right_inputs = Input(shape=input_shape) +y = right_inputs +filters = n_filters +# 3 layers of Conv2D-Dropout-MaxPooling2Do +# number of filters doubles after each layer (32-64-128) +for i in range(3): + y = Conv2D(filters=filters, + kernel_size=kernel_size, + padding='same', + activation='relu', + dilation_rate=2)(y) + y = Dropout(dropout)(y) + y = MaxPooling2D()(y) + filters *= 2 +``` + +```py +# merge left and right branches outputs +y = concatenate([x, y]) +# feature maps to vector before connecting to Dense +y = Flatten()(y) +y = Dropout(dropout)(y) +outputs = Dense(num_labels, activation='softmax')(y) +``` + +```py +# build the model in functional API +model = Model([left_inputs, right_inputs], outputs) +# verify the model using graph +plot_model(model, to_file='cnn-y-network.png', show_shapes=True) +# verify the model using layer text description +model.summary() +``` + +```py +# classifier loss, Adam optimizer, classifier accuracy +model.compile(loss='categorical_crossentropy', + optimizer='adam', + metrics=['accuracy']) +``` + +```py +# train the model with input images and labels +model.fit([x_train, x_train], + y_train, + validation_data=([x_test, x_test], y_test), + epochs=20, + batch_size=batch_size) +``` + +```py +# model accuracy on test dataset +score = model.evaluate([x_test, x_test], + y_test, + batch_size=batch_size, + verbose=0) +print("\nTest accuracy: %.1f%%" % (100.0 * score[1])) +``` + +退后一步,我们可以注意到 Y 网络期望有两个输入用于训练和验证。 输入是相同的,因此提供了`[x_train, x_train]`。 + +在 20 个周期的过程中,Y 网络的准确率为 99.4% 至 99.5%。 与 3 叠 CNN 相比,这是一个微小的改进,CNN 的精度在 99.3% 到 99.4% 之间。 但是,这是以更高的复杂度和两倍以上的参数数量为代价的。 + +下图“图 2.1.3”显示了 Keras 理解并由`plot_model()`函数生成的 Y 网络的架构: + +![A close up of text on a white background Description automatically generated](img/B14853_02_03.png) + +图 2.1.3:清单 2.1.2 中实现的 CNN Y 网络 + +总结我们对函数式 API 的了解。 我们应该花时间记住本章的重点是构建深度神经网络,特别是 ResNet 和 DenseNet。 因此,我们只讨论构建它们所需的函数式 API 材料,因为涵盖整个的 API 将超出本书的范围。 话虽如此,让我们继续讨论 ResNet。 + +有关函数式 API 的其他信息,请阅读[这里](https://keras.io/)。 + +# 2\. 深度残差网络(ResNet) + +深度网络的一个主要优点是,它们具有从输入图和特征映射学习不同级别表示的能力。 在分类,分割,检测和许多其他计算机视觉问题中,学习不同的特征映射通常可以提高性能。 + +但是,您会发现训练深层网络并不容易,因为在反向传播过程中,梯度可能会随着浅层中的深度消失(或爆炸)。“图 2.2.1”说明了梯度消失的问题。 通过从输出层向所有先前层的反向传播来更新网络参数。 由于反向传播是基于链法则的,因此当梯度到达浅层时,梯度会逐渐减小。 这是由于小数的乘法,尤其是对于小损失函数和参数值。 + +乘法运算的数量将与网络深度成正比。 还要注意的是,如果梯度降低,则不会适当更新参数。 + +因此,网络将无法提高其表现。 + +![](img/B14853_02_04.png) + +图 2.2.1:深层网络中的一个常见问题是,在反向传播过程中,梯度在到达浅层时会消失。 + +为了减轻深度网络中梯度的降级,ResNet 引入了深度残差学习框架的概念。 让我们分析一个块:深度网络的一小部分。 + +“图 2.2.2”显示了典型 CNN 块和 ResNet 残差块之间的比较。 ResNet 的想法是,为了防止梯度降级,我们将让信息通过快捷连接流到浅层。 + +![](img/B14853_02_05.png) + +图 2.2.2:典型 CNN 中的块与 ResNet 中的块之间的比较。 为了防止反向传播期间梯度的降低,引入了快捷连接。 + +接下来,我们将在中讨论两个模块之间的差异,以了解更多详细信息。“图 2.2.3”显示了另一个常用的深层网络 **VGG** [3]和 ResNet 的 CNN 块的更多详细信息。 我们将层特征映射表示为`x`。 层`l`的特征映射为`x[l]`。 在 CNN 层中的操作是 **Conv2D 批量规范化(BN)- ReLU**。 + +假设我们以`H() = Conv2D-Batch Normalization(BN)-ReLU`的形式表示这组操作; 然后: + +`x[l-1] = H(x[l-2])`(公式 2.2.1) + +`x[l] = H(x[l-1])`(方程式 2.2.2) + +换句话说,通过`H() =Conv2D-Batch Normalization(BN)-ReLU`将`l-2`层上的特征映射转换为`x[l-1]`。 应用相同的操作集将`x[l-1]`转换为`x[l]`。 换句话说,如果我们有一个 18 层的 VGG,则在将输入图像转换为第 18 个层特征映射之前,有 18 个`H()`操作。 + +一般而言,我们可以观察到`l`层输出特征映射仅直接受先前的特征映射影响。 同时,对于 ResNet: + +`x[l-1] = H(x[l-2])`(公式 2.2.3) + +`x[l] = ReLU(F(x[l-1]) + x[l-2])`(公式 2.2.4) + +![](img/B14853_02_06.png) + +图 2.2.3:普通 CNN 块和残差块的详细层操作 + +`F(x[l-1])`由`Conv2D-BN`制成,这也被称为残差映射。 `+`符号是快捷方式连接和`F(x[l-1])`输出之间的张量元素加法。 快捷连接不会增加额外的参数,也不会增加计算复杂度。 + +可以通过`add()`合并函数在`tf.keras`中实现添加操作。 但是,`F(x[l-1])`和`x[l-2]`应该具有相同的尺寸。 + +如果尺寸不同,例如,当更改特征映射尺寸时,我们应该在`x[l-2]`上进行线性投影以匹配尺寸`F([l-1])`的含量。 在原始论文中,当特征映射的大小减半时,情况的线性投影是通过`Conv2D`和 1 `strides=2`核完成的。 + +在“第 1 章”,“Keras 高级深度学习”,我们讨论了`stride > 1`等效于在卷积期间跳过像素。 例如,如果`strides=2`,则在卷积过程中滑动核时,可以跳过其他每个像素。 + +前面的“公式 2.2.3”和“公式 2.2.4”都对 ResNet 残余块操作进行建模。 他们暗示,如果可以训练较深的层具有较少的误差,则没有理由为什么较浅的层应具有较高的误差。 + +知道 ResNet 的基本构建块后,我们就可以设计一个深度残差网络来进行图像分类。 但是,这一次,我们将处理更具挑战性的数据集。 + +在我们的示例中,我们将考虑 CIFAR10,它是原始论文所基于的数据集之一。 在此示例中,`tf.keras`提供了一个 API,可以方便地访问 CIFAR10 数据集,如下所示: + +```py +from tensorflow.keras.datasets import cifar10 +(x_train, y_train), (x_test, y_test) = cifar10.load_data() +``` + +与 MNIST 一样,CIFAR10 数据集也有 10 个类别。 数据集是对应于飞机,汽车,鸟,猫,鹿,狗,青蛙,马,船和卡车的小型(`32×32`)RGB 真实世界图像的集合。 10 个类别中的每个类别。“图 2.2.4”显示了来自 CIFAR10 的示例图像。 + +在数据集中,有 50,000 个标记的训练图像和 10,000 个标记的测试图像用于验证: + +![](img/B14853_02_07.png) + +图 2.2.4:来自 CIFAR10 数据集的样本图像。 完整的数据集包含 50,000 张标签的训练图像和 10,000 张标签的测试图像以进行验证。 + +对于 CIFAR10 数据,可以使用“表 2.2.1”中所示的不同网络架构来构建 ResNet。“表 2.2.1”表示我们有三组残差块。 每组具有对应于`n`个残余块的`2n`层。`32×32`的额外层是输入图像的第一层。 + +| **层** | **输出大小** | **过滤器尺寸** | **操作** | +| --- | --- | --- | --- | +| 卷积 | `32 × 32` | 16 | `3 x 3 Conv2D` | +| 残差块(1) | `32 × 32` | | ![](img/B14853_02_001.png) | +| 过渡层(1) | `32 × 32` | | `{1 x 1 Conv2D, stride = 2}` | +| | `16 × 16` | | +| 残差块(2) | `16 × 16` | 32 | ![](img/B14853_02_002.png) | +| 过渡层(2) | `16 × 16` | | | | `{1 x 1 Conv2D, stride = 2}` | +| | `8 × 8` | | +| 残差块(3) | `8 × 8` | 64 | ![](img/B14853_02_003.png) | +| 平均池化 | `1 × 1` | | | `8 x 8 AveragePooling2D` | + +表 2.2.1:ResNet 网络架构配置 + +核大小为 3,不同大小的两个特征映射之间的过渡除外,该过渡实现了线性映射。 例如,核大小为 1 的`Conv2D`和`strides=2`。 为了与 DenseNet 保持一致,当我们连接两个大小不同的剩余块时,我们将使用项`Transition`层。 + +ResNet 使用`kernel_initializer='he_normal'`以便在进行反向传播时帮助收敛[1]。 最后一层由`AveragePooling2D-Flatten-Dense`制成。 在这一点上值得注意的是 ResNet 不使用丢弃。 似乎`add` 合并操作和`1 x 1`卷积具有自正则化效果。“图 2.2.5”显示了 CIFAR10 数据集的 ResNet 模型架构,如“表 2.2.1”中所述。 + +![A screenshot of a cell phone Description automatically generated](img/B14853_02_08.png) + +图 2.2.5:用于 CIFAR10 数据集分类的 ResNet 的模型架构 + +以下代码段显示了`tf.keras`中的部分 ResNet 实现。 该代码已添加到 Keras GitHub 存储库中。 从“表 2.2.2”(稍后显示)中,我们还可以看到,通过修改`n`的值,我们可以增加网络的深度。 + +例如,对于`n = 18`,我们已经拥有 ResNet110,这是一个具有 110 层的深度网络。 要构建 ResNet20,我们使用`n = 3`: + +```py +n = 3 +``` + +```py +# model version +# orig paper: version = 1 (ResNet v1), +# improved ResNet: version = 2 (ResNet v2) +version = 1 +``` + +```py +# computed depth from supplied model parameter n +if version == 1: + depth = n * 6 + 2 +elif version == 2: + depth = n * 9 + 2 +``` + +```py + if version == 2: + model = resnet_v2(input_shape=input_shape, depth=depth) +else: + model = resnet_v1(input_shape=input_shape, depth=depth) +``` + +`resnet_v1()`方法是 ResNet 的模型构建器。 它使用工具函数`resnet_layer(),`来帮助构建`Conv2D-BN-ReLU`的栈。 + +它将称为版本 1,正如我们将在下一节中看到的那样,提出了一种改进的 ResNet,该版本称为 ResNet 版本 2 或 v2。 通过 ResNet,ResNet v2 改进了残差块设计,从而提高了表现。 + +以下清单显示了`resnet-cifar10-2.2.1.py`的部分代码,它是 ResNet v1 的`tf.keras`模型实现。 + +“列表 2.2.1”:`resnet-cifar10-2.2.1.py` + +```py +def resnet_v1(input_shape, depth, num_classes=10): + """ResNet Version 1 Model builder [a] +``` + +```py + Stacks of 2 x (3 x 3) Conv2D-BN-ReLU + Last ReLU is after the shortcut connection. + At the beginning of each stage, the feature map size is halved + (downsampled) by a convolutional layer with strides=2, while + the number of filters is doubled. Within each stage, + the layers have the same number filters and the + same number of filters. + Features maps sizes: + stage 0: 32x32, 16 + stage 1: 16x16, 32 + stage 2: 8x8, 64 + The Number of parameters is approx the same as Table 6 of [a]: + ResNet20 0.27M + ResNet32 0.46M + ResNet44 0.66M + ResNet56 0.85M + ResNet110 1.7M +``` + +```py + Arguments: + input_shape (tensor): shape of input image tensor + depth (int): number of core convolutional layers + num_classes (int): number of classes (CIFAR10 has 10) +``` + +```py + Returns: + model (Model): Keras model instance + """ + if (depth - 2) % 6 != 0: + raise ValueError('depth should be 6n+2 (eg 20, 32, in [a])') + # Start model definition. + num_filters = 16 + num_res_blocks = int((depth - 2) / 6) +``` + +```py + inputs = Input(shape=input_shape) + x = resnet_layer(inputs=inputs) + # instantiate the stack of residual units + for stack in range(3): + for res_block in range(num_res_blocks): + strides = 1 + # first layer but not first stack + if stack > 0 and res_block == 0: + strides = 2 # downsample + y = resnet_layer(inputs=x, + num_filters=num_filters, + strides=strides) + y = resnet_layer(inputs=y, + num_filters=num_filters, + activation=None) + # first layer but not first stack + if stack > 0 and res_block == 0: + # linear projection residual shortcut + # connection to match changed dims + x = resnet_layer(inputs=x, + num_filters=num_filters, + kernel_size=1, + strides=strides, + activation=None, + batch_normalization=False) + x = add([x, y]) + x = Activation('relu')(x) + num_filters *= 2 +``` + +```py + # add classifier on top. + # v1 does not use BN after last shortcut connection-ReLU + x = AveragePooling2D(pool_size=8)(x) + y = Flatten()(x) + outputs = Dense(num_classes, + activation='softmax', + kernel_initializer='he_normal')(y) +``` + +```py + # instantiate model. + model = Model(inputs=inputs, outputs=outputs) + return model +``` + +ResNet 在`n`的各种值上的表现显示在“表 2.2.2”中。 + +| **层** | `n` | **CIFAR10 的准确率百分比(原始论文)** | **CIFAR10 的准确率百分比(本书)** | +| --- | --- | --- | --- | +| ResNet20 | 3 | 91.25 | 92.16 | +| ResNet32 | 5 | 92.49 | 92.46 | +| ResNet44 | 7 | 92.83 | 92.50 | +| ResNet56 | 9 | 93.03 | 92.71 | +| ResNet110 | 18 | 93.57 | 92.65 | + +表 2.2.2:针对不同的 n 值,使用 CIFAR10 验证的 ResNet 架构 + +与 ResNet 的原始实现有一些细微的差异。 特别是,我们不使用 SGD,而是使用 Adam。 这是因为 ResNet 更容易与 Adam 融合。 我们还将使用学习率调度器`lr_schedule()`,以便将`lr`的减少量从默认的`1e-3`缩短为 80、120、160 和 180 个周期。 在训练期间的每个周期之后,都会将`lr_schedule()`函数作为回调变量的一部分进行调用。 + +每当验证准确率方面取得进展时,另一个回调将保存检查点。 训练深层网络时,保存模型或权重检查点是一个好习惯。 这是因为训练深度网络需要大量时间。 + +当您想使用网络时,您只需要做的就是重新加载检查点,然后恢复经过训练的模型。 这可以通过调用`tf.keras load_model()`来完成。 包含`lr_reducer()`函数。 如果指标在排定的减少之前已稳定在上,则如果在`patience = 5`周期之后验证损失没有改善,则此回调将以参数中提供的某个因子来降低学习率。 + +调用`model.fit()`方法时,会提供**回调**变量。 与原始论文相似,`tf.keras`实现使用数据扩充`ImageDataGenerator()`来提供其他训练数据作为正则化方案的一部分。 随着训练数据数量的增加,概括性将会提高。 + +例如,简单的数据扩充就是翻转一条狗的照片,如图“图 2.2.6”(`horizontal_flip = True`)所示。 如果它是狗的图像,则翻转的图像仍然是狗的图像。 您还可以执行其他变换,例如缩放,旋转,变白等等,并且标签将保持不变: + +![A brown and white dog looking at the camera Description automatically generated](img/B14853_02_09.png) + +图 2.2.6:一个简单的数据扩充就是翻转原始图像 + +[完整的代码可在 GitHub 上获得](https://github.com/PacktPublishing/Advanced-Deep-Learning-with-Keras)。 + +准确复制原始论文的实现通常很困难。 在本书中,我们使用了不同的优化器和数据扩充。 这可能会导致本书中所实现的`tf.keras` ResNet 和原始模型中的表现略有不同。 + +在 **ResNet** [4]的第二篇论文发布之后,本节中介绍的原始模型为,称为 ResNet v1。 改进的 ResNet 通常称为 ResNet v2,我们将在下一部分讨论。 + +# 3\. ResNet v2 + +ResNet v2 的改进主要体现在残块中各层的排列中,如图“图 2.3.1”所示。 + +ResNet v2 的主要变化是: + +* 使用`1 x 1 – 3 x 3 – 1 × 1`的栈`BN-ReLU-Conv2D` +* 批量标准化和 ReLU 激活先于二维卷积 + +![](img/B14853_02_10.png) + +图 2.3.1:ResNet v1 和 ResNet v2 之间的剩余块比较 + +ResNet v2 也以与`resnet-cifar10-2.2.1.py`相同的代码实现,如“列表 2.2.1”所示: + +“列表 2.2.1”:`resnet-cifar10-2.2.1.py` + +```py +def resnet_v2(input_shape, depth, num_classes=10): + """ResNet Version 2 Model builder [b] +``` + +```py + Stacks of (1 x 1)-(3 x 3)-(1 x 1) BN-ReLU-Conv2D or + also known as bottleneck layer. + First shortcut connection per layer is 1 x 1 Conv2D. + Second and onwards shortcut connection is identity. + At the beginning of each stage, + the feature map size is halved (downsampled) + by a convolutional layer with strides=2, + while the number of filter maps is + doubled. Within each stage, the layers have + the same number filters and the same filter map sizes. + Features maps sizes: + conv1 : 32x32, 16 + stage 0: 32x32, 64 + stage 1: 16x16, 128 + stage 2: 8x8, 256 +``` + +```py + Arguments: + input_shape (tensor): shape of input image tensor + depth (int): number of core convolutional layers + num_classes (int): number of classes (CIFAR10 has 10) +``` + +```py + Returns: + model (Model): Keras model instance + """ + if (depth - 2) % 9 != 0: + raise ValueError('depth should be 9n+2 (eg 110 in [b])') + # start model definition. + num_filters_in = 16 + num_res_blocks = int((depth - 2) / 9) +``` + +```py + inputs = Input(shape=input_shape) + # v2 performs Conv2D with BN-ReLU + # on input before splitting into 2 paths + x = resnet_layer(inputs=inputs, + num_filters=num_filters_in, + conv_first=True) +``` + +```py + # instantiate the stack of residual units + for stage in range(3): + for res_block in range(num_res_blocks): + activation = 'relu' + batch_normalization = True + strides = 1 + if stage == 0: + num_filters_out = num_filters_in * 4 + # first layer and first stage + if res_block == 0: + activation = None + batch_normalization = False + else: + num_filters_out = num_filters_in * 2 + # first layer but not first stage + if res_block == 0: + # downsample + strides = 2 +``` + +```py + # bottleneck residual unit + y = resnet_layer(inputs=x, + num_filters=num_filters_in, + kernel_size=1, + strides=strides, + activation=activation, + batch_normalization=batch_normalization, + conv_first=False) + y = resnet_layer(inputs=y, + num_filters=num_filters_in, + conv_first=False) + y = resnet_layer(inputs=y, + num_filters=num_filters_out, + kernel_size=1, + conv_first=False) + if res_block == 0: + # linear projection residual shortcut connection + # to match changed dims + x = resnet_layer(inputs=x, + num_filters=num_filters_out, + kernel_size=1, + strides=strides, + activation=None, + batch_normalization=False) + x = add([x, y]) +``` + +```py + num_filters_in = num_filters_out +``` + +```py + # add classifier on top. + # v2 has BN-ReLU before Pooling + x = BatchNormalization()(x) + x = Activation('relu')(x) + x = AveragePooling2D(pool_size=8)(x) + y = Flatten()(x) + outputs = Dense(num_classes, + activation='softmax', + kernel_initializer='he_normal')(y) +``` + +```py + # instantiate model. + model = Model(inputs=inputs, outputs=outputs) + return model +``` + +下面的代码显示了 ResNet v2 的模型构建器。 例如,要构建 ResNet110 v2,我们将使用`n = 12`和`version = 2`: + +```py +n = 12 +``` + +```py +# model version +# orig paper: version = 1 (ResNet v1), +# improved ResNet: version = 2 (ResNet v2) +version = 2 +``` + +```py +# computed depth from supplied model parameter n +if version == 1: + depth = n * 6 + 2 +elif version == 2: + depth = n * 9 + 2 +``` + +```py + if version == 2: + model = resnet_v2(input_shape=input_shape, depth=depth) +else: + model = resnet_v1(input_shape=input_shape, depth=depth) +``` + +ResNet v2 的准确率显示在下面的“表 2.3.1”中: + +| **层** | `n` | **CIFAR10 的准确率百分比(原始论文)** | **CIFAR10 的准确率百分比(本书)** | +| --- | --- | --- | --- | +| ResNet56 | 9 | 不适用 | 93.01 | +| ResNet110 | 18 | 93.63 | 93.15 | + +表 2.3.1:在 CIFAR10 数据集上验证的 ResNet v2 架构 + +在 Keras 应用包中,已实现某些 ResNet v1 和 v2 模型(例如:50、101、152)。 这些是替代的实现方式,其中预训练的权重不清楚,可以轻松地重新用于迁移学习。 本书中使用的模型在层数方面提供了灵活性。 + +我们已经完成了对最常用的深度神经网络之一 ResNet v1 和 v2 的讨论。 在以下部分中,将介绍另一种流行的深度神经网络架构 DenseNet。 + +# 4\. 紧密连接的卷积网络(DenseNet) + +![](img/B14853_02_11.png) + +图 2.4.1:DenseNet 中的一个 4 层`Dense`块,每层的输入均由所有先前的特征映射组成。 + +DenseNet 使用另一种方法攻击梯度消失的问题。 代替使用快捷方式连接,所有先前的特征映射都将成为下一层的输入。 上图显示了一个`Dense`块中密集互连的示例。 + +为简单起见,在此图中,我们仅显示四层。 注意,层`l`的输入是所有先前特征映射的连接。 如果用操作`H`表示`BN-ReLU-Conv2D`(`x`),则层`l`的输出为: + +`x[l] = H(x[0], x[1], x[2], x[l-1])`(公式 2.4.1) + +`Conv2D`使用大小为 3 的核。每层生成的特征映射的数量称为增长率`k`。 通常,在 Huang 等人的论文“密集连接卷积网络”中,也使用`k = 12`,但是`k = 24` [5]。 因此,如果特征映射`x[0]`的数量为`k[0]`,则“图 2.4.1”中,4 层`Dense`块的末尾的特征映射总数为`4 x k + k[0]`。 + +DenseNet 建议在`Dense`块之前加上`BN-ReLU-Conv2D`,以及许多是增长率两倍的特征映射`k[0]`= 2 x`k`。 在`Dense`块的末尾,特征映射的总数将为`4 x 12 + 2 x 12 = 72`。 + +在输出层,DenseNet 建议我们在具有`softmax`层的`Dense()`之前执行平均池化。 如果未使用数据扩充,则必须在`Dense`块`Conv2D`之后跟随一个丢弃层。 + +随着网络的深入,将出现两个新问题。 首先,由于每一层都贡献了`k`特征映射,因此`l`层的输入数量为`(l – 1) x k + k[0]`。 特征映射可以在深层中快速增长,从而减慢了计算速度。 例如,对于 101 层网络,对于`k = 12`,这将是`1200 + 24 = 1224`。 + +其次,类似于 ResNet,随着网络的不断深入,特征映射的大小将减小,从而增加核的接收域大小。 如果 DenseNet 在合并操作中使用连接,则必须协调大小上的差异。 + +为了防止特征映射的数量增加到计算效率低的程度,DenseNet 引入了`Bottleneck`层,如图“图 2.4.2”所示。 这个想法是,在每次连接之后,现在应用`1 x 1`卷积,其过滤器大小等于`4k`。 这种降维技术阻止了`Conv2D(3)`处理的特征映射的数量快速增加。 + +![](img/B14853_02_12.png) + +图 2.4.2:DenseNet 的 Dense 块中的一层,带有和不带有瓶颈层 BN-ReLU-Conv2D(1)。 为了清楚起见,我们将核大小作为 Conv2D 的参数。 + +然后`Bottleneck`层将 DenseNet 层修改为`BN-ReLU-Conv2D(1)-BN- ReLU-Conv2D(3)`,而不仅仅是`BN-ReLU-Conv2D(3)`。 为了清楚起见,我们将核大小作为`Conv2D`的参数。 在瓶颈层,每个`Conv2D(3)`仅处理 4 个`k`特征映射,而不是`(l – 1) x k + k[0]`的,对于层`l`。 例如,对于 101 层网络,最后一个`Conv2D(3)`的输入仍然是`k = 12`而不是先前计算的 1224 的 48 个特征映射。 + +为了解决特征映射大小不匹配的问题,DenseNet 将深度网络划分为多个 Dense 块,这些块通过过渡层连接在一起,如图“图 2.4.3”所示。 在每个`Dense`块中,特征映射的大小(即宽度和高度)将保持不变。 + +过渡层的作用是在两个`Dense`块之间从一个特征映射大小过渡到较小的特征映射大小。 尺寸通常减少一半。 这是通过平均池化层完成的。 例如,默认值为`pool_size=2`的`AveragePooling2D`会将大小从`(64, 64, 256)`减小为`(32, 32, 256)`。 过渡层的输入是前一个`Dense`块中最后一个连接层的输出。 + +![A close up of a logo Description automatically generated](img/B14853_02_13.png) + +图 2.4.3:两个密集块之间的过渡层 + +但是,在将特征映射传递到平均池之前,使用`Conv2D(1)`将其数量减少某个压缩因子`0 < θ < 1`。DenseNet 在实验中使用`θ = 0.5`。 例如,如果先前`Dense`块的最后连接的输出是`(64, 64, 512)`,则在`Conv2D(1)`之后,特征映射的新尺寸将是`(64, 64, 256)`。 当压缩和降维放在一起时,过渡层由`BN-Conv2D(1)-AveragePooling2D`层组成。 实际上,批量归一化在卷积层之前。 + +现在,我们已经涵盖了 DenseNet 的重要概念。 接下来,我们将为`tf.keras`中的 CIFAR10 数据集构建并验证 DenseNet-BC。 + +## 为 CIFAR10 构建 100 层 DenseNet-BC + +现在,我们将要为 CIFAR10 数据集构建一个具有 100 层的 **DenseNet-BC**(**瓶颈压缩**), 我们在上面讨论过。 + +“表 2.4.1”显示了模型配置,而“图 2.4.4”显示了模型架构。 清单为我们展示了具有 100 层的 DenseNet-BC 的部分 Keras 实现。 我们需要注意的是,我们使用`RMSprop`,因为在使用 DenseNet 时,它的收敛性优于 SGD 或 Adam。 + +| **层** | **输出大小** | **DenseNet-100 BC** | +| --- | --- | --- | +| 卷积 | `32 x 32` | `3 x 3 Conv2D` | +| 密集块(1) | `32 x 32` | ![](img/B14853_02_006.png) | +| 过渡层(1) | `32 x 32` | ![](img/B14853_02_007.png) | +| `16 x 16` | +| 密集块(2) | `16 x 16` | ![](img/B14853_02_008.png) | +| 过渡层(2) | `16 x 16` | ![](img/B14853_02_009.png) | +| `8 x 8` | +| 密集块(3) | `8 x 8` | ![](img/B14853_02_006.png) | +| 平均池化 | `1 x 1` | `8 x 8 AveragePooling2D` | +| 分类层 | | `Flatten-Dense(10)-softmax` | + +表 2.4.1:100 层的 DenseNet-BC 用于 CIFAR10 分类 + +将从配置移至架构: + +![A screenshot of a cell phone Description automatically generated](img/B14853_02_14.png) + +图 2.4.4:用于 CIFAR10 分类的 100 个层的 DenseNet-BC 模型架构 + +下面“列表 2.4.1”是具有 100 层的 DenseNet-BC 的部分 Keras 实现,如“表 2.4.1”所示。 + +“列表 2.4.1”:`densenet-cifar10-2.4.1.py` + +```py +# start model definition +# densenet CNNs (composite function) are made of BN-ReLU-Conv2D +inputs = Input(shape=input_shape) +x = BatchNormalization()(inputs) +x = Activation('relu')(x) +x = Conv2D(num_filters_bef_dense_block, + kernel_size=3, + padding='same', + kernel_initializer='he_normal')(x) +x = concatenate([inputs, x]) +``` + +```py +# stack of dense blocks bridged by transition layers +for i in range(num_dense_blocks): + # a dense block is a stack of bottleneck layers + for j in range(num_bottleneck_layers): + y = BatchNormalization()(x) + y = Activation('relu')(y) + y = Conv2D(4 * growth_rate, + kernel_size=1, + padding='same', + kernel_initializer='he_normal')(y) + if not data_augmentation: + y = Dropout(0.2)(y) + y = BatchNormalization()(y) + y = Activation('relu')(y) + y = Conv2D(growth_rate, + kernel_size=3, + padding='same', + kernel_initializer='he_normal')(y) + if not data_augmentation: + y = Dropout(0.2)(y) + x = concatenate([x, y]) +``` + +```py + # no transition layer after the last dense block + if i == num_dense_blocks - 1: + continue + # transition layer compresses num of feature maps and # reduces the size by 2 + num_filters_bef_dense_block += num_bottleneck_layers * growth_rate + num_filters_bef_dense_block = int(num_filters_bef_dense_block * compression_factor) + y = BatchNormalization()(x) + y = Conv2D(num_filters_bef_dense_block, + kernel_size=1, + padding='same', + kernel_initializer='he_normal')(y) + if not data_augmentation: + y = Dropout(0.2)(y) + x = AveragePooling2D()(y) +``` + +```py +# add classifier on top +# after average pooling, size of feature map is 1 x 1 +x = AveragePooling2D(pool_size=8)(x) +y = Flatten()(x) +outputs = Dense(num_classes, + kernel_initializer='he_normal', + activation='softmax')(y) +# instantiate and compile model +# orig paper uses SGD but RMSprop works better for DenseNet +model = Model(inputs=inputs, outputs=outputs) +model.compile(loss='categorical_crossentropy', + optimizer=RMSprop(1e-3), + metrics=['accuracy']) +model.summary() +``` + +训练 DenseNet 的`tf.keras`实现 200 个周期,可以达到 93.74% 的准确率,而本文中报道的是 95.49%。 使用数据扩充。 我们在 ResNet v1 / v2 中为 DenseNet 使用了相同的回调函数。 + +对于更深的层,必须使用 Python 代码上的表来更改`growth_rate`和`depth`变量。 但是,如本文所述,以深度 190 或 250 训练网络将需要大量时间。 为了给我们一个训练时间的想法,每个周期在 1060Ti GPU 上运行大约一个小时。 与 ResNet 相似,Keras 应用包具有针对 DenseNet 121 及更高版本的预训练模型。 + +DenseNet 完成了我们对深度神经网络的讨论。 与 ResNet 一起,这两个网络已成为许多下游任务中不可或缺的特征提取器网络。 + +# 5\. 总结 + +在本章中,我们介绍了函数式 API 作为使用`tf.keras`构建复杂的深度神经网络模型的高级方法。 我们还演示了如何使用函数式 API 来构建多输入单输出 Y 网络。 与单分支 CNN 网络相比,该网络具有更高的准确率。 在本书的其余部分中,我们将发现在构建更复杂和更高级的模型时必不可少的函数式 API。 例如,在下一章中,函数式 API 将使我们能够构建模块化编码器,解码器和自编码器。 + +我们还花费了大量时间探索两个重要的深度网络 ResNet 和 DenseNet。 这两个网络不仅用于分类,而且还用于其他领域,例如分段,检测,跟踪,生成和视觉语义理解。 在“第 11 章”,“对象检测”和“第 12 章”,“语义分割”中,我们将使用 ResNet 进行对象检测和分割。 我们需要记住,与仅仅遵循原始实现相比,更仔细地了解 ResNet 和 DenseNet 中的模型设计决策至关重要。 这样,我们就可以将 ResNet 和 DenseNet 的关键概念用于我们的目的。 + +# 6\. 参考 + +1. `Kaiming He et al. Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification. Proceedings of the IEEE international conference on computer vision, 2015 (https://www.cv-foundation.org/openaccess/content_iccv_2015/papers/He_Delving_Deep_into_ICCV_2015_paper.pdfspm=5176.100239.blogcont55892.28.pm8zm1&file=He_Delving_Deep_into_ICCV_2015_paper.pdf).` +1. `Kaiming He et al. Deep Residual Learning for Image Recognition. Proceedings of the IEEE conference on computer vision and pattern recognition, 2016a (http://openaccess.thecvf.com/content_cvpr_2016/papers/He_Deep_Residual_Learning_CVPR_2016_paper.pdf).` +1. `Karen Simonyan and Andrew Zisserman. Very Deep Convolutional Networks for Large-Scale Image Recognition. ICLR, 2015 (https://arxiv.org/pdf/1409.1556/).` +1. `Kaiming He et al. Identity Mappings in Deep Residual Networks. European Conference on Computer Vision. Springer International Publishing, 2016b (https://arxiv.org/pdf/1603.05027.pdf).` +1. `Gao Huang et al. Densely Connected Convolutional Networks. Proceedings of the IEEE conference on computer vision and pattern recognition, 2017 (http://openaccess.thecvf.com/content_cvpr_2017/papers/Huang_Densely_Connected_Convolutional_CVPR_2017_paper.pdf).` +1. `Saining Xie et al. Aggregated Residual Transformations for Deep Neural Networks. Computer Vision and Pattern Recognition (CVPR), 2017 IEEE Conference on. IEEE, 2017 (http://openaccess.thecvf.com/content_cvpr_2017/papers/Xie_Aggregated_Residual_Transformations_CVPR_2017_paper.pdf).` +1. `Zagoruyko, Sergey, and Nikos Komodakis. "Wide residual networks." arXiv preprint arXiv:1605.07146 (2016).` \ No newline at end of file diff --git a/机器学习/ApacheCN/apachecn-dl-zh/adv-dl-tf2-keras/03.md b/机器学习/ApacheCN/apachecn-dl-zh/adv-dl-tf2-keras/03.md new file mode 100644 index 00000000..a59f4ebf --- /dev/null +++ b/机器学习/ApacheCN/apachecn-dl-zh/adv-dl-tf2-keras/03.md @@ -0,0 +1,891 @@ +# 三、自编码器 + +在上一章“第 2 章”,“深度神经网络”中,我们介绍了深度神经网络的概念。 现在,我们将继续研究自编码器,它是一种神经网络架构,试图找到给定输入数据的压缩表示形式。 + +与前面的章节相似,输入数据可以采用多种形式,包括语音,文本,图像或视频。 自编码器将尝试查找表示形式或一段代码,以便对输入数据执行有用的转换。 例如,当对自编码器进行降噪处理时,神经网络将尝试找到可用于将噪声数据转换为干净数据的代码。 嘈杂的数据可以是带有静态噪声的录音形式,然后将其转换为清晰的声音。 自编码器将自动从数据中自动学习代码,而无需人工标记。 这样,自编码器可以在**无监督**学习算法下分类为。 + +在本书的后续章节中,我们将研究**生成对抗网络**(**GAN**)和**变分自编码器**(**VAE**) 也是无监督学习算法的代表形式。 这与我们在前几章中讨论过的监督学习算法相反,后者需要人工标注。 + +总之,本章介绍: + +* 自编码器的原理 +* 如何使用`tf.keras`实现自编码器 +* 去噪和着色自编码器的实际应用 + +让我们从了解自编码器是什么以及自编码器的原理开始。 + +# 1\. 自编码器的原理 + +自编码器以最简单的形式通过尝试将输入复制到输出中来学习表示形式或代码。 但是,使用自编码器并不像将输入复制到输出那样简单。 否则,神经网络将无法发现输入分布中的隐藏结构。 + +自编码器将输入分布编码为低维张量,通常采用向量形式。 这将近似通常称为潜在表示,代码或向量的隐藏结构。 该处理构成编码部分。 然后,潜在向量将由解码器部分解码,以恢复原始输入。 + +由于潜向量是输入分布的低维压缩表示,因此应该期望解码器恢复的输出只能近似输入。 输入和输出之间的差异可以通过损失函数来衡量。 + +但是为什么我们要使用自编码器? 简而言之,自编码器在原始形式或更复杂的神经网络的一部分中都有实际应用。 + +它们是了解深度学习的高级主题的关键工具,因为它们为我们提供了适合密度估计的低维数据表示。 此外,可以有效地对其进行处理以对输入数据执行结构化操作。 常见的操作包括去噪,着色,特征级算术,检测,跟踪和分割,仅举几例。 + +在本节中,我们将介绍自编码器的原理。 我们将使用前几章介绍的带有 MNIST 数据集的自编码器。 + +首先,我们需要意识到自编码器具有两个运算符,它们是: + +* **编码器**:这会将输入`x`转换为低维潜向量`z = f(x)`。 由于潜向量是低维的,编码器被迫仅学习输入数据的最重要特征。 例如,在 MNIST 数字的情况下,要学习的重要特征可能包括书写风格,倾斜角度,笔触圆度,厚度等。 从本质上讲,这些是代表数字 0 至 9 所需的最重要的信息位。 +* **解码器**:这尝试从潜在向量`g(z) = x`中恢复输入。 + +尽管潜向量的维数较小,但它的大小足以使解码器恢复输入数据。 + +解码器的目标是使`x_tilde`尽可能接近`x`。 通常,编码器和解码器都是非线性函数。`z`的尺寸是可以表示的重要特征数量的度量。 该维数通常比输入维数小得多,以提高效率,并为了限制潜在代码仅学习输入分布的最显着属性[1]。 + +当潜码的维数明显大于`x`时,自编码器倾向于记忆输入。 + +合适的损失函数`L(x, x_tilde)`衡量输入`x`与输出(即)恢复后的输入`x_tilde`的相异程度。 如下式所示,均方误差(MSE)是此类损失函数的一个示例: + +![](img/B14853_03_006.png) (Equation 3.1.1) + +在此示例中,`m`是输出尺寸(例如,在 MNIST 中,`m = width × height × channels = 28 × 28 × 1 = 784`)。`x[i]`和`x_tilde[i]`分别是`x`和`x_tilde`的元素。 由于损失函数是输入和输出之间差异的量度,因此我们可以使用替代的重建损失函数,例如二进制交叉熵或结构相似性指数(SSIM)。 + +与其他神经网络类似,自编码器会在训练过程中尝试使此误差或损失函数尽可能小。“图 3.1.1”显示了一个自编码器。 编码器是将输入`x`压缩为低维潜向量`z`的函数。 该潜向量代表输入分布的重要特征。 然后,解码器尝试以`x_tilde`的形式从潜向量中恢复原始输入。 + +![](img/B14853_03_01.png) + +图 3.1.1:自编码器的框图 + +为了将自编码器置于上下文中,`x`可以是尺寸为`28×28×1 = 784`的 MNIST 数字。编码器将输入转换为低维的`z`,可以是 16 维潜在向量。 解码器将尝试从`z`中以`x_tilde`的形式恢复输入。 + +在视觉上,每个 MNIST 数字`x`看起来都类似于`x_tilde`。“图 3.1.2”向我们演示了此自编码过程。 + +![](img/B14853_03_02.png) + +图 3.1.2:带有 MNIST 数字输入和输出的自编码器。 潜在向量为 16 角 + +我们可以看到,虽然解码后的数字 7 并不完全相同,但仍然足够接近。 + +由于编码器和解码器都是非线性函数,因此我们可以使用神经网络来实现两者。 例如,在 MNIST 数据集中,自编码器可以由 MLP 或 CNN 实现。 通过最小化通过反向传播的损失函数,可以训练自编码器。 与其他神经网络类似,反向传播的要求是损失函数必须是可微的。 + +如果将输入视为分布,则可以将编码器解释为分布的编码器,`p(z | x)`,将解码器解释为分布的解码器`p(x | z)`。 自编码器的损失函数表示为: + +![](img/B14853_03_012.png) (Equation 3.1.2) + +损失函数只是意味着我们要在给定潜在向量分布的情况下最大程度地恢复输入分布的机会。 如果假设解码器的输出分布为为高斯,则损失函数归结为 MSE,因为: + +![](img/B14853_03_013.png) (Equation 3.1.3) + +在此示例中,`N(x[i]; x_tilde[i], σ²`表示平均值为`x_tilde[i]`且方差为`σ²`的高斯分布。 假设恒定方差。 假定解码器输出`x_tilde[i]`是独立的。`m`是输出尺寸。 + +了解自编码器背后的原理将有助于我们执行代码。 在下一节中,我们将研究如何使用`tf.keras`函数式 API 来构建编码器,解码器和自编码器。 + +# 2\. 使用 Keras 构建自编码器 + +现在,我们要使用进行一些令人兴奋的事情,使用`tf.keras`库构建一个自编码器。 为了简单起见,我们将使用 MNIST 数据集作为第一组示例。 然后,自编码器将根据输入数据生成潜向量,并使用解码器恢复输入。 在该第一示例中,潜向量是 16 维。 + +首先,我们将通过构建编码器来实现自编码器。 + +“列表 3.2.1”显示了将 MNIST 数字压缩为 16 维潜在向量的编码器。 编码器是两个`Conv2D`的栈。 最后阶段是具有 16 个单元的`Dense`层,以生成潜向量。 + +“列表 3.2.1”:`autoencoder-mnist-3.2.1.py` + +```py +from tensorflow.keras.layers import Dense, Input +from tensorflow.keras.layers import Conv2D, Flatten +from tensorflow.keras.layers import Reshape, Conv2DTranspose +from tensorflow.keras.models import Model +from tensorflow.keras.datasets import mnist +from tensorflow.keras.utils import plot_model +from tensorflow.keras import backend as K +``` + +```py +import numpy as np +import matplotlib.pyplot as plt +``` + +```py +# load MNIST dataset +(x_train, _), (x_test, _) = mnist.load_data() +# reshape to (28, 28, 1) and normalize input images +image_size = x_train.shape[1] +x_train = np.reshape(x_train, [-1, image_size, image_size, 1]) +x_test = np.reshape(x_test, [-1, image_size, image_size, 1]) +x_train = x_train.astype('float32') / 255 +x_test = x_test.astype('float32') / 255 +``` + +```py +# network parameters +input_shape = (image_size, image_size, 1) +batch_size = 32 +kernel_size = 3 +latent_dim = 16 +# encoder/decoder number of CNN layers and filters per layer +layer_filters = [32, 64] +# build the autoencoder model +# first build the encoder model +inputs = Input(shape=input_shape, name='encoder_input') +x = inputs +# stack of Conv2D(32)-Conv2D(64) +for filters in layer_filters: + x = Conv2D(filters=filters, + kernel_size=kernel_size, + activation='relu', + strides=2, + padding='same')(x) +``` + +```py +# shape info needed to build decoder model +# so we don't do hand computation +# the input to the decoder's first +# Conv2DTranspose will have this shape +# shape is (7, 7, 64) which is processed by +# the decoder back to (28, 28, 1) +shape = K.int_shape(x) +``` + +```py +# generate latent vector +x = Flatten()(x) +latent = Dense(latent_dim, name='latent_vector')(x) +``` + +```py +# instantiate encoder model +encoder = Model(inputs, + latent, + name='encoder') +encoder.summary() +plot_model(encoder, + to_file='encoder.png', + show_shapes=True) +``` + +```py +# build the decoder model +latent_inputs = Input(shape=(latent_dim,), name='decoder_input') +# use the shape (7, 7, 64) that was earlier saved +x = Dense(shape[1] * shape[2] * shape[3])(latent_inputs) +# from vector to suitable shape for transposed conv +x = Reshape((shape[1], shape[2], shape[3]))(x) +``` + +```py +# stack of Conv2DTranspose(64)-Conv2DTranspose(32) +for filters in layer_filters[::-1]: + x = Conv2DTranspose(filters=filters, + kernel_size=kernel_size, + activation='relu', + strides=2, + padding='same')(x) +``` + +```py +# reconstruct the input +outputs = Conv2DTranspose(filters=1, + kernel_size=kernel_size, + activation='sigmoid', + padding='same', + name='decoder_output')(x) +``` + +```py +# instantiate decoder model +decoder = Model(latent_inputs, outputs, name='decoder') +decoder.summary() +plot_model(decoder, to_file='decoder.png', show_shapes=True) +``` + +```py +# autoencoder = encoder + decoder +# instantiate autoencoder model +autoencoder = Model(inputs, + decoder(encoder(inputs)), + name='autoencoder') +autoencoder.summary() +plot_model(autoencoder, + to_file='autoencoder.png', + show_shapes=True) +``` + +```py +# Mean Square Error (MSE) loss function, Adam optimizer +autoencoder.compile(loss='mse', optimizer='adam') +``` + +```py +# train the autoencoder +autoencoder.fit(x_train, + x_train, + validation_data=(x_test, x_test), + epochs=1, + batch_size=batch_size) +``` + +```py +# predict the autoencoder output from test data +x_decoded = autoencoder.predict(x_test) +``` + +```py +# display the 1st 8 test input and decoded images +imgs = np.concatenate([x_test[:8], x_decoded[:8]]) +imgs = imgs.reshape((4, 4, image_size, image_size)) +imgs = np.vstack([np.hstack(i) for i in imgs]) +plt.figure() +plt.axis('off') +plt.title('Input: 1st 2 rows, Decoded: last 2 rows') +plt.imshow(imgs, interpolation='none', cmap='gray') +plt.savefig('input_and_decoded.png') +plt.show() +``` + +“图 3.2.1”显示了`plot_model()`生成的架构模型图,与`encoder.summary()`生成的文本版本相同。 保存最后一个`Conv2D`的输出形状以计算解码器输入层的尺寸,以便轻松重建 MNIST 图像:`shape = K.int_shape(x)`。 + +![A screenshot of a cell phone Description automatically generated](img/B14853_03_03.png) + +图 3.2.1:编码器模型由`Conv2D(32) - Conv2D(64) - Dense(16)`组成,以生成低维潜向量 + +列表 3.2.1 中的解码器对潜在向量进行解压缩,以恢复 MNIST 数字。 解码器输入级是`Dense`层,它将接受潜在向量。 单元的数量等于从编码器保存的`Conv2D`输出尺寸的乘积。 这样做是为了便于我们调整`Dense`层`Dense`层的输出大小,以最终恢复原始 MNIST 图像尺寸。 + +解码器由三个`Conv2DTranspose`的栈组成。 在我们的案例中,我们将使用**转置的 CNN**(有时称为**反卷积**),它是解码器中常用的。 我们可以将转置的 CNN(`Conv2DTranspose`)想象成 CNN 的逆过程。 + +在一个简单的示例中,如果 CNN 将图像转换为特征映射,则转置的 CNN 将生成给定特征映射的图像。“图 3.2.2”显示了解码器模型: + +![A screenshot of a cell phone Description automatically generated](img/B14853_03_04.png) + +图 3.2.2:解码器模型由`Dense(16) - Conv2DTranspose(64) - Conv2DTranspose(32) - Conv2DTranspose(1)`组成。 输入是经过解码以恢复原始输入的潜向量 + +通过将编码器和解码器连接在一起,我们可以构建自编码器。“图 3.2.3”说明了自编码器的模型图: + +![A screenshot of a cell phone Description automatically generated](img/B14853_03_05.png) + +图 3.2.3:通过将编码器模型和解码器模型结合在一起来构建自编码器模型。 此自编码器有 178 k 个参数 + +编码器的张量输出也是解码器的输入,该解码器生成自编码器的输出。 在此示例中,我们将使用 MSE 损失函数和 Adam 优化器。 在训练期间,输入与输出`x_train`相同。 我们应该注意,在我们的示例中,只有几层足以将验证损失在一个周期内驱动到 0.01。 对于更复杂的数据集,我们可能需要更深的编码器和解码器,以及更多的训练时间。 + +在对自编码器进行了一个周期的验证损失为 0.01 的训练之后,我们能够验证它是否可以对以前从未见过的 MNIST 数据进行编码和解码。“图 3.2.4”向我们展示了来自测试数据和相应解码图像的八个样本: + +![](img/B14853_03_06.png) + +图 3.2.4:根据测试数据预测自编码器。 前两行是原始输入测试数据。 最后两行是预测数据 + +除了图像中的轻微模糊之外,我们能够轻松识别出自编码器能够以良好的质量恢复输入。 随着我们训练更多的周期,结果将有所改善。 + +在这一点上,我们可能想知道:我们如何可视化空间中的潜在向量? 一种简单的可视化方法是强制自编码器使用 2 维潜在向量来学习 MNIST 数字特征。 从那里,我们可以将该潜在向量投影到二维空间上,以查看 MNIST 潜在向量的分布方式。“图 3.2.5”和“图 3.2.6”显示了 MNIST 数字的分布与潜在代码尺寸的关系。 + +![A close up of a mans face Description automatically generated](img/B14853_03_07.png) + +图 3.2.5:MNIST 数字分布与潜在代码尺寸`z[0]`和`z[1]`的关系。 原始照片可以在本书的 [GitHub 存储库](https://github.com/PacktPublishing/Advanced-Deep-Learning-with-Keras/blob/master/chapter3-autoencoders/README.md)中找到。 + +在“图 3.2.5”中,我们可以看到特定数字的潜向量聚集在空间的某个区域上。 例如,数字 0 在左下象限中,而数字 1 在右上象限中。 这种群集在图中得到了反映。 实际上,同一图显示了导航或从潜在空间生成新数字的结果,如图“图 3.2.5”所示。 + +例如,从中心开始,向右上象限改变 2 维潜向量的值,这表明数字从 9 变为 1。这是可以预期的,因为从“图 3.2.5”开始,我们可以看到数字 9 群集的潜在代码值在中心附近,数字 1 群集的潜在代码值在右上象限。 + +对于“图 3.2.5”和“图 3.2.6”,我们仅研究了每个潜在向量维在 -4.0 和 +4.0 之间的区域: + +![](img/B14853_03_08.png) + +图 3.2.6:导航 2 维潜在向量空间时生成的数字 + +从“图 3.2.5”中可以看出,潜在代码分布不是连续的。 理想情况下,应该看起来像一个圆圈,其中到处都有有效值。 由于这种不连续性,因此如果解码潜伏向量,则几乎不会产生任何可识别的数字。 + +“图 3.2.5”和“图 3.2.6”经过 20 个训练周期后生成。 通过设置`latent_dim = 2`修改了`autoencoder-mnist-3.2.1.py`代码。 `plot_ results()`函数将 MNIST 数字绘制为 2 维潜在向量的函数。 为了方便起见,该程序另存为`autoencoder-2dim-mnist-3.2.2.py`,其部分代码显示在“列表 3.2.2”中。 其余代码实际上类似于“列表 3.2.1”,在此不再显示。 + +“列表 3.2.2”:`autoencoder-2dim-mnist-3.2.2.py` + +```py +def plot_results(models, + data, + batch_size=32, + model_name="autoencoder_2dim"): + """Plots 2-dim latent values as scatter plot of digits + then, plot MNIST digits as function of 2-dim latent vector +``` + +```py + Arguments: + models (list): encoder and decoder models + data (list): test data and label + batch_size (int): prediction batch size + model_name (string): which model is using this function + """ +``` + +```py + encoder, decoder = models + x_test, y_test = data + xmin = ymin = -4 + xmax = ymax = +4 + os.makedirs(model_name, exist_ok=True) +``` + +```py + filename = os.path.join(model_name, "latent_2dim.png") + # display a 2D plot of the digit classes in the latent space + z = encoder.predict(x_test, + batch_size=batch_size) + plt.figure(figsize=(12, 10)) +``` + +```py + # axes x and y ranges + axes = plt.gca() + axes.set_xlim([xmin,xmax]) + axes.set_ylim([ymin,ymax]) +``` + +```py + # subsample to reduce density of points on the plot + z = z[0::2] + y_test = y_test[0::2] + plt.scatter(z[:, 0], z[:, 1], marker="") + for i, digit in enumerate(y_test): + axes.annotate(digit, (z[i, 0], z[i, 1])) + plt.xlabel("z[0]") + plt.ylabel("z[1]") + plt.savefig(filename) + plt.show() +``` + +```py + filename = os.path.join(model_name, "digits_over_latent.png") + # display a 30x30 2D manifold of the digits + n = 30 + digit_size = 28 + figure = np.zeros((digit_size * n, digit_size * n)) + # linearly spaced coordinates corresponding to the 2D plot + # of digit classes in the latent space + grid_x = np.linspace(xmin, xmax, n) + grid_y = np.linspace(ymin, ymax, n)[::-1] +``` + +```py + for i, yi in enumerate(grid_y): + for j, xi in enumerate(grid_x): + z = np.array([[xi, yi]]) + x_decoded = decoder.predict(z) + digit = x_decoded[0].reshape(digit_size, digit_size) + figure[i * digit_size: (i + 1) * digit_size, + j * digit_size: (j + 1) * digit_size] = digit +``` + +```py + plt.figure(figsize=(10, 10)) + start_range = digit_size // 2 + end_range = n * digit_size + start_range + 1 + pixel_range = np.arange(start_range, end_range, digit_size) + sample_range_x = np.round(grid_x, 1) + sample_range_y = np.round(grid_y, 1) + plt.xticks(pixel_range, sample_range_x) + plt.yticks(pixel_range, sample_range_y) + plt.xlabel("z[0]") + plt.ylabel("z[1]") + plt.imshow(figure, cmap='Greys_r') + plt.savefig(filename) + plt.show() +``` + +这样就完成了和自编码器的检查。 接下来的章节将重点介绍其实际应用。 我们将从去噪自编码器开始。 + +# 3\. 去噪自编码器(DAE) + +现在,我们将构建具有实际应用的自编码器。 首先,让我们画一幅画,然后想象 MNIST 的数字图像被噪声破坏了,从而使人类更难以阅读。 我们能够构建一个去噪自编码器(DAE),以消除这些图像中的噪声。“图 3.3.1”向我们展示了三组 MNIST 数字。 每组的顶部行(例如,MNIST 数字 7、2、1、9、0、6、3、4 和 9)是原始图像。 中间的行显示了 DAE 的输入,这些输入是被噪声破坏的原始图像。 作为人类,我们发现很难读取损坏的 MNIST 数字。 最后一行显示 DAE 的输出。 + +![](img/B14853_03_09.png) + +图 3.3.1:原始 MNIST 数字(顶部行),损坏的原始图像(中间行)和去噪图像(最后一行) + +如图“图 3.3.2”所示,去噪自编码器的结构实际上与我们在上一节中介绍的 MNIST 的自编码器相同。 + +![](img/B14853_03_10.png) + +图 3.3.2:去噪自编码器的输入是损坏的图像。 输出是干净或去噪的图像。 假定潜向量为 16 维 + +“图 3.3.2”中的输入定义为: + +`x = x_ori + noise`(公式 3.3.1) + +在该公式中,`x_ori`表示被*噪声*破坏的原始 MNIST 图像。 编码器的目的是发现如何产生潜向量`z`,这将使解码器能够恢复诸如 MSE,如下所示:`x_ori`通过最小化相异损失函数: + +![](img/B14853_03_018.png) (Equation 3.3.2) + +在此示例中,`m`是输出尺寸(例如,在 MNIST 中,`m = width × height × channels = 28 × 28 × 1 = 784`)。 `x_ori[i]`和`x_tilde[i]`分别是`x_ori`和`x_tilde`的元素。 + +为了实现 DAE,我们将需要对上一节中介绍的自编码器进行一些更改。 首先,训练输入数据应损坏的 MNIST 数字。 训练输出数据是原始的原始 MNIST 数字相同。 这就像告诉自编码器应校正的图像是什么,或要求它找出在图像损坏的情况下如何消除噪声。 最后,我们必须在损坏的 MNIST 测试数据上验证自编码器。 + +“图 3.3.2"左侧所示的 MNIST 数字 7 是实际损坏的图像输入。 右边的是经过训练的降噪自编码器的干净图像输出。 + +“列表 3.3.1”:`denoising-autoencoder-mnist-3.3.1.py` + +```py +from tensorflow.keras.layers import Dense, Input +from tensorflow.keras.layers import Conv2D, Flatten +from tensorflow.keras.layers import Reshape, Conv2DTranspose +from tensorflow.keras.models import Model +from tensorflow.keras import backend as K +from tensorflow.keras.datasets import mnist +import numpy as np +import matplotlib.pyplot as plt +from PIL import Image +``` + +```py +np.random.seed(1337) +``` + +```py +# load MNIST dataset +(x_train, _), (x_test, _) = mnist.load_data() +``` + +```py +# reshape to (28, 28, 1) and normalize input images +image_size = x_train.shape[1] +x_train = np.reshape(x_train, [-1, image_size, image_size, 1]) +x_test = np.reshape(x_test, [-1, image_size, image_size, 1]) +x_train = x_train.astype('float32') / 255 +x_test = x_test.astype('float32') / 255 +``` + +```py +# generate corrupted MNIST images by adding noise with normal dist +# centered at 0.5 and std=0.5 +noise = np.random.normal(loc=0.5, scale=0.5, size=x_train.shape) +x_train_noisy = x_train + noise +``` + +```py +noise = np.random.normal(loc=0.5, scale=0.5, size=x_test.shape) +x_test_noisy = x_test + noise +# adding noise may exceed normalized pixel values>1.0 or <0.0 +# clip pixel values >1.0 to 1.0 and <0.0 to 0.0 +x_train_noisy = np.clip(x_train_noisy, 0., 1.) +x_test_noisy = np.clip(x_test_noisy, 0., 1.) +# network parameters +input_shape = (image_size, image_size, 1) +batch_size = 32 +kernel_size = 3 +latent_dim = 16 +# encoder/decoder number of CNN layers and filters per layer +layer_filters = [32, 64] +``` + +```py +# build the autoencoder model +# first build the encoder model +inputs = Input(shape=input_shape, name='encoder_input') +x = inputs +``` + +```py +# stack of Conv2D(32)-Conv2D(64) +for filters in layer_filters: + x = Conv2D(filters=filters, + kernel_size=kernel_size, + strides=2, + activation='relu', + padding='same')(x) +``` + +```py +# shape info needed to build decoder model so we don't do hand computation +# the input to the decoder's first Conv2DTranspose will have this shape +# shape is (7, 7, 64) which can be processed by the decoder back to (28, 28, 1) +shape = K.int_shape(x) +``` + +```py +# generate the latent vector +x = Flatten()(x) +latent = Dense(latent_dim, name='latent_vector')(x) +``` + +```py +# instantiate encoder model +encoder = Model(inputs, latent, name='encoder') +encoder.summary() +``` + +```py +# build the decoder model +latent_inputs = Input(shape=(latent_dim,), name='decoder_input') +# use the shape (7, 7, 64) that was earlier saved +x = Dense(shape[1] * shape[2] * shape[3])(latent_inputs) +# from vector to suitable shape for transposed conv +x = Reshape((shape[1], shape[2], shape[3]))(x) +``` + +```py +# stack of Conv2DTranspose(64)-Conv2DTranspose(32) +for filters in layer_filters[::-1]: + x = Conv2DTranspose(filters=filters, + kernel_size=kernel_size, + strides=2, + activation='relu', + padding='same')(x) +``` + +```py +# reconstruct the denoised input +outputs = Conv2DTranspose(filters=1, + kernel_size=kernel_size, + padding='same', + activation='sigmoid', + name='decoder_output')(x) +``` + +```py +# instantiate decoder model +decoder = Model(latent_inputs, outputs, name='decoder') +decoder.summary() +``` + +```py +# autoencoder = encoder + decoder +# instantiate autoencoder model +autoencoder = Model(inputs, decoder(encoder(inputs)), name='autoencoder') +autoencoder.summary() +``` + +```py +# Mean Square Error (MSE) loss function, Adam optimizer +autoencoder.compile(loss='mse', optimizer='adam') +``` + +```py +# train the autoencoder +autoencoder.fit(x_train_noisy, + x_train, + validation_data=(x_test_noisy, x_test), + epochs=10, + batch_size=batch_size) +``` + +```py +# predict the autoencoder output from corrupted test images +x_decoded = autoencoder.predict(x_test_noisy) +``` + +```py +# 3 sets of images with 9 MNIST digits +# 1st rows - original images +# 2nd rows - images corrupted by noise +# 3rd rows - denoised images +rows, cols = 3, 9 +num = rows * cols +imgs = np.concatenate([x_test[:num], x_test_noisy[:num], x_decoded[:num]]) +imgs = imgs.reshape((rows * 3, cols, image_size, image_size)) +imgs = np.vstack(np.split(imgs, rows, axis=1)) +imgs = imgs.reshape((rows * 3, -1, image_size, image_size)) +imgs = np.vstack([np.hstack(i) for i in imgs]) +imgs = (imgs * 255).astype(np.uint8) +plt.figure() +plt.axis('off') +plt.title('Original images: top rows, ' + 'Corrupted Input: middle rows, ' + 'Denoised Input: third rows') +plt.imshow(imgs, interpolation='none', cmap='gray') +Image.fromarray(imgs).save('corrupted_and_denoised.png') +plt.show() +``` + +“列表 3.3.1”显示了去噪自编码器,该编码器已添加到官方 Keras GitHub 存储库中。 使用相同的 MNIST 数据集,我们可以通过添加随机噪声来模拟损坏的图像。 添加的噪声是高斯分布,平均值为`μ = 0.5`,标准差为`σ = 0.5`。 由于添加随机噪声可能会将像素数据推入小于 0 或大于 1 的无效值,因此像素值会被裁剪为`[0.1, 1.0]`范围。 + +其他所有内容实际上都与上一节中的自编码器相同。 我们将使用相同的 MSE 损失函数和 Adam 优化器。 但是,训练的周期数已增加到 10。这是为了进行足够的参数优化。 + +“图 3.3.3”显示了 DAE 在某种程度上的鲁棒性,因为噪声级别从`σ = 0.5`增至`σ = 0.75`和`σ = 1.0`。 在`σ = 0.75`处,DAE 仍能够恢复原始图像。 但是,在`σ = 1.0`处,一些数字,例如第二和第三组中的 4 和 5,将无法正确恢复。 + +![](img/B14853_03_11.png) + +图 3.3.3:降噪自编码器的表现随着噪声水平的提高而增加 + +我们已经完成去噪自编码器的讨论和实现。 尽管此概念已在 MNIST 数字上进行了演示,但该思想也适用于其他信号。 在下一节中,我们将介绍自编码器的另一种实际应用,称为着色自编码器。 + +# 4\. 自动着色自编码器 + +现在,我们将致力于自编码器的另一个实际应用。 在这种情况下,我们将想象一下,我们有一张灰度照片,并且想要构建一个可以自动为其添加颜色的工具。 我们要复制人类的能力,以识别海洋和天空为蓝色,草地和树木为绿色,云层为白色,依此类推。 + +如图“图 3.4.1”所示,如果给我们前景的稻田,背景的火山和顶部的天空的灰度照片(左),我们可以添加适当的颜色(右)。 + +![](img/B14853_03_12.png) + +图 3.4.1:为 Mayon 火山的灰度照片添加颜色。 着色网络应通过向灰度照片添加颜色来复制人类的能力。 左照片是灰度的。 正确的照片是彩色的。 原始彩色照片可以在本书的 [GitHub 存储库](https://github.com/PacktPublishing/Advanced-Deep-Learning-with-Keras/blob/master/chapter3-autoencoders/README.md)中找到。 + +对于自编码器,一种简单的自动着色算法似乎是一个合适的问题。 如果我们可以使用足够数量的灰度照片作为输入并使用相应的彩色照片作为输出来训练自编码器,则可能会在正确应用颜色时发现隐藏的结构。 大致上,这是去噪的反向过程。 问题是,自编码器能否在原始灰度图像上添加颜色(良好的噪点)? + +“列表 3.4.1”显示了着色自编码器网络。 着色自编码器网络是我们用于 MNIST 数据集的降噪自编码器的修改版本。 首先,我们需要一个彩色照片的灰度数据集。 我们之前使用过的 CIFAR10 数据库进行了 50,000 次训练和 10,000 次测试,可以将`32×32` RGB 照片转换为灰度图像。 如下清单所示,我们可以使用`rgb2gray()`函数在 R,G 和 B 分量上应用权重,以从彩色转换为灰度: + +“列表 3.4.1”:`colorization-autoencoder-cifar10-3.4.1.py` + +```py +from tensorflow.keras.layers import Dense, Input +from tensorflow.keras.layers import Conv2D, Flatten +from tensorflow.keras.layers import Reshape, Conv2DTranspose +from tensorflow.keras.models import Model +from tensorflow.keras.callbacks import ReduceLROnPlateau +from tensorflow.keras.callbacks import ModelCheckpoint +from tensorflow.keras.datasets import cifar10 +from tensorflow.keras.utils import plot_model +from tensorflow.keras import backend as K +``` + +```py +import numpy as np +import matplotlib.pyplot as plt +import os +``` + +```py +def rgb2gray(rgb): + """Convert from color image (RGB) to grayscale. + Source: opencv.org + grayscale = 0.299*red + 0.587*green + 0.114*blue + Argument: + rgb (tensor): rgb image + Return: + (tensor): grayscale image + """ + return np.dot(rgb[...,:3], [0.299, 0.587, 0.114]) +``` + +```py +# load the CIFAR10 data +(x_train, _), (x_test, _) = cifar10.load_data() +``` + +```py +# input image dimensions +# we assume data format "channels_last" +img_rows = x_train.shape[1] +img_cols = x_train.shape[2] +channels = x_train.shape[3] +# create saved_images folder +imgs_dir = 'saved_images' +save_dir = os.path.join(os.getcwd(), imgs_dir) +if not os.path.isdir(save_dir): + os.makedirs(save_dir) +``` + +```py +# display the 1st 100 input images (color and gray) +imgs = x_test[:100] +imgs = imgs.reshape((10, 10, img_rows, img_cols, channels)) +imgs = np.vstack([np.hstack(i) for i in imgs]) +plt.figure() +plt.axis('off') +plt.title('Test color images (Ground Truth)') +plt.imshow(imgs, interpolation='none') +plt.savefig('%s/test_color.png' % imgs_dir) +plt.show() +``` + +```py +# convert color train and test images to gray +x_train_gray = rgb2gray(x_train) +x_test_gray = rgb2gray(x_test) +``` + +```py +# display grayscale version of test images +imgs = x_test_gray[:100] +imgs = imgs.reshape((10, 10, img_rows, img_cols)) +imgs = np.vstack([np.hstack(i) for i in imgs]) +plt.figure() +plt.axis('off') +plt.title('Test gray images (Input)') +plt.imshow(imgs, interpolation='none', cmap='gray') +plt.savefig('%s/test_gray.png' % imgs_dir) +plt.show() +``` + +```py +# normalize output train and test color images +x_train = x_train.astype('float32') / 255 +x_test = x_test.astype('float32') / 255 +``` + +```py +# normalize input train and test grayscale images +x_train_gray = x_train_gray.astype('float32') / 255 +x_test_gray = x_test_gray.astype('float32') / 255 +``` + +```py +# reshape images to row x col x channel for CNN output/validation +x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, channels) +x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, channels) +``` + +```py +# reshape images to row x col x channel for CNN input +x_train_gray = x_train_gray.reshape(x_train_gray.shape[0], img_rows, img_cols, 1) +x_test_gray = x_test_gray.reshape(x_test_gray.shape[0], img_rows, img_cols, 1) +``` + +```py +# network parameters +input_shape = (img_rows, img_cols, 1) +batch_size = 32 +kernel_size = 3 +latent_dim = 256 +# encoder/decoder number of CNN layers and filters per layer +layer_filters = [64, 128, 256] +``` + +```py +# build the autoencoder model +# first build the encoder model +inputs = Input(shape=input_shape, name='encoder_input') +x = inputs +# stack of Conv2D(64)-Conv2D(128)-Conv2D(256) +for filters in layer_filters: + x = Conv2D(filters=filters, + kernel_size=kernel_size, + strides=2, + activation='relu', + padding='same')(x) +``` + +```py +# shape info needed to build decoder model so we don't do hand computation +# the input to the decoder's first Conv2DTranspose will have this shape +# shape is (4, 4, 256) which is processed by the decoder back to (32, 32, 3) +shape = K.int_shape(x) +``` + +```py +# generate a latent vector +x = Flatten()(x) +latent = Dense(latent_dim, name='latent_vector')(x) +``` + +```py +# instantiate encoder model +encoder = Model(inputs, latent, name='encoder') +encoder.summary() +# build the decoder model +latent_inputs = Input(shape=(latent_dim,), name='decoder_input') +x = Dense(shape[1]*shape[2]*shape[3])(latent_inputs) +x = Reshape((shape[1], shape[2], shape[3]))(x) +``` + +```py +# stack of Conv2DTranspose(256)-Conv2DTranspose(128)-Conv2DTranspose(64) +for filters in layer_filters[::-1]: + x = Conv2DTranspose(filters=filters, + kernel_size=kernel_size, + strides=2, + activation='relu', + padding='same')(x) +``` + +```py +outputs = Conv2DTranspose(filters=channels, + kernel_size=kernel_size, + activation='sigmoid', + padding='same', + name='decoder_output')(x) +``` + +```py +# instantiate decoder model +decoder = Model(latent_inputs, outputs, name='decoder') +decoder.summary() +# autoencoder = encoder + decoder +# instantiate autoencoder model +autoencoder = Model(inputs, decoder(encoder(inputs)), name='autoencoder') +autoencoder.summary() +``` + +```py +# prepare model saving directory. +save_dir = os.path.join(os.getcwd(), 'saved_models') +model_name = 'colorized_ae_model.{epoch:03d}.h5' +if not os.path.isdir(save_dir): + os.makedirs(save_dir) +filepath = os.path.join(save_dir, model_name) +``` + +```py +# reduce learning rate by sqrt(0.1) if the loss does not improve in 5 epochs +lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1), + cooldown=0, + patience=5, + verbose=1, + min_lr=0.5e-6) +# save weights for future use (e.g. reload parameters w/o training) +checkpoint = ModelCheckpoint(filepath=filepath, + monitor='val_loss', + verbose=1, + save_best_only=True) +``` + +```py +# Mean Square Error (MSE) loss function, Adam optimizer +autoencoder.compile(loss='mse', optimizer='adam') +``` + +```py +# called every epoch +callbacks = [lr_reducer, checkpoint] +``` + +```py +# train the autoencoder +autoencoder.fit(x_train_gray, + x_train, + validation_data=(x_test_gray, x_test), + epochs=30, + batch_size=batch_size, + callbacks=callbacks) +# predict the autoencoder output from test data +x_decoded = autoencoder.predict(x_test_gray) +``` + +```py +# display the 1st 100 colorized images +imgs = x_decoded[:100] +imgs = imgs.reshape((10, 10, img_rows, img_cols, channels)) +imgs = np.vstack([np.hstack(i) for i in imgs]) +plt.figure() +plt.axis('off') +plt.title('Colorized test images (Predicted)') +plt.imshow(imgs, interpolation='none') +plt.savefig('%s/colorized.png' % imgs_dir) +plt.show() +``` + +通过添加更多卷积和转置卷积,我们提高了自编码器的容量。 我们还将每个 CNN 块的过滤器数量增加了一倍。 潜向量现在为 256 维,以增加其可以表示的显着属性的数量,如自编码器部分所述。 最后,输出过滤器的大小已增加到三倍,或等于预期的彩色输出的 RGB 中的通道数。 + +现在使用灰度作为输入,原始 RGB 图像作为输出来训练着色自编码器。 训练将花费更多的时间,并在验证损失没有改善的情况下使用学习率降低器来缩小学习率。 通过告诉`tf.keras fit()`函数中的 callbacks 参数调用`lr_reducer()`函数,可以轻松完成此操作。 + +“图 3.4.2”演示了来自 CIFAR10 测试数据集的灰度图像的着色。 + +![A picture containing photo, many, indoor, different Description automatically generated](img/B14853_03_13.png) + +图 3.4.2:使用自编码器将灰度自动转换为彩色图像。 CIFAR10 测试灰度输入图像(左)和预测的彩色图像(右)。 原始彩色照片可以在本书的 GitHub 存储库中找到,网址为 https://github.com/PacktPublishing/Advanced-Deep-Learning-with-Keras/blob/master/chapter3-autoencoders/README.md + +“图 3.4.3”将基本事实与着色自编码器预测进行了比较: + +![A picture containing photo, indoor Description automatically generated](img/B14853_03_14.png) + +图 3.4.3:地面真彩色图像与预测彩色图像的并排比较。 原始彩色照片可以在本书的 GitHub 存储库中找到,网址为 https://github.com/PacktPublishing/Advanced-Deep-Learning-with-Keras/blob/master/chapter3-autoencoders/README.md + +自编码器执行可接受的着色作业。 预计大海或天空为蓝色,动物的阴影为棕色,云为白色,依此类推。 + +有一些明显的错误预测,例如红色车辆变成蓝色或蓝色车辆变成红色,偶尔的绿色领域被误认为是蓝天,而黑暗或金色的天空被转换为蓝天。 + +这是关于自编码器的最后一部分。 在以下各章中,我们将重新讨论以一种或另一种形式进行编码和解码的概念。 表示学习的概念在深度学习中非常基础。 + +# 5\. 总结 + +在本章中,我们已经介绍了自编码器,它们是将输入数据压缩为低维表示形式的神经网络,以便有效地执行结构转换,例如降噪和着色。 我们为 GAN 和 VAE 的更高级主题奠定了基础,我们将在后面的章节中介绍它们。 我们已经演示了如何从两个构建模块模型(编码器和解码器)实现自编码器。 我们还学习了如何提取输入分布的隐藏结构是 AI 的常见任务之一。 + +一旦学习了潜在代码,就可以对原始输入分布执行许多结构操作。 为了更好地了解输入分布,可以使用低级嵌入(类似于本章内容)或通过更复杂的降维技术(例如 t-SNE 或 PCA)来可视化潜在向量形式的隐藏结构。 + +除了去噪和着色外,自编码器还用于将输入分布转换为低维潜向量,可以针对其他任务(例如,分割,检测,跟踪,重建和视觉理解)进一步对其进行处理。 在“第 8 章”,“变分自编码器(VAE)”中,我们将讨论 VAE,它们在结构上与自编码器相同,但具有可解释的潜在代码,这些代码可以产生连续的潜在向量投影,因此有所不同。 + +在下一章中,我们将着手介绍 AI 最近最重要的突破之一,即 GAN。 在下一章中,我们将学习 GAN 的核心优势,即其综合看起来真实的数据的能力。 + +# 6\. 参考 + +1. `Ian Goodfellow et al.: Deep Learning. Vol. 1. Cambridge: MIT press, 2016 (http://www.deeplearningbook.org/).` \ No newline at end of file diff --git a/机器学习/ApacheCN/apachecn-dl-zh/adv-dl-tf2-keras/04.md b/机器学习/ApacheCN/apachecn-dl-zh/adv-dl-tf2-keras/04.md new file mode 100644 index 00000000..c4c08c1a --- /dev/null +++ b/机器学习/ApacheCN/apachecn-dl-zh/adv-dl-tf2-keras/04.md @@ -0,0 +1,784 @@ +# 四、生成对抗网络(GAN) + +在本章中,我们将研究**生成对抗网络**(**GAN**)[1]。 GAN 属于生成模型家族。 但是,与自编码器不同,生成模型能够在给定任意编码的情况下创建新的有意义的输出。 + +在本章中,将讨论 GAN 的工作原理。 我们还将使用`tf.keras`回顾几个早期 GAN 的实现,而在本章的后面,我们将演示实现稳定训练所需的技术。 本章的范围涵盖了 GAN 实现的两个流行示例,**深度卷积 GAN**(**DCGAN**)[2]和**条件 GAN**(**CGAN**)[3]。 + +总之,本章的目标是: + +* GAN 的原理简介 +* GAN 的早期工作实现之一的简介,称为 DCGAN +* 改进的 DCGAN,称为 CGAN,它使用条件 +* 在`tf.keras`中实现 DCGAN 和 CGAN + +让我们从 GAN 的概述开始。 + +# 1\. GAN 概述 + +在进入 GAN 的更高级概念之前,让我们开始研究 GAN,并介绍它们背后的基本概念。 GAN 非常强大。 通过执行潜在空间插值,他们可以生成不是真实人的新人脸这一事实证明了这一简单的陈述。 + +可以在以下 YouTube 视频中看到 GAN 的高级功能: + +* [Progressive GAN [4]](https://youtu.be/G06dEcZ-QTg) +* [StyleGAN v1 [5]](https://youtu.be/kSLJriaOumA) +* [StyleGAN v2 [6]](https://youtu.be/c-NJtV9Jvp0) + +展示如何利用 GAN 产生逼真的面部的视频演示了它们的功能。 这个主题比我们之前看过的任何内容都先进得多。 例如,上面的视频演示了自编码器无法轻松完成的事情,我们在“第 3 章”,“自编码器”中介绍了这些内容。 + +GAN 可以通过训练两个相互竞争(且相互配合)的网络(称为**生成器**和**判别器**(有时称为**评论家**)。 生成器的作用是继续弄清楚如何生成伪造数据或信号(包括音频和图像),使伪造者蒙上阴影。 同时,判别器被训练以区分假信号和真实信号。 随着训练的进行,判别器将不再能够看到合成生成的数据与真实数据之间的差异。 从那里,可以丢弃判别器,然后可以使用生成器来创建从未见过的新的真实数据。 + +GAN 的基本概念很简单。 但是,我们将发现的一件事是,最具挑战性的问题是我们如何实现对生成器-判别器网络的稳定训练? 为了使两个网络都能同时学习,生成器和判别器之间必须存在健康的竞争。 由于损失函数是根据判别器的输出计算得出的,因此其参数会快速更新。 当判别器收敛速度更快时,生成器不再为其参数接收到足够的梯度更新,并且无法收敛。 除了难以训练之外,GAN 还可能遭受部分或全部模态崩溃的影响,这种情况下,生成器针对不同的潜在编码生成几乎相似的输出。 + +## GAN 的原理 + +如图“图 4.1.1”所示,GAN 类似于伪造者(生成器)-警察(判别器)场景。 在学院里,警察被教导如何确定美钞是真钞还是假钞。 来自银行的真实美钞样本和来自伪造者的伪钞样本被用来训练警察。 但是,伪造者会不时地假装他印制了真实的美元钞票。 最初,警方不会上当,并且会告诉造假者这笔钱是假的。 考虑到此反馈,造假者再次磨练他的技能,并尝试制作新的假美元钞票。 如预期的那样,警察将能够发现这笔钱是伪造的,并说明为什么美元钞票是伪造的: + +![A picture containing toy Description automatically generated](img/B14853_04_01.png) + +图 4.1.1:GAN 的生成器和判别器类似于伪造者和警察。 造假者的目的是欺骗警察,使他们相信美元钞票是真实的 + +此过程无限期地继续,但是到了造假者已经掌握了伪造货币的程度,以至于伪造品与真实货币几乎无法区分-甚至对于最受执业的警察也是如此。 然后,伪造者可以无限次打印美元钞票,而不会被警方抓获,因为它们不再可识别为伪造的。 + +如图“图 4.1.2”所示,GAN 由两个网络组成,一个生成器和一个判别器: + +![A close up of a logo Description automatically generated](img/B14853_04_02.png) + +图 4.1.2:GAN 由两个网络组成,一个生成器和一个判别器。 判别器经过训练,可以区分真实信号和虚假信号或数据。 生成器的工作是生成伪造的信号或数据,这些伪造的信号或数据最终会欺骗判别器 + +生成器的输入是噪声,输出是合成数据。 同时,判别器的输入将是实数据或合成数据。 真实数据来自真实的采样数据,而虚假数据来自生成器。 所有有效数据均标记为 1.0(即 100% 为真实概率),而所有合成数据均标记为 0.0(即 0% 为真实概率)。 由于标记过程是自动化的,因此 GAN 仍被认为是深度学习中无监督学习方法的一部分。 + +判别器的目标是从此提供的数据集中学习如何区分真实数据与伪数据。 在 GAN 训练的这一部分中,仅判别器参数将被更新。 像典型的二元分类器一样,判别器经过训练,可以在 0.0 到 1.0 的范围内预测置信度值,以了解给定输入数据与真实数据的接近程度。 但是,这只是故事的一半。 + +生成器将以固定的时间间隔假装其输出是真实数据,并要求 GAN 将其标记为 1.0。 然后,当将伪造数据提供给判别器时,自然会将其分类为伪造,标签接近 0.0。 + +优化器根据显示的标签(即 1.0)计算生成器参数更新。 在对新数据进行训练时,它还会考虑自己的预测。 换句话说,判别器对其预测有一些疑问,因此,GAN 将其考虑在内。 这次,GAN 将让梯度从判别器的最后一层向下向下传播到生成器的第一层。 但是,在大多数实践中,在训练的此阶段,判别器参数会暂时冻结。 生成器将使用梯度来更新其参数并提高其合成伪数据的能力。 + +总体而言,整个过程类似于两个网络相互竞争,同时仍在合作。 当 GAN 训练收敛时,最终结果是生成器,可以合成看似真实的数据。 判别器认为该合成数据是真实数据或带有接近 1.0 的标签,这意味着该判别器可以被丢弃。 生成器部分将有助于从任意噪声输入中产生有意义的输出。 + +下面的“图 4.1.3”中概述了该过程: + +![](img/B14853_04_03.png) + +图 4.1.3:训练判别器类似于使用二进制交叉熵损失训练二分类器网络。 伪数据由生成器提供,而真实数据来自真实样本 + +如上图所示,可以通过最小化以下等式中的损失函数来训练判别器: + +![](img/B14853_04_001.png) (Equation 4.1.1) + +该方程只是标准的二进制交叉熵代价函数。 损失是正确识别真实数据`1 - D(g(z))`的期望值与 1.0 正确识别合成数据`1 - D(g(z))`的期望值之和。 日志不会更改本地最小值的位置。 + +训练过程中将两个小批数据提供给判别器: + +1. `x`,来自采样数据的实数据(换言之,`x ~ p_data`),标签为 1.0 + +1. `x' = g(z)`,来自生成器的带有标签 0.0 的伪造数据 + +为了使的损失函数最小,将通过反向传播通过正确识别真实数据`D(x)`和合成数据`1 - D(g(z))`来更新判别器参数`θ^(D)`。 正确识别真实数据等同于`D(x) -> 1.0`,而正确分类伪造数据则与`D(g(z)) -> 0.0`或`1 - D(g(z)) -> 1.0`相同。 在此等式中,`z`是生成器用来合成新信号的任意编码或噪声向量。 两者都有助于最小化损失函数。 + +为了训练生成器,GAN 将判别器和生成器损失的总和视为零和博弈。 生成器损失函数只是判别器损失函数的负数: + +![](img/B14853_04_012.png) (Equation 4.1.2) + +然后可以将其更恰当地重写为值函数: + +![](img/B14853_04_013.png) (Equation 4.1.3) + +从生成器的角度来看,应将“公式 4.1.3”最小化。 从判别器的角度来看,值函数应最大化。 因此,生成器训练准则可以写成极大极小问题: + +![](img/B14853_04_014.png) (Equation 4.1.4) + +有时,我们会假装合成数据是带有标签 1.0 的真实数据,以此来欺骗判别器。 通过最大化`θ^(D)`,优化器将梯度更新发送到判别器参数,以将该合成数据视为真实数据。 同时,通过将`θ^(G)`的相关性减至最小,优化器将在上训练生成器的参数,从而欺骗识别器。 但是,实际上,判别器对将合成数据分类为伪造的预测很有信心,并且不会更新 GAN 参数。 此外,梯度更新很小,并且在传播到生成器层时已大大减小。 结果,生成器无法收敛。 + +![](img/B14853_04_04.png) + +图 4.1.4:训练生成器就像使用二进制交叉熵损失函数训练网络一样。 来自生成器的虚假数据显示为真实数据 + +解决方案是按以下形式重新构造生成器的损失函数: + +![](img/B14853_04_017.png) (Equation 4.1.5) + +损失函数只是通过训练生成器,最大程度地提高了判别器认为合成数据是真实数据的机会。 新公式不再是零和,而是纯粹由启发式驱动的。“图 4.1.4”显示了训练过程中的生成器。 在此图中,仅在训练整个对抗网络时才更新生成器参数。 这是因为梯度从判别器向下传递到生成器。 但是,实际上,判别器权重仅在对抗训练期间临时冻结。 + +在深度学习中,可以使用合适的神经网络架构来实现生成器和判别器。 如果数据或信号是图像,则生成器和判别器网络都将使用 CNN。 对于诸如音频之类的一维序列,两个网络通常都是循环的(RNN,LSTM 或 GRU)。 + +在本节中,我们了解到 GAN 的原理很简单。 我们还了解了如何通过熟悉的网络层实现 GAN。 GAN 与其他网络的区别在于众所周知,它们很难训练。 只需稍作更改,就可以使网络变得不稳定。 在以下部分中,我们将研究使用深度 CNN 的 GAN 早期成功实现之一。 它称为 DCGAN [3]。 + +# 2\. 在 Keras 中实现 DCGAN + +“图 4.2.1”显示 DCGAN,其中用于生成伪造的 MNIST 图像: + +![](img/B14853_04_05.png) + +图 4.2.1:DCGAN 模型 + +DCGAN 实现以下设计原则: + +* 使用`stride > 1`和卷积代替`MaxPooling2D`或`UpSampling2D`。 通过`stride > 1`,CNN 可以学习如何调整特征映射的大小。 +* 避免使用`Dense`层。 在所有层中使用 CNN。 `Dense`层仅用作生成器的第一层以接受`z`向量。 调整`Dense`层的输出大小,并成为后续 C​​NN 层的输入。 +* 使用**批量归一化**(**BN**),通过将每一层的输入归一化以使均值和单位方差为零,来稳定学习。 生成器输出层和判别器输入层中没有 BN。 在此处要介绍的实现示例中,没有在标识符中使用批量归一化。 +* **整流线性单元**(**ReLU**)在生成器的所有层中均使用,但在输出层中则使用`tanh`激活。 在此处要介绍的实现示例中,在生成器的输出中使用`sigmoid`代替`tanh`,因为通常会导致对 MNIST 数字进行更稳定的训练。 +* 在判别器的所有层中使用 **Leaky ReLU**。 与 ReLU 不同,Leaky ReLU 不会在输入小于零时将所有输出清零,而是生成一个等于`alpha x input`的小梯度。 在以下示例中,`alpha = 0.2`。 + +生成器学习从 100 维输入向量(`[-1.0,1.0]`范围内具有均匀分布的 100 维随机噪声)生成伪图像。 判别器将真实图像与伪图像分类,但是在训练对抗网络时无意中指导生成器如何生成真实图像。 在我们的 DCGAN 实现中使用的核大小为 5。这是为了允许它增加卷积的接收场大小和表达能力。 + +生成器接受由 -1.0 到 1.0 范围内的均匀分布生成的 100 维`z`向量。 生成器的第一层是`7 x 7 x 128 = 6,272`单元的密集层。 基于输出图像的预期最终尺寸(`28 x 28 x 1`,28 是 7 的倍数)和第一个`Conv2DTranspose`的过滤器数量(等于 128)来计算单元数量。 + +我们可以将转置的 CNN(`Conv2DTranspose`)想象成 CNN 的逆过程。 在一个简单的示例中,如果 CNN 将图像转换为特征映射,则转置的 CNN 将生成给定特征映射的图像。 因此,转置的 CNN 在上一章的解码器中和本章的生成器中使用。 + +在对`strides = 2`进行两个`Conv2DTranspose`之后,特征映射的大小将为`28 x 28 x n_filter`。 每个`Conv2DTranspose`之前都有批量规范化和 ReLU。 最后一层具有 *Sigmoid* 激活,可生成`28 x 28 x 1`假 MNIST 图像。 将每个像素标准化为与`[0, 255]`灰度级相对应的`[0.0, 1.0]`。 下面的“列表 4.2.1”显示了`tf.keras`中生成器网络的实现。 定义了一个函数来生成生成器模型。 由于整个代码的长度,我们将列表限制为正在讨论的特定行。 + +[完整的代码可在 GitHub 上获得](https://github.com/PacktPublishing/Advanced-Deep-Learning-with-Keras)。 + +“列表 4.2.1”:`dcgan-mnist-4.2.1.py` + +```py +def build_generator(inputs, image_size): + """Build a Generator Model +``` + +```py + Stack of BN-ReLU-Conv2DTranpose to generate fake images + Output activation is sigmoid instead of tanh in [1]. + Sigmoid converges easily. +``` + +```py + Arguments: + inputs (Layer): Input layer of the generator + the z-vector) + image_size (tensor): Target size of one side + (assuming square image) +``` + +```py + Returns: + generator (Model): Generator Model + """ +``` + +```py + image_resize = image_size // 4 + # network parameters + kernel_size = 5 + layer_filters = [128, 64, 32, 1] +``` + +```py + x = Dense(image_resize * image_resize * layer_filters[0])(inputs) + x = Reshape((image_resize, image_resize, layer_filters[0]))(x) +``` + +```py + for filters in layer_filters: + # first two convolution layers use strides = 2 + # the last two use strides = 1 + if filters > layer_filters[-2]: + strides = 2 + else: + strides = 1 + x = BatchNormalization()(x) + x = Activation('relu')(x) + x = Conv2DTranspose(filters=filters, + kernel_size=kernel_size, + strides=strides, + padding='same')(x) +``` + +```py + x = Activation('sigmoid')(x) + generator = Model(inputs, x, name='generator') + return generator +``` + +判别器与相似,是许多基于 CNN 的分类器。 输入是`28 x 28 x 1`MNIST 图像,分类为真实(1.0)或伪(0.0)。 有四个 CNN 层。 除了最后的卷积,每个`Conv2D`都使用`strides = 2`将特征映射下采样两个。 然后每个`Conv2D`之前都有一个泄漏的 ReLU 层。 最终的过滤器大小为 256,而初始的过滤器大小为 32,并使每个卷积层加倍。 最终的过滤器大小 128 也适用。 但是,我们会发现生成的图像在 256 的情况下看起来更好。最终输出层被展平,并且在通过 Sigmoid 激活层缩放后,单个单元`Dense`层在 0.0 到 1.0 之间生成预测。 输出被建模为伯努利分布。 因此,使用了二进制交叉熵损失函数。 + +建立生成器和判别器模型后,通过将生成器和判别器网络连接起来,建立对抗模型。 鉴别网络和对抗网络都使用 RMSprop 优化器。 判别器的学习率是`2e-4`,而对抗网络的学习率是`1e-4`。 判别器的 RMSprop 衰减率为`6e-8`,对抗网络的 RMSprop 衰减率为`3e-8`。 + +将对手的学习率设置为判别器的一半将使训练更加稳定。 您会从“图 4.1.3”和“图 4.1.4”中回忆起,GAN 训练包含两个部分:判别器训练和生成器训练,这是冻结判别器权重的对抗训练。 + +“列表 4.2.2”显示了`tf.keras`中判别器的实现。 定义一个函数来建立鉴别模型。 + +“列表 4.2.2”:`dcgan-mnist-4.2.1.py` + +```py +def build_discriminator(inputs): + """Build a Discriminator Model +``` + +```py + Stack of LeakyReLU-Conv2D to discriminate real from fake. + The network does not converge with BN so it is not used here + unlike in [1] or original paper. +``` + +```py + Arguments: + inputs (Layer): Input layer of the discriminator (the image) +``` + +```py + Returns: + discriminator (Model): Discriminator Model + """ + kernel_size = 5 + layer_filters = [32, 64, 128, 256] +``` + +```py + x = inputs + for filters in layer_filters: + # first 3 convolution layers use strides = 2 + # last one uses strides = 1 + if filters == layer_filters[-1]: + strides = 1 + else: + strides = 2 + x = LeakyReLU(alpha=0.2)(x) + x = Conv2D(filters=filters, + kernel_size=kernel_size, + strides=strides, + padding='same')(x) +``` + +```py + x = Flatten()(x) + x = Dense(1)(x) + x = Activation('sigmoid')(x) + discriminator = Model(inputs, x, name='discriminator') + return discriminator +``` + +在“列表 4.2.3”中,我们将说明如何构建 GAN 模型。 首先,建立鉴别模型,然后实例化生成器模型。 对抗性模型只是生成器和判别器组合在一起。 在许多 GAN 中,批大小为 64 似乎是最常见的。 网络参数显示在“列表 4.2.3”中。 + +“列表 4.2.3”:`dcgan-mnist-4.2.1.py` + +建立 DCGAN 模型并调用训练例程的函数: + +```py +def build_and_train_models(): + # load MNIST dataset + (x_train, _), (_, _) = mnist.load_data() +``` + +```py + # reshape data for CNN as (28, 28, 1) and normalize + image_size = x_train.shape[1] + x_train = np.reshape(x_train, [-1, image_size, image_size, 1]) + x_train = x_train.astype('float32') / 255 +``` + +```py + model_name = "dcgan_mnist" + # network parameters + # the latent or z vector is 100-dim + latent_size = 100 + batch_size = 64 + train_steps = 40000 + lr = 2e-4 + decay = 6e-8 + input_shape = (image_size, image_size, 1) +``` + +```py + # build discriminator model + inputs = Input(shape=input_shape, name='discriminator_input') + discriminator = build_discriminator(inputs) + # [1] or original paper uses Adam, + # but discriminator converges easily with RMSprop + optimizer = RMSprop(lr=lr, decay=decay) + discriminator.compile(loss='binary_crossentropy', + optimizer=optimizer, + metrics=['accuracy']) + discriminator.summary() +``` + +```py + # build generator model + input_shape = (latent_size, ) + inputs = Input(shape=input_shape, name='z_input') + generator = build_generator(inputs, image_size) + generator.summary() +``` + +```py + # build adversarial model + optimizer = RMSprop(lr=lr * 0.5, decay=decay * 0.5) + # freeze the weights of discriminator during adversarial training + discriminator.trainable = False + # adversarial = generator + discriminator + adversarial = Model(inputs, + discriminator(generator(inputs)), + name=model_name) + adversarial.compile(loss='binary_crossentropy', + optimizer=optimizer, + metrics=['accuracy']) + adversarial.summary() +``` + +```py + # train discriminator and adversarial networks + models = (generator, discriminator, adversarial) + params = (batch_size, latent_size, train_steps, model_name) + train(models, x_train, params) +``` + +从“列表 4.2.1”和“列表 4.2.2”中可以看出,DCGAN 模型很简单。 使它们难以构建的原因是,网络中的较小更改设计很容易破坏训练收敛。 例如,如果在判别器中使用批量归一化,或者如果生成器中的`strides = 2`传输到后面的 C​​NN 层,则 DCGAN 将无法收敛。 + +“列表 4.2.4”显示了专用于训练判别器和对抗网络的函数。 由于自定义训练,将不使用常规的`fit()`函数。 取而代之的是,调用`train_on_batch()`对给定的数据批量运行单个梯度更新。 然后通过对抗网络训练生成器。 训练首先从数据集中随机选择一批真实图像。 这被标记为实数(1.0)。 然后,生成器将生成一批伪图像。 这被标记为假(0.0)。 这两个批量是连接在一起的,用于训练判别器。 + +完成此操作后,生成器将生成一批新的伪图像,并将其标记为真实(1.0)。 这批将用于训练对抗网络。 交替训练这两个网络约 40,000 步。 定期将基于特定噪声向量生成的 MNIST 数字保存在文件系统中。 在最后的训练步骤中,网络已收敛。 生成器模型也保存在文件中,因此我们可以轻松地将训练后的模型重新用于未来的 MNIST 数字生成。 但是,仅保存生成器模型,因为这是该 DCGAN 在生成新 MNIST 数字时的有用部分。 例如,我们可以通过执行以下操作来生成新的和随机的 MNIST 数字: + +```py +python3 dcgan-mnist-4.2.1.py --generator=dcgan_mnist.h5 +``` + +“列表 4.2.4”:`dcgan-mnist-4.2.1.py` + +训练判别器和对抗网络的函数: + +```py +def train(models, x_train, params): + """Train the Discriminator and Adversarial Networks +``` + +```py + Alternately train Discriminator and Adversarial networks by batch. + Discriminator is trained first with properly real and fake images. + Adversarial is trained next with fake images pretending to be real + Generate sample images per save_interval. +``` + +```py + Arguments: + models (list): Generator, Discriminator, Adversarial models + x_train (tensor): Train images + params (list) : Networks parameters +``` + +```py + """ + # the GAN component models + generator, discriminator, adversarial = models + # network parameters + batch_size, latent_size, train_steps, model_name = params + # the generator image is saved every 500 steps + save_interval = 500 + # noise vector to see how the generator output evolves during training + noise_input = np.random.uniform(-1.0, 1.0, size=[16, latent_size]) + # number of elements in train dataset + train_size = x_train.shape[0] + for i in range(train_steps): + # train the discriminator for 1 batch + # 1 batch of real (label=1.0) and fake images (label=0.0) + # randomly pick real images from dataset + rand_indexes = np.random.randint(0, train_size, size=batch_size) + real_images = x_train[rand_indexes] + # generate fake images from noise using generator + # generate noise using uniform distribution + noise = np.random.uniform(-1.0, + 1.0, + size=[batch_size, latent_size]) + # generate fake images + fake_images = generator.predict(noise) + # real + fake images = 1 batch of train data + x = np.concatenate((real_images, fake_images)) + # label real and fake images + # real images label is 1.0 + y = np.ones([2 * batch_size, 1]) + # fake images label is 0.0 + y[batch_size:, :] = 0.0 + # train discriminator network, log the loss and accuracy + loss, acc = discriminator.train_on_batch(x, y) + log = "%d: [discriminator loss: %f, acc: %f]" % (i, loss, acc) +``` + +```py + # train the adversarial network for 1 batch + # 1 batch of fake images with label=1.0 + # since the discriminator weights + # are frozen in adversarial network + # only the generator is trained + # generate noise using uniform distribution + noise = np.random.uniform(-1.0, + 1.0, + size=[batch_size, latent_size]) + # label fake images as real or 1.0 + y = np.ones([batch_size, 1]) + # train the adversarial network + # note that unlike in discriminator training, + # we do not save the fake images in a variable + # the fake images go to the discriminator input of the adversarial + # for classification + # log the loss and accuracy + loss, acc = adversarial.train_on_batch(noise, y) + log = "%s [adversarial loss: %f, acc: %f]" % (log, loss, acc) + print(log) + if (i + 1) % save_interval == 0: + # plot generator images on a periodic basis + plot_images(generator, + noise_input=noise_input, + show=False, + step=(i + 1), + model_name=model_name) +``` + +```py + # save the model after training the generator + # the trained generator can be reloaded for + # future MNIST digit generation + generator.save(model_name + ".h5") +``` + +“图 4.2.2”显示了生成器伪造图像根据训练步骤的演变。 生成器已经以 5,000 步的速度生成了可识别的图像。 非常像拥有一个知道如何绘制数字的智能体。 值得注意的是,某些数字从一种可识别的形式(例如,最后一行的第二列中的 8)变为另一种形式(例如,0)。 当训练收敛时,判别器损失接近 0.5,而对抗性损失接近 1.0,如下所示: + +```py +39997: [discriminator loss: 0.423329, acc: 0.796875] [adversarial loss: +0.819355, acc: 0.484375] +39998: [discriminator loss: 0.471747, acc: 0.773438] [adversarial loss: +1.570030, acc: 0.203125] +39999: [discriminator loss: 0.532917, acc: 0.742188] [adversarial loss: +0.824350, acc: 0.453125] +``` + +我们可以看到以下结果: + +![](img/B14853_04_06.png) + +图 4.2.2:DCGAN 生成器在不同训练步骤生成的伪造图像 + +在本节中,由 DCGAN 生成的伪造图像是随机的。 + +生成器无法控制哪个特定数字。 没有机制可以请求生成器提供特定的数字。 这个问题可以通过称为 CGAN [4]的 GAN 变体来解决,我们将在下一部分中进行讨论。 + +# 3\. Conditional GAN + +使用与上一节相同的 GAN ,会对生成器和判别器输入都施加一个条件。 条件是数字的一键向量形式。 这与要生成的图像(生成器)或分类为真实或伪造的图像(判别器)相关。 CGAN 模型显示在“图 4.3.1”中。 + +CGAN 与 DCGAN 相似,除了附加的单热向量输入。 对于生成器,单热标签在`Dense`层之前与潜向量连接在一起。 对于判别器,添加了新的`Dense`层。 新层用于处理单热向量并对其进行整形,以使其适合于与后续 CNN 层的另一个输入连接。 + +![](img/B14853_04_07.png) + +图 4.3.1:CGAN 模型与 DCGAN 相似,只不过是单热向量,用于调节生成器和判别器的输出 + +生成器学习从 100 维输入向量和指定位数生成伪图像。 判别器基于真实和伪图像及其对应的标签将真实图像与伪图像分类。 + +CGAN 的基础仍然与原始 GAN 原理相同,区别在于判别器和生成器的输入均以“一热”标签`y`为条件。 + +通过在“公式 4.1.1”和“公式 4.1.5”中合并此条件,判别器和生成器的损失函数在“公式 4.3.1”和“公式 4.3.2”中显示,分别为: + +![](img/B14853_04_018.png) (Equation 4.3.1) + +![](img/B14853_04_019.png) (Equation 4.3.2) + +给定“图 4.3.2”,将损失函数写为: + +![](img/B14853_04_020.png) (Equation 4.3.3) + +![](img/B14853_04_021.png) (Equation 4.3.4) + +判别器的新损失函数旨在最大程度地减少预测来自数据集的真实图像和来自生成器的假图像(给定单热点标签)的误差。“图 4.3.2”显示了如何训练判别器。 + +![](img/B14853_04_08.png) + +图 4.3.2:训练 CGAN 判别器类似于训练 GAN 判别器。 唯一的区别是,所生成的伪造品和数据集的真实图像均以其相应的“一键通”标签作为条件。 + +生成器的新损失函数可最大程度地减少对以指定的一幅热标签为条件的伪造图像进行鉴别的正确预测。 生成器学习如何在给定单热向量的情况下生成特定的 MNIST 数字,该数字可能使判别器蒙蔽。“图 4.3.3”显示了如何训练生成器。 + +![](img/B14853_04_09.png) + +图 4.3.3:通过对抗网络训练 CGAN 生成器类似于训练 GAN 生成器。 唯一的区别是,生成的伪造图像以“一热”标签为条件 + +“列表 4.3.1”突出显示了判别器模型中所需的微小更改。 该代码使用`Dense`层处理单热点向量,并将其与输入图像连接在一起。 修改了`Model`实例以用于图像和一键输入向量。 + +“列表 4.3.1”:`cgan-mnist-4.3.1.py` + +突出显示了 DCGAN 中所做的更改: + +```py +def build_discriminator(inputs, labels, image_size): + """Build a Discriminator Model +``` + +```py + Inputs are concatenated after Dense layer. + Stack of LeakyReLU-Conv2D to discriminate real from fake. + The network does not converge with BN so it is not used here + unlike in DCGAN paper. +``` + +```py + Arguments: + inputs (Layer): Input layer of the discriminator (the image) + labels (Layer): Input layer for one-hot vector to condition + the inputs + image_size: Target size of one side (assuming square image) + Returns: + discriminator (Model): Discriminator Model + """ + kernel_size = 5 + layer_filters = [32, 64, 128, 256] +``` + +```py + x = inputs +``` + +```py + y = Dense(image_size * image_size)(labels) + y = Reshape((image_size, image_size, 1))(y) + x = concatenate([x, y]) +``` + +```py + for filters in layer_filters: + # first 3 convolution layers use strides = 2 + # last one uses strides = 1 + if filters == layer_filters[-1]: + strides = 1 + else: + strides = 2 + x = LeakyReLU(alpha=0.2)(x) + x = Conv2D(filters=filters, + kernel_size=kernel_size, + strides=strides, + padding='same')(x) +``` + +```py + x = Flatten()(x) + x = Dense(1)(x) + x = Activation('sigmoid')(x) + # input is conditioned by labels + discriminator = Model([inputs, labels], x, name='discriminator') + return discriminator +``` + +以下“列表 4.3.2”突出显示了代码更改,以在生成器生成器函数中合并条件化单热标签。 对于`z`向量和单热向量输入,修改了`Model`实例。 + +“列表 4.3.2”:`cgan-mnist-4.3.1.py` + +突出显示了 DCGAN 中所做的更改: + +```py +def build_generator(inputs, labels, image_size): + """Build a Generator Model + Inputs are concatenated before Dense layer. + Stack of BN-ReLU-Conv2DTranpose to generate fake images. + Output activation is sigmoid instead of tanh in orig DCGAN. + Sigmoid converges easily. +``` + +```py + Arguments: + inputs (Layer): Input layer of the generator (the z-vector) + labels (Layer): Input layer for one-hot vector to condition the inputs + image_size: Target size of one side (assuming square image) + Returns: + generator (Model): Generator Model + """ + image_resize = image_size // 4 + # network parameters + kernel_size = 5 + layer_filters = [128, 64, 32, 1] +``` + +```py + x = concatenate([inputs, labels], axis=1) + x = Dense(image_resize * image_resize * layer_filters[0])(x) + x = Reshape((image_resize, image_resize, layer_filters[0]))(x) +``` + +```py + for filters in layer_filters: + # first two convolution layers use strides = 2 + # the last two use strides = 1 + if filters > layer_filters[-2]: + strides = 2 + else: + strides = 1 + x = BatchNormalization()(x) + x = Activation('relu')(x) + x = Conv2DTranspose(filters=filters, + kernel_size=kernel_size, + strides=strides, + padding='same')(x) +``` + +```py + x = Activation('sigmoid')(x) + # input is conditioned by labels + generator = Model([inputs, labels], x, name='generator') + return generator +``` + +“列表 4.3.3”突出显示了在`train()`函数中所做的更改,以适应判别器和生成器的条件一热向量。 首先对 CGAN 判别器进行训练,以一批真实和伪造的数据为条件,这些数据以其各自的热门标签为条件。 然后,在给定单热标签条件假冒数据为假的情况下,通过训练对抗网络来更新生成器参数。 与 DCGAN 相似,在对抗训练中,判别器权重被冻结。 + +“列表 4.3.3”:`cgan-mnist-4.3.1.py` + +着重介绍了 DCGAN 中所做的更改: + +```py +def train(models, data, params): + """Train the Discriminator and Adversarial Networks +``` + +```py + Alternately train Discriminator and Adversarial networks by batch. + Discriminator is trained first with properly labelled real and fake images. + Adversarial is trained next with fake images pretending to be real. + Discriminator inputs are conditioned by train labels for real images, + and random labels for fake images. + Adversarial inputs are conditioned by random labels. + Generate sample images per save_interval. +``` + +```py + Arguments: + models (list): Generator, Discriminator, Adversarial models + data (list): x_train, y_train data + params (list): Network parameters +``` + +```py + """ + # the GAN models + generator, discriminator, adversarial = models + # images and labels + x_train, y_train = data + # network parameters + batch_size, latent_size, train_steps, num_labels, model_name = params + # the generator image is saved every 500 steps + save_interval = 500 + # noise vector to see how the generator output evolves during training + noise_input = np.random.uniform(-1.0, 1.0, size=[16, latent_size]) + # one-hot label the noise will be conditioned to + noise_class = np.eye(num_labels)[np.arange(0, 16) % num_labels] + # number of elements in train dataset + train_size = x_train.shape[0] +``` + +```py + print(model_name, + "Labels for generated images: ", + np.argmax(noise_class, axis=1)) +``` + +```py + for i in range(train_steps): + # train the discriminator for 1 batch + # 1 batch of real (label=1.0) and fake images (label=0.0) + # randomly pick real images from dataset + rand_indexes = np.random.randint(0, train_size, size=batch_size) + real_images = x_train[rand_indexes] + # corresponding one-hot labels of real images + real_labels = y_train[rand_indexes] + # generate fake images from noise using generator + noise = np.random.uniform(-1.0, + 1.0, + size=[batch_size, latent_size]) +``` + +```py + # assign random one-hot labels + fake_labels = np.eye(num_labels)[np.random.choice(num_labels,batch_size)] + # generate fake images conditioned on fake labels + fake_images = generator.predict([noise, fake_labels]) + # real + fake images = 1 batch of train data + x = np.concatenate((real_images, fake_images)) + # real + fake one-hot labels = 1 batch of train one-hot labels + labels = np.concatenate((real_labels, fake_labels)) + # label real and fake images + # real images label is 1.0 + y = np.ones([2 * batch_size, 1]) + # fake images label is 0.0 + y[batch_size:, :] = 0.0 + # train discriminator network, log the loss and accuracy + loss, acc = discriminator.train_on_batch([x, labels], y) + log = "%d: [discriminator loss: %f, acc: %f]" % (i, loss, acc) + # train the adversarial network for 1 batch + # 1 batch of fake images conditioned on fake 1-hot labels + # w/ label=1.0 + # since the discriminator weights are frozen in + # adversarial network only the generator is trained + # generate noise using uniform distribution + noise = np.random.uniform(-1.0, + 1.0, + size=[batch_size, latent_size]) + # assign random one-hot labels + fake_labels = np.eye(num_labels)[np.random.choice(num_labels,batch_size)] +``` + +```py + # label fake images as real or 1.0 + y = np.ones([batch_size, 1]) + # train the adversarial network + # note that unlike in discriminator training, + # we do not save the fake images in a variable + # the fake images go to the discriminator input of the adversarial + # for classification + # log the loss and accuracy + loss, acc = adversarial.train_on_batch([noise, fake_labels], y) + log = "%s [adversarial loss: %f, acc: %f]" % (log, loss, acc) + print(log) + if (i + 1) % save_interval == 0: + # plot generator images on a periodic basis + plot_images(generator, + noise_input=noise_input, + noise_class=noise_class, + show=False, + step=(i + 1), + model_name=model_name) +``` + +```py + # save the model after training the generator + # the trained generator can be reloaded for + # future MNIST digit generation + generator.save(model_name + ".h5") +``` + +“图 4.3.4”显示了当生成器被调整为产生带有以下标签的数字时生成的 MNIST 数字的演变: + +```py +[0 1 2 3 +4 5 6 7 +8 9 0 1 +2 3 4 5] +``` + +我们可以看到以下结果: + +![](img/B14853_04_10.png) + +图 4.3.4:使用标签`[0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5]`对 CGAN 在不同训练步骤中生成的伪造图像 + +鼓励您运行经过训练的生成器模型,以查看新的合成 MNIST 数字图像: + +```py +python3 cgan-mnist-4.3.1.py --generator=cgan_mnist.h5 +``` + +或者,也可以请求要生成的特定数字(例如 8): + +```py +python3 cgan-mnist-4.3.1.py --generator=cgan_mnist.h5 --digit=8 +``` + +使用 CGAN,就像有一个智能体,我们可以要求绘制数字,类似于人类如何写数字。 与 DCGAN 相比,CGAN 的主要优势在于我们可以指定希望智能体绘制的数字。 + +# 4。结论 + +本章讨论了 GAN 的一般原理,以便为我们现在要讨论的更高级的主题奠定基础,包括改进的 GAN,解缠的表示 GAN 和跨域 GAN。 我们从了解 GAN 如何由两个网络(称为生成器和判别器)组成的这一章开始。 判别器的作用是区分真实信号和虚假信号。 生成器的目的是欺骗判别器。 生成器通常与判别器结合以形成对抗网络。 生成器是通过训练对抗网络来学习如何生成可欺骗判别器的虚假数据的。 + +我们还了解了 GAN 的构建方法,但众所周知,其操作起来非常困难。 提出了`tf.keras`中的两个示例实现。 DCGAN 证明了可以训练 GAN 使用深层 CNN 生成伪造图像。 伪造的图像是 MNIST 数字。 但是,DCGAN 生成器无法控制应绘制的特定数字。 CGAN 通过调节生成器以绘制特定数字来解决此问题。 该病是单热标签的形式。 如果我们要构建可以生成特定类数据的智能体,则 CGAN 很有用。 + +在下一章中,将介绍 DCGAN 和 CGAN 的改进。 特别是,重点将放在如何稳定 DCGAN 的训练以及如何提高 CGAN 的感知质量上。 这将通过引入新的损失函数和稍有不同的模型架构来完成。 + +# 5\. 参考 + +1. `Ian Goodfellow. NIPS 2016 Tutorial: Generative Adversarial Networks. arXiv preprint arXiv:1701.00160, 2016 (https://arxiv.org/pdf/1701.00160.pdf).` +1. `Alec Radford, Luke Metz, and Soumith Chintala. Unsupervised Representation Learning with Deep Convolutional Generative Adversarial Networks. arXiv preprint arXiv:1511.06434, 2015 (https://arxiv.org/pdf/1511.06434.pdf).` +1. `Mehdi Mirza and Simon Osindero. Conditional Generative Adversarial Nets. arXiv preprint arXiv:1411.1784, 2014 (https://arxiv.org/pdf/1411.1784.pdf).` +1. `Tero Karras et al. Progressive Growing of GANs for Improved Quality, Stability, and Variation. ICLR, 2018 (https://arxiv.org/pdf/1710.10196.pdf).` +1. `Tero Karras, , Samuli Laine, and Timo Aila. A Style-Based Generator Architecture for Generative Adversarial Networks. Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2019.` +1. `Tero Karras et al. Analyzing and Improving the Image Quality of StyleGAN. 2019 (https://arxiv.org/abs/1912.04958).` \ No newline at end of file diff --git a/机器学习/ApacheCN/apachecn-dl-zh/adv-dl-tf2-keras/05.md b/机器学习/ApacheCN/apachecn-dl-zh/adv-dl-tf2-keras/05.md new file mode 100644 index 00000000..1815e4e3 --- /dev/null +++ b/机器学习/ApacheCN/apachecn-dl-zh/adv-dl-tf2-keras/05.md @@ -0,0 +1,1136 @@ +# 五、改进的 GAN + +自 2014 年引入**生成对抗网络**(**GAN**)以来,其流行度迅速提高。 GAN 已被证明是有用的生成模型,可以合成看起来真实的新数据。 深度学习中的许多研究论文都遵循提出的措施来解决原始 GAN 的困难和局限性。 + +正如我们在前几章中讨论的那样,众所周知,GAN 很难训练,并且易于崩溃。 模式损失是一种情况,即使损失函数已经被优化,但生成器仍会产生看起来相同的输出。 在 MNIST 数字的情况下,模式折叠时,生成器可能只产生数字 4 和 9,因为看起来很相似。 **Wasserstein GAN**(**WGAN**)[2]解决了这些问题,认为只需替换基于 **Wasserstein** 的 GAN 损失函数就可以稳定的训练和避免模式崩溃,也称为**陆地移动距离**(**EMD**)。 + +但是,稳定性问题并不是 GAN 的唯一问题。 也越来越需要来提高所生成图像的感知质量。 **最小二乘 GAN**(**LSGAN**)[3]建议同时解决这两个问题。 基本前提是,在训练过程中,Sigmoid 交叉熵损失会导致梯度消失。 这导致较差的图像质量。 最小二乘损失不会导致梯度消失。 与原始 GAN 生成的图像相比,生成的生成图像具有更高的感知质量。 + +在上一章中,CGAN 介绍了一种调节生成器输出的方法。 例如,如果要获取数字 8,则可以在生成器的输入中包含条件标签。 受 CGAN 的启发,**辅助分类器 GAN**(**ACGAN**)[4]提出了一种改进的条件算法,可产生更好的感知质量和输出多样性。 + +总之,本章的目的是介绍: + +* WGAN 的理论描述 +* 对 LSGAN 原理的理解 +* 对 ACGAN 原理的理解 +* 改进的 GAN 的`tf.keras`实现 – WGAN,LSGAN 和 ACGAN + +让我们从讨论 WGAN 开始。 + +# 1\. Wasserstein GAN + +如前所述,众所周知,GAN 很难训练。 判别器和生成器这两个网络的相反目标很容易导致训练不稳定。 判别器尝试从真实数据中正确分类伪造数据。 同时,生成器将尽最大努力欺骗判别器。 如果判别器的学习速度比生成器快,则生成器参数将无法优化。 另一方面,如果判别器学习较慢,则梯度可能会在到达生成器之前消失。 在最坏的情况下,如果判别器无法收敛,则生成器将无法获得任何有用的反馈。 + +WGAN 认为 GAN 固有的不稳定性是由于它的损失函数引起的,该函数基于 **Jensen-Shannon**(**JS**)距离。 在 GAN 中,生成器的目的是学习如何将一种源分布(例如噪声)从转换为估计的目标分布(例如 MNIST 数字)。 使用 GAN 的原始公式,损失函数实际上是使目标分布与其估计值之间的距离最小。 问题是,对于某些分布对,没有平滑的路径可以最小化此 JS 距离。 因此,训练将无法收敛。 + +在以下部分中,我们将研究三个距离函数,并分析什么可以替代更适合 GAN 优化的 JS 距离函数。 + +## 距离函数 + +可以通过检查其损失函数来了解训练 GAN 的稳定性。 为了更好地理解 GAN 损失函数,我们将回顾两个概率分布之间的公共距离或散度函数。 + +我们关注的是用于真实数据分配的`p_data`与用于生成器数据分配的`p_g`之间的距离。 GAN 的目标是制造`p_g -> p_data`。“表 5.1.1”显示了散度函数。 + +在大多数个最大似然任务中,我们将使用 **Kullback-Leibler**(**KL**)散度,或`D[KL]`损失函数可以衡量我们的神经网络模型预测与真实分布函数之间的距离。 如“公式 5.1.1”所示,由于`D[KL](p_data || p_g) ≠ D[KL](p_g || p_data)`,所以`D[KL]`不对称。 + +**JS** 或`D[JS]`是基于`D[KL]`的差异。 但是,与`D[KL]`不同,`D[JS]`是对称的并且是有限的。 在本节中,我们将演示优化 GAN 损失函数等同于优化`D[JS]`: + +| **散度** | **表达式** | +| --- | --- | +| Kullback-Leibler(KL)“公式 5.1.1” | ![](img/B14853_05_003.png) | +| | ![](img/B14853_05_004.png) | +| *詹森·香农(JS)“公式 5.1.2” | ![](img/B14853_05_005.png) | +| 陆地移动距离(EMD)或 Wasserstein 1 “公式 5.1.3” | ![](img/B14853_05_006.png) | +| | 其中`Π(p_data, p_g)`是所有联合分布`γ(x, y)`的集合,其边际为`p_data`和`p_g`。 | + +表 5.1.1:两个概率分布函数`p_data`和`p_g`之间的散度函数 + +EMD 背后的想法是,它是`d = ||x - y||`传输多少质量`γ(x, y)`,为了让概率分布`p_data`匹配`p_g`的度量。 `γ(x, y)`是所有可能的联合分布`Π(p_data, p_g)`的空间中的联合分布。 `γ(x, y)`也被称为运输计划,以反映运输质量以匹配两个概率分布的策略。 给定两个概率分布,有许多可能的运输计划。 大致而言, `inf`表示成本最低的运输计划。 + +例如,“图 5.1.1”向我们展示了两个简单的离散分布`x`和`y`: + +![](img/B14853_05_01.png) + +图 5.1.1:EMD 是从`x`传输以匹配目标分布`y`的质量的加权数量。 + +在位置`i = 1, 2, 3, 4`上,`x`在具有质量`m[i], i = 1, 2, 3, 4`。同时,位置`y[i], i = 1, 2`上,`y`的质量为`m[i], i = 1, 2`。为了匹配分布`y`,图中的箭头显示了将每个质量`x[i]`移动`d[i]`的最小运输计划。 EMD 计算如下: + +![](img/B14853_05_014.png) (Equation 5.1.4) + +在“图 5.1.1”中,EMD 可解释为移动一堆污物`x`填充孔`y`所需的最少工作量。 尽管在此示例中,也可以从图中推导出`inf`,但在大多数情况下,尤其是在连续分布中,用尽所有可能的运输计划是很棘手的。 我们将在本章中稍后回到这个问题。 同时,我们将向您展示 GAN 损失函数的作用,实际上是如何使 **JS** 的差异最小化。 + +## GAN 中的距离函数 + +现在,在上一章的损失函数给定任何生成器的情况下,我们将计算最佳判别器。 我们将回顾上一章中的以下等式: + +![](img/B14853_05_015.png) (Equation 4.1.1) + +除了从噪声分布中采样外,前面的等式也可以表示为从生成器分布中采样: + +![](img/B14853_05_016.png) (Equation 5.1.5) + +找出最小的`L^(D)`: + +![](img/B14853_05_018.png) (Equation 5.1.6) + +![](img/B14853_05_019.png) (Equation 5.1.7) + +积分内部的项为`y -> a log(y) + b log(1 - y)`的形式,对于不包括`{0, 0}`的任何`a, b ∈ R^2`,在`y ∈ [0. 1]`的`a / (a + b)`处都有一个已知的最大值。 由于该积分不会更改此表达式的最大值(或`L^(D)`的最小值)的位置,因此最佳判别器为: + +![](img/B14853_05_026.png) (Equation 5.1.8) + +因此,给定最佳判别器的损失函数为: + +![](img/B14853_05_027.png) (Equation 5.1.9) + +![](img/B14853_05_028.png) (Equation 5.1.10) + +![](img/B14853_05_029.png) (Equation 5.1.11) + +![](img/B14853_05_030.png) (Equation 5.1.12) + +我们可以从“公式 5.1.12”观察到,最佳判别器的损失函数为常数减去真实分布`p_data`和任何生成器分布`p_g`之间的 JS 散度的两倍。 最小化`L^(D*)`意味着最大化`D[JS](p_data || p_g)`,否则判别器必须正确地将真实数据中的伪造物分类。 + +同时,我们可以放心地说,最佳生成器是当生成器分布等于真实数据分布时: + +![](img/B14853_05_034.png) (Equation 5.1.13) + +这是有道理的,因为生成器的目的是通过学习真实的数据分布来欺骗判别器。 有效地,我们可以通过最小化`D[JS]`或通过制作`p_g -> p_data`来获得最佳生成器。 给定最佳生成器,最佳判别器为`D*(x) = 1 / 2`和`L^(D*) = 2log2 = 0.60`。 + +问题在于,当两个分布没有重叠时,就没有平滑函数可以帮助缩小它们之间的差距。 训练 GAN 不会因梯度下降而收敛。 例如,假设: + +`p_data = (x, y) where x = 0, y ~ U(0, 1)` (Equation 5.1.14) + +`p_g = (x, y) where x = θ, y ~ U(0, 1)` (Equation 5.1.15) + +这两个分布显示在“图 5.1.2”中: + +![](img/B14853_05_02.png) + +图 5.1.2:没有重叠的两个分布的示例。 对于`p_g`,`θ = 0.5` + +![](img/B14853_05_044.png)是均匀分布。 每个距离函数的差异如下: + +* ![](img/B14853_05_045.png) +* ![](img/B14853_05_046.png) +* ![](img/B14853_05_047.png) +* ![](img/B14853_05_048.png) + +由于`D[JS]`是一个常数,因此 GAN 将没有足够的梯度来驱动`p_g -> p_data`。 我们还会发现`D[KL]`或反向`D[KL]`也不起作用。 但是,通过`W(p_data, p_g)`,我们可以拥有平滑函数,以便通过梯度下降获得`p_g -> p_data`。 为了优化 GAN,EMD 或 Wasserstein 1 似乎是一个更具逻辑性的损失函数,因为在两个分布具有极小或没有重叠的情况下,`D[JS]`会失败。 + +为了帮助进一步理解,可以在以下位置找到[有关距离函数的精彩讨论](https://lilianweng.github.io/lil-log/2017/08/20/from-GAN-to-WGAN.html)。 + +在下一节中,我们将重点介绍使用 EMD 或 Wasserstein 1 距离函数来开发替代损失函数,以鼓励稳定训练 GAN。 + +## 使用 Wasserstein 损失 + +在使用 EMD 或 Wasserstein 1 之前,还有一个要解决的问题。 耗尽`Π(p_data, p_g)`的空间来找到`γ ~ Π(p_data, p_g)`是很棘手的。 提出的解决方案是使用其 Kantorovich-Rubinstein 对偶: + +![](img/B14853_05_053.png) (Equation 5.1.16) + +等效地,EMD `sup ||f||_L <= 1`是所有 K-Lipschitz 函数上的最高值(大约是最大值):`f: x -> R`。 K-Lipschitz 函数满足以下约束: + +![](img/B14853_05_056.png) (Equation 5.1.17) + +对于所有`x[1], x[2] ∈ R`。 K-Lipschitz 函数具有有界导数,并且几乎总是连续可微的(例如,`f(x) = |x|`具有有界导数并且是连续的,但在`x = 0`时不可微分)。 + +“公式 5.1.16”可以通过找到 K-Lipschitz 函数`{f[w]}, w ∈ W`的族来求解: + +![](img/B14853_05_060.png) (Equation 5.1.18) + +在 GAN 中,可以通过从`z`-噪声分布采样并用`f[w]`替换“公式 5.1.18”来重写。 鉴别函数,`D[w]`: + +![](img/B14853_05_061.png) (Equation 5.1.19) + +我们使用粗体字母突出显示多维样本的一般性。 最后一个问题是如何找到函数族`w ∈ W`。 所提出的解决方案是在每次梯度更新时进行的。 判别器`w`的权重被限制在上下限之间(例如,-0.01 和 0.01): + +![](img/B14853_05_063.png) (Equation 5.1.20) + +`w`的较小值将判别器约束到紧凑的参数空间,从而确保 Lipschitz 连续性。 + +我们可以使用“公式 5.1.19”作为我们新的 GAN 损失函数的基础。 EMD 或 Wasserstein 1 是生成器旨在最小化的损失函数,以及判别器试图最大化的损失函数(或最小化`-W(p_data, p_g)`: + +![](img/B14853_05_064.png) (Equation 5.1.21) + +![](img/B14853_05_065.png) (Equation 5.1.22) + +在生成器损失函数中,第一项消失了,因为它没有针对实际数据进行直接优化。 + +“表 5.1.2”显示了 GAN 和 WGAN 的损失函数之间的差异。 为简洁起见,我们简化了`L^(D)`和`L^(G)`的表示法: + +| **网络** | **损失函数** | **公式** | +| --- | --- | --- | +| GAN | ![](img/B14853_05_068.png) | 4.1.1 | +| | ![](img/B14853_05_069.png) | 4.1.5 | +| WGAN | ![](img/B14853_05_070.png) | 5.1.21 | +| | ![](img/B14853_05_071.png) | 5.1.22 | +| | ![](img/B14853_05_072.png) | 5.1.20 | + +表 5.1.2:GAN 和 WGAN 的损失函数之间的比较 + +这些损失函数用于训练 WGAN,如“算法 5.1.1”中所示。 + +**算法 5.1.1 WGAN**。 参数的值为`α = 0.00005`,`c = 0.01`,`m = 64`和`n_critic = 5`。 + +要求:`α`,学习率。`c`是削波参数。`m`,批量大小。 `n_critic`,即每个生成器迭代的评论(鉴别)迭代次数。 + +要求:`w[D]`,初始判别器(discriminator)参数。 `θ[D]`,初始生成器参数: + +1. 当`θ[D]`尚未收敛,执行: +2. 对于`t = 1, ..., n_critic`,执行: +3. 从真实数据中抽样一批`{x^(i)} ~ p_data, i = 1, ..., m` +4. 从均匀的噪声分布中采样一批`{z^(i)} ~ p_x, i = 1, ..., m` +5. ![](img/B14853_05_085.png) + + 计算判别器梯度 +6. ![](img/B14853_05_086.png) + + 更新判别器参数 +7. ![](img/B14853_05_087.png) + + 剪辑判别器权重 +8. `end for` +9. 从均匀的噪声分布中采样一批`{z^(i)} ~ p_x, i = 1, ..., m` +10. ![](img/B14853_05_089.png) + + 计算生成器梯度 +11. ![](img/B14853_05_090.png) + + 更新生成器参数 +12. `end while` + +“图 5.1.3”展示了 WGAN 模型实际上与 DCGAN 相同,除了伪造的/真实的数据标签和损失函数: + +![](img/B14853_05_03.png) + +图 5.1.3:顶部:训练 WGAN 判别器需要来自生成器的虚假数据和来自真实分发的真实数据。 下:训练 WGAN 生成器要求生成器中假冒的真实数据是真实的 + +与 GAN 相似,WGAN 交替训练判别器和生成器(通过对抗)。 但是,在 WGAN 中,判别器(也称为评论者)在训练生成器进行一次迭代(第 9 至 11 行)之前,先训练`n_critic`迭代(第 2 至 8 行)。 这与对于判别器和生成器具有相同数量的训练迭代的 GAN 相反。 换句话说,在 GAN 中,`n_critic = 1`。 + +训练判别器意味着学习判别器的参数(权重和偏差)。 这需要从真实数据中采样一批(第 3 行),并从伪数据中采样一批(第 4 行),然后将采样数据馈送到判别器网络,然后计算判别器参数的梯度(第 5 行)。 判别器参数使用 RMSProp(第 6 行)进行了优化。 第 5 行和第 6 行都是“公式 5.1.21”的优化。 + +最后,EM 距离优化中的 Lipschitz 约束是通过裁剪判别器参数(第 7 行)来施加的。 第 7 行是“公式 5.1.20”的实现。 在`n_critic`迭代判别器训练之后,判别器参数被冻结。 生成器训练通过对一批伪造数据进行采样开始(第 9 行)。 采样的数据被标记为实数(1.0),以致愚弄判别器网络。 在第 10 行中计算生成器梯度,并在第 11 行中使用 RMSProp 对其进行优化。第 10 行和第 11 行执行梯度更新以优化“公式 5.1.22”。 + +训练生成器后,将解冻判别器参数,并开始另一个`n_critic`判别器训练迭代。 我们应该注意,在判别器训练期间不需要冻结生成器参数,因为生成器仅涉及数据的制造。 类似于 GAN,可以将判别器训练为一个单独的网络。 但是,训练生成器始终需要判别器通过对抗网络参与,因为损失是根据生成器网络的输出计算得出的。 + +与 GAN 不同,在 WGAN 中,将实际数据标记为 1.0,而将伪数据标记为 -1.0,作为计算第 5 行中的梯度的一种解决方法。第 5-6 和 10-11 行执行梯度更新以优化“公式 5.1.21”和“5.1.22”。 第 5 行和第 10 行中的每一项均建模为: + +![](img/B14853_05_091.png) (Equation 5.1.23) + +对于真实数据,其中`y_label = 1.0`,对于假数据,`y_label= -1.0`。 为了简化符号,我们删除了上标`(i)`。 对于判别器,当使用实际数据进行训练时,WGAN 增加`y_pred = D[w](x)`以最小化损失函数。 + +使用伪造数据进行训练时,WGAN 会降低`y_pred = D[w](g(z))`以最大程度地减少损失函数。 对于生成器,当在训练过程中将伪数据标记为真实数据时,WGAN 增加`y_pred = D[w](g(z))`以最小化损失函数。 请注意,`y_label`除了其符号外,对损失函数没有直接贡献。 在`tf.keras`中,“公式 5.1.23”实现为: + +```py +def wasserstein_loss(y_label, y_pred): + return -K.mean(y_label * y_pred) +``` + +本节最重要的部分是用于稳定训练 GAN 的新损失函数。 它基于 EMD 或 Wasserstein1。“算法 5.1.1”形式化了 WGAN 的完整训练算法,包括损失函数。 在下一节中,将介绍`tf.keras`中训练算法的实现。 + +## 使用 Keras 的 WGAN 实现 + +为了在`tf.keras`中实现 WGAN,我们可以重用 GAN 的 DCGAN 实现,这是我们在上一一章中介绍的。 DCGAN 构建器和工具函数在`lib`文件夹的`gan.py`中作为模块实现。 + +函数包括: + +* `generator()`:生成器模型构建器 +* `discriminator()`:判别器模型构建器 +* `train()`:DCGAN 训练师 +* `plot_images()`:通用生成器输出绘图仪 +* `test_generator()`:通用的生成器测试工具 + +如“列表 5.1.1”所示,我们可以通过简单地调用以下命令来构建一个判别器: + +```py +discriminator = gan.discriminator(inputs, activation='linear') +``` + +WGAN 使用线性输出激活。 对于生成器,我们执行: + +```py +generator = gan.generator(inputs, image_size) +``` + +`tf.keras`中的整体网络模型类似于 DCGAN 的“图 4.2.1”中看到的模型。 + +“列表 5.1.1”突出显示了 RMSprop 优化器和 Wasserstein 损失函数的使用。 在训练期间使用“算法 5.1.1”中的超参数。 + +[完整的代码可在 GitHub 上获得](https://github.com/PacktPublishing/Advanced-Deep-Learning-with-Keras)。 + +“列表 5.1.1”:`wgan-mnist-5.1.2.py` + +```py +def build_and_train_models(): + """Load the dataset, build WGAN discriminator, + generator, and adversarial models. + Call the WGAN train routine. + """ + # load MNIST dataset + (x_train, _), (_, _) = mnist.load_data() +``` + +```py + # reshape data for CNN as (28, 28, 1) and normalize + image_size = x_train.shape[1] + x_train = np.reshape(x_train, [-1, image_size, image_size, 1]) + x_train = x_train.astype('float32') / 255 +``` + +```py + model_name = "wgan_mnist" + # network parameters + # the latent or z vector is 100-dim + latent_size = 100 + # hyper parameters from WGAN paper [2] + n_critic = 5 + clip_value = 0.01 + batch_size = 64 + lr = 5e-5 + train_steps = 40000 + input_shape = (image_size, image_size, 1) +``` + +```py + # build discriminator model + inputs = Input(shape=input_shape, name='discriminator_input') + # WGAN uses linear activation in paper [2] + discriminator = gan.discriminator(inputs, activation='linear') + optimizer = RMSprop(lr=lr) + # WGAN discriminator uses wassertein loss + discriminator.compile(loss=wasserstein_loss, + optimizer=optimizer, + metrics=['accuracy']) + discriminator.summary() +``` + +```py + # build generator model + input_shape = (latent_size, ) + inputs = Input(shape=input_shape, name='z_input') + generator = gan.generator(inputs, image_size) + generator.summary() +``` + +```py + # build adversarial model = generator + discriminator + # freeze the weights of discriminator during adversarial training + discriminator.trainable = False + adversarial = Model(inputs, + discriminator(generator(inputs)), + name=model_name) + adversarial.compile(loss=wasserstein_loss, + optimizer=optimizer, + metrics=['accuracy']) + adversarial.summary() +``` + +```py + # train discriminator and adversarial networks + models = (generator, discriminator, adversarial) + params = (batch_size, + latent_size, + n_critic, + clip_value, + train_steps, + model_name) + train(models, x_train, params) +``` + +“列表 5.1.2”是紧跟“算法 5.1.1”的训练函数。 但是,在判别器的训练中有一个小的调整。 与其在单个合并的真实数据和虚假数据中组合训练权重,不如先训练一批真实数据,然后再训练一批虚假数据。 这种调整将防止梯度消失,因为真实和伪造数据标签中的符号相反,并且由于裁剪而导致的权重较小。 + +“列表 5.1.2”:`wgan-mnist-5.1.2.py` + +为 WGAN 训练算法: + +```py +def train(models, x_train, params): + """Train the Discriminator and Adversarial Networks +``` + +```py + Alternately train Discriminator and Adversarial + networks by batch. + Discriminator is trained first with properly labelled + real and fake images for n_critic times. + Discriminator weights are clipped as a requirement + of Lipschitz constraint. + Generator is trained next (via Adversarial) with + fake images pretending to be real. + Generate sample images per save_interval +``` + +```py + Arguments: + models (list): Generator, Discriminator, + Adversarial models + x_train (tensor): Train images + params (list) : Networks parameters +``` + +```py + """ + # the GAN models + generator, discriminator, adversarial = models + # network parameters + (batch_size, latent_size, n_critic, + clip_value, train_steps, model_name) = params + # the generator image is saved every 500 steps + save_interval = 500 + # noise vector to see how the + # generator output evolves during training + noise_input = np.random.uniform(-1.0, + 1.0, + size=[16, latent_size]) + # number of elements in train dataset + train_size = x_train.shape[0] + # labels for real data + real_labels = np.ones((batch_size, 1)) + for i in range(train_steps): + # train discriminator n_critic times + loss = 0 + acc = 0 + for _ in range(n_critic): + # train the discriminator for 1 batch + # 1 batch of real (label=1.0) and + # fake images (label=-1.0) + # randomly pick real images from dataset + rand_indexes = np.random.randint(0, + train_size, + size=batch_size) + real_images = x_train[rand_indexes] + # generate fake images from noise using generator + # generate noise using uniform distribution + noise = np.random.uniform(-1.0, + 1.0, + size=[batch_size, latent_size]) + fake_images = generator.predict(noise) +``` + +```py + # train the discriminator network + # real data label=1, fake data label=-1 + # instead of 1 combined batch of real and fake images, + # train with 1 batch of real data first, then 1 batch + # of fake images. + # this tweak prevents the gradient + # from vanishing due to opposite + # signs of real and fake data labels (i.e. +1 and -1) and + # small magnitude of weights due to clipping. + real_loss, real_acc = \ + discriminator.train_on_batch(real_images, + real_labels) + fake_loss, fake_acc = \ + discriminator.train_on_batch(fake_images, + -real_labels) + # accumulate average loss and accuracy + loss += 0.5 * (real_loss + fake_loss) + acc += 0.5 * (real_acc + fake_acc) + # clip discriminator weights to satisfy Lipschitz constraint + for layer in discriminator.layers: + weights = layer.get_weights() + weights = [np.clip(weight, + -clip_value, + clip_value) for weight in weights] + layer.set_weights(weights) +``` + +```py + # average loss and accuracy per n_critic training iterations + loss /= n_critic + acc /= n_critic + log = "%d: [discriminator loss: %f, acc: %f]" % (i, loss, acc) +``` + +```py + # train the adversarial network for 1 batch + # 1 batch of fake images with label=1.0 + # since the discriminator weights are frozen in + # adversarial network only the generator is trained + # generate noise using uniform distribution + noise = np.random.uniform(-1.0, + 1.0, + size=[batch_size, latent_size]) + # train the adversarial network + # note that unlike in discriminator training, + # we do not save the fake images in a variable + # the fake images go to the discriminator + # input of the adversarial for classification + # fake images are labelled as real + # log the loss and accuracy + loss, acc = adversarial.train_on_batch(noise, real_labels) + log = "%s [adversarial loss: %f, acc: %f]" % (log, loss, acc) + print(log) + if (i + 1) % save_interval == 0: + # plot generator images on a periodic basis + gan.plot_images(generator, + noise_input=noise_input, + show=False, + step=(i + 1), + model_name=model_name) +``` + +```py + # save the model after training the generator + # the trained generator can be reloaded + # for future MNIST digit generation + generator.save(model_name + ".h5") +``` + +“图 5.1.4”显示了 MNIST 数据集上 WGAN 输出的演变: + +![](img/B14853_05_04.png) + +图 5.1.4:WGAN 与训练步骤的示例输出。 在训练和测试期间,WGAN 的任何输出均不会遭受模式崩溃 + +即使在网络配置更改的情况下,WGAN 也稳定。 例如,当在识别符网络的 ReLU 之前插入批量规范化时,已知 DCGAN 不稳定。 在 WGAN 中,相同的配置是稳定的。 + +下图“图 5.1.5”向我们展示了 DCGAN 和 WGAN 的输出,并在判别器网络上进行了批量归一化: + +![](img/B14853_05_05.png) + +图 5.1.5:在判别器网络中的 ReLU 激活之前插入批量归一化时,DCGAN(左)和 WGAN(右)的输出比较 + +与上一章中的 GAN 训练相似,经过 40,000 个训练步骤,将训练后的模型保存在文件中。 使用训练有素的生成器模型,通过运行以下命令来生成新的合成 MNIST 数字图像: + +```py +python3 wgan-mnist-5.1.2.py --generator=wgan_mnist.h5 +``` + +正如我们所讨论的,原始 GAN 很难训练。 当 GAN 优化的损失函数时,就会出现问题。 实际上是在优化 *JS* 差异,`D[JS]`。 当两个分布函数之间几乎没有重叠时,很难优化`D[JS]`。 + +WGAN 提出通过使用 EMD 或 Wasserstein 1 损失函数来解决该问题,该函数即使在两个分布之间很少或没有重叠时也具有平滑的微分函数。 但是,WGAN 与生成的图像质量无关。 除了稳定性问题之外,原始 GAN 生成的图像在感知质量方面还有很多改进的地方。 LSGAN 理论上可以同时解决两个问题。 在下一节中,我们将介绍 LSGAN。 + +# 2\. 最小二乘 GAN(LSGAN) + +LSGAN 提出最小二乘损失。“图 5.2.1”演示了为什么在 GAN 中使用 Sigmoid 交叉熵损失会导致生成的数据质量较差: + +![](img/B14853_05_06.png) + +图 5.2.1:真实样本和虚假样本分布均除以各自的决策边界:Sigmoid 和最小二乘 + +理想情况下,假样本分布应尽可能接近真实样本的分布。 但是,对于 GAN,一旦伪样本已经位于决策边界的正确一侧,梯度就消失了。 + +这会阻止生成器具有足够的动机来提高生成的伪数据的质量。 远离决策边界的伪样本将不再试图靠近真实样本的分布。 使用最小二乘损失函数,只要假样本分布与真实样本的分布相距甚远,梯度就不会消失。 即使假样本已经位于决策边界的正确一侧,生成器也将努力改善其对实际密度分布的估计。 + +“表 5.2.1”显示了 GAN,WGAN 和 LSGAN 之间的损失函数的比较: + +| **网络** | **损失函数** | **公式** | +| --- | --- | --- | +| GAN | ![](img/B14853_05_095.png) | 4.1.1 | +| | ![](img/B14853_05_096.png) | 4.1.5 | +| WGAN | ![](img/B14853_05_097.png) | 5.1.21 | +| | ![](img/B14853_05_098.png) | 5.1.22 | +| | ![](img/B14853_05_099.png) | 5.1.20 | +| LSGAN | ![](img/B14853_05_100.png) | 5.2.1 | +| | ![](img/B14853_05_101.png) | 5.2.2 | + +表 5.2.1:GAN,WGAN 和 LSGAN 损失函数之间的比较 + +最小化“公式 5.2.1”或判别器损失函数意味着实际数据分类与真实标签 1.0 之间的 MSE 应该接近零。 此外,假数据分类和真实标签 0.0 之间的 MSE 应该接近零。 + +与其他 GAN 相似,对 LSGAN 判别器进行了训练,可以从假数据样本中对真实数据进行分类。 最小化公式 5.2.2 意味着在标签 1.0 的帮助下,使判别器认为生成的假样本数据是真实的。 + +以上一章中的 DCGAN 代码为基础来实现 LSGAN 仅需进行一些更改。 如“列表 5.2.1”所示,删除了判别器 Sigmoid 激活。 判别器是通过调用以下命令构建的: + +```py +discriminator = gan.discriminator(inputs, activation=None) +``` + +生成器类似于原始的 DCGAN: + +```py +generator = gan.generator(inputs, image_size) +``` + +鉴别函数和对抗损失函数都被`mse`代替。 所有网络参数均与 DCGAN 中的相同。 `tf.keras`中 LSGAN 的网络模型类似于“图 4.2.1”,除了存在线性激活或无输出激活外。 训练过程类似于 DCGAN 中的训练过程,由工具函数提供: + +```py +gan.train(models, x_train, params) +``` + +“列表 5.2.1”:`lsgan-mnist-5.2.1.py` + +```py +def build_and_train_models(): + """Load the dataset, build LSGAN discriminator, + generator, and adversarial models. + Call the LSGAN train routine. + """ + # load MNIST dataset + (x_train, _), (_, _) = mnist.load_data() + # reshape data for CNN as (28, 28, 1) and normalize + image_size = x_train.shape[1] + x_train = np.reshape(x_train, + [-1, image_size, image_size, 1]) + x_train = x_train.astype('float32') / 255 + model_name = "lsgan_mnist" + # network parameters + # the latent or z vector is 100-dim + latent_size = 100 + input_shape = (image_size, image_size, 1) + batch_size = 64 + lr = 2e-4 + decay = 6e-8 + train_steps = 40000 + # build discriminator model + inputs = Input(shape=input_shape, name='discriminator_input') + discriminator = gan.discriminator(inputs, activation=None) + # [1] uses Adam, but discriminator easily + # converges with RMSprop + optimizer = RMSprop(lr=lr, decay=decay) + # LSGAN uses MSE loss [2] + discriminator.compile(loss='mse', + optimizer=optimizer, + metrics=['accuracy']) + discriminator.summary() + # build generator model + input_shape = (latent_size, ) + inputs = Input(shape=input_shape, name='z_input') + generator = gan.generator(inputs, image_size) + generator.summary() + # build adversarial model = generator + discriminator + optimizer = RMSprop(lr=lr*0.5, decay=decay*0.5) + # freeze the weights of discriminator + # during adversarial training + discriminator.trainable = False + adversarial = Model(inputs, + discriminator(generator(inputs)), + name=model_name) + # LSGAN uses MSE loss [2] + adversarial.compile(loss='mse', + optimizer=optimizer, + metrics=['accuracy']) + adversarial.summary() + # train discriminator and adversarial networks + models = (generator, discriminator, adversarial) + params = (batch_size, latent_size, train_steps, model_name) + gan.train(models, x_train, params) +``` + +“图 5.2.2”显示了使用 MNIST 数据集对 40,000 个训练步骤进行 LSGAN 训练后生成的样本: + +![](img/B14853_05_07.png) + +图 5.2.2:LSGAN 的示例输出与训练步骤 + +与上一章中 DCGAN 中的“图 4.2.1”相比,输出图像的感知质量更好。 + +使用训练有素的生成器模型,通过运行以下命令来生成新的合成 MNIST 数字图像: + +```py +python3 lsgan-mnist-5.2.1.py --generator=lsgan_mnist.h5 +``` + +在本节中,我们讨论了损失函数的另一种改进。 通过使用 MSE 或 L2,我们解决了训练 GAN 的稳定性和感知质量的双重问题。 在下一节中,提出了相对于 CGAN 的另一项改进,这已在上一章中进行了讨论。 + +# 3\. 辅助分类器 GAN (ACGAN) + +ACGAN 在原理上类似于我们在上一章中讨论的**条件 GAN**(**CGAN**)。 我们将比较 CGAN 和 ACGAN。 对于 CGAN 和 ACGAN,生成器输入均为噪声及其标签。 输出是属于输入类标签的伪图像。 对于 CGAN,判别器的输入是图像(假的或真实的)及其标签。 输出是图像真实的概率。 对于 ACGAN,判别器的输入是一幅图像,而输出是该图像是真实的且其类别是标签的概率。 + +“图 5.3.1”突出显示了生成器训练期间 CGAN 和 ACGAN 之间的区别: + +![](img/B14853_05_08.png) + +图 5.3.1:CGAN 与 ACGAN 生成器训练。 主要区别是判别器的输入和输出 + +本质上,在 CGAN 中,我们向网络提供了边信息(标签)。 在 ACGAN 中,我们尝试使用辅助类解码器网络重建辅助信息。 ACGAN 理论认为,强制网络执行其他任务可以提高原始任务的表现。 在这种情况下,附加任务是图像分类。 原始任务是生成伪造图像。 + +“表 5.3.1”显示了 ACGAN 损失函数与 CGAN 损失函数的比较: + +| **网络** | **损失函数** | **编号** | +| --- | --- | --- | +| CGAN | ![](img/B14853_05_102.png) | 4.3.1 | +| | ![](img/B14853_05_103.png) | 4.3.2 | +| ACGAN | ![](img/B14853_05_104.png) | 5.3.1 | +| | ![](img/B14853_05_105.png) | 5.3.2 | + +表 5.3.1:CGAN 和 ACGAN 损失函数之间的比较 + +ACGAN 损失函数与 CGAN 相同,除了附加的分类器损失函数。 除了从假图片中识别真实图像的原始任务之外,判别器的“公式 5.3.1”还具有对真假图像正确分类的附加任务。 生成器的“公式 5.3.2”意味着,除了尝试用伪造的图像来欺骗判别器(`-E[z] log D(g(z | y))`)之外,它还要求判别器正确地对那些伪造的图像进行分类(`-E[z] log P(c | g(z | y))`)。 + +从 CGAN 代码开始,仅修改判别器和训练函数以实现 ACGAN。 `gan.py`还提供了判别器和生成器构建器函数。 要查看判别器上所做的更改,清单 5.3.1 显示了构建器函数,其中突出显示了执行图像分类的辅助解码器网络和双输出。 + +“列表 5.3.1”:`gan.py` + +```py +def discriminator(inputs, + activation='sigmoid', + num_labels=None, + num_codes=None): + """Build a Discriminator Model +``` + +```py + Stack of LeakyReLU-Conv2D to discriminate real from fake + The network does not converge with BN so it is not used here + unlike in [1] + Arguments: + inputs (Layer): Input layer of the discriminator (the image) + activation (string): Name of output activation layer + num_labels (int): Dimension of one-hot labels for ACGAN & InfoGAN + num_codes (int): num_codes-dim Q network as output + if StackedGAN or 2 Q networks if InfoGAN + + Returns: + Model: Discriminator Model + """ + kernel_size = 5 + layer_filters = [32, 64, 128, 256] +``` + +```py + x = inputs + for filters in layer_filters: + # first 3 convolution layers use strides = 2 + # last one uses strides = 1 + if filters == layer_filters[-1]: + strides = 1 + else: + strides = 2 + x = LeakyReLU(alpha=0.2)(x) + x = Conv2D(filters=filters, + kernel_size=kernel_size, + strides=strides, + padding='same')(x) +``` + +```py + x = Flatten()(x) + # default output is probability that the image is real + outputs = Dense(1)(x) + if activation is not None: + print(activation) + outputs = Activation(activation)(outputs) +``` + +```py + if num_labels: + # ACGAN and InfoGAN have 2nd output + # 2nd output is 10-dim one-hot vector of label + layer = Dense(layer_filters[-2])(x) + labels = Dense(num_labels)(layer) + labels = Activation('softmax', name='label')(labels) + if num_codes is None: + outputs = [outputs, labels] + else: + # InfoGAN have 3rd and 4th outputs + # 3rd output is 1-dim continous Q of 1st c given x + code1 = Dense(1)(layer) + code1 = Activation('sigmoid', name='code1')(code1) +``` + +```py + # 4th output is 1-dim continuous Q of 2nd c given x + code2 = Dense(1)(layer) + code2 = Activation('sigmoid', name='code2')(code2) +``` + +```py + outputs = [outputs, labels, code1, code2] + elif num_codes is not None: + # StackedGAN Q0 output + # z0_recon is reconstruction of z0 normal distribution + z0_recon = Dense(num_codes)(x) + z0_recon = Activation('tanh', name='z0')(z0_recon) + outputs = [outputs, z0_recon] +``` + +```py + return Model(inputs, outputs, name='discriminator') +``` + +然后通过调用以下命令来构建判别器: + +```py +discriminator = gan.discriminator(inputs, num_labels=num_labels) +``` + +生成器与 WGAN 和 LSGAN 中的生成器相同。 回想一下,在以下“列表 5.3.2”中显示了生成器生成器。 我们应该注意,“列表 5.3.1”和“5.3.2”与上一节中 WGAN 和 LSGAN 使用的生成器函数相同。 重点介绍了适用于 LSGAN 的部件。 + +“列表 5.3.2”:`gan.py` + +```py +def generator(inputs, + image_size, + activation='sigmoid', + labels=None, + codes=None): + """Build a Generator Model +``` + +```py + Stack of BN-ReLU-Conv2DTranpose to generate fake images. + Output activation is sigmoid instead of tanh in [1]. + Sigmoid converges easily. +``` + +```py + Arguments: + inputs (Layer): Input layer of the generator (the z-vector) + image_size (int): Target size of one side + (assuming square image) + activation (string): Name of output activation layer + labels (tensor): Input labels + codes (list): 2-dim disentangled codes for InfoGAN +``` + +```py + Returns: + Model: Generator Model + """ + image_resize = image_size // 4 + # network parameters + kernel_size = 5 + layer_filters = [128, 64, 32, 1] +``` + +```py + if labels is not None: + if codes is None: + # ACGAN labels + # concatenate z noise vector and one-hot labels + inputs = [inputs, labels] + else: + # infoGAN codes + # concatenate z noise vector, + # one-hot labels and codes 1 & 2 + inputs = [inputs, labels] + codes + x = concatenate(inputs, axis=1) + elif codes is not None: + # generator 0 of StackedGAN + inputs = [inputs, codes] + x = concatenate(inputs, axis=1) + else: + # default input is just 100-dim noise (z-code) + x = inputs +``` + +```py + x = Dense(image_resize * image_resize * layer_filters[0])(x) + x = Reshape((image_resize, image_resize, layer_filters[0]))(x) +``` + +```py + for filters in layer_filters: + # first two convolution layers use strides = 2 + # the last two use strides = 1 + if filters > layer_filters[-2]: + strides = 2 + else: + strides = 1 + x = BatchNormalization()(x) + x = Activation('relu')(x) + x = Conv2DTranspose(filters=filters, + kernel_size=kernel_size, + strides=strides, + padding='same')(x) +``` + +```py + if activation is not None: + x = Activation(activation)(x) +``` + +```py + # generator output is the synthesized image x + return Model(inputs, x, name='generator') +``` + +在 ACGAN 中,生成器实例化为: + +```py +generator = gan.generator(inputs, image_size, labels=labels) +``` + +“图 5.3.2”显示了`tf.keras`中 ACGAN 的网络模型: + +![](img/B14853_05_09.png) + +图 5.3.2:ACGAN 的`tf.keras`模型 + +如“列表 5.3.3”所示,对判别器和对抗模型进行了修改,以适应判别器网络中的更改。 现在,我们有两个损失函数。 首先是原始的二进制交叉熵,用于训练判别器来估计输入图像为实的概率。 + +第二个是图像分类器,用于预测类别标签。 输出是一个 10 维的单热向量。 + +“列表 5.3.3”:`acgan-mnist-5.3.1.py` + +重点介绍了在判别器和对抗网络中实现的更改: + +```py +def build_and_train_models(): + """Load the dataset, build ACGAN discriminator, + generator, and adversarial models. + Call the ACGAN train routine. + """ + # load MNIST dataset + (x_train, y_train), (_, _) = mnist.load_data() +``` + +```py + # reshape data for CNN as (28, 28, 1) and normalize + image_size = x_train.shape[1] + x_train = np.reshape(x_train, + [-1, image_size, image_size, 1]) + x_train = x_train.astype('float32') / 255 +``` + +```py + # train labels + num_labels = len(np.unique(y_train)) + y_train = to_categorical(y_train) +``` + +```py + model_name = "acgan_mnist" + # network parameters + latent_size = 100 + batch_size = 64 + train_steps = 40000 + lr = 2e-4 + decay = 6e-8 + input_shape = (image_size, image_size, 1) + label_shape = (num_labels, ) +``` + +```py + # build discriminator Model + inputs = Input(shape=input_shape, + name='discriminator_input') + # call discriminator builder + # with 2 outputs, pred source and labels + discriminator = gan.discriminator(inputs, + num_labels=num_labels) +``` + +```py + # [1] uses Adam, but discriminator + # easily converges with RMSprop + optimizer = RMSprop(lr=lr, decay=decay) + # 2 loss fuctions: 1) probability image is real + # 2) class label of the image + loss = ['binary_crossentropy', 'categorical_crossentropy'] + discriminator.compile(loss=loss, + optimizer=optimizer, + metrics=['accuracy']) + discriminator.summary() +``` + +```py + # build generator model + input_shape = (latent_size, ) + inputs = Input(shape=input_shape, name='z_input') + labels = Input(shape=label_shape, name='labels') + # call generator builder with input labels + generator = gan.generator(inputs, + image_size, + labels=labels) + generator.summary() +``` + +```py + # build adversarial model = generator + discriminator + optimizer = RMSprop(lr=lr*0.5, decay=decay*0.5) + # freeze the weights of discriminator + # during adversarial training + discriminator.trainable = False + adversarial = Model([inputs, labels], + discriminator(generator([inputs, labels])), + name=model_name) + # same 2 loss fuctions: 1) probability image is real + # 2) class label of the image + adversarial.compile(loss=loss, + optimizer=optimizer, + metrics=['accuracy']) + adversarial.summary() +``` + +```py + # train discriminator and adversarial networks + models = (generator, discriminator, adversarial) + data = (x_train, y_train) + params = (batch_size, latent_size, \ + train_steps, num_labels, model_name) + train(models, data, params) +``` + +在“列表 5.3.4”中,我们重点介绍了训练例程中实现的更改。 将与 CGAN 代码进行比较的主要区别在于,必须在鉴别和对抗训练中提供输出标签。 + +“列表 5.3.4”:`acgan-mnist-5.3.1.py` + +```py +def train(models, data, params): + """Train the discriminator and adversarial Networks + Alternately train discriminator and adversarial + networks by batch. + Discriminator is trained first with real and fake + images and corresponding one-hot labels. + Adversarial is trained next with fake images pretending + to be real and corresponding one-hot labels. + Generate sample images per save_interval. + # Arguments + models (list): Generator, Discriminator, + Adversarial models + data (list): x_train, y_train data + params (list): Network parameters + """ + # the GAN models + generator, discriminator, adversarial = models + # images and their one-hot labels + x_train, y_train = data + # network parameters + batch_size, latent_size, train_steps, num_labels, model_name \ + = params + # the generator image is saved every 500 steps + save_interval = 500 + # noise vector to see how the generator + # output evolves during training + noise_input = np.random.uniform(-1.0, + 1.0, + size=[16, latent_size]) + # class labels are 0, 1, 2, 3, 4, 5, + # 6, 7, 8, 9, 0, 1, 2, 3, 4, 5 + # the generator must produce these MNIST digits + noise_label = np.eye(num_labels)[np.arange(0, 16) % num_labels] + # number of elements in train dataset + train_size = x_train.shape[0] + print(model_name, + "Labels for generated images: ", + np.argmax(noise_label, axis=1)) +``` + +```py + for i in range(train_steps): + # train the discriminator for 1 batch + # 1 batch of real (label=1.0) and fake images (label=0.0) + # randomly pick real images and + # corresponding labels from dataset + rand_indexes = np.random.randint(0, + train_size, + size=batch_size) + real_images = x_train[rand_indexes] + real_labels = y_train[rand_indexes] + # generate fake images from noise using generator + # generate noise using uniform distribution + noise = np.random.uniform(-1.0, + 1.0, + size=[batch_size, latent_size]) + # randomly pick one-hot labels + fake_labels = np.eye(num_labels)[np.random.choice(num_labels, + batch_size)] + # generate fake images + fake_images = generator.predict([noise, fake_labels]) + # real + fake images = 1 batch of train data + x = np.concatenate((real_images, fake_images)) + # real + fake labels = 1 batch of train data labels + labels = np.concatenate((real_labels, fake_labels)) +``` + +```py + # label real and fake images + # real images label is 1.0 + y = np.ones([2 * batch_size, 1]) + # fake images label is 0.0 + y[batch_size:, :] = 0 + # train discriminator network, log the loss and accuracy + # ['loss', 'activation_1_loss', + # 'label_loss', 'activation_1_acc', 'label_acc'] + metrics = discriminator.train_on_batch(x, [y, labels]) + fmt = "%d: [disc loss: %f, srcloss: %f," + fmt += "lblloss: %f, srcacc: %f, lblacc: %f]" + log = fmt % (i, metrics[0], metrics[1], \ + metrics[2], metrics[3], metrics[4]) +``` + +```py + # train the adversarial network for 1 batch + # 1 batch of fake images with label=1.0 and + # corresponding one-hot label or class + # since the discriminator weights are frozen + # in adversarial network only the generator is trained + # generate noise using uniform distribution + noise = np.random.uniform(-1.0, + 1.0, + size=[batch_size, latent_size]) + # randomly pick one-hot labels + fake_labels = np.eye(num_labels)[np.random.choice(num_labels, + batch_size)] + # label fake images as real + y = np.ones([batch_size, 1]) + # train the adversarial network + # note that unlike in discriminator training, + # we do not save the fake images in a variable + # the fake images go to the discriminator input + # of the adversarial for classification + # log the loss and accuracy + metrics = adversarial.train_on_batch([noise, fake_labels], + [y, fake_labels]) + fmt = "%s [advr loss: %f, srcloss: %f," + fmt += "lblloss: %f, srcacc: %f, lblacc: %f]" + log = fmt % (log, metrics[0], metrics[1],\ + metrics[2], metrics[3], metrics[4]) + print(log) + if (i + 1) % save_interval == 0: + # plot generator images on a periodic basis + gan.plot_images(generator, + noise_input=noise_input, + noise_label=noise_label, + show=False, + step=(i + 1), + model_name=model_name) +``` + +```py + # save the model after training the generator + # the trained generator can be reloaded + # for future MNIST digit generation + generator.save(model_name + ".h5") +``` + +可以看出,与其他任务相比,与我们之前讨论的所有 GAN 相比,ACGAN 的表现显着提高。 ACGAN 训练是稳定的,如“图 5.3.3”的 ACGAN 示例输出的以下标签所示: + +```py +[0 1 2 3 + 4 5 6 7 + 8 9 0 1 + 2 3 4 5] +``` + +与 CGAN 不同,样本输出的外观在训练过程中变化不大。 MNIST 数字图像的感知质量也更好。 + +![](img/B14853_05_10.png) + +图 5.3.3:ACGAN 根据标签的训练步骤生成的示例输出`[0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5]` + +使用训练有素的生成器模型,通过运行以下命令来生成新的合成 MNIST 数字图像: + +```py +python3 acgan-mnist-5.3.1.py --generator=acgan_mnist.h5 +``` + +或者,也可以请求生成要生成的特定数字(例如 3): + +```py +python3 acgan-mnist-5.3.1.py --generator=acgan_mnist.h5 --digit=3 +``` + +“图 5.3.4”显示了 CGAN 和 ACGAN 产生的每个 MNIST 数字的并排比较。 ACGAN 中的数字 2-6 比 CGAN 中的数字质量更好: + +![](img/B14853_05_11.png) + +图 5.3.4:以数字 0 到 9 为条件的 CGAN 和 ACGAN 输出的并排比较 + +与 WGAN 和 LSGAN 相似,ACGAN 通过微调的损失函数,对现有 GAN CGAN 进行了改进。 在接下来的章节中,我们将发现新的损失函数,这些函数将使 GAN 能够执行新的有用任务。 + +# 4\. 总结 + +在本章中,我们介绍了对原始 GAN 算法的各种改进,这些改进在上一章中首次介绍。 WGAN 提出了一种通过使用 EMD 或 Wasserstein 1 损失来提高训练稳定性的算法。 LSGAN 认为,与最小二乘损失不同,GANs 的原始交叉熵函数倾向于消失梯度。 LSGAN 提出了一种实现稳定训练和高质量输出的算法。 ACGAN 通过要求判别器在确定输入图像是假的还是真实的基础上执行分类任务,来令人信服地提高了 MNIST 数字有条件生成的质量。 + +在下一章中,我们将研究如何控制生成器输出的属性。 尽管 CGAN 和 ACGAN 可以指示要生成的期望数字,但我们尚未分析可以指定输出属性的 GAN。 例如,我们可能想要控制 MNIST 数字的书写风格,例如圆度,倾斜角度和厚度。 因此,目标是引入具有纠缠表示的 GAN,以控制生成器输出的特定属性。 + +# 5\. 参考 + +1. `Ian Goodfellow et al.: Generative Adversarial Nets. Advances in neural information processing systems, 2014 (http://papers.nips.cc/paper/5423-generative-adversarial-nets.pdf).` +1. `Martin Arjovsky, Soumith Chintala, and Léon Bottou: Wasserstein GAN. arXiv preprint, 2017 (https://arxiv.org/pdf/1701.07875.pdf).` +1. `Xudong Mao et al.: Least Squares Generative Adversarial Networks. 2017 IEEE International Conference on Computer Vision (ICCV). IEEE 2017 (http://openaccess.thecvf.com/content_ICCV_2017/papers/Mao_Least_Squares_Generative_ICCV_2017_paper.pdf).` +1. `Augustus Odena, Christopher Olah, and Jonathon Shlens. Conditional Image Synthesis with Auxiliary Classifier GANs. ICML, 2017 (http://proceedings.mlr.press/v70/odena17a/odena17a.pdf).` \ No newline at end of file diff --git a/机器学习/ApacheCN/apachecn-dl-zh/adv-dl-tf2-keras/06.md b/机器学习/ApacheCN/apachecn-dl-zh/adv-dl-tf2-keras/06.md new file mode 100644 index 00000000..925fb4fa --- /dev/null +++ b/机器学习/ApacheCN/apachecn-dl-zh/adv-dl-tf2-keras/06.md @@ -0,0 +1,1374 @@ +# 六、纠缠表示 GAN + +正如我们已经探索的那样,GAN 可以通过学习数据分布来产生有意义的输出。 但是,无法控制所生成输出的属性。 GAN 的一些变体,例如**条件 GAN**(**CGAN**)和**辅助分类器 GAN**(**ACGAN**),如前两章所讨论的,都可以训练生成器,该生成器可以合成特定的输出。 例如,CGAN 和 ACGAN 都可以诱导生成器生成特定的 MNIST 数字。 这可以通过同时使用 100 维噪声代码和相应的一号热标签作为输入来实现。 但是,除了单热标签外,我们没有其他方法可以控制生成的输出的属性。 + +有关 CGAN 和 ACGAN 的评论,请参阅“第 4 章”,“生成对抗网络(GANs)”和“第 5 章”,“改进的 GANs”。 + +在本章中,我们将介绍使我们能够修改生成器输出的 GAN 的变体。 在 MNIST 数据集的上下文中,除了要生成的数字外,我们可能会发现我们想要控制书写样式。 这可能涉及所需数字的倾斜度或宽度。 换句话说,GAN 也可以学习纠缠的潜在代码或表示形式,我们可以使用它们来改变生成器输出的属性。 解开的代码或表示形式是张量,可以在不影响其他属性的情况下更改输出数据的特定特征或属性。 + +在本章的第一部分中,我们将讨论《InfoGAN:通过最大化生成对抗网络的信息进行可解释的表示学习》[1],这是 GAN 的扩展。 InfoGAN 通过最大化输入代码和输出观察值之间的互信息来以无监督的方式学习解缠结的表示形式。 在 MNIST 数据集上,InfoGAN 从数字数据集中解开了写作风格。 + +在本章的以下部分中,我们还将讨论《栈式生成对抗网络或 StackedGAN》[2],这是 GAN 的另一种扩展。 + +StackedGAN 使用预训练的编码器或分类器,以帮助解开潜在代码。 StackedGAN 可以看作是一堆模型,每个模型都由编码器和 GAN 组成。 通过使用相应编码器的输入和输出数据,以对抗性方式训练每个 GAN。 + +总之,本章的目的是介绍: + +* 纠缠表示的概念 +* InfoGAN 和 StackedGAN 的原理 +* 使用`tf.keras`实现 InfoGAN 和 StackedGAN + +让我们从讨论纠缠的表示开始。 + +# 1\. 纠缠表示 + +最初的 GAN 能够产生有意义的输出,但是缺点是它的属性无法控制。 例如,如果我们训练 GAN 来学习名人面孔的分布,则生成器将产生名人形象的新图像。 但是,没有任何方法可以影响生成器有关所需脸部的特定属性。 例如,我们无法向生成器询问女性名人的脸,该女性名人是黑发,白皙的肤色,棕色的眼睛,微笑着。 这样做的根本原因是因为我们使用的 100 维噪声代码纠缠了生成器输出的所有显着属性。 我们可以回想一下,在`tf.keras`中,`100-dim`代码是由均匀噪声分布的随机采样生成的: + +```py + # generate fake images from noise using generator + # generate noise using uniform distribution + noise = np.random.uniform(-1.0, + 1.0, + size=[batch_size, latent_size]) + # generate fake images + fake_images = generator.predict(noise) +``` + +如果我们能够修改原始 GAN,以便将表示形式分为纠缠的和解缠的可解释的潜在代码向量,则我们将能够告诉生成器合成什么。 + +“图 6.1.1”向我们展示了一个带纠缠代码的 GAN,以及它的纠缠和解缠表示的混合形式。 在假设的名人脸生成的情况下,使用解开的代码,我们可以指出我们希望生成的脸的性别,发型,面部表情,肤色和肤色。 仍然需要`n–dim`纠缠代码来表示我们尚未纠缠的所有其他面部属性,例如面部形状,面部毛发,眼镜等,仅是三个示例。 纠缠和解纠缠的代码向量的连接用作生成器的新输入。 级联代码的总维不一定是 100: + +![](img/B14853_06_01.png) + +图 6.1.1:带有纠缠码的 GAN 及其随纠缠码和解缠码的变化。 此示例在名人脸生成的背景下显示 + +查看上图中的,似乎可以以与原始 GAN 相同的方式优化具有解缠表示的 GAN。 这是因为生成器的输出可以表示为: + +![](img/B14853_06_001.png) (Equation 6.1.1) + +代码`z = (z, c)`包含两个元素: + +* 类似于 GANs`z`或噪声向量的不可压缩纠缠噪声代码。 +* 潜在代码`c[1]`,`c[2]`,…,`c[L]`, 代表数据分配的可解译的纠缠码。 所有潜在代码共同表示为`c`。 + +为简单起见,假定所有潜在代码都是独立的: + +![](img/B14853_06_002.png) (Equation 6.1.2) + +生成器函数`x = g(z, c) = g(z)`带有不可压缩的噪声代码和潜在代码。 从生成器的角度来看,优化`z = (z, c)`与优化`z`相同。 + +当提出解决方案时,生成器网络将仅忽略解纠结代码所施加的约束。 + +生成器学习分布`p_g(x | c) = p_g(x)`。 这实际上将打乱分散表示的目的。 + +InfoGAN 的关键思想是强制 GAN 不要忽略潜在代码`c`。 这是通过最大化`c`和`g(z, c)`之间的相互信息来完成的。 在下一节中,我们将公式化 InfoGAN 的损失函数。 + +# InfoGAN + +为了加强对代码的纠缠,InfoGAN 提出了一种针对原始损失函数的正则化函数,该函数可最大化潜在代码`c`和`g(z, c)`之间的互信息: + +![](img/B14853_06_007.png) (Equation 6.1.3) + +正则化器在生成用于合成伪图像的函数时,会强制生成器考虑潜在代码。 在信息论领域,潜码`c`和`g(z, c)`之间的互信息定义为: + +![](img/B14853_06_009.png) (Equation 6.1.4) + +其中`H(c)`是潜码`c`的熵,`H(c | g(z | c))`是观察生成器的输出后`c`的条件熵, `g(z, c)`。 熵是对随机变量或事件的不确定性的度量。 例如,**在东方升起**之类的信息具有较低的熵,而**在彩票中赢得大奖**具有较高的熵。 可以在“第 13 章”,“使用互信息的无监督学习”中找到有关互信息的更详细讨论。 + +在“公式 6.1.4”中,最大化互信息意味着在观察生成的输出时,将`H(c | g(z | c))`最小化或减小潜码中的不确定性。 这是有道理的,因为例如在 MNIST 数据集中,如果 GAN 看到生成器 8 看到了数字 8,则生成器对合成数字 8 变得更有信心。 + +但是,`H(c | g(z | c))`很难估计,因为它需要后验`P(c | g(z | c)) = P(c | x)`的知识,这是我们无法获得的。 为简单起见,我们将使用常规字母`x`表示数据分布。 + +解决方法是通过使用辅助分布`Q(c | x)`估计后验来估计互信息的下界。 InfoGAN 估计相互信息的下限为: + +![](img/B14853_06_016.png) (Equation 6.1.5) + +在 InfoGAN 中,`H(c)`被假定为常数。 因此,使相互信息最大化是使期望最大化的问题。 生成器必须确信已生成具有特定属性的输出。 我们应注意,此期望的最大值为零。 因此,相互信息的下限的最大值为`H(c)`。 在 InfoGAN 中,离散隐码的`Q(c | x)`可以由`softmax`非线性表示。 期望是`tf.keras`中的负`categorical_crossentropy`损失。 + +对于一维连续代码,期望是`c`和`x`的双整数。 这是由于期望从纠缠的代码分布和生成器分布中采样。 估计期望值的一种方法是通过假设样本是连续数据的良好度量。 因此,损失估计为`c log Q(c | x)`。 在“第 13 章”,“使用互信息的无监督学习”中,我们将提供对互信息的更精确估计。 + +为了完成 InfoGAN 的网络,我们应该有`Q(c | x)`的实现。 为了简单起见,网络 Q 是一个附加到判别器第二到最后一层的辅助网络。 因此,这对原始 GAN 的训练影响很小。 + +“图 6.1.2”显示了 InfoGAN 网络图: + +![](img/B14853_06_02.png) + +图 6.1.2 网络图显示 InfoGAN 中的判别器和生成器训练 + +“表 6.1.1”显示了与 GAN 相比 InfoGAN 的损失函数: + +| **网络** | **损失函数** | **编号** | +| --- | --- | --- | +| GAN | ![](img/B14853_06_019.png) | 4.1.1 | +| | ![](img/B14853_06_020.png) | 4.1.5 | +| InfoGAN | ![](img/B14853_06_021.png) | 6.1.1 | +| | ![](img/B14853_06_022.png) | 6.1.2 | +| | 对于连续代码,InfoGAN 建议使用`λ < 1`的值。 在我们的示例中,我们设置`λ = 0.5`。 对于离散代码,InfoGAN 建议使用`λ = 1`。 | | + +表 6.1.1:GAN 和 InfoGAN 的损失函数之间的比较 + +InfoGAN 的损失函数与 GAN 的区别是附加项`-λI(c; g(z, c))`,其中`λ`是一个小的正常数。 最小化 InfoGAN 的损失函数可以将原始 GAN 的损失最小化,并将互信息最大化`I(c; g(z, c))`。 + +如果将其应用于 MNIST 数据集,InfoGAN 可以学习解开的离散码和连续码,以修改生成器输出属性。 例如,像 CGAN 和 ACGAN 一样,将使用`10-dim`一键标签形式的离散代码来指定要生成的数字。 但是,我们可以添加两个连续的代码,一个用于控制书写样式的角度,另一个用于调整笔划宽度。“图 6.1.3”显示了 InfoGAN 中 MNIST 数字的代码。 我们保留较小尺寸的纠缠代码以表示所有其他属性: + +![](img/B14853_06_03.png) + +图 6.1.3:MNIST 数据集中 GAN 和 InfoGAN 的代码 + +在讨论了 InfoGAN 背后的一些概念之后,让我们看一下`tf.keras`中的 InfoGAN 实现。 + +## 在 Keras 中实现 InfoGAN + +为了在 MNIST 数据集上实现 InfoGAN,需要对 ACGAN 的基本代码进行一些更改。 如“列表 6.1.1”中突出显示的那样,生成器将纠缠的(`z`噪声代码)和解纠结的代码(单标签和连续代码)连接起来作为输入: + +```py +inputs = [inputs, labels] + codes +``` + +`generator`和`discriminator`的构建器函数也在`lib`文件夹的`gan.py`中实现。 + +[完整的代码可在 GitHub 上获得](https://github.com/PacktPublishing/Advanced-Deep-Learning-with-Keras)。 + +“列表 6.1.1”:`infogan-mnist-6.1.1.py` + +突出显示了特定于 InfoGAN 的行: + +```py +def generator(inputs, + image_size, + activation='sigmoid', + labels=None, + codes=None): + """Build a Generator Model +``` + +```py + Stack of BN-ReLU-Conv2DTranpose to generate fake images. + Output activation is sigmoid instead of tanh in [1]. + Sigmoid converges easily. +``` + +```py + Arguments: + inputs (Layer): Input layer of the generator (the z-vector) + image_size (int): Target size of one side + (assuming square image) + activation (string): Name of output activation layer + labels (tensor): Input labels + codes (list): 2-dim disentangled codes for InfoGAN +``` + +```py + Returns: + Model: Generator Model + """ + image_resize = image_size // 4 + # network parameters + kernel_size = 5 + layer_filters = [128, 64, 32, 1] +``` + +```py + if labels is not None: + if codes is None: + # ACGAN labels + # concatenate z noise vector and one-hot labels + inputs = [inputs, labels] + else: + # infoGAN codes + # concatenate z noise vector, + # one-hot labels and codes 1 & 2 + inputs = [inputs, labels] + codes + x = concatenate(inputs, axis=1) + elif codes is not None: + # generator 0 of StackedGAN + inputs = [inputs, codes] + x = concatenate(inputs, axis=1) + else: + # default input is just 100-dim noise (z-code) + x = inputs +``` + +```py + x = Dense(image_resize * image_resize * layer_filters[0])(x) + x = Reshape((image_resize, image_resize, layer_filters[0]))(x) +``` + +```py + for filters in layer_filters: + # first two convolution layers use strides = 2 + # the last two use strides = 1 + if filters > layer_filters[-2]: + strides = 2 + else: + strides = 1 + x = BatchNormalization()(x) + x = Activation('relu')(x) + x = Conv2DTranspose(filters=filters, + kernel_size=kernel_size, + strides=strides, + padding='same')(x) +``` + +```py + if activation is not None: + x = Activation(activation)(x) +``` + +```py + # generator output is the synthesized image x + return Model(inputs, x, name='generator') +``` + +“列表 6.1.2”显示了具有原始默认 GAN 输出的判别器和 Q 网络。 高亮显示了三个辅助输出,它们对应于离散代码(用于单热标签)`softmax`预测的和给定输入 MNIST 数字图像的连续代码概率。 + +“列表 6.1.2”:`infogan-mnist-6.1.1.py` + +突出显示了特定于 InfoGAN 的行: + +```py +def discriminator(inputs, + activation='sigmoid', + num_labels=None, + num_codes=None): + """Build a Discriminator Model +``` + +```py + Stack of LeakyReLU-Conv2D to discriminate real from fake + The network does not converge with BN so it is not used here + unlike in [1] +``` + +```py + Arguments: + inputs (Layer): Input layer of the discriminator (the image) + activation (string): Name of output activation layer + num_labels (int): Dimension of one-hot labels for ACGAN & InfoGAN + num_codes (int): num_codes-dim Q network as output + if StackedGAN or 2 Q networks if InfoGAN +``` + +```py + Returns: + Model: Discriminator Model + """ + kernel_size = 5 + layer_filters = [32, 64, 128, 256] +``` + +```py + x = inputs + for filters in layer_filters: + # first 3 convolution layers use strides = 2 + # last one uses strides = 1 + if filters == layer_filters[-1]: + strides = 1 + else: + strides = 2 + x = LeakyReLU(alpha=0.2)(x) + x = Conv2D(filters=filters, + kernel_size=kernel_size, + strides=strides, + padding='same')(x) +``` + +```py + x = Flatten()(x) + # default output is probability that the image is real + outputs = Dense(1)(x) + if activation is not None: + print(activation) + outputs = Activation(activation)(outputs) +``` + +```py + if num_labels: + # ACGAN and InfoGAN have 2nd output + # 2nd output is 10-dim one-hot vector of label + layer = Dense(layer_filters[-2])(x) + labels = Dense(num_labels)(layer) + labels = Activation('softmax', name='label')(labels) + if num_codes is None: + outputs = [outputs, labels] + else: + # InfoGAN have 3rd and 4th outputs + # 3rd output is 1-dim continous Q of 1st c given x + code1 = Dense(1)(layer) + code1 = Activation('sigmoid', name='code1')(code1) + # 4th output is 1-dim continuous Q of 2nd c given x + code2 = Dense(1)(layer) + code2 = Activation('sigmoid', name='code2')(code2) +``` + +```py + outputs = [outputs, labels, code1, code2] + elif num_codes is not None: + # StackedGAN Q0 output + # z0_recon is reconstruction of z0 normal distribution + z0_recon = Dense(num_codes)(x) + z0_recon = Activation('tanh', name='z0')(z0_recon) + outputs = [outputs, z0_recon] +``` + +```py + return Model(inputs, outputs, name='discriminator') +``` + +“图 6.1.4”显示了`tf.keras`中的 InfoGAN 模型: + +![](img/B14853_06_04.png) + +图 6.1.4:InfoGAN Keras 模型 + +建立判别器和对抗模型还需要进行许多更改。 更改取决于所使用的损失函数。 原始的判别器损失函数`binary_crossentropy`,用于离散码的`categorical_crossentropy`和每个连续码的`mi_loss`函数构成了整体损失函数。 除`mi_loss`函数的权重为 0.5(对应于连续代码的`λ = 0.5`)外,每个损失函数的权重均为 1.0。 + +“列表 6.1.3”突出显示了所做的更改。 但是,我们应该注意,通过使用构造器函数,判别器被实例化为: + +```py + # call discriminator builder with 4 outputs: + # source, label, and 2 codes + discriminator = gan.discriminator(inputs, + num_labels=num_labels, + num_codes=2) +``` + +生成器通过以下方式创建: + +```py + # call generator with inputs, + # labels and codes as total inputs to generator + generator = gan.generator(inputs, + image_size, + labels=labels, + codes=[code1, code2]) +``` + +“列表 6.1.3”:`infogan-mnist-6.1.1.py` + +以下代码演示了互信息损失函数以及建立和训练 InfoGAN 判别器和对抗网络的过程: + +```py +def mi_loss(c, q_of_c_given_x): + """ Mutual information, Equation 5 in [2], + assuming H(c) is constant + """ + # mi_loss = -c * log(Q(c|x)) + return K.mean(-K.sum(K.log(q_of_c_given_x + K.epsilon()) * c, + axis=1)) +``` + +```py +def build_and_train_models(latent_size=100): + """Load the dataset, build InfoGAN discriminator, + generator, and adversarial models. + Call the InfoGAN train routine. + """ +``` + +```py + # load MNIST dataset + (x_train, y_train), (_, _) = mnist.load_data() +``` + +```py + # reshape data for CNN as (28, 28, 1) and normalize + image_size = x_train.shape[1] + x_train = np.reshape(x_train, [-1, image_size, image_size, 1]) + x_train = x_train.astype('float32') / 255 +``` + +```py + # train labels + num_labels = len(np.unique(y_train)) + y_train = to_categorical(y_train) +``` + +```py + model_name = "infogan_mnist" + # network parameters + batch_size = 64 + train_steps = 40000 + lr = 2e-4 + decay = 6e-8 + input_shape = (image_size, image_size, 1) + label_shape = (num_labels, ) + code_shape = (1, ) +``` + +```py + # build discriminator model + inputs = Input(shape=input_shape, name='discriminator_input') + # call discriminator builder with 4 outputs: + # source, label, and 2 codes + discriminator = gan.discriminator(inputs, + num_labels=num_labels, + num_codes=2) + # [1] uses Adam, but discriminator converges easily with RMSprop + optimizer = RMSprop(lr=lr, decay=decay) + # loss functions: 1) probability image is real + # (binary crossentropy) + # 2) categorical cross entropy image label, + # 3) and 4) mutual information loss + loss = ['binary_crossentropy', + 'categorical_crossentropy', + mi_loss, + mi_loss] + # lamda or mi_loss weight is 0.5 + loss_weights = [1.0, 1.0, 0.5, 0.5] + discriminator.compile(loss=loss, + loss_weights=loss_weights, + optimizer=optimizer, + metrics=['accuracy']) + discriminator.summary() +``` + +```py + # build generator model + input_shape = (latent_size, ) + inputs = Input(shape=input_shape, name='z_input') + labels = Input(shape=label_shape, name='labels') + code1 = Input(shape=code_shape, name="code1") + code2 = Input(shape=code_shape, name="code2") + # call generator with inputs, + # labels and codes as total inputs to generator + generator = gan.generator(inputs, + image_size, + labels=labels, + codes=[code1, code2]) + generator.summary() +``` + +```py + # build adversarial model = generator + discriminator + optimizer = RMSprop(lr=lr*0.5, decay=decay*0.5) + discriminator.trainable = False + # total inputs = noise code, labels, and codes + inputs = [inputs, labels, code1, code2] + adversarial = Model(inputs, + discriminator(generator(inputs)), + name=model_name) + # same loss as discriminator + adversarial.compile(loss=loss, + loss_weights=loss_weights, + optimizer=optimizer, + metrics=['accuracy']) + adversarial.summary() +``` + +```py + # train discriminator and adversarial networks + models = (generator, discriminator, adversarial) + data = (x_train, y_train) + params = (batch_size, + latent_size, + train_steps, + num_labels, + model_name) + train(models, data, params) +``` + +就训练而言,我们可以看到 InfoGAN 与 ACGAN 类似,除了我们需要为连续代码提供`c`。`c`是从正态分布中提取的,标准差为 0.5,平均值为 0.0。 我们将对伪数据使用随机采样的标签,对实际数据使用数据集的类标签来表示离散的潜在代码。 + +“列表 6.1.4”突出显示了对训练函数所做的更改。 与以前的所有 GAN 相似,判别器和生成器(通过对抗性训练)被交替训练。 在对抗训练期间,判别器的权重被冻结。 + +通过使用`gan.py plot_images()`函数,样本生成器输出图像每 500 个间隔步被保存一次。 + +“列表 6.1.4”:`infogan-mnist-6.1.1.py` + +```py +def train(models, data, params): + """Train the Discriminator and Adversarial networks +``` + +```py + Alternately train discriminator and adversarial networks by batch. + Discriminator is trained first with real and fake images, + corresponding one-hot labels and continuous codes. + Adversarial is trained next with fake images pretending + to be real, corresponding one-hot labels and continous codes. + Generate sample images per save_interval. +``` + +```py + # Arguments + models (Models): Generator, Discriminator, Adversarial models + data (tuple): x_train, y_train data + params (tuple): Network parameters + """ + # the GAN models + generator, discriminator, adversarial = models + # images and their one-hot labels + x_train, y_train = data + # network parameters + batch_size, latent_size, train_steps, num_labels, model_name = \ + params + # the generator image is saved every 500 steps + save_interval = 500 + # noise vector to see how the generator output + # evolves during training + noise_input = np.random.uniform(-1.0, + 1.0, + size=[16, latent_size]) + # random class labels and codes + noise_label = np.eye(num_labels)[np.arange(0, 16) % num_labels] + noise_code1 = np.random.normal(scale=0.5, size=[16, 1]) + noise_code2 = np.random.normal(scale=0.5, size=[16, 1]) + # number of elements in train dataset + train_size = x_train.shape[0] + print(model_name, + "Labels for generated images: ", + np.argmax(noise_label, axis=1)) +``` + +```py + for i in range(train_steps): + # train the discriminator for 1 batch + # 1 batch of real (label=1.0) and fake images (label=0.0) + # randomly pick real images and + # corresponding labels from dataset + rand_indexes = np.random.randint(0, + train_size, + size=batch_size) + real_images = x_train[rand_indexes] + real_labels = y_train[rand_indexes] + # random codes for real images + real_code1 = np.random.normal(scale=0.5, + size=[batch_size, 1]) + real_code2 = np.random.normal(scale=0.5, + size=[batch_size, 1]) + # generate fake images, labels and codes + noise = np.random.uniform(-1.0, + 1.0, + size=[batch_size, latent_size]) + fake_labels = np.eye(num_labels)[np.random.choice(num_labels, + batch_size)] + fake_code1 = np.random.normal(scale=0.5, + size=[batch_size, 1]) + fake_code2 = np.random.normal(scale=0.5, + size=[batch_size, 1]) + inputs = [noise, fake_labels, fake_code1, fake_code2] + fake_images = generator.predict(inputs) + # real + fake images = 1 batch of train data + x = np.concatenate((real_images, fake_images)) + labels = np.concatenate((real_labels, fake_labels)) + codes1 = np.concatenate((real_code1, fake_code1)) + codes2 = np.concatenate((real_code2, fake_code2)) + # label real and fake images + # real images label is 1.0 + y = np.ones([2 * batch_size, 1]) + # fake images label is 0.0 + y[batch_size:, :] = 0 + # train discriminator network, + # log the loss and label accuracy + outputs = [y, labels, codes1, codes2] + # metrics = ['loss', 'activation_1_loss', 'label_loss', + # 'code1_loss', 'code2_loss', 'activation_1_acc', + # 'label_acc', 'code1_acc', 'code2_acc'] + # from discriminator.metrics_names + metrics = discriminator.train_on_batch(x, outputs) + fmt = "%d: [discriminator loss: %f, label_acc: %f]" + log = fmt % (i, metrics[0], metrics[6]) + # train the adversarial network for 1 batch + # 1 batch of fake images with label=1.0 and + # corresponding one-hot label or class + random codes + # since the discriminator weights are frozen + # in adversarial network only the generator is trained + # generate fake images, labels and codes + noise = np.random.uniform(-1.0, + 1.0, + size=[batch_size, latent_size]) + fake_labels = np.eye(num_labels)[np.random.choice(num_labels, + batch_size)] + fake_code1 = np.random.normal(scale=0.5, + size=[batch_size, 1]) + fake_code2 = np.random.normal(scale=0.5, + size=[batch_size, 1]) + # label fake images as real + y = np.ones([batch_size, 1]) + # train the adversarial network + # note that unlike in discriminator training, + # we do not save the fake images in a variable + # the fake images go to the discriminator + # input of the adversarial for classification + # log the loss and label accuracy + inputs = [noise, fake_labels, fake_code1, fake_code2] + outputs = [y, fake_labels, fake_code1, fake_code2] + metrics = adversarial.train_on_batch(inputs, outputs) + fmt = "%s [adversarial loss: %f, label_acc: %f]" + log = fmt % (log, metrics[0], metrics[6]) + print(log) + if (i + 1) % save_interval == 0: + # plot generator images on a periodic basis + gan.plot_images(generator, + noise_input=noise_input, + noise_label=noise_label, + noise_codes=[noise_code1, noise_code2], + show=False, + step=(i + 1), + model_name=model_name) + # save the model after training the generator + # the trained generator can be reloaded for + # future MNIST digit generation + generator.save(model_name + ".h5") +``` + +给定 InfoGAN 的`tf.keras`实现,下一个部分介绍具有解缠结属性的生成器 MNIST 输出。 + +## InfoGAN 的生成器输出 + +与以前提供给我们的所有 GAN 相似,我们已经对 InfoGAN 进行了 40,000 步的训练。 训练完成后,我们可以运行 InfoGAN 生成器,以使用`infogan_mnist.h5`文件中保存的模型生成新输出。 进行以下验证: + +1. 通过将离散标签从 0 更改为 9,可生成数字 0 至 9。 两个连续代码都设置为零。 结果显示在“图 6.1.5”中。 我们可以看到,InfoGAN 离散代码可以控制生成器产生的数字: + + ```py + python3 infogan-mnist-6.1.1.py --generator=infogan_mnist.h5 + --digit=0 --code1=0 --code2=0 + ``` + + 至 + + ```py + python3 infogan-mnist-6.1.1.py --generator=infogan_mnist.h5 + --digit=9 --code1=0 --code2=0 + ``` + + 在“图 6.1.5”中,我们可以看到 InfoGAN 生成的图像: + + ![](img/B14853_06_05.png) + + 图 6.1.5:当离散代码从 0 变为 9 时,InfoGAN 生成的图像都被设置为零。 + +2. 检查第一个连续代码的效果,以了解哪个属性已受到影响。 我们将 0 到 9 的第一个连续代码从 -2.0 更改为 2.0。 第二个连续代码设置为 0.0。 “图 6.1.6”显示了第一个连续代码控制数字的粗细: + + ```py + python3 infogan-mnist-6.1.1.py --generator=infogan_mnist.h5 + --digit=0 --code1=0 --code2=0 --p1 + ``` + + ![](img/B14853_06_06.png) + + 图 6.1.6:InfoGAN 作为第一个连续代码将 0 到 9 的数字从-2.0 更改为 2.0。第二个连续代码设置为零。 第一个连续代码控制数字的粗细 + +3. 与上一步的类似,但更多地关注第二个连续代码。“图 6.1.7”显示第二个连续代码控制书写样式的旋转角度(倾斜): + + ```py + python3 infogan-mnist-6.1.1.py --generator=infogan_mnist.h5 + --digit=0 --code1=0 --code2=0 --p2 + ``` + +![](img/B14853_06_07.png) + +图 6.1.7:InfoGAN 生成的图像作为第二个连续代码从 0 到 9 的数字从 -2.0 变为 2.0。第一个连续代码设置为零。 第二个连续代码控制书写样式的旋转角度(倾斜) + +从这些验证结果中,我们可以看到,除了生成 MNIST 外观数字的能力之外,InfoGAN 还扩展了条件 GAN(如 CGAN 和 ACGAN)的功能。 网络自动学习了两个可以控制生成器输出的特定属性的任意代码。 有趣的是,如果我们将连续代码的数量增加到 2 以上,可以控制哪些附加属性,可以通过将“列表 6.1.1”的突出显示行中的代码扩展到列表 6.1.4 来实现。 + +本节中的结果表明,可以通过最大化代码和数据分布之间的互信息来纠缠生成器输出的属性。 在以下部分中,介绍了一种不同的解缠结方法。 StackedGAN 的想法是在特征级别注入代码。 + +# 2\. StackedGAN + +与 InfoGAN 一样,StackedGAN 提出了一种用于分解潜在表示的方法,以调节生成器输出。 但是,StackedGAN 使用不同的方法来解决此问题。 与其学习如何调节噪声以产生所需的输出,不如将 StackedGAN 分解为 GAN 栈。 每个 GAN 均以通常的区分对手的方式进行独立训练,并带有自己的潜在代码。 + +“图 6.2.1”向我们展示了 StackedGAN 在假设名人脸生成的背景下如何工作,假设已经训练了*编码器*网络对名人脸进行分类: + +![](img/B14853_06_08.png) + +图 6.2.1:在名人脸生成的背景下 StackedGAN 的基本思想。 假设有一个假设的深层编码器网络可以对名人脸进行分类,那么 StackedGAN 可以简单地反转编码器的过程 + +*编码器*网络是由一堆简单的编码器组成的,`Encoder[i]`,其中`i = 0 … n-1`对应`n`个特征。 每个编码器都提取某些面部特征。 例如,`Encoder[0]`可能是发型特征的编码器,`Feature[1]`。 所有简单的编码器都有助于使整个*编码器*执行正确的预测。 + +StackedGAN 背后的想法是,如果我们想构建一个可生成假名人面孔的 GAN,则只需将*编码器*反转即可。 StackedGAN 由一堆更简单的 GAN 组成,`GAN[i]`,其中`i = 0 … n-1`与`n`个特征相对应。 每个`GAN[i]`学会反转其相应编码器`Encoder[i]`的过程。 例如,`GAN[0]`从假发型特征生成假名人脸,这是`Encoder[0]`处理的逆过程。 + +每个`GAN[i]`使用潜码`z[i]`,以调节其生成器输出。 例如,潜在代码`z[0]`可以将发型从卷曲更改为波浪形。 GAN 的栈也可以用作合成假名人面孔的对象,从而完成整个*编码器*的逆过程。 每个`GAN[i]`,`z[i]`的潜在代码都可以用来更改假名人面孔的特定属性。 + +有了 StackedGAN 的工作原理的关键思想,让我们继续下一节,看看如何在`tf.keras`中实现它。 + +## Keras 中 StackedGAN 的实现 + +StackedGAN 的详细网络模型可以在“图 6.2.2”中看到。 为简洁起见,每个栈仅显示两个编码器 GAN。 该图最初可能看起来很复杂,但这只是一个编码器 GAN 的重复,这意味着如果我们了解如何训练一个编码器 GAN,其余的将使用相同的概念。 + +在本节中,我们假设 StackedGAN 是为 MNIST 数字生成而设计的。 + +![](img/B14853_06_09.png) + +图 6.2.2:StackedGAN 包含编码器和 GAN 的栈。 对编码器进行预训练以执行分类。 `Generator[1]`,`G[1]`学会合成特征`f[1f]`,假标签`y[f]`和潜在代码`z[1f]`。 `Generator[0]`,`G[0]`均使用这两个伪特征`f[1f]`生成伪图像和潜在代码`z[0f]`。 + +StackedGAN 以*编码器*开头。 它可能是训练有素的分类器,可以预测正确的标签。 可以将中间特征向量`f[1r]`用于 GAN 训练。 对于 MNIST,我们可以使用基于 CNN 的分类器,类似于在“第 1 章”,“Keras 高级深度学习”中讨论的分类器。 + +“图 6.2.3”显示了*编码器*及其在`tf.keras`中的网络模型实现: + +![](img/B14853_06_10.png) + +图 6.2.3:StackedGAN 中的编码器是一个基于 CNN 的简单分类器 + +“列表 6.2.1”显示了上图的`tf.keras`代码。 它与“第 1 章”,“Keras 高级深度学习”中的基于 CNN 的分类器相似,不同之处在于,我们使用`Dense`层来提取`256-dim` 特征。 有两个输出模型,`Encoder[0]`和`Encoder[1]`。 两者都将用于训练 StackedGAN。 + +“列表 6.2.1”:`stackedgan-mnist-6.2.1.py` + +```py +def build_encoder(inputs, num_labels=10, feature1_dim=256): + """ Build the Classifier (Encoder) Model sub networks +``` + +```py + Two sub networks: + 1) Encoder0: Image to feature1 (intermediate latent feature) + 2) Encoder1: feature1 to labels +``` + +```py + # Arguments + inputs (Layers): x - images, feature1 - + feature1 layer output + num_labels (int): number of class labels + feature1_dim (int): feature1 dimensionality +``` + +```py + # Returns + enc0, enc1 (Models): Description below + """ + kernel_size = 3 + filters = 64 +``` + +```py + x, feature1 = inputs + # Encoder0 or enc0 + y = Conv2D(filters=filters, + kernel_size=kernel_size, + padding='same', + activation='relu')(x) + y = MaxPooling2D()(y) + y = Conv2D(filters=filters, + kernel_size=kernel_size, + padding='same', + activation='relu')(y) + y = MaxPooling2D()(y) + y = Flatten()(y) + feature1_output = Dense(feature1_dim, activation='relu')(y) +``` + +```py + # Encoder0 or enc0: image (x or feature0) to feature1 + enc0 = Model(inputs=x, outputs=feature1_output, name="encoder0") +``` + +```py + # Encoder1 or enc1 + y = Dense(num_labels)(feature1) + labels = Activation('softmax')(y) + # Encoder1 or enc1: feature1 to class labels (feature2) + enc1 = Model(inputs=feature1, outputs=labels, name="encoder1") +``` + +```py + # return both enc0 and enc1 + return enc0, enc1 +``` + +`Encoder[0]`输出`f[1r]`是我们想要的`256`维特征向量*生成器* 1 学习合成。 可以将用作`Encoder[0]`,`E[0]`的辅助输出。 训练整个*编码器*以对 MNIST 数字进行分类,即`x[r]`。 正确的标签`y[r]`由`Encoder[1]`,`E[1]`。 在此过程中,学习了的中间特征集`f[1r]`,可用于`Generator[0]`训练。 当针对该编码器训练 GAN 时,下标`r`用于强调和区分真实数据与伪数据。 + +假设*编码器*输入(`x[r]`)中间特征(`f[1r]`)和标签(`y[r]`),每个 GAN 都采用通常的区分-对抗方式进行训练。 损失函数由“表 6.2.1”中的“公式 6.2.1”至“公式 6.2.5”给出。“公式 6.2.1”和“公式 6.2.2”是通用 GAN 的常见损失函数。 StackedGAN 具有两个附加损失函数,即**有条件**和**熵**。 + +| **网络** | **损失函数** | **编号** | +| --- | --- | --- | +| GAN | ![](img/B14853_06_030.png) | 4.1.1 | +| | ![](img/B14853_06_031.png) | 4.1.5 | +| 栈式 | ![](img/B14853_06_032.png) | 6.2.1 | +| | ![](img/B14853_06_033.png) | 6.2.2 | +| | ![](img/B14853_06_034.png) | 6.2.3 | +| | ![](img/B14853_06_035.png) | 6.2.4 | +| | ![](img/B14853_06_036.png) | 6.2.5 | +| | 其中`λ1, λ2, λ3`是权重,`i`是编码器和 GAN ID | | + +表 6.2.1:GAN 和 StackedGAN 的损失函数之间的比较。 `~p_data`表示从相应的编码器数据(输入,特征或输出)采样 + +条件“公式 6.2.3”中的损失函数`L_i^(G_cond)`确保生成器不会忽略输入`f[i + 1]`, 当从输入噪声代码`z[i]`合成输出`f[i]`时。 编码器`Encoder[i]`必须能够通过反转生成器的过程`Generator[i]`来恢复生成器输入。 通过`L2`或欧几里德距离(**均方误差**(**MSE**))来测量生成器输入和使用编码器恢复的输入之间的差异。 + +“图 6.2.4”显示了`L_0^(G_cond)`计算所涉及的网络元素: + +![](img/B14853_06_11.png) + +图 6.2.4:图 6.2.3 的简化版本,仅显示`L_0^(G_cond)`计算中涉及的网络元素 + +但是,条件损失函数引入了一个新问题。 生成器忽略输入噪声代码`z[i]`,仅依赖`f[i + 1]`。 熵损失函数“公式 6.2.4”中的`L_0^(G_ent)`确保生成器不会忽略噪声代码`z[i]`。 *Q 网络*从生成器的输出中恢复噪声代码。 恢复的噪声和输入噪声之间的差异也通过`L2`或欧几里德距离(MSE)进行测量。 + +“图 6.2.5”显示了`L_0^(G_ent)`计算中涉及的网络元素: + +![](img/B14853_06_12.png) + +图 6.2.5:图 6.2.3 的简单版本仅向我们显示了`L_0^(G_ent)`计算中涉及的网络元素 + +最后的损失函数类似于通常的 GAN 损失。 它包括判别器损失`L_i^(D)`和生成器(通过对抗性)损失`L_i^(G_adv)`。“图 6.2.6”显示了 GAN 损失所涉及的元素。 + +![](img/B14853_06_13.png) + +图 6.2.6:图 6.2.3 的简化版本,仅显示了`L_i^(D)`和`L_0^(G_adv)`计算中涉及的网络元素 + +在“公式 6.2.5”中,三个生成器损失函数的加权和为最终生成器损失函数。 在我们将要介绍的 Keras 代码中,除的熵损失设置为 10.0 之外,所有权重都设置为 1.0。 在“公式 6.2.1”至“公式 6.2.5”中,`i`是指编码器和 GAN 组 ID 或级别。 在原始论文中,首先对网络进行独立训练,然后进行联合训练。 在独立训练期间,编码器将首先进行训练。 在联合训练期间,将使用真实数据和虚假数据。 + +`tf.keras`中 StackedGAN 生成器和判别器的实现只需进行少量更改即可提供辅助点来访问中间特征。“图 6.2.7”显示了生成器`tf.keras`模型。 + +![](img/B14853_06_14.png) + +图 6.2.7:Keras 中的 StackedGAN 生成器模型 + +“列表 6.2.2”说明了构建与`Generator[0]`和`Generator[1]`相对应的两个生成器(`gen0`和`gen1`)的函数。 `gen1`生成器由三层`Dense`层组成,标签为和噪声代码`z[1f]`作为输入。 第三层生成伪造的`f[1f]`特征。 `gen0`生成器类似于我们介绍的其他 GAN 生成器,可以使用`gan.py`中的生成器生成器实例化: + +```py +# gen0: feature1 + z0 to feature0 (image) +gen0 = gan.generator(feature1, image_size, codes=z0) +``` + +`gen0`输入为`f[1]`特征,并且噪声代码为`z[0]`。 输出是生成的伪图像`x[f]`: + +“列表 6.2.2”:`stackedgan-mnist-6.2.1.py` + +```py +def build_generator(latent_codes, image_size, feature1_dim=256): + """Build Generator Model sub networks +``` + +```py + Two sub networks: 1) Class and noise to feature1 + (intermediate feature) + 2) feature1 to image +``` + +```py + # Arguments + latent_codes (Layers): dicrete code (labels), + noise and feature1 features + image_size (int): Target size of one side + (assuming square image) + feature1_dim (int): feature1 dimensionality +``` + +```py + # Returns + gen0, gen1 (Models): Description below + """ +``` + +```py + # Latent codes and network parameters + labels, z0, z1, feature1 = latent_codes + # image_resize = image_size // 4 + # kernel_size = 5 + # layer_filters = [128, 64, 32, 1] +``` + +```py + # gen1 inputs + inputs = [labels, z1] # 10 + 50 = 62-dim + x = concatenate(inputs, axis=1) + x = Dense(512, activation='relu')(x) + x = BatchNormalization()(x) + x = Dense(512, activation='relu')(x) + x = BatchNormalization()(x) + fake_feature1 = Dense(feature1_dim, activation='relu')(x) + # gen1: classes and noise (feature2 + z1) to feature1 + gen1 = Model(inputs, fake_feature1, name='gen1') +``` + +```py + # gen0: feature1 + z0 to feature0 (image) + gen0 = gan.generator(feature1, image_size, codes=z0) +``` + +```py + return gen0, gen1 +``` + +“图 6.2.8”显示了判别器`tf.keras`模型: + +![](img/B14853_06_15.png) + +图 6.2.8:Keras 中的 StackedGAN 判别器模型 + +我们提供函数来构建`Discriminator[0]`和`Discriminator[1]`(`dis0`和`dis1`)。 `dis0`判别器类似于 GAN 判别器,除了特征向量输入和辅助网络`Q[0]`,其恢复`z[0]`。 `gan.py`中的构造器函数用于创建`dis0`: + +```py +dis0 = gan.discriminator(inputs, num_codes=z_dim) +``` + +`dis1`判别器由三层 MLP 组成,如清单 6.2.3 所示。 最后一层将区分为真假`f[1]`。`Q[1]`网络共享`dis1`的前两层。 其第三层回收`z[1]`。 + +“列表 6.2.3”:`stackedgan-mnist-6.2.1.py` + +```py +def build_discriminator(inputs, z_dim=50): + """Build Discriminator 1 Model +``` + +```py + Classifies feature1 (features) as real/fake image and recovers + the input noise or latent code (by minimizing entropy loss) +``` + +```py + # Arguments + inputs (Layer): feature1 + z_dim (int): noise dimensionality +``` + +```py + # Returns + dis1 (Model): feature1 as real/fake and recovered latent code + """ +``` + +```py + # input is 256-dim feature1 + x = Dense(256, activation='relu')(inputs) + x = Dense(256, activation='relu')(x) +``` + +```py + # first output is probability that feature1 is real + f1_source = Dense(1)(x) + f1_source = Activation('sigmoid', + name='feature1_source')(f1_source) +``` + +```py + # z1 reonstruction (Q1 network) + z1_recon = Dense(z_dim)(x) + z1_recon = Activation('tanh', name='z1')(z1_recon) +``` + +```py + discriminator_outputs = [f1_source, z1_recon] + dis1 = Model(inputs, discriminator_outputs, name='dis1') + return dis1 +``` + +有了所有可用的构建器函数,StackedGAN 就会在“列表 6.2.4”中进行组装。 在训练 StackedGAN 之前,对编码器进行了预训练。 请注意,我们已经在对抗模型训练中纳入了三个生成器损失函数(对抗,条件和熵)。`Q`网络与判别器模型共享一些公共层。 因此,其损失函数也被纳入判别器模型训练中。 + +“列表 6.2.4”:`stackedgan-mnist-6.2.1.py` + +```py +def build_and_train_models(): + """Load the dataset, build StackedGAN discriminator, + generator, and adversarial models. + Call the StackedGAN train routine. + """ + # load MNIST dataset + (x_train, y_train), (x_test, y_test) = mnist.load_data() +``` + +```py + # reshape and normalize images + image_size = x_train.shape[1] + x_train = np.reshape(x_train, [-1, image_size, image_size, 1]) + x_train = x_train.astype('float32') / 255 +``` + +```py + x_test = np.reshape(x_test, [-1, image_size, image_size, 1]) + x_test = x_test.astype('float32') / 255 +``` + +```py + # number of labels + num_labels = len(np.unique(y_train)) + # to one-hot vector + y_train = to_categorical(y_train) + y_test = to_categorical(y_test) +``` + +```py + model_name = "stackedgan_mnist" + # network parameters + batch_size = 64 + train_steps = 10000 + lr = 2e-4 + decay = 6e-8 + input_shape = (image_size, image_size, 1) + label_shape = (num_labels, ) + z_dim = 50 + z_shape = (z_dim, ) + feature1_dim = 256 + feature1_shape = (feature1_dim, ) +``` + +```py + # build discriminator 0 and Q network 0 models + inputs = Input(shape=input_shape, name='discriminator0_input') + dis0 = gan.discriminator(inputs, num_codes=z_dim) + # [1] uses Adam, but discriminator converges easily with RMSprop + optimizer = RMSprop(lr=lr, decay=decay) + # loss fuctions: 1) probability image is real (adversarial0 loss) + # 2) MSE z0 recon loss (Q0 network loss or entropy0 loss) + loss = ['binary_crossentropy', 'mse'] + loss_weights = [1.0, 10.0] + dis0.compile(loss=loss, + loss_weights=loss_weights, + optimizer=optimizer, + metrics=['accuracy']) + dis0.summary() # image discriminator, z0 estimator +``` + +```py + # build discriminator 1 and Q network 1 models + input_shape = (feature1_dim, ) + inputs = Input(shape=input_shape, name='discriminator1_input') + dis1 = build_discriminator(inputs, z_dim=z_dim ) + # loss fuctions: 1) probability feature1 is real + # (adversarial1 loss) + # 2) MSE z1 recon loss (Q1 network loss or entropy1 loss) + loss = ['binary_crossentropy', 'mse'] + loss_weights = [1.0, 1.0] + dis1.compile(loss=loss, + loss_weights=loss_weights, + optimizer=optimizer, + metrics=['accuracy']) + dis1.summary() # feature1 discriminator, z1 estimator +``` + +```py + # build generator models + feature1 = Input(shape=feature1_shape, name='feature1_input') + labels = Input(shape=label_shape, name='labels') + z1 = Input(shape=z_shape, name="z1_input") + z0 = Input(shape=z_shape, name="z0_input") + latent_codes = (labels, z0, z1, feature1) + gen0, gen1 = build_generator(latent_codes, image_size) + gen0.summary() # image generator + gen1.summary() # feature1 generator +``` + +```py + # build encoder models + input_shape = (image_size, image_size, 1) + inputs = Input(shape=input_shape, name='encoder_input') + enc0, enc1 = build_encoder((inputs, feature1), num_labels) + enc0.summary() # image to feature1 encoder + enc1.summary() # feature1 to labels encoder (classifier) + encoder = Model(inputs, enc1(enc0(inputs))) + encoder.summary() # image to labels encoder (classifier) +``` + +```py + data = (x_train, y_train), (x_test, y_test) + train_encoder(encoder, data, model_name=model_name) +``` + +```py + # build adversarial0 model = + # generator0 + discriminator0 + encoder0 + optimizer = RMSprop(lr=lr*0.5, decay=decay*0.5) + # encoder0 weights frozen + enc0.trainable = False + # discriminator0 weights frozen + dis0.trainable = False + gen0_inputs = [feature1, z0] + gen0_outputs = gen0(gen0_inputs) + adv0_outputs = dis0(gen0_outputs) + [enc0(gen0_outputs)] + # feature1 + z0 to prob feature1 is + # real + z0 recon + feature0/image recon + adv0 = Model(gen0_inputs, adv0_outputs, name="adv0") + # loss functions: 1) prob feature1 is real (adversarial0 loss) + # 2) Q network 0 loss (entropy0 loss) + # 3) conditional0 loss + loss = ['binary_crossentropy', 'mse', 'mse'] + loss_weights = [1.0, 10.0, 1.0] + adv0.compile(loss=loss, + loss_weights=loss_weights, + optimizer=optimizer, + metrics=['accuracy']) + adv0.summary() +``` + +```py + # build adversarial1 model = + # generator1 + discriminator1 + encoder1 + # encoder1 weights frozen + enc1.trainable = False + # discriminator1 weights frozen + dis1.trainable = False + gen1_inputs = [labels, z1] + gen1_outputs = gen1(gen1_inputs) + adv1_outputs = dis1(gen1_outputs) + [enc1(gen1_outputs)] + # labels + z1 to prob labels are real + z1 recon + feature1 recon + adv1 = Model(gen1_inputs, adv1_outputs, name="adv1") + # loss functions: 1) prob labels are real (adversarial1 loss) + # 2) Q network 1 loss (entropy1 loss) + # 3) conditional1 loss (classifier error) + loss_weights = [1.0, 1.0, 1.0] + loss = ['binary_crossentropy', + 'mse', + 'categorical_crossentropy'] + adv1.compile(loss=loss, + loss_weights=loss_weights, + optimizer=optimizer, + metrics=['accuracy']) + adv1.summary() +``` + +```py + # train discriminator and adversarial networks + models = (enc0, enc1, gen0, gen1, dis0, dis1, adv0, adv1) + params = (batch_size, train_steps, num_labels, z_dim, model_name) + train(models, data, params) +``` + +最后,训练函数与典型的 GAN 训练相似,不同之处在于我们一次只训练一个 GAN(即`GAN[0]`然后是`GAN[0]`)。 代码显示在“列表 6.2.5”中。 值得注意的是,训练顺序为: + +1. `Discriminator[1]`和`Q[1]`网络通过最小化判别器和熵损失 +2. `Discriminator[0]`和`Q[0]`网络通过最小化判别器和熵损失 +3. `Adversarial[1]`网络通过最小化对抗性,熵和条件损失 + +1. `Adversarial[0]`网络通过最小化对抗性,熵和条件损失 + +“列表 6.2.5”:`stackedgan-mnist-6.2.1.py` + +```py +def train(models, data, params): + """Train the discriminator and adversarial Networks +``` + +```py + Alternately train discriminator and adversarial networks by batch. + Discriminator is trained first with real and fake images, + corresponding one-hot labels and latent codes. + Adversarial is trained next with fake images pretending + to be real, corresponding one-hot labels and latent codes. + Generate sample images per save_interval. +``` + +```py + # Arguments + models (Models): Encoder, Generator, Discriminator, + Adversarial models + data (tuple): x_train, y_train data + params (tuple): Network parameters +``` + +```py + """ + # the StackedGAN and Encoder models + enc0, enc1, gen0, gen1, dis0, dis1, adv0, adv1 = models + # network parameters + batch_size, train_steps, num_labels, z_dim, model_name = params + # train dataset + (x_train, y_train), (_, _) = data + # the generator image is saved every 500 steps + save_interval = 500 +``` + +```py + # label and noise codes for generator testing + z0 = np.random.normal(scale=0.5, size=[16, z_dim]) + z1 = np.random.normal(scale=0.5, size=[16, z_dim]) + noise_class = np.eye(num_labels)[np.arange(0, 16) % num_labels] + noise_params = [noise_class, z0, z1] + # number of elements in train dataset + train_size = x_train.shape[0] + print(model_name, + "Labels for generated images: ", + np.argmax(noise_class, axis=1)) +``` + +```py + for i in range(train_steps): + # train the discriminator1 for 1 batch + # 1 batch of real (label=1.0) and fake feature1 (label=0.0) + # randomly pick real images from dataset + rand_indexes = np.random.randint(0, + train_size, + size=batch_size) + real_images = x_train[rand_indexes] + # real feature1 from encoder0 output + real_feature1 = enc0.predict(real_images) + # generate random 50-dim z1 latent code + real_z1 = np.random.normal(scale=0.5, + size=[batch_size, z_dim]) + # real labels from dataset + real_labels = y_train[rand_indexes] +``` + +```py + # generate fake feature1 using generator1 from + # real labels and 50-dim z1 latent code + fake_z1 = np.random.normal(scale=0.5, + size=[batch_size, z_dim]) + fake_feature1 = gen1.predict([real_labels, fake_z1]) +``` + +```py + # real + fake data + feature1 = np.concatenate((real_feature1, fake_feature1)) + z1 = np.concatenate((fake_z1, fake_z1)) +``` + +```py + # label 1st half as real and 2nd half as fake + y = np.ones([2 * batch_size, 1]) + y[batch_size:, :] = 0 +``` + +```py + # train discriminator1 to classify feature1 as + # real/fake and recover + # latent code (z1). real = from encoder1, + # fake = from genenerator1 + # joint training using discriminator part of + # advserial1 loss and entropy1 loss + metrics = dis1.train_on_batch(feature1, [y, z1]) + # log the overall loss only + log = "%d: [dis1_loss: %f]" % (i, metrics[0]) +``` + +```py + # train the discriminator0 for 1 batch + # 1 batch of real (label=1.0) and fake images (label=0.0) + # generate random 50-dim z0 latent code + fake_z0 = np.random.normal(scale=0.5, size=[batch_size, z_dim]) + # generate fake images from real feature1 and fake z0 + fake_images = gen0.predict([real_feature1, fake_z0]) + # real + fake data + x = np.concatenate((real_images, fake_images)) + z0 = np.concatenate((fake_z0, fake_z0)) + # train discriminator0 to classify image + # as real/fake and recover latent code (z0) + # joint training using discriminator part of advserial0 loss + # and entropy0 loss + metrics = dis0.train_on_batch(x, [y, z0]) + # log the overall loss only (use dis0.metrics_names) + log = "%s [dis0_loss: %f]" % (log, metrics[0]) +``` + +```py + # adversarial training + # generate fake z1, labels + fake_z1 = np.random.normal(scale=0.5, + size=[batch_size, z_dim]) + # input to generator1 is sampling fr real labels and + # 50-dim z1 latent code + gen1_inputs = [real_labels, fake_z1] +``` + +```py + # label fake feature1 as real + y = np.ones([batch_size, 1]) +``` + +```py + # train generator1 (thru adversarial) by fooling i + # the discriminator + # and approximating encoder1 feature1 generator + # joint training: adversarial1, entropy1, conditional1 + metrics = adv1.train_on_batch(gen1_inputs, + [y, fake_z1, real_labels]) + fmt = "%s [adv1_loss: %f, enc1_acc: %f]" + # log the overall loss and classification accuracy + log = fmt % (log, metrics[0], metrics[6]) +``` + +```py + # input to generator0 is real feature1 and + # 50-dim z0 latent code + fake_z0 = np.random.normal(scale=0.5, + size=[batch_size, z_dim]) + gen0_inputs = [real_feature1, fake_z0] +``` + +```py + # train generator0 (thru adversarial) by fooling + # the discriminator and approximating encoder1 imag + # source generator joint training: + # adversarial0, entropy0, conditional0 + metrics = adv0.train_on_batch(gen0_inputs, + [y, fake_z0, real_feature1]) + # log the overall loss only + log = "%s [adv0_loss: %f]" % (log, metrics[0]) +``` + +```py + print(log) + if (i + 1) % save_interval == 0: + generators = (gen0, gen1) + plot_images(generators, + noise_params=noise_params, + show=False, + step=(i + 1), + model_name=model_name) +``` + +```py + # save the modelis after training generator0 & 1 + # the trained generator can be reloaded for + # future MNIST digit generation + gen1.save(model_name + "-gen1.h5") + gen0.save(model_name + "-gen0.h5") +``` + +`tf.keras`中 StackedGAN 的代码实现现已完成。 训练后,可以评估生成器的输出以检查合成 MNIST 数字的某些属性是否可以以与我们在 InfoGAN 中所做的类似的方式进行控制。 + +## StackedGAN 的生成器输出 + +在对 StackedGAN 进行 10,000 步训练之后,`Generator[0]`和`Generator[1]`模型被保存在文件中。 `Generator[0]`和`Generator[1]`堆叠在一起可以合成以标签和噪声代码`z[0]`和`z[1]`为条件的伪造图像。 + +StackedGAN 生成器可以通过以下方式进行定性验证: + +1. 从两个噪声代码`z[0]`和`z[1]`的离散标签从 0 变到 9,从正态分布中采样,均值为 0.5,标准差为 1.0。 结果显示在“图 6.2.9”中。 我们可以看到 StackedGAN 离散代码可以控制生成器生成的数字: + + ```py + python3 stackedgan-mnist-6.2.1.py --generator0=stackedgan_mnist-gen0.h5 --generator1=stackedgan_mnist-gen1.h5 --digit=0 + ``` + + 至 + + ```py + python3 stackedgan-mnist-6.2.1.py --generator0=stackedgan_mnist-gen0.h5 --generator1=stackedgan_mnist-gen1.h5 --digit=9 + ``` + + ![](img/B14853_06_16.png) + + 图 6.2.9:当离散代码从 0 变为 9 时,StackedGAN 生成的图像。`z0`和`z1`均从正态分布中采样,平均值为 0,标准差为 0.5。 + +2. 如下所示,将第一噪声码`z[0]`从 -4.0 到 4.0 的恒定向量变为从 0 到 9 的数字。 第二噪声代码`z[1]`被设置为零向量。 “图 6.2.10”显示第一个噪声代码控制数字的粗细。 例如,对于数字 8: + + ```py + python3 stackedgan-mnist-6.2.1.py --generator0=stackedgan_mnist-gen0.h5 --generator1=stackedgan_mnist-gen1.h5 --z0=0 --z1=0 --p0 --digit=8 + ``` + + ![](img/B14853_06_17.png) + + 图 6.2.10:使用 StackedGAN 作为第一个噪声代码`z0`生成的图像,对于数字 0 到 9,其向量从 -4.0 到 4.0 不变。`z0`似乎控制着每个数字的粗细。 + +3. 如下所示,对于数字 0 到 9,从 -1.0 到 1.0 的恒定向量变化第二噪声代码`z[1]`。 将第一噪声代码`z[0]`设置为零向量。“图 6.2.11”显示第二个噪声代码控制旋转(倾斜),并在一定程度上控制手指的粗细。 例如,对于数字 8: + + ```py + python3 stackedgan-mnist-6.2.1.py --generator0=stackedgan_mnist-gen0.h5 --generator1=stackedgan_mnist-gen1.h5 --z0=0 --z1=0 --p1 --digit=8 + ``` + +![](img/B14853_06_18.png) + +图 6.2.11:由 StackedGAN 生成的图像作为第二个噪声代码`z1`从 0 到 9 的恒定向量 -1.0 到 1.0 变化。`z1`似乎控制着每个数字的旋转(倾斜)和笔划粗细 + +“图 6.2.9”至“图 6.2.11”证明 StackedGAN 提供了对生成器输出属性的附加控制。 控件和属性为(标签,哪个数字),(`z0`,数字粗细)和(`z1`,数字倾斜度)。 从此示例中,我们可以控制其他可能的实验,例如: + +* 从当前数量 2 增加栈中的元素数量 +* 像在 InfoGAN 中一样,减小代码`z[0]`和`z[1]`的尺寸 + +“图 6.2.12”显示了 InfoGAN 和 StackedGAN 的潜在代码之间的区别: + +![](img/B14853_06_19.png) + +图 6.2.12:不同 GAN 的潜在表示 + +解开代码的基本思想是对损失函数施加约束,以使仅特定属性受代码影响。 从结构上讲,与 StackedGAN 相比,InfoGAN 更易于实现。 InfoGAN 的训练速度也更快。 + +# 4\. 总结 + +在本章中,我们讨论了如何解开 GAN 的潜在表示。 在本章的前面,我们讨论了 InfoGAN 如何最大化互信息以迫使生成器学习解纠缠的潜向量。 在 MNIST 数据集示例中,InfoGAN 使用三种表示形式和一个噪声代码作为输入。 噪声以纠缠的形式表示其余的属性。 StackedGAN 以不同的方式处理该问题。 它使用一堆编码器 GAN 来学习如何合成伪造的特征和图像。 首先对编码器进行训练,以提供特征数据集。 然后,对编码器 GAN 进行联合训练,以学习如何使用噪声代码控制生成器输出的属性。 + +在下一章中,我们将着手一种新型的 GAN,它能够在另一个域中生成新数据。 例如,给定马的图像,GAN 可以将其自动转换为斑马的图像。 这种 GAN 的有趣特征是无需监督即可对其进行训练,并且不需要成对的样本数据。 + +# 5\. 参考 + +1. `Xi Chen et al.: InfoGAN: Interpretable Representation Learning by Information Maximizing Generative Adversarial Nets. Advances in Neural Information Processing Systems, 2016 (http://papers.nips.cc/paper/6399-infogan-interpretable-representation-learning-by-information-maximizing-generative-adversarial-nets.pdf).` +1. `Xun Huang et al. Stacked Generative Adversarial Networks. IEEE Conference on Computer Vision and Pattern Recognition (CVPR). Vol. 2, 2017 (http://openaccess.thecvf.com/content_cvpr_2017/papers/Huang_Stacked_Generative_Adversarial_CVPR_2017_paper.pdf).` \ No newline at end of file diff --git a/机器学习/ApacheCN/apachecn-dl-zh/adv-dl-tf2-keras/07.md b/机器学习/ApacheCN/apachecn-dl-zh/adv-dl-tf2-keras/07.md new file mode 100644 index 00000000..a259129a --- /dev/null +++ b/机器学习/ApacheCN/apachecn-dl-zh/adv-dl-tf2-keras/07.md @@ -0,0 +1,990 @@ +# 七、跨域 GAN + +在计算机视觉,计算机图形学和图像处理中,许多任务涉及将图像从一种形式转换为另一种形式。 灰度图像的着色,将卫星图像转换为地图,将一位艺术家的艺术品风格更改为另一位艺术家,将夜间图像转换为白天,将夏季照片转换为冬天只是几个例子。 这些任务被称为**跨域迁移**,将成为本章的重点。 源域中的图像将迁移到目标域,从而生成新的转换图像。 + +跨域迁移在现实世界中具有许多实际应用。 例如,在自动驾驶研究中,收集公路现场驾驶数据既费时又昂贵。 为了在该示例中覆盖尽可能多的场景变化,将在不同的天气条件,季节和时间中遍历道路,从而为我们提供了大量不同的数据。 使用跨域迁移,可以通过转换现有图像来生成看起来真实的新合成场景。 例如,我们可能只需要在夏天从一个区域收集道路场景,在冬天从另一地方收集道路场景。 然后,我们可以将夏季图像转换为冬季,并将冬季图像转换为夏季。 在这种情况下,它将必须完成的任务数量减少了一半。 + +现实的合成图像的生成是 GAN 擅长的领域。 因此,跨域翻译是 GAN 的应用之一。 在本章中,我们将重点介绍一种流行的跨域 GAN 算法,称为 *CycleGAN* [2]。 与其他跨域迁移算法(例如 *pix2pix* [3])不同,CycleGAN 不需要对齐的训练图像即可工作。 在对齐的图像中,训练数据应该是由源图像及其对应的目标图像组成的一对图像; 例如,卫星图像和从该图像得出的相应地图。 + +CycleGAN 仅需要卫星数据图像和地图。 这些地图可以来自其他卫星数据,而不必事先从训练数据中生成。 + +在本章中,我们将探讨以下内容: + +* CycleGAN 的原理,包括其在`tf.keras`中的实现 +* CycleGAN 的示例应用,包括使用 CIFAR10 数据集对灰度图像进行着色和应用于 MNIST 数字和*街景门牌号码(SVHN)* [1]数据集的样式迁移 + +让我们开始讨论 CycleGAN 背后的原理。 + +# 1\. CycleGAN 的原理 + +将图像从一个域转换到另一个域是计算机视觉,计算机图形学和图像处理中的常见任务。“图 7.1.1”显示了边缘检测,这是常见的图像转换任务: + +![](img/B14853_07_01.png) + +图 7.1.1:对齐图像对的示例:使用 Canny 边缘检测器的左,原始图像和右,变换后的图像。 原始照片是作者拍摄的。 + +在此示例中,我们可以将真实照片(左)视为源域中的图像,将边缘检测的照片(右)视为目标域中的样本。 还有许多其他具有实际应用的跨域翻译过程,例如: + +* 卫星图像到地图 +* 脸部图像到表情符号,漫画或动画 +* 身体图像到头像 +* 灰度照片的着色 +* 医学扫描到真实照片 +* 真实照片到画家的绘画 + +在不同领域中还有许多其他示例。 例如,在计算机视觉和图像处理中,我们可以通过发明一种从源图像中提取特征并将其转换为目标图像的算法来执行翻译。 坎尼边缘算子就是这种算法的一个例子。 但是,在很多情况下,翻译对于手工工程师而言非常复杂,因此几乎不可能找到合适的算法。 源域分布和目标域分布都是高维且复杂的。 + +解决图像翻译问题的一种方法是使用深度学习技术。 如果我们具有来自源域和目标域的足够大的数据集,则可以训练神经网络对转换进行建模。 由于必须在给定源图像的情况下自动生成目标域中的图像,因此它们必须看起来像是来自目标域的真实样本。 GAN 是适合此类跨域任务的网络。 *pix2pix* [3]算法是跨域算法的示例。 + +pix2pix 算法与**条件 GAN**(**CGAN**)[4]相似,我们在“第 4 章”,“生成对抗网络(GAN)”。 我们可以回想起在 CGAN 中,除了`z`噪声输入之外,诸如单热向量之类的条件会限制生成器的输出。 例如,在 MNIST 数字中,如果我们希望生成器输出数字 8,则条件为单热向量`[0, 0, 0, 0, 0, 0, 0, 0, 1, 0]`。 在 pix2pix 中,条件是要翻译的图像。 生成器的输出是翻译后的图像。 通过优化 CGAN 损失来训练 pix2pix 算法。 为了使生成的图像中的模糊最小化,还包括 *L1* 损失。 + +类似于 pix2pix 的神经网络的主要缺点是训练输入和输出图像必须对齐。“图 7.1.1”是对齐的图像对的示例。 样本目标图像是从源生成的。 在大多数情况下,对齐的图像对不可用或无法从源图像生成,也不昂贵,或者我们不知道如何从给定的源图像生成目标图像。 我们拥有的是来自源域和目标域的样本数据。“图 7.1.2”是来自同一向日葵主题上源域(真实照片)和目标域(范高的艺术风格)的数据示例。 源图像和目标图像不一定对齐。 + +与 pix2pix 不同,CycleGAN 会学习图像翻译,只要源数据和目标数据之间有足够的数量和差异即可。 无需对齐。 CycleGAN 学习源和目标分布,以及如何从给定的样本数据中将源分布转换为目标分布。 无需监督。 在“图 7.1.2”的上下文中,我们只需要数千张真实向日葵的照片和数千张梵高向日葵画的照片。 在训练了 CycleGAN 之后,我们可以将向日葵的照片转换成梵高的画作: + +![A close up of a flower Description automatically generated](img/B14853_07_02.png) + +图 7.1.2:未对齐的图像对示例:左侧为菲律宾大学沿着大学大道的真实向日葵照片,右侧为伦敦国家美术馆的梵高的向日葵, 英国。 原始照片由作者拍摄。 + +下一个问题是:我们如何建立可以从未配对数据中学习的模型? 在下一部分中,我们将构建一个使用正向和反向循环 GAN 的 CycleGAN,以及一个循环一致性检查,以消除对配对输入数据的需求。 + +## CycleGAN 模型 + +“图 7.1.3”显示了 CycleGAN 的网络模型: + +![A close up of a logo Description automatically generated](img/B14853_07_03.png) + +图 7.1.3:CycleGAN 模型包含四个网络:生成器`G`,生成器`F`,判别器`D[y]`和判别器`D[x]` + +让我们逐个讨论“图 7.1.3”。 让我们首先关注上层网络,即转发周期 GAN。 如下图“图 7.1.4”所示,正向循环 CycleGAN 的目标是学习以下函数: + +![](img/B14853_07_001.png) (Equation 7.1.1) + +![A close up of a logo Description automatically generated](img/B14853_07_04.png) + +图 7.1.4:伪造`y`的 CycleGAN 生成器`G` + +“公式 7.1.1”只是假目标数据`y'`的生成器`G`。 它将数据从源域`x`转换为目标域`y`。 + +要训​​练生成器,我们必须构建 GAN。 这是正向循环 GAN,如图“图 7.1.5”所示。 该图表明,它类似于“第 4 章”,“生成对抗网络(GANs)”中的典型 GAN,由生成器`G`和判别器`D[y]`组成,它可以以相同的对抗方式进行训练。通过仅利用源域中的可用实际图像`x`和目标域中的实际图像`y`,进行无监督学习。 + +![A close up of a logo Description automatically generated](img/B14853_07_05.png) + +图 7.1.5:CycleGAN 正向循环 GAN + +与常规 GAN 不同,CycleGAN 施加了周期一致性约束,如图“图 7.1.6”所示。 前向循环一致性网络可确保可以从伪造的目标数据中重建真实的源数据: + +![](img/B14853_07_004.png) (Equation 7.1.2) + +![](img/B14853_07_06.png) + +图 7.1.6:CycleGAN 循环一致性检查 + +通过最小化正向循环一致性 *L1* 损失来完成: + +![](img/B14853_07_005.png) (Equation 7.1.3) + +周期一致性损失使用 *L1* 或**平均绝对误差**(**MAE**),因为与 *L2* 或**均方误差**(**MSE**)相比,它通常导致较少的模糊图像重建。 + +循环一致性检查表明,尽管我们已将源数据`x`转换为域`y`,但`x`的原始特征仍应保留在`y`中并且可恢复。 网络`F`只是我们将从反向循环 GAN 借用的另一个生成器,如下所述。 + +CycleGAN 是对称的。 如图“图 7.1.7”所示,后向循环 GAN 与前向循环 GAN 相同,但将源数据`x`和目标数据`y`的作用逆转。 现在,源数据为`y`,目标数据为`x`。 生成器`G`和`F`的作用也相反。`F`现在是生成器,而`G`恢复输入。 在正向循环 GAN 中,生成器`F`是用于恢复源数据的网络,而`G`是生成器。 + +Backward Cycle GAN 生成器的目标是合成: + +![](img/B14853_07_006.png) (Equation 7.1.2) + +![A close up of a logo Description automatically generated](img/B14853_07_07.png) + +图 7.1.7:CycleGAN 向后循环 GAN + +这可以通过对抗性训练反向循环 GAN 来完成。 目的是让生成器`F`学习如何欺骗判别器`D[x]`。 + +此外,还具有类似的向后循环一致性,以恢复原始源`y`: + +![](img/B14853_07_008.png) (Equation 7.1.4) + +这是通过最小化后向循环一致性 *L1* 损失来完成的: + +![](img/B14853_07_009.png) (Equation 7.1.5) + +总而言之,CycleGAN 的最终目标是使生成器`G`学习如何合成伪造的目标数据`y'`,该伪造的目标数据`y'`会在正向循环中欺骗识别器`D[y]`。 由于网络是对称的,因此 CycleGAN 还希望生成器`F`学习如何合成伪造的源数据`x'`,该伪造的源数据可以使判别器`D[x]`在反向循环中蒙蔽。 考虑到这一点,我们现在可以将所有损失函数放在一起。 + +让我们从 GAN 部分开始。 受到*最小二乘 GAN(LSGAN)* [5]更好的感知质量的启发,如“第 5 章”,“改进的 GAN” 中所述,CycleGAN 还使用 MSE 作为判别器和生成器损失。 回想一下,LSGAN 与原始 GAN 之间的差异需要使用 MSE 损失,而不是二进制交叉熵损失。 + +CycleGAN 将生成器-标识符损失函数表示为: + +![](img/B14853_07_014.png) (Equation 7.1.6) + +![](img/B14853_07_015.png) (Equation 7.1.7) + +![](img/B14853_07_016.png) (Equation 7.1.8) + +![](img/B14853_07_017.png) (Equation 7.1.9) + +![](img/B14853_07_018.png) (Equation 7.1.10) + +![](img/B14853_07_019.png) (Equation 7.1.11) + +损失函数的第二组是周期一致性损失,可以通过汇总前向和后向 GAN 的贡献来得出: + +![](img/B14853_07_020.png) + +![](img/B14853_07_021.png) (Equation 7.1.12) + +CycleGAN 的总损失为: + +![](img/B14853_07_022.png) (Equation 7.1.13) + +CycleGAN 建议使用以下权重值`λ1 = 1.0`和`λ2 = 10.0`,以更加重视循环一致性检查。 + +训练策略类似于原始 GAN。 “算法 7.1.1”总结了 CycleGAN 训练过程。 + +“算法 7.1.1”:CycleGAN 训练 + +对`n`训练步骤重复上述步骤: + +1. 通过使用真实的源数据和目标数据训练前向循环判别器,将`L_forward_GAN^(D)`降至最低。 实际目标数据的小批量`y`标记为 1.0。 伪造的目标数据`y' = G(x)`的小批量标记为 0.0。 +2. 通过使用真实的源数据和目标数据训练反向循环判别器,将`L_backward_GAN^(D)`最小化。 实际源数据的小批量`x`标记为 1.0。 一小部分伪造的源数据`x' = F(y)`被标记为 0.0。 + +1. 通过训练对抗网络中的前向周期和后向周期生成器,将`L_GAN^(D)`和`L_cyc`最小化。 伪造目标数据的一个小批量`y' = G(x)`被标记为 1.0。 一小部分伪造的源数据`x' = F(y)`被标记为 1.0。 判别器的权重被冻结。 + +在神经样式迁移问题中,颜色组合可能无法成功地从源图像迁移到伪造目标图像。 此问题显示在“图 7.1.8”中: + +![](img/B14853_07_08.png) + +图 7.1.8:在样式迁移过程中,颜色组合可能无法成功迁移。 为了解决此问题,将恒等损失添加到总损失函数中 + +为了解决这个问题,CycleGAN 建议包括正向和反向循环身份损失函数: + +![](img/B14853_07_033.png) (Equation 7.1.14) + +CycleGAN 的总损失变为: + +![](img/B14853_07_034.png) (Equation 7.1.15) + +其中`λ3 = 0.5`。 在对抗训练中,身份损失也得到了优化。“图 7.1.9”重点介绍了实现身份正则器的 CycleGAN 辅助网络: + +![A screenshot of a cell phone Description automatically generated](img/B14853_07_09.png) + +图 7.1.9:具有身份正则化网络的 CycleGAN 模型,图像左侧突出显示 + +在下一个部分,我们将在`tf.keras`中实现 CycleGAN。 + +## 使用 Keras 实现 CycleGAN + +我们来解决,这是 CycleGAN 可以解决的简单问题。 在“第 3 章”,“自编码器”中,我们使用了自编码器为 CIFAR10 数据集中的灰度图像着色。 我们可以记得,CIFAR10 数据集包含 50,000 个训练过的数据项和 10,000 个测试数据样本,这些样本属于 10 个类别的`32 x 32` RGB 图像。 我们可以使用`rgb2gray`(RGB)将所有彩色图像转换为灰度图像,如“第 3 章”,“自编码器”中所述。 + +接下来,我们可以将灰度训练图像用作源域图像,将原始彩色图像用作目标域图像。 值得注意的是,尽管数据集是对齐的,但我们 CycleGAN 的输入是彩色图像的随机样本和灰度图像的随机样本。 因此,我们的 CycleGAN 将看不到训练数据对齐。 训练后,我们将使用测试的灰度图像来观察 CycleGAN 的表现。 + +如前几节所述,要实现 CycleGAN,我们需要构建两个生成器和两个判别器。 CycleGAN 的生成器学习源输入分布的潜在表示,并将该表示转换为目标输出分布。 这正是自编码器的功能。 但是,类似于“第 3 章”,“自编码器”中讨论的典型自编码器,使用的编码器会对输入进行下采样,直到瓶颈层为止,此时解码器中的处理过程相反。 + +由于在编码器和解码器层之间共享许多低级特征,因此该结构不适用于某些图像转换问题。 例如,在着色问题中,灰度图像的形式,结构和边缘与彩色图像中的相同。 为了解决这个问题,CycleGAN 生成器使用 *U-Net* [7]结构,如图“图 7.1.10”所示: + +![](img/B14853_07_10.png) + +图 7.1.10:在 Keras 中实现正向循环生成器`G`。 产生器是包括编码器和解码器的 U 网络[7]。 + +在 U-Net 结构中,编码器层的输出`e[ni]`与解码器层的输出`d[i]`,其中`n = 4`是编码器/解码器的层数,`i = 1, 2, 3`是共享信息的层号。 + +我们应该注意,尽管该示例使用`n = 4`,但输入/输出尺寸较大的问题可能需要更深的编码器/解码器层。 通过 U-Net 结构,可以在编码器和解码器之间自由迁移特征级别的信息。 + +编码器层由`Instance Normalization(IN)-LeakyReLU-Conv2D`组成,而解码器层由`IN-ReLU-Conv2D`组成。 编码器/解码器层的实现如清单 7.1.1 所示,而生成器的实现如列表 7.1.2 所示。 + +[完整的代码可在 GitHub 上找到](https://github.com/PacktPublishing/Advanced-Deep-Learning-with-Keras)。 + +**实例规范化**(**IN**)是每个数据(即 IN 是图像或每个特征的 BN)。 在样式迁移中,重要的是标准化每个样本而不是每个批量的对比度。 IN 等于,相当于对比度归一化。 同时,BN 打破了对比度标准化。 + +记住在使用 IN 之前先安装`tensorflow-addons`: + +```py +$ pip install tensorflow-addons +``` + +“列表 7.1.1”:`cyclegan-7.1.1.py` + +```py +def encoder_layer(inputs, + filters=16, + kernel_size=3, + strides=2, + activation='relu', + instance_norm=True): + """Builds a generic encoder layer made of Conv2D-IN-LeakyReLU + IN is optional, LeakyReLU may be replaced by ReLU + """ +``` + +```py + conv = Conv2D(filters=filters, + kernel_size=kernel_size, + strides=strides, + padding='same') +``` + +```py + x = inputs + if instance_norm: + x = InstanceNormalization(axis=3)(x) + if activation == 'relu': + x = Activation('relu')(x) + else: + x = LeakyReLU(alpha=0.2)(x) + x = conv(x) + return x +``` + +```py +def decoder_layer(inputs, + paired_inputs, + filters=16, + kernel_size=3, + strides=2, + activation='relu', + instance_norm=True): + """Builds a generic decoder layer made of Conv2D-IN-LeakyReLU + IN is optional, LeakyReLU may be replaced by ReLU + Arguments: (partial) + inputs (tensor): the decoder layer input + paired_inputs (tensor): the encoder layer output + provided by U-Net skip connection & + concatenated to inputs. + """ +``` + +```py + conv = Conv2DTranspose(filters=filters, + kernel_size=kernel_size, + strides=strides, + padding='same') +``` + +```py + x = inputs + if instance_norm: + x = InstanceNormalization(axis=3)(x) + if activation == 'relu': + x = Activation('relu')(x) + else: + x = LeakyReLU(alpha=0.2)(x) + x = conv(x) + x = concatenate([x, paired_inputs]) + return x +``` + +将移至生成器实现中: + +“列表 7.1.2”:`cyclegan-7.1.1.py` + +Keras 中的生成器实现: + +```py +def build_generator(input_shape, + output_shape=None, + kernel_size=3, + name=None): + """The generator is a U-Network made of a 4-layer encoder + and a 4-layer decoder. Layer n-i is connected to layer i. +``` + +```py + Arguments: + input_shape (tuple): input shape + output_shape (tuple): output shape + kernel_size (int): kernel size of encoder & decoder layers + name (string): name assigned to generator model +``` + +```py + Returns: + generator (Model): + """ +``` + +```py + inputs = Input(shape=input_shape) + channels = int(output_shape[-1]) + e1 = encoder_layer(inputs, + 32, + kernel_size=kernel_size, + activation='leaky_relu', + strides=1) + e2 = encoder_layer(e1, + 64, + activation='leaky_relu', + kernel_size=kernel_size) + e3 = encoder_layer(e2, + 128, + activation='leaky_relu', + kernel_size=kernel_size) + e4 = encoder_layer(e3, + 256, + activation='leaky_relu', + kernel_size=kernel_size) +``` + +```py + d1 = decoder_layer(e4, + e3, + 128, + kernel_size=kernel_size) + d2 = decoder_layer(d1, + e2, + 64, + kernel_size=kernel_size) + d3 = decoder_layer(d2, + e1, + 32, + kernel_size=kernel_size) + outputs = Conv2DTranspose(channels, + kernel_size=kernel_size, + strides=1, + activation='sigmoid', + padding='same')(d3) +``` + +```py + generator = Model(inputs, outputs, name=name) +``` + +```py + return generator +``` + +CycleGAN 的判别器类似于原始 GAN 判别器。 输入图像被下采样数次(在此示例中为 3 次)。 最后一层是`Dense`(1)层,它预测输入为实数的可能性。 除了不使用 IN 之外,每个层都类似于生成器的编码器层。 然而,在大图像中,用一个数字将图像计算为真实图像或伪图像会导致参数效率低下,并导致生成器的图像质量较差。 + +解决方案是使用 PatchGAN [6],该方法将图像划分为补丁网格,并使用标量值网格来预测补丁是真实概率。“图 7.1.11”显示了原始 GAN 判别器和`2 x 2` PatchGAN 判别器之间的比较: + +![](img/B14853_07_11.png) + +图 7.1.11:GAN 与 PatchGAN 判别器的比较 + +在此示例中,面片不重叠且在其边界处相遇。 但是,通常,补丁可能会重叠。 + +我们应该注意,PatchGAN 并没有在 CycleGAN 中引入一种新型的 GAN。 为了提高生成的图像质量,如果使用`2 x 2` PatchGAN,则没有四个输出可以区分,而没有一个输出可以区分。 损失函数没有变化。 从直觉上讲,这是有道理的,因为如果图像的每个面片或部分看起来都是真实的,则整个图像看起来会更加真实。 + +“图 7.1.12”显示了`tf.keras`中实现的判别器网络。 下图显示了判别器确定输入图像或色块为彩色 CIFAR10 图像的可能性: + +![](img/B14853_07_12.png) + +图 7.1.12:目标标识符`D[y]`在`tf.keras`中的实现。 PatchGAN 判别器显示在右侧 + +由于输出图像只有`32 x 32` RGB 时较小,因此表示该图像是真实的单个标量就足够了。 但是,当使用 PatchGAN 时,我们也会评估结果。“列表 7.1.3”显示了判别器的函数构建器: + +“列表 7.1.3”:`cyclegan-7.1.1.py` + +`tf.keras`中的判别器实现: + +```py +def build_discriminator(input_shape, + kernel_size=3, + patchgan=True, + name=None): + """The discriminator is a 4-layer encoder that outputs either + a 1-dim or a n x n-dim patch of probability that input is real +``` + +```py + Arguments: + input_shape (tuple): input shape + kernel_size (int): kernel size of decoder layers + patchgan (bool): whether the output is a patch + or just a 1-dim + name (string): name assigned to discriminator model +``` + +```py + Returns: + discriminator (Model): + """ +``` + +```py + inputs = Input(shape=input_shape) + x = encoder_layer(inputs, + 32, + kernel_size=kernel_size, + activation='leaky_relu', + instance_norm=False) + x = encoder_layer(x, + 64, + kernel_size=kernel_size, + activation='leaky_relu', + instance_norm=False) + x = encoder_layer(x, + 128, + kernel_size=kernel_size, + activation='leaky_relu', + instance_norm=False) + x = encoder_layer(x, + 256, + kernel_size=kernel_size, + strides=1, + activation='leaky_relu', + instance_norm=False) +``` + +```py + # if patchgan=True use nxn-dim output of probability + # else use 1-dim output of probability + if patchgan: + x = LeakyReLU(alpha=0.2)(x) + outputs = Conv2D(1, + kernel_size=kernel_size, + strides=2, + padding='same')(x) + else: + x = Flatten()(x) + x = Dense(1)(x) + outputs = Activation('linear')(x) +``` + +```py + discriminator = Model(inputs, outputs, name=name) +``` + +```py + return discriminator +``` + +使用生成器和判别器生成器,我们现在可以构建 CycleGAN。“列表 7.1.4”显示了构建器函数。 与上一节中的讨论一致,实例化了两个生成器`g_source = F`和`g_target = G`以及两个判别器`d_source = D[x]`和`d_target = D[y]`。 正向循环为`x' = F(G(x)) = reco_source = g_source(g_target(source_input))`。反向循环为`y' = G(F(y)) = reco_target = g_target(g_source (target_input))`。 + +对抗模型的输入是源数据和目标数据,而输出是`D[x]`和`D[y]`的输出以及重构的输入`x'`和`y'`。 在本示例中,由于由于灰度图像和彩色图像中通道数之间的差异,因此未使用身份网络。 对于 GAN 和循环一致性损失,我们分别使用建议的`λ1 = 1.0`和`λ2 = 10.0`损失权重。 与前几章中的 GAN 相似,我们使用 RMSprop 作为判别器的优化器,其学习率为`2e-4`,衰减率为`6e-8`。 对抗的学习率和衰退率是判别器的一半。 + +“列表 7.1.4”:`cyclegan-7.1.1.py` + +`tf.keras`中的 CycleGAN 构建器: + +```py +def build_cyclegan(shapes, + source_name='source', + target_name='target', + kernel_size=3, + patchgan=False, + identity=False + ): + """Build the CycleGAN +``` + +```py + 1) Build target and source discriminators + 2) Build target and source generators + 3) Build the adversarial network +``` + +```py + Arguments: + shapes (tuple): source and target shapes + source_name (string): string to be appended on dis/gen models + target_name (string): string to be appended on dis/gen models + kernel_size (int): kernel size for the encoder/decoder + or dis/gen models + patchgan (bool): whether to use patchgan on discriminator + identity (bool): whether to use identity loss +``` + +```py + Returns: + (list): 2 generator, 2 discriminator, + and 1 adversarial models + """ +``` + +```py + source_shape, target_shape = shapes + lr = 2e-4 + decay = 6e-8 + gt_name = "gen_" + target_name + gs_name = "gen_" + source_name + dt_name = "dis_" + target_name + ds_name = "dis_" + source_name +``` + +```py + # build target and source generators + g_target = build_generator(source_shape, + target_shape, + kernel_size=kernel_size, + name=gt_name) + g_source = build_generator(target_shape, + source_shape, + kernel_size=kernel_size, + name=gs_name) + print('---- TARGET GENERATOR ----') + g_target.summary() + print('---- SOURCE GENERATOR ----') + g_source.summary() +``` + +```py + # build target and source discriminators + d_target = build_discriminator(target_shape, + patchgan=patchgan, + kernel_size=kernel_size, + name=dt_name) + d_source = build_discriminator(source_shape, + patchgan=patchgan, + kernel_size=kernel_size, + name=ds_name) + print('---- TARGET DISCRIMINATOR ----') + d_target.summary() + print('---- SOURCE DISCRIMINATOR ----') + d_source.summary() +``` + +```py + optimizer = RMSprop(lr=lr, decay=decay) + d_target.compile(loss='mse', + optimizer=optimizer, + metrics=['accuracy']) + d_source.compile(loss='mse', + optimizer=optimizer, + metrics=['accuracy']) +``` + +```py + d_target.trainable = False + d_source.trainable = False +``` + +```py + # build the computational graph for the adversarial model + # forward cycle network and target discriminator + source_input = Input(shape=source_shape) + fake_target = g_target(source_input) + preal_target = d_target(fake_target) + reco_source = g_source(fake_target) +``` + +```py + # backward cycle network and source discriminator + target_input = Input(shape=target_shape) + fake_source = g_source(target_input) + preal_source = d_source(fake_source) + reco_target = g_target(fake_source) +``` + +```py + # if we use identity loss, add 2 extra loss terms + # and outputs + if identity: + iden_source = g_source(source_input) + iden_target = g_target(target_input) + loss = ['mse', 'mse', 'mae', 'mae', 'mae', 'mae'] + loss_weights = [1., 1., 10., 10., 0.5, 0.5] + inputs = [source_input, target_input] + outputs = [preal_source, + preal_target, + reco_source, + reco_target, + iden_source, + iden_target] + else: + loss = ['mse', 'mse', 'mae', 'mae'] + loss_weights = [1., 1., 10., 10.] + inputs = [source_input, target_input] + outputs = [preal_source, + preal_target, + reco_source, + reco_target] +``` + +```py + # build adversarial model + adv = Model(inputs, outputs, name='adversarial') + optimizer = RMSprop(lr=lr*0.5, decay=decay*0.5) + adv.compile(loss=loss, + loss_weights=loss_weights, + optimizer=optimizer, + metrics=['accuracy']) + print('---- ADVERSARIAL NETWORK ----') + adv.summary() +``` + +```py + return g_source, g_target, d_source, d_target, adv +``` + +我们遵循训练过程,我们可以从上一节中的“算法 7.1.1”中调用。“列表 7.1.5”显示了 CycleGAN 训练。 此训练与原始 GAN 之间的次要区别是有两个要优化的判别器。 但是,只有一种对抗模型需要优化。 对于每 2,000 步,生成器将保存预测的源图像和目标图像。 我们将的批量大小设为 32。我们也尝试了 1 的批量大小,但是输出质量几乎相同,并且需要花费更长的时间进行训练(批量为每个图像 43 ms,在 NVIDIA GTX 1060 上批量大小为 32 时,最大大小为每个图像 1 vs 3.6 ms) + +“列表 7.1.5”:`cyclegan-7.1.1.py` + +`tf.keras`中的 CycleGAN 训练例程: + +```py +def train_cyclegan(models, + data, + params, + test_params, + test_generator): + """ Trains the CycleGAN. + + 1) Train the target discriminator + 2) Train the source discriminator + 3) Train the forward and backward cyles of + adversarial networks +``` + +```py + Arguments: + models (Models): Source/Target Discriminator/Generator, + Adversarial Model + data (tuple): source and target training data + params (tuple): network parameters + test_params (tuple): test parameters + test_generator (function): used for generating + predicted target and source images + """ +``` + +```py + # the models + g_source, g_target, d_source, d_target, adv = models + # network parameters + batch_size, train_steps, patch, model_name = params + # train dataset + source_data, target_data, test_source_data, test_target_data\ + = data +``` + +```py + titles, dirs = test_params +``` + +```py + # the generator image is saved every 2000 steps + save_interval = 2000 + target_size = target_data.shape[0] + source_size = source_data.shape[0] +``` + +```py + # whether to use patchgan or not + if patch > 1: + d_patch = (patch, patch, 1) + valid = np.ones((batch_size,) + d_patch) + fake = np.zeros((batch_size,) + d_patch) + else: + valid = np.ones([batch_size, 1]) + fake = np.zeros([batch_size, 1]) +``` + +```py + valid_fake = np.concatenate((valid, fake)) + start_time = datetime.datetime.now() +``` + +```py + for step in range(train_steps): + # sample a batch of real target data + rand_indexes = np.random.randint(0, + target_size, + size=batch_size) + real_target = target_data[rand_indexes] +``` + +```py + # sample a batch of real source data + rand_indexes = np.random.randint(0, + source_size, + size=batch_size) + real_source = source_data[rand_indexes] + # generate a batch of fake target data fr real source data + fake_target = g_target.predict(real_source) +``` + +```py + # combine real and fake into one batch + x = np.concatenate((real_target, fake_target)) + # train the target discriminator using fake/real data + metrics = d_target.train_on_batch(x, valid_fake) + log = "%d: [d_target loss: %f]" % (step, metrics[0]) +``` + +```py + # generate a batch of fake source data fr real target data + fake_source = g_source.predict(real_target) + x = np.concatenate((real_source, fake_source)) + # train the source discriminator using fake/real data + metrics = d_source.train_on_batch(x, valid_fake) + log = "%s [d_source loss: %f]" % (log, metrics[0]) +``` + +```py + # train the adversarial network using forward and backward + # cycles. the generated fake source and target + # data attempts to trick the discriminators + x = [real_source, real_target] + y = [valid, valid, real_source, real_target] + metrics = adv.train_on_batch(x, y) + elapsed_time = datetime.datetime.now() - start_time + fmt = "%s [adv loss: %f] [time: %s]" + log = fmt % (log, metrics[0], elapsed_time) + print(log) + if (step + 1) % save_interval == 0: + test_generator((g_source, g_target), + (test_source_data, test_target_data), + step=step+1, + titles=titles, + dirs=dirs, + show=False) +``` + +```py + # save the models after training the generators + g_source.save(model_name + "-g_source.h5") + g_target.save(model_name + "-g_target.h5") +``` + +最后,在使用 CycleGAN 构建和训练函数之前,我们必须执行一些数据准备。 模块`cifar10_utils.py`和`other_ utils.py`加载`CIFAR10`训练和测试数据。 有关这两个文件的详细信息,请参考源代码。 加载后,将训练图像和测试图像转换为灰度,以生成源数据和测试源数据。 + +“列表 7.1.6”显示了 CycleGAN 如何用于构建和训练用于灰度图像着色的生成器网络(`g_target`)。 由于 CycleGAN 是对称的,因此我们还构建并训练了第二个生成器网络(`g_source`),该网络可以将颜色转换为灰度。 训练了两个 CycleGAN 着色网络。 第一种使用标量输出类似于原始 GAN 的判别器,第二种使用`2 x 2` PatchGAN。 + +“列表 7.1.6”:`cyclegan-7.1.1.py` + +CycleGAN 用于着色: + +```py +def graycifar10_cross_colorcifar10(g_models=None): + """Build and train a CycleGAN that can do + grayscale <--> color cifar10 images + """ +``` + +```py + model_name = 'cyclegan_cifar10' + batch_size = 32 + train_steps = 100000 + patchgan = True + kernel_size = 3 + postfix = ('%dp' % kernel_size) \ + if patchgan else ('%d' % kernel_size) +``` + +```py + data, shapes = cifar10_utils.load_data() + source_data, _, test_source_data, test_target_data = data + titles = ('CIFAR10 predicted source images.', + 'CIFAR10 predicted target images.', + 'CIFAR10 reconstructed source images.', + 'CIFAR10 reconstructed target images.') + dirs = ('cifar10_source-%s' % postfix, \ + 'cifar10_target-%s' % postfix) +``` + +```py + # generate predicted target(color) and source(gray) images + if g_models is not None: + g_source, g_target = g_models + other_utils.test_generator((g_source, g_target), + (test_source_data, \ + test_target_data), + step=0, + titles=titles, + dirs=dirs, + show=True) + return +``` + +```py + # build the cyclegan for cifar10 colorization + models = build_cyclegan(shapes, + "gray-%s" % postfix, + "color-%s" % postfix, + kernel_size=kernel_size, + patchgan=patchgan) + # patch size is divided by 2^n since we downscaled the input + # in the discriminator by 2^n (ie. we use strides=2 n times) + patch = int(source_data.shape[1] / 2**4) if patchgan else 1 + params = (batch_size, train_steps, patch, model_name) + test_params = (titles, dirs) + # train the cyclegan + train_cyclegan(models, + data, + params, + test_params, + other_utils.test_generator) +``` + +在的下一部分中,我们将检查 CycleGAN 的生成器输出以进行着色。 + +## CycleGAN 的生成器输出 + +“图 7.1.13”显示 CycleGAN 的着色结果。 源图像来自测试数据集: + +![](img/B14853_07_13.png) + +图 7.1.13:使用不同技术进行着色。 显示的是基本事实,使用自编码器的着色(第 3 章,自编码器),使用带有原始 GAN 判别器的 CycleGAN 进行着色,以及使用带有 PatchGAN 判别器的 CycleGAN 进行着色。 彩色效果最佳。 原始彩色照片可以在该书的 [GitHub 存储库](https://github.com/PacktPublishing/Advanced-Deep-Learning-with-Keras/blob/master/chapter7-cross-domain-gan/README.md)中找到。 + +为了进行比较,我们使用第 3 章,“自编码器”中描述的普通自编码器显示了地面真实情况和着色结果。 通常,所有彩色图像在感觉上都是可接受的。 总体而言,似乎每种着色技术都有自己的优点和缺点。 所有着色方法与天空和车辆的正确颜色不一致。 + +例如,平面背景(第三行,第二列)中的天空为白色。 自编码器没错,但是 CycleGAN 认为它是浅棕色或蓝色。 + +对于第六行第六列,暗海上的船天空阴沉,但自编码器将其涂成蓝色和蓝色,而 CycleGAN 将其涂成蓝色和白色,而没有 PatchGAN。 两种预测在现实世界中都是有意义的。 同时,使用 PatchGAN 对 CycleGAN 的预测与基本事实相似。 在倒数第二行和第二列上,没有方法能够预测汽车的红色。 在动物身上,CycleGAN 的两种口味都具有接近真实情况的颜色。 + +由于 CycleGAN 是对称的,因此它还能在给定彩色图像的情况下预测灰度图像。“图 7.1.14”显示了两个 CycleGAN 变体执行的颜色到灰度转换。 目标图像来自测试数据集。 除了某些图像的灰度阴影存在细微差异外,这些预测通常是准确的。 + +![](img/B14853_07_14.png) + +图 7.1.14:颜色(来自图 7.1.9)到 CycleGAN 的灰度转换 + +要训​​练 CycleGAN 进行着色,命令是: + +```py +python3 cyclegan-7.1.1.py -c +``` + +读者可以使用带有 PatchGAN 的 CycleGAN 预训练模型来运行图像转换: + +```py +python3 cyclegan-7.1.1.py --cifar10_g_source=cyclegan_cifar10-g_source.h5 +--cifar10_g_target=cyclegan_cifar10-g_target.h5 +``` + +在本节中,我们演示了 CycleGAN 在着色上的一种实际应用。 在下一部分中,我们将在更具挑战性的数据集上训练 CycleGAN。 源域 MNIST 与目标域 SVHN 数据集有很大的不同[1]。 + +## MNIST 和 SVHN 数据集上的 CycleGAN + +我们现在要解决一个更具挑战性的问题。 假设我们使用 MNIST 灰度数字作为源数据,并且我们想从 *SVHN* [1]中借鉴样式,这是我们的目标数据。 每个域中的样本数据显示在“图 7.1.15”中: + +![](img/B14853_07_15.png) + +图 7.1.15:两个未对齐数据的不同域。 原始彩色照片可以在该书的 [GitHub 存储库](https://github.com/PacktPublishing/Advanced-Deep-Learning-with-Keras/blob/master/chapter7-cross-domain-gan/README.md)中找到。 + +我们可以重用上一节中讨论的 CycleGAN 的所有构建和训练函数,以执行样式迁移。 唯一的区别是,我们必须添加用于加载 MNIST 和 SVHN 数据的例程。 SVHN 数据集可在[这个页面](http://ufldl.stanford.edu/housenumbers/)中找到。 + +我们介绍`mnist_svhn_utils.py`模块来帮助我们完成此任务。“列表 7.1.7”显示了针对跨域迁移的 CycleGAN 的初始化和训练。 + +CycleGAN 结构与上一部分相同,不同之处在于我们使用的核大小为 5,因为两个域完全不同。 + +“列表 7.1.7”:`cyclegan-7.1.1.py` + +CycleGAN 用于 MNIST 和 SVHN 之间的跨域样式迁移: + +```py +def mnist_cross_svhn(g_models=None): + """Build and train a CycleGAN that can do mnist <--> svhn + """ +``` + +```py + model_name = 'cyclegan_mnist_svhn' + batch_size = 32 + train_steps = 100000 + patchgan = True + kernel_size = 5 + postfix = ('%dp' % kernel_size) \ + if patchgan else ('%d' % kernel_size) +``` + +```py + data, shapes = mnist_svhn_utils.load_data() + source_data, _, test_source_data, test_target_data = data + titles = ('MNIST predicted source images.', + 'SVHN predicted target images.', + 'MNIST reconstructed source images.', + 'SVHN reconstructed target images.') + dirs = ('mnist_source-%s' \ + % postfix, 'svhn_target-%s' % postfix) +``` + +```py + # generate predicted target(svhn) and source(mnist) images + if g_models is not None: + g_source, g_target = g_models + other_utils.test_generator((g_source, g_target), + (test_source_data, \ + test_target_data), + step=0, + titles=titles, + dirs=dirs, + show=True) + return +``` + +```py + # build the cyclegan for mnist cross svhn + models = build_cyclegan(shapes, + "mnist-%s" % postfix, + "svhn-%s" % postfix, + kernel_size=kernel_size, + patchgan=patchgan) + # patch size is divided by 2^n since we downscaled the input + # in the discriminator by 2^n (ie. we use strides=2 n times) + patch = int(source_data.shape[1] / 2**4) if patchgan else 1 + params = (batch_size, train_steps, patch, model_name) + test_params = (titles, dirs) + # train the cyclegan + train_cyclegan(models, + data, + params, + test_params, + other_utils.test_generator) +``` + +将 MNIST 从测试数据集迁移到 SVHN 的结果显示在“图 7.1.16”中。 生成的图像具有样式的 SVHN,但是数字未完全传送。 例如,在第四行上,数字 3、1 和 3 由 CycleGAN 进行样式化。 + +但是,在第三行中,不带有和带有 PatchGAN 的 CycleGAN 的数字 9、6 和 6 分别设置为 0、6、01、0、65 和 68: + +![](img/B14853_07_16.png) + +图 7.1.16:测试数据从 MNIST 域到 SVHN 的样式迁移。 原始彩色照片可以在该书的 [GitHub 存储库](https://github.com/PacktPublishing/Advanced-Deep-Learning-with-Keras/blob/master/chapter7-cross-domain-gan/README.md)中找到。 + +向后循环的结果为“图 7.1.17”中所示的。 在这种情况下,目标图像来自 SVHN 测试数据集。 生成的图像具有 MNIST 的样式,但是数字没有正确翻译。 例如,在第一行中,对于不带和带有 PatchGAN 的 CycleGAN,数字 5、2 和 210 分别被样式化为 7、7、8、3、3 和 1: + +![](img/B14853_07_17.png) + +图 7.1.17:测试数据从 SVHN 域到 MNIST 的样式迁移。 原始彩色照片可以在该书的 [GitHub 存储库](https://github.com/PacktPublishing/Advanced-Deep-Learning-with-Keras/blob/master/chapter7-cross-domain-gan/README.md)中找到。 + +在 PatchGAN 的情况下,假设预测的 MNIST 数字被限制为一位,则输出 1 是可以理解的。 有以某种方式正确的预测,例如在第二行中,不使用 PatchGAN 的 CycleGAN 将 SVHN 数字的最后三列 6、3 和 4 转换为 6、3 和 6。 但是,CycleGAN 两种版本的输出始终是个位数且可识别。 + +从 MNIST 到 SVHN 的转换中出现的问题称为“标签翻转”[8],其中源域中的数字转换为目标域中的另一个数字。 尽管 CycleGAN 的预测是周期一致的,但它们不一定是语义一致的。 在翻译过程中数字的含义会丢失。 + +为了解决这个问题, *Hoffman* [8]引入了一种改进的 CycleGAN,称为**循环一致性对抗域自适应**(**CyCADA**)。 不同之处在于,附加的语义损失项可确保预测不仅周期一致,而且语义一致。 + +“图 7.1.18”显示 CycleGAN 在正向循环中重建 MNIST 数字。 重建的 MNIST 数字几乎与源 MNIST 数字相同: + +![](img/B14853_07_18.png) + +图 7.1.18:带有 MNIST 上的 PatchGAN 的 CycleGAN(源)到 SVHN(目标)的前向周期。 重建的源类似于原始源。 原始彩色照片可以在该书的 [GitHub 存储库](https://github.com/PacktPublishing/Advanced-Deep-Learning-with-Keras/blob/master/chapter7-cross-domain-gan/README.md)中找到。 + +“图 7.1.19”显示了 CycleGAN 在向后周期中重构 SVHN 数字的过程: + +![](img/B14853_07_19.png) + +图 7.1.19:带有 MNIST 上的 PatchGAN 的 CycleGAN 与 SVHN(目标)的反向循环。 重建的目标与原始目标并不完全相似。 原始彩色照片可以在该书的 [GitHub 存储库](https://github.com/PacktPublishing/Advanced-Deep-Learning-with-Keras/blob/master/chapter7-cross-domain-gan/README.md)中找到。 + +在“图 7.1.3”中,CycleGAN 被描述为具有周期一致性。 换句话说,给定源`x`,CycleGAN 将正向循环中的源重构为`x'`。 另外,在给定目标`y`的情况下,CycleGAN 在反向循环中将目标重构为`y'`。 + +重建了许多目标图像。 有些数字显然是相同的,例如最后两列(3 和 4)中的第二行,而有些数字却是相同的但是模糊的,例如前两列列中的第一行(5 和 2)。 尽管样式仍像第二行一样,但在前两列(从 33 和 6 到 1 以及无法识别的数字)中,有些数字会转换为另一数字。 + +要将 MNIST 的 CycleGAN 训练为 SVHN,命令为: + +```py +python3 cyclegan-7.1.1.py -m +``` + +鼓励读者使用带有 PatchGAN 的 CycleGAN 预训练模型来运行图像翻译: + +```py +python3 cyclegan-7.1.1.py --mnist_svhn_g_source=cyclegan_mnist_svhn-g_ source.h5 --mnist_svhn_g_target=cyclegan_mnist_svhn-g_target.h5 +``` + +到目前为止,我们只看到了 CycleGAN 的两个实际应用。 两者都在小型数据集上进行了演示,以强调可重复性的概念。 如本章前面所述,CycleGAN 还有许多其他实际应用。 我们在这里介绍的 CycleGAN 可以作为分辨率更高的图像转换的基础。 + +# 2\. 总结 + +在本章中,我们讨论了 CycleGAN 作为可用于图像翻译的算法。 在 CycleGAN 中,源数据和目标数据不一定要对齐。 我们展示了两个示例,*灰度 ↔ 颜色*和 *MNIST ↔ SVHN* ,尽管 CycleGAN 可以执行许多其他可能的图像转换 。 + +在下一章中,我们将着手另一种生成模型,即**变分自编码器**(**VAE**)。 VAE 具有类似的学习目标–如何生成新图像(数据)。 他们专注于学习建模为高斯分布的潜在向量。 我们将以有条件的 VAE 和解开 VAE 中的潜在表示形式来证明 GAN 解决的问题中的其他相似之处。 + +# 3\. 参考 + +1. `Yuval Netzer et al.: Reading Digits in Natural Images with Unsupervised Feature Learning. NIPS workshop on deep learning and unsupervised feature learning. Vol. 2011. No. 2. 2011 (https://www-cs.stanford.edu/~twangcat/papers/nips2011_housenumbers.pdf).` +1. `Zhu-Jun-Yan et al.: Unpaired Image-to-Image Translation using Cycle-Consistent Adversarial Networks. 2017 IEEE International Conference on Computer Vision (ICCV). IEEE, 2017 (http://openaccess.thecvf.com/content_ICCV_2017/papers/Zhu_Unpaired_Image-To-Image_Translation_ICCV_2017_paper.pdf).` +1. `Phillip Isola et al.: Image-to-Image Translation with Conditional Adversarial Networks. 2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR). IEEE, 2017 (http://openaccess.thecvf.com/content_cvpr_2017/papers/Isola_Image-To-Image_Translation_With_CVPR_2017_paper.pdf).` +1. `Mehdi Mirza and Simon Osindero. Conditional Generative Adversarial Nets. arXiv preprint arXiv:1411.1784, 2014 (https://arxiv.org/pdf/1411.1784.pdf).` +1. `Xudong Mao et al.: Least Squares Generative Adversarial Networks. 2017 IEEE International Conference on Computer Vision (ICCV). IEEE, 2017 (http://openaccess.thecvf.com/content_ICCV_2017/papers/Mao_Least_Squares_Generative_ICCV_2017_paper.pdf).` +1. `Chuan Li and Michael Wand. Precomputed Real-Time Texture Synthesis with Markovian Generative Adversarial Networks. European Conference on Computer Vision. Springer, Cham, 2016 (https://arxiv.org/pdf/1604.04382.pdf).` +1. `Olaf Ronneberger, Philipp Fischer, and Thomas Brox. U-Net: Convolutional Networks for Biomedical Image Segmentation. International Conference on Medical image computing and computer-assisted intervention. Springer, Cham, 2015 (https://arxiv.org/pdf/1505.04597.pdf).` +1. `Judy Hoffman et al.: CyCADA: Cycle-Consistent Adversarial Domain Adaptation. arXiv preprint arXiv:1711.03213, 2017 (https://arxiv.org/pdf/1711.03213.pdf).` \ No newline at end of file diff --git a/机器学习/ApacheCN/apachecn-dl-zh/adv-dl-tf2-keras/08.md b/机器学习/ApacheCN/apachecn-dl-zh/adv-dl-tf2-keras/08.md new file mode 100644 index 00000000..c45a273c --- /dev/null +++ b/机器学习/ApacheCN/apachecn-dl-zh/adv-dl-tf2-keras/08.md @@ -0,0 +1,699 @@ +# 八、变分自编码器(VAE) + +与我们在之前的章节中讨论过的**生成对抗网络**(**GAN**)类似,**变分自编码器**(**VAE**)[1] 属于生成模型家族。 VAE 的生成器能够在导航其连续潜在空间的同时产生有意义的输出。 通过潜向量探索解码器输出的可能属性。 + +在 GAN 中,重点在于如何得出近似输入分布的模型。 VAE 尝试对可解码的连续潜在空间中的输入分布进行建模。 这是 GAN 与 VAE 相比能够生成更真实信号的可能的潜在原因之一。 例如,在图像生成中,GAN 可以生成看起来更逼真的图像,而相比之下,VAE 生成的图像清晰度较差。 + +在 VAE 中,重点在于潜在代码的变分推理。 因此,VAE 为潜在变量的学习和有效贝叶斯推理提供了合适的框架。 例如,带有解缠结表示的 VAE 可以将潜在代码重用于迁移学习。 + +在结构上,VAE 与自编码器相似。 它也由编码器(也称为识别或推理模型)和解码器(也称为生成模型)组成。 VAE 和自编码器都试图在学习潜向量的同时重建输入数据。 + +但是,与自编码器不同,VAE 的潜在空间是连续的,并且解码器本身被用作生成模型。 + +在前面各章中讨论的 GAN 讨论中,也可以对 VAE 的解码器进行调整。 例如,在 MNIST 数据集中,我们能够指定一个给定的单热向量产生的数字。 这种有条件的 VAE 类别称为 CVAE [2]。 也可以通过在损失函数中包含正则化超参数来解开 VAE 潜向量。 这称为 β-VAE [5]。 例如,在 MNIST 中,我们能够隔离确定每个数字的粗细或倾斜角度的潜向量。 本章的目的是介绍: + +* VAE 的原理 +* 了解重新参数化技巧,有助于在 VAE 优化中使用随机梯度下降 +* 有条件的 VAE(CVAE)和 β-VAE 的原理 +* 了解如何使用`tf.keras`实现 VAE + +我们将从谈论 VAE 的基本原理开始。 + +# 1\. VAE 原理 + +在生成模型中,我们经常对使用神经网络来逼近输入的真实分布感兴趣: + +![](img/B14853_08_003.png) (Equation 8.1.1) + +在前面的等式中,`θ`表示训练期间确定的参数。 例如,在名人面孔数据集的上下文中,这等效于找到可以绘制面孔的分布。 同样,在 MNIST 数据集中,此分布可以生成可识别的手写数字。 + +在机器学习中,为了执行特定级别的推理,我们有兴趣寻找`P[θ](x, z)`,这是输入`x`和潜在变量`z`之间的联合分布。 潜在变量不是数据集的一部分,而是对可从输入中观察到的某些属性进行编码。 在名人面孔的背景下,这些可能是面部表情,发型,头发颜色,性别等。 在 MNIST 数据集中,潜在变量可以表示数字和书写样式。 + +`P[θ](x, z)`实际上是输入数据点及其属性的分布。 `P[θ](x)`可以从边际分布计算得出: + +![](img/B14853_08_010.png) (Equation 8.1.2) + +换句话说,考虑所有可能的属性,我们最终得到描述输入的分布。 在名人面孔中,如果考虑所有面部表情,发型,头发颜色和性别,将恢复描述名人面孔的分布。 在 MNIST 数据集中,如果考虑所有可能的数字,书写风格等,我们以手写数字的分布来结束。 + +问题在于“公式 8.1.2”很难处理。 该方程式没有解析形式或有效的估计量。 它的参数无法微分。 因此,通过神经网络进行优化是不可行的。 + +使用贝叶斯定理,我们可以找到“公式 8.1.2”的替代表达式: + +![](img/B14853_08_011.png) (Equation 8.1.3) + +`P(z)`是`z`的先验分布。 它不以任何观察为条件。 如果`z`是离散的,而`P[θ](x | z)`是高斯分布,则`P[θ](x)`是高斯的混合。 如果`z`是连续的,则`P[θ](x)`是高斯的无限混合。 + +实际上,如果我们尝试在没有合适的损失函数的情况下建立一个近似`P[θ](x | z)`的神经网络,它将忽略`z`得出一个简单的解`P[θ](x | z) = P[θ](x)`。 因此,“公式 8.1.3”无法为我们提供`P[θ](x)`的良好估计。 或者,“公式 8.1.2”也可以表示为: + +![](img/B14853_08_024.png) (Equation 8.1.4) + +但是,`P[θ](z | x)`也很棘手。 VAE 的目标是在给定输入的情况下,找到一种可预测的分布,该分布易于估计`P[θ](z | x)`,即潜在属性`z`的条件分布的估计。 + +## 变分推理 + +为了使易于处理,VAE 引入了变化推理模型(编码器): + +![](img/B14853_08_030.png) (Equation 8.1.5) + +`Q[φ](z | x)`提供了`P[θ](z | x)`的良好估计。 它既参数化又易于处理。 `Q[φ](z | x)`可以通过优化参数`φ`由深度神经网络近似。 通常,`Q[φ](z | x)`被选择为多元高斯: + +![](img/B14853_08_036.png) (Equation 8.1.6) + +均值`μ(x)`和标准差`σ(x)`均由编码器神经网络使用输入数据点计算得出。 对角线矩阵表示`z`的元素是独立的。 + +在下一节中,我们将求解 VAE 的核心方程。 核心方程式将引导我们找到一种优化算法,该算法将帮助我们确定推理模型的参数。 + +## 核心方程 + +推理模型`Q[φ](z | x)`从输入`x`生成潜向量`z`。 `Q[φ](z | x)`似于自编码器模型中的编码器。 另一方面,从潜在代码`z`重构输入。 `P[θ](x | z)`的作用类似于自编码器模型中的解码器。 要估计`P[θ](x)`,我们必须确定其与`Q[φ](z | x)`和`P[θ](x | z)`的关系。 + +如果`Q[φ](z | x)`是`P[θ](z | x)`的估计值,则 **Kullback-Leibler**(**KL**)的差异决定了这两个条件密度之间的距离: + +![](img/B14853_08_052.png) (Equation 8.1.7) + +使用贝叶斯定理: + +![](img/B14853_08_053.png) (Equation 8.1.8) + +在“公式 8.1.7”中: + +![](img/B14853_08_054.png) (Equation 8.1.9) + +由于`log P[θ](x)`不依赖于`z ~ Q`,因此可能会超出预期。 重新排列“公式 8.1.9”并认识到: + +![](img/B14853_08_057.png),其结果是: + +![](img/B14853_08_058.png) (Equation 8.1.10) + +“公式 8.1.10”是 VAE 的核心。 左侧是项`P[θ](x)`,由于`Q[φ](z | x)`与真实`P[θ](z | x)`的距离,我们使误差最小化。 我们可以记得,的对数不会更改最大值(或最小值)的位置。 给定提供`P[θ](z | x)`良好估计的推断模型,`D[KL](Q[φ](z | x) || P[θ](z | x)`大约为零。 + +右边的第一项`P[θ](x | z)`类似于解码器,该解码器从推理模型中抽取样本以重建输入。 + +第二个项是另一个距离。 这次是在`Q[φ](z | x)`和先前的`P[θ](z)`之间。 “公式 8.1.10”的左侧也称为**变异下界**或**证据下界**(**ELBO**)。 由于 KL 始终为正,因此 ELBO 是`log P[θ](x)`的下限。 通过优化神经网络的参数`φ`和`θ`来最大化 ELBO 意味着: + +* 在将`z`中的`x`属性编码时,`D[KL](Q[φ](z | x) || P[θ](z | x) -> 0`或推理模型变得更好。 +* 右边的`log P[θ](x | z)`最大化了“公式 8.1.10”或解码器模型在从潜在向量`z`重构`x`方面变得更好。 +* 在下一节中,我们将利用“公式 8.1.10”的结构来确定推理模型(编码器)和解码器的损失函数。 + +## 优化 + +“公式 8.1.10”的右侧具有有关 VAE 的`loss`函数的两个重要信息。 解码器项`E[z~Q] [log P[θ](x | z)]`表示生成器从推理模型的输出中提取`z`个样本,以重建输入。 使最大化是指我们将**重构损失**和`L_R`降到最低。 如果假设图像(数据)分布为高斯分布,则可以使用 MSE。 + +如果每个像素(数据)都被认为是伯努利分布,那么损失函数就是二进制互熵。 + +第二项`-D[KL](Q[φ](z | x) || P[θ](z))`易于评估。 根据“公式 8.1.6”,`Q[φ]`是高斯分布。 通常,`P[θ](z) = P(z) = N(0, 1)`也是平均值为零且标准差等于 1.0 的高斯。 在“公式 8.1.11”中,我们看到 KL 项简化为: + +![](img/B14853_08_082.png) (Equation 8.1.11) + +其中`J`是`z`的维。 `μ[j]`和`σ[j]`都是通过推理模型计算的`x`的函数。 要最大化:`-D[KL]`,`σ[j] -> 1`和`μ[j] -> 9`。 `P(z) = N(0, 1)`的选择源于各向同性单位高斯的性质,在具有适当函数的情况下,它可以变形为任意分布[6]。 + +根据“公式 8.1.11”,KL 损失`L_KL`简称为`D[KL]`。 + +总之,在“公式 8.1.12”中将 VAE `loss`函数定义为: + +![](img/B14853_08_094.png) (Equation 8.1.12) + +在给定编码器和解码器模型的情况下,在我们可以构建和训练 VAE(随机采样块,生成潜在属性)之前,还需要解决一个问题。 在下一节中,我们将讨论此问题以及如何使用重新参数化技巧解决它。 + +## 重新参数化技巧 + +“图 8.1.1”的左侧显示了 VAE 网络。 编码器获取输入`x`,并估计潜向量`z`的多元高斯分布的平均值`μ`和标准差`σ`。 解码器从潜向量`z`中提取样本,以将输入重构为`x_tilde`。 这似乎很简单,直到在反向传播期间发生梯度更新为止: + +![](img/B14853_08_01.png) + +图 8.1.1:带有和不带有重新参数化技巧的 VAE 网络 + +反向传播梯度将不会通过随机**采样**块。 尽管具有用于神经网络的随机输入是可以的,但梯度不可能穿过随机层。 + +解决此问题的方法是将**采样**处理作为输入,如“图 8.1.1”右侧所示。 然后,将样本计算为: + +![](img/B14853_08_101.png) (Equation 8.1.13) + +如果`ε`和`σ`以向量格式表示,则`εσ`是逐元素乘法。 使用“公式 8.1.13”,看起来好像采样直接来自潜在空间一样。 这项技术被称为*重新参数化技巧*。 + +现在,在输入端发生*采样*时,可以使用熟悉的优化算法(例如 SGD,Adam 或 RMSProp)来训练 VAE 网络。 + +在讨论如何在`tf.keras`中实现 VAE 之前,让我们首先展示如何测试经过训练的解码器。 + +## 解码器测试 + +在训练了 VAE 网络之后,可以丢弃推理模型,包括加法和乘法运算符。 为了生成新的有意义的输出,请从用于生成`ε`的高斯分布中抽取样本。“图 8.1.2”向我们展示了解码器的测试设置: + +![](img/B14853_08_02.png) + +图 8.1.2:解码器测试设置 + +通过重新参数化技巧解决了 VAE 上的最后一个问题,我们现在可以在`tf.keras`中实现和训练变分自编码器。 + +## ALAS 与 Keras + +VAE 的结构类似于典型的自编码器。 区别主要在于重新参数化技巧中的高斯随机变量的采样。“列表 8.1.1”显示了使用 MLP 实现的编码器,解码器和 VAE。 + +[此代码也已添加到官方 Keras GitHub 存储库中](https://github.com/keras-team/keras/blob/master/examples/variational_autoencoder.py)。 + +为便于显示潜在代码,将`z`的维设置为 2。编码器仅是两层 MLP,第二层生成均值和对数方差。 对数方差的使用是为了简化 KL 损失和重新参数化技巧的计算。 编码器的第三个输出是使用重新参数化技巧进行的`z`采样。 我们应该注意,在采样函数`exp(0.5 log σ²) = sqrt(σ²) = σ`中,因为`σ > 0`假定它是高斯分布的标准差。 + +解码器也是两层 MLP,它采用`z`的样本来近似输入。 编码器和解码器均使用大小为 512 的中间尺寸。 + +VAE 网络只是将编码器和解码器连接在一起。 `loss`函数是*重建损失*和 *KL 损失*的总和。 在默认的 Adam 优化器上,VAE 网络具有良好的效果。 VAE 网络中的参数总数为 807,700。 + +VAE MLP 的 Keras 代码具有预训练的权重。 要测试,我们需要运行: + +```py +python3 vae-mlp-mnist-8.1.1.py --weights=vae_mlp_mnist.tf +``` + +[完整的代码可以在以下链接中找到](https://github.com/PacktPublishing/Advanced-Deep-Learning-with-Keras)。 + +“列表 8.1.1”:`vae-mlp-mnist-8.1.1.py` + +```py +# reparameterization trick +# instead of sampling from Q(z|X), sample eps = N(0,I) +# z = z_mean + sqrt(var)*eps +def sampling(args): + """Reparameterization trick by sampling + fr an isotropic unit Gaussian. +``` + +```py + # Arguments: + args (tensor): mean and log of variance of Q(z|X) +``` + +```py + # Returns: + z (tensor): sampled latent vector + """ +``` + +```py + z_mean, z_log_var = args + # K is the keras backend + batch = K.shape(z_mean)[0] + dim = K.int_shape(z_mean)[1] + # by default, random_normal has mean=0 and std=1.0 + epsilon = K.random_normal(shape=(batch, dim)) + return z_mean + K.exp(0.5 * z_log_var) * epsilon +``` + +```py +# MNIST dataset +(x_train, y_train), (x_test, y_test) = mnist.load_data() +``` + +```py +image_size = x_train.shape[1] +original_dim = image_size * image_size +x_train = np.reshape(x_train, [-1, original_dim]) +x_test = np.reshape(x_test, [-1, original_dim]) +x_train = x_train.astype('float32') / 255 +x_test = x_test.astype('float32') / 255 +``` + +```py +# network parameters +input_shape = (original_dim, ) +intermediate_dim = 512 +batch_size = 128 +latent_dim = 2 +epochs = 50 +``` + +```py +# VAE model = encoder + decoder +# build encoder model +inputs = Input(shape=input_shape, name='encoder_input') +x = Dense(intermediate_dim, activation='relu')(inputs) +z_mean = Dense(latent_dim, name='z_mean')(x) +z_log_var = Dense(latent_dim, name='z_log_var')(x) +``` + +```py +# use reparameterization trick to push the sampling out as input +# note that "output_shape" isn't necessary +# with the TensorFlow backend +z = Lambda(sampling, + output_shape=(latent_dim,), + name='z')([z_mean, z_log_var]) +``` + +```py +# instantiate encoder model +encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder') +``` + +```py +# build decoder model +latent_inputs = Input(shape=(latent_dim,), name='z_sampling') +x = Dense(intermediate_dim, activation='relu')(latent_inputs) +outputs = Dense(original_dim, activation='sigmoid')(x) +``` + +```py +# instantiate decoder model +decoder = Model(latent_inputs, outputs, name='decoder') +# instantiate VAE model +outputs = decoder(encoder(inputs)[2]) +vae = Model(inputs, outputs, name='vae_mlp') +``` + +```py +if __name__ == '__main__': + parser = argparse.ArgumentParser() + help_ = "Load tf model trained weights" + parser.add_argument("-w", "--weights", help=help_) + help_ = "Use binary cross entropy instead of mse (default)" + parser.add_argument("--bce", help=help_, action='store_true') + args = parser.parse_args() + models = (encoder, decoder) + data = (x_test, y_test) +``` + +```py + # VAE loss = mse_loss or xent_loss + kl_loss + if args.bce: + reconstruction_loss = binary_crossentropy(inputs, + outputs) + else: + reconstruction_loss = mse(inputs, outputs) + + reconstruction_loss *= original_dim + kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var) + kl_loss = K.sum(kl_loss, axis=-1) + kl_loss *= -0.5 + vae_loss = K.mean(reconstruction_loss + kl_loss) + vae.add_loss(vae_loss) + vae.compile(optimizer='adam') +``` + +“图 8.1.3”显示了编码器模型,它是一个 MLP,具有两个输出,即潜向量的均值和方差。 lambda 函数实现了重新参数化技巧,将随机潜在代码的采样推送到 VAE 网络之外: + +![A screenshot of a cell phone Description automatically generated](img/B14853_08_03.png) + +图 8.1.3:VAE MLP 的编码器模型 + +“图 8.1.4”显示了解码器模型。 2 维输入来自 lambda 函数。 输出是重构的 MNIST 数字: + +![A screenshot of a cell phone Description automatically generated](img/B14853_08_04.png) + +图 8.1.4:VAE MLP 的解码器模型 + +“图 8.1.5”显示了完整的 VAE 模型。 通过将编码器和解码器模型结合在一起制成: + +![](img/B14853_08_05.png) + +图 8.1.5:使用 MLP 的 VAE 模型 + +“图 8.1.6”显示了使用`plot_results()`在 50 个周期后潜向量的连续空间。 为简单起见,此函数未在此处显示,但可以在`vae-mlp-mnist-8.1.1.py`的其余代码中找到。 该函数绘制两个图像,即测试数据集标签(“图 8.1.6”)和样本生成的数字(“图 8.1.7”),这两个图像都是`z`的函数。 这两个图都说明了潜在向量如何确定所生成数字的属性: + +![A close up of a plant Description automatically generated](img/B14853_08_06.png) + +图 8.1.6:MNIST 数字标签作为测试数据集(VAE MLP)的潜在向量平均值的函数。 原始图像可以在该书的 [GitHub 存储库](https://github.com/PacktPublishing/Advanced-Deep-Learning-with-Keras/tree/master/chapter8-vae)中找到。 + +浏览时,连续空格始终会产生与 MNIST 数字相似的输出。 例如,数字 9 的区域接近数字 7 的区域。从中心附近的 9 移动到左下角会将数字变形为 7。从中心向上移动会将生成的数字从 3 更改为 5,最后变为 0.数字的变形在“图 8.1.7”中更明显,这是解释“图 8.1.6”的另一种方式。 + +在“图 8.1.7”中,显示生成器输出。 显示了潜在空间中数字的分布。 可以观察到所有数字都被表示。 由于中心附近分布密集,因此变化在中间迅速,在平均值较高的区域则缓慢。 我们需要记住,“图 8.1.7”是“图 8.1.6”的反映。 例如,数字 0 在两个图的左上象限中,而数字 1 在右下象限中。 + +“图 8.1.7”中存在一些无法识别的数字,尤其是在右上象限中。 从“图 8.1.6”可以看出,该区域大部分是空的,并且远离中心: + +![](img/B14853_08_07.png) + +图 8.1.7:根据潜在向量平均值(VAE MLP)生成的数字。 为了便于解释,均值的范围类似于图 8.1.6 + +在本节中,我们演示了如何在 MLP 中实现 VAE。 我们还解释了导航潜在空间的结果。 在的下一部分中,我们将使用 CNN 实现相同的 VAE。 + +## 带有 CNN 的 AE + +在原始论文《自编码变分贝叶斯》[1]中,使用 MLP 来实现 VAE 网络,这与我们在上一节中介绍的类似。 在本节中,我们将证明使用 CNN 将显着提高所产生数字的质量,并将参数数量显着减少至 134,165。 + +“列表 8.1.3”显示了编码器,解码器和 VAE 网络。 [该代码也被添加到了官方的 Keras GitHub 存储库中](https://github.com/keras-team/keras/blob/master/examples/variational_autoencoder_deconv.py)。 + +为简洁起见,不再显示与 MLP VAE 类似的某些代码行。 编码器由两层 CNN 和两层 MLP 组成,以生成潜在代码。 编码器的输出结构与上一节中看到的 MLP 实现类似。 解码器由一层`Dense`和三层转置的 CNN 组成。 + +VAE CNN 的 Keras 代码具有预训练的权重。 要测试,我们需要运行: + +```py +python3 vae-cnn-mnist-8.1.2.py --weights=vae_cnn_mnist.tf +``` + +“列表 8.1.3”:`vae-cnn-mnist-8.1.2.py` + +使用 CNN 层的`tf.keras`中的 VAE: + +```py +# network parameters +input_shape = (image_size, image_size, 1) +batch_size = 128 +kernel_size = 3 +filters = 16 +latent_dim = 2 +epochs = 30 +``` + +```py +# VAE model = encoder + decoder +# build encoder model +inputs = Input(shape=input_shape, name='encoder_input') +x = inputs +for i in range(2): + filters *= 2 + x = Conv2D(filters=filters, + kernel_size=kernel_size, + activation='relu', + strides=2, + padding='same')(x) +``` + +```py +# shape info needed to build decoder model +shape = K.int_shape(x) +``` + +```py +# generate latent vector Q(z|X) +x = Flatten()(x) +x = Dense(16, activation='relu')(x) +z_mean = Dense(latent_dim, name='z_mean')(x) +z_log_var = Dense(latent_dim, name='z_log_var')(x) +``` + +```py +# use reparameterization trick to push the sampling out as input +# note that "output_shape" isn't necessary +# with the TensorFlow backend +z = Lambda(sampling, + output_shape=(latent_dim,), + name='z')([z_mean, z_log_var]) +``` + +```py +# instantiate encoder model +encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder') +``` + +```py +# build decoder model +latent_inputs = Input(shape=(latent_dim,), name='z_sampling') +x = Dense(shape[1] * shape[2] * shape[3], + activation='relu')(latent_inputs) +x = Reshape((shape[1], shape[2], shape[3]))(x) +``` + +```py +for i in range(2): + x = Conv2DTranspose(filters=filters, + kernel_size=kernel_size, + activation='relu', + strides=2, + padding='same')(x) + filters //= 2 +``` + +```py +outputs = Conv2DTranspose(filters=1, + kernel_size=kernel_size, + activation='sigmoid', + padding='same', + name='decoder_output')(x) +``` + +```py +# instantiate decoder model +decoder = Model(latent_inputs, outputs, name='decoder') +``` + +```py +# instantiate VAE model +outputs = decoder(encoder(inputs)[2]) +vae = Model(inputs, outputs, name='vae') +``` + +“图 8.1.8”显示了 CNN 编码器模型的两个输出,即潜向量的均值和方差。 lambda 函数实现了重新参数化技巧,将随机潜码的采样推送到 VAE 网络之外: + +![A screenshot of a cell phone Description automatically generated](img/B14853_08_08.png) + +图 8.1.8:VAE CNN 的编码器 + +“图 8.1.9”显示了 CNN 解码器模型。 2 维输入来自 lambda 函数。 输出是重构的 MNIST 数字: + +![A screenshot of a cell phone Description automatically generated](img/B14853_08_09.png) + +图 8.1.9:VAE CNN 的解码器 + +“图 8.1.10”显示完整的 CNN VAE 模型。 通过将编码器和解码器模型结合在一起制成: + +![](img/B14853_08_10.png) + +图 8.1.10:使用 CNN 的 VAE 模型 + +对 VAE 进行了 30 个周期的训练。“图 8.1.11”显示了在导航 VAE 的连续潜在空间时数字的分布。 例如,从中间到右边从 2 变为 0: + +![A picture containing tree Description automatically generated](img/B14853_08_11.png) + +图 8.1.11:MNIST 数字标签作为测试数据集(VAE CNN)的潜在向量平均值的函数。 原始图像可以在该书的 [GitHub 存储库](https://github.com/PacktPublishing/Advanced-Deep-Learning-with-Keras/tree/master/chapter8-vae)中找到。 + +“图 8.1.12”向我们展示了生成模型的输出。 从质量上讲,与“图 8.1.7”(具有 MLP 实现)相比,模棱两可的位数更少: + +![](img/B14853_08_12.png) + +图 8.1.12:根据潜在向量平均值(VAE CNN)生成的数字。 为了便于解释,均值的范围类似于图 8.1.11 + +前的两节讨论了使用 MLP 或 CNN 的 VAE 的实现。 我们分析了两种实现方式的结果,结果表明 CNN 可以减少参数数量并提高感知质量。 在下一节中,我们将演示如何在 VAE 中实现条件,以便我们可以控制要生成的数字。 + +# 2\. 条件 VAE(CVAE) + +有条件的 VAE [2]与 CGAN 相似。 在 MNIST 数据集的上下文中,如果随机采样潜在空间,则 VAE 无法控制将生成哪个数字。 CVAE 可以通过包含要产生的数字的条件(单标签)来解决此问题。 该条件同时施加在编码器和解码器输入上。 + +正式地,将“公式 8.1.10”中 VAE 的核心公式修改为包括条件`c`: + +![](img/B14853_08_113.png) (Equation 8.2.1) + +与 VAE 相似,“公式 8.2.1”表示如果要最大化输出条件`c`和`P[θ](x | c)`,则必须最小化两个损失项: + +* 给定潜在向量和条件,解码器的重建损失。 +* 给定潜在向量和条件的编码器之间的 KL 损失以及给定条件的先验分布。 与 VAE 相似,我们通常选择`P[θ](x | c) = P(x | c) = N(0, 1)`。 + +实现 CVAE 需要对 VAE 的代码进行一些修改。 对于 CVAE,使用 VAE CNN 实现是因为它可以形成一个较小的网络,并产生感知上更好的数字。 + +“列表 8.2.1”突出显示了针对 MNIST 数字的 VAE 原始代码所做的更改。 编码器输入现在是原始输入图像及其单标签的连接。 解码器输入现在是潜在空间采样与其应生成的图像的一键热标签的组合。 参数总数为 174,437。 与 β-VAE 相关的代码将在本章下一节中讨论。 + +损失函数没有改变。 但是,在训练,测试和结果绘制过程中会提供单热标签。 + +“列表 8.2.1”:`cvae-cnn-mnist-8.2.1.py` + +`tf.keras`中使用 CNN 层的 CVAE。 重点介绍了为支持 CVAE 而进行的更改: + +```py +# compute the number of labels +num_labels = len(np.unique(y_train)) +``` + +```py +# network parameters +input_shape = (image_size, image_size, 1) +label_shape = (num_labels, ) +batch_size = 128 +kernel_size = 3 +filters = 16 +latent_dim = 2 +epochs = 30 +``` + +```py +# VAE model = encoder + decoder +# build encoder model +inputs = Input(shape=input_shape, name='encoder_input') +y_labels = Input(shape=label_shape, name='class_labels') +x = Dense(image_size * image_size)(y_labels) +x = Reshape((image_size, image_size, 1))(x) +x = keras.layers.concatenate([inputs, x]) +for i in range(2): + filters *= 2 + x = Conv2D(filters=filters, + kernel_size=kernel_size, + activation='relu', + strides=2, + padding='same')(x) +``` + +```py +# shape info needed to build decoder model +shape = K.int_shape(x) +``` + +```py +# generate latent vector Q(z|X) +x = Flatten()(x) +x = Dense(16, activation='relu')(x) +z_mean = Dense(latent_dim, name='z_mean')(x) +z_log_var = Dense(latent_dim, name='z_log_var')(x) +``` + +```py +# use reparameterization trick to push the sampling out as input +# note that "output_shape" isn't necessary +# with the TensorFlow backend +z = Lambda(sampling, + output_shape=(latent_dim,), + name='z')([z_mean, z_log_var]) +``` + +```py +# instantiate encoder model +encoder = Model([inputs, y_labels], + [z_mean, z_log_var, z], + name='encoder') +``` + +```py +# build decoder model +latent_inputs = Input(shape=(latent_dim,), name='z_sampling') +x = concatenate([latent_inputs, y_labels]) +x = Dense(shape[1]*shape[2]*shape[3], activation='relu')(x) +x = Reshape((shape[1], shape[2], shape[3]))(x) +``` + +```py +for i in range(2): + x = Conv2DTranspose(filters=filters, + kernel_size=kernel_size, + activation='relu', + strides=2, + padding='same')(x) + filters //= 2 +``` + +```py +outputs = Conv2DTranspose(filters=1, + kernel_size=kernel_size, + activation='sigmoid', + padding='same', + name='decoder_output')(x) +``` + +```py +# instantiate decoder model +decoder = Model([latent_inputs, y_labels], + outputs, + name='decoder') +# instantiate vae model +outputs = decoder([encoder([inputs, y_labels])[2], y_labels]) +cvae = Model([inputs, y_labels], outputs, name='cvae') +``` + +“图 8.2.1”显示了 CVAE 模型的编码器。 附加输入,即单热向量`class_labels`形式的条件标签表示: + +![A screenshot of a cell phone Description automatically generated](img/B14853_08_13.png) + +图 8.2.1:CVAE CNN 中的编码器。 输入现在包括 VAE 输入和条件标签的连接 + +“图 8.2.2”显示了 CVAE 模型的解码器。 附加输入,即单热向量`class_labels`形式的条件标签表示: + +![A screenshot of a cell phone Description automatically generated](img/B14853_08_14.png) + +图 8.2.2:CVAE CNN 中的解码器。 输入现在包括 z 采样和条件标签的连接 + +“图 8.2.3”显示了完整的 CVAE 模型,该模型是编码器和解码器结合在一起的。 附加输入,即单热向量`class_labels`形式的条件标签: + +![](img/B14853_08_15.png) + +图 8.2.3:使用 CNN 的 CVAE 模型。输入现在包含一个 VAE 输入和一个条件标签 + +在“图 8.2.4”中,每个标记的平均值分布在 30 个周期后显示。 与前面章节中的“图 8.1.6”和“图 8.1.11”不同,每个标签不是集中在一个区域上,而是分布在整个图上。 这是预期的,因为潜在空间中的每个采样都应生成一个特定的数字。 浏览潜在空间会更改该特定数字的属性。 例如,如果指定的数字为 0,则在潜伏空间中导航仍将产生 0,但是诸如倾斜角度,厚度和其他书写样式方面的属性将有所不同。 + +![](img/B14853_08_16.png) + +图 8.2.4:作为测试数据集(CVAE CNN)的潜在向量平均值的函数的 MNIST 数字标签。 原始图像可以在该书的 [GitHub 存储库](https://github.com/PacktPublishing/Advanced-Deep-Learning-with-Keras/tree/master/chapter8-vae)中找到。 + +“图 8.2.4”在“图 8.2.5”中更清楚地显示,数字 0 到 5。每个帧都有相同的数字,并且属性在我们浏览时顺畅地变化。 潜在代码: + +![](img/B14853_08_17.png) + +图 8.2.5:根据潜在向量平均值和单热点标签(CVAE CNN)生成的数字 0 至 5。 为了便于解释,均值的范围类似于图 8.2.4。 + +“图 8.2.6”显示“图 8.2.4”,用于数字 6 至 9: + +![](img/B14853_08_18.png) + +图 8.2.6:根据潜在向量平均值和单热点标签(CVAE CNN)生成的数字 6 至 9。 为了便于解释,均值的范围类似于图 8.2.4。 + +为了便于比较,潜向量的值范围与“图 8.2.4”中的相同。 使用预训练的权重,可以通过执行以下命令来生成数字(例如 0): + +```py +python3 cvae-cnn-mnist-8.2.1.py –bce --weights=cvae_cnn_mnist.tf --digit=0 +``` + +在“图 8.2.5”和“图 8.2.6”中,可以注意到,每个数字的宽度和圆度(如果适用)随`z[0]`的变化而变化。 从左到右追踪。 同时,当`z[1]`从上到下导航时,每个数字的倾斜角度和圆度(如果适用)也会发生变化。 随着我们离开分布中心,数字的图像开始退化。 这是可以预期的,因为潜在空间是一个圆形。 + +属性中其他明显的变化可能是数字特定的。 例如,数字 1 的水平笔划(手臂)在左上象限中可见。 数字 7 的水平笔划(纵横线)只能在右象限中看到。 + +在下一节中,我们将发现 CVAE 实际上只是另一种称为 β-VAE 的 VAE 的特例。 + +# 3\. β-VAE – 具有纠缠的潜在表示形式的 VAE + +在“第 6 章”,“非纠缠表示 GAN”中,讨论了潜码非纠缠表示的概念和重要性。 我们可以回想起,一个纠缠的表示是单个潜伏单元对单个生成因子的变化敏感,而相对于其他因子的变化相对不变[3]。 更改潜在代码会导致生成的输出的一个属性发生更改,而其余属性保持不变。 + +在同一章中,InfoGAN [4]向我们展示了对于 MNIST 数据集,可以控制生成哪个数字以及书写样式的倾斜度和粗细。 观察上一节中的结果,可以注意到,VAE 在本质上使潜向量维解开了一定程度。 例如,查看“图 8.2.6”中的数字 8,从上到下导航`z[1]`会减小宽度和圆度,同时顺时针旋转数字。 从左至右增加`z[0]`也会在逆时针旋转数字时减小宽度和圆度。 换句话说,`z[1]`控制顺时针旋转,而`z[0]`影响逆时针旋转,并且两者都改变​​宽度和圆度。 + +在本节中,我们将演示对 VAE 损失函数的简单修改会迫使潜在代码进一步解开纠缠。 修改为正恒重`β > 1`,用作 KL 损失的调节器: + +![](img/B14853_08_121.png) (Equation 8.3.1) + +VAE 的这种变化称为 β-VAE [5]。 `β`的隐含效果是更严格的标准差。 换句话说,`β`强制后验分布中的潜码`Q[φ](z | x)`独立。 + +实现 β-VAE 很简单。 例如,对于上一个示例中的 CVAE,所需的修改是`kl_loss`中的额外`beta`因子: + +```py +kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var) +kl_loss = K.sum(kl_loss, axis=-1) +kl_loss *= -0.5 * beta +``` + +CVAE 是 β-VAE 的特例,其中`β = 1`。 其他一切都一样。 但是,确定的值需要一些反复试验。 为了潜在的代码独立性,在重构误差和正则化之间必须有一个仔细的平衡。 解缠最大在`β = 9`附近。 当中`β = 9`的值时,β-VAE 仅被迫学习一个解纠缠的表示,而忽略另一个潜在维度。 + +“图 8.3.1”和“图 8.3.2”显示 β-VAE 的潜向量平均值,其中`β = 9`和`β = 10`: + +![](img/B14853_08_19.png) + +图 8.3.1:MNIST 数字标签与测试数据集的潜在向量平均值的函数(β-VAE,`β = 9`)。 原始图像可以在该书的 [GitHub 存储库](https://github.com/PacktPublishing/Advanced-Deep-Learning-with-Keras/tree/master/chapter8-vae)中找到。 + +`β = 9`时,与 CVAE 相比,分布具有较小的标准差。 在`β = 10`的情况下,仅学习了潜在代码。 分布实际上缩小为一个维度,编码器和解码器忽略了第一潜码`z[0]`。 + +![](img/B14853_08_20.png) + +图 8.3.2:MNIST 数字标签与测试数据集的潜向量平均值的函数(β-VAE 和`β = 10`) + +[原始图像可以在该书的 GitHub 存储库中找到](https://github.com/PacktPublishing/Advanced-Deep-Learning-with-Keras/tree/master/chapter8-vae)。 + +这些观察结果反映在“图 8.3.3”中。 具有`β = 9`的 β-VAE 具有两个实际上独立的潜在代码。 `z[0]`确定书写样式的倾斜度,而`z[1]`指定数字的宽度和圆度(如果适用)。 对于中`β = 10`的 β-VAE,`z[0]`被静音。 `z[0]`的增加不会显着改变数字。`z[1]`确定书写样式的倾斜角度和宽度: + +![A picture containing grass, window Description automatically generated](img/B14853_08_21.png) + +图 8.3.3:根据潜在向量平均值和单热点标签(β-VAE,`β = 1, 9, 10`)生成的数字 0 至 3。 为了便于解释,均值的范围类似于图 8.3.1。 + +β-VAE 的`tf.keras`代码具有预训练的权重。 要使用`β = 9`生成数字 0 来测试 β-VAE,我们需要运行以下命令: + +```py +python3 cvae-cnn-mnist-8.2.1.py --beta=9 --bce --weights=beta-cvae_cnn_mnist.tf --digit=0 +``` + +总而言之,我们已经证明与 GAN 相比,在 β-VAE 上更容易实现解缠表示学习。 我们所需要做的就是调整单个超参数。 + +# 4\. 总结 + +在本章中,我们介绍了 VAE 的原理。 正如我们从 VAE 原理中学到的那样,从两次尝试从潜在空间创建合成输出的角度来看,它们都与 GAN 相似。 但是,可以注意到,与 GAN 相比,VAE 网络更简单,更容易训练。 越来越清楚的是 CVAE 和 β-VAE 在概念上分别类似于条件 GAN 和解缠表示 GAN。 + +VAE 具有消除潜在向量纠缠的内在机制。 因此,构建 β-VAE 很简单。 但是,我们应该注意,可解释和解开的代码对于构建智能体很重要。 + +在下一章中,我们将专注于强化学习。 在没有任何先验数据的情况下,智能体通过与周围的世界进行交互来学习。 我们将讨论如何为智能体的正确行为提供奖励,并为错误的行为提供惩罚。 + +# 5\. 参考 + +1. `Diederik P. Kingma and Max Welling. Auto-encoding Variational Bayes. arXiv preprint arXiv:1312.6114, 2013 (https://arxiv.org/pdf/1312.6114.pdf).` +1. `Kihyuk Sohn, Honglak Lee, and Xinchen Yan. Learning Structured Output Representation Using Deep Conditional Generative Models. Advances in Neural Information Processing Systems, 2015 (http://papers.nips.cc/paper/5775-learning-structured-output-representation-using-deep-conditional-generative-models.pdf).` +1. `Yoshua Bengio, Aaron Courville, and Pascal Vincent. Representation Learning.` +1. `A Review and New Perspectives. IEEE transactions on Pattern Analysis and Machine Intelligence 35.8, 2013: 1798-1828 (https://arxiv.org/pdf/1206.5538.pdf).` +1. `Xi Chen et al.: Infogan: Interpretable Representation Learning by Information Maximizing Generative Adversarial Nets. Advances in Neural Information Processing Systems, 2016 (http://papers.nips.cc/paper/6399-infogan-interpretable-representation-learning-by-information-maximizing-generative-adversarial-nets.pdf).` +1. `I. Higgins, L. Matthey, A. Pal, C. Burgess, X. Glorot, M. Botvinick, S. Mohamed, and A. Lerchner. -VAE: Learning Basic Visual Concepts with a Constrained Variational Framework. ICLR, 2017 (https://openreview.net/pdf?id=Sy2fzU9gl).` +1. `Carl Doersch. Tutorial on variational autoencoders. arXiv preprint arXiv:1606.05908, 2016 (https://arxiv.org/pdf/1606.05908.pdf).` \ No newline at end of file diff --git a/机器学习/ApacheCN/apachecn-dl-zh/adv-dl-tf2-keras/09.md b/机器学习/ApacheCN/apachecn-dl-zh/adv-dl-tf2-keras/09.md new file mode 100644 index 00000000..6795b85d --- /dev/null +++ b/机器学习/ApacheCN/apachecn-dl-zh/adv-dl-tf2-keras/09.md @@ -0,0 +1,1187 @@ +# 九、深度强化学习 + +**强化学习**(**RL**)是智能体程序用于决策的框架。 智能体不一定是软件实体,例如您在视频游戏中可能看到的那样。 相反,它可以体现在诸如机器人或自动驾驶汽车之类的硬件中。 内在的智能体可能是充分理解和利用 RL 的最佳方法,因为物理实体与现实世界进行交互并接收响应。 + +该智能体位于**环境**中。 环境具有**状态**,可以部分或完全观察到。 该智能体具有一组**操作**,可用于与环境交互。 动作的结果将环境转换为新状态。 执行动作后,会收到相应的标量**奖励**。 + +智能体的目标是通过学习**策略**来最大化累积的未来奖励,该策略将决定在特定状态下应采取的行动。 + +RL 与人类心理学有很强的相似性。 人类通过体验世界来学习。 错误的行为会导致某种形式的惩罚,将来应避免使用,而正确的行为应得到奖励并应予以鼓励。 这种与人类心理学的强相似之处使许多研究人员相信 RL 可以将引向真正的**人工智能**(**AI**)。 + +RL 已经存在了几十年。 但是,除了简单的世界模型之外,RL 还在努力扩展规模。 这是,其中**深度学习**(**DL**)开始发挥作用。 它解决了这个可扩展性问题,从而开启了**深度强化学习**(**DRL**)的时代。 在本章中,我们的重点是 DRL。 DRL 中值得注意的例子之一是 DeepMind 在智能体上的工作,这些智能体能够在不同的视频游戏上超越最佳的人类表现。 + +在本章中,我们将讨论 RL 和 DRL。 + +总之,本章的目的是介绍: + +* RL 的原理 +* RL 技术,Q 学习 +* 高级主题,包括**深度 Q 网络**(**DQN**)和**双重 Q 学习**(**DDQN**) +* 关于如何使用`tf.keras`在 Python 和 DRL 上实现 RL 的说明 + +让我们从 RL 的基本原理开始。 + +# 1\. 强化学习原理(RL) + +“图 9.1.1”显示了用于描述 RL 的感知动作学习循环。 环境是苏打水可以坐在地板上。 智能体是一个移动机器人,其目标是拾取苏打水。 它观察周围的环境,并通过车载摄像头跟踪汽水罐的位置。 观察结果以一种状态的形式进行了汇总,机器人将使用该状态来决定要采取的动作。 所采取的动作可能与低级控制有关,例如每个车轮的旋转角度/速度,手臂的每个关节的旋转角度/速度以及抓手是打开还是关闭。 + +可替代地,动作可以是高级控制动作,诸如向前/向后移动机器人,以特定角度转向以及抓取/释放。 将夹持器从汽水中移开的任何动作都会得到负回报。 缩小抓取器位置和苏打之间的缝隙的任何动作都会获得积极的回报。 当机械臂成功捡起汽水罐时,它会收到丰厚的回报。 RL 的目标是学习最佳策略,该策略可帮助机器人决定在给定状态下采取哪种行动以最大化累积的折扣奖励: + +![](img/B14853_09_01.png) + +图 9.1.1:RL 中的感知-动作-学习循环 + +形式上,RL 问题可以描述为 **Markov 决策过程**(**MDP**)。 + +为简单起见,我们将假定为*确定性*环境,在该环境中,给定状态下的某个动作将始终导致已知的下一个状态和奖励。 在本章的后面部分,我们将研究如何考虑随机性。 在时间步`t`时: + +* 环境处于状态空间`S`的状态下,状态`s[0]`,该状态可以是离散的也可以是连续的。 起始状态为`s[0]`,而终止状态为`s[T]`。 +* 智能体通过遵循策略`π(a[t] | s[t])`从操作空间`A`采取操作,即`s[a]`。 `A`可以是离散的或连续的。 +* 环境使用状态转换动态`T(s[t + 1] | s[t], a[t])`转换为新状态,`s[t + 1]`。 下一个状态仅取决于当前状态和操作。 智能体不知道`T`。 +* 智能体使用奖励函数接收标量奖励,`r[t + 1] = R(s[t], a[t])`,以及`r: A x S -> R`。 奖励仅取决于当前状态和操作。 智能体不知道`R`。 +* 将来的奖励折扣为`γ^k`,其中`γ ∈ [0, 1]`和`k`是未来的时间步长。 +* *地平线*,`H`是完成从`s[0]`到`s[T]`的一集所需的时间步长`T`。 + +该环境可以是完全或部分可观察的。 后者也称为**部分可观察的 MDP** 或 **POMDP**。 在大多数情况下,完全观察环境是不现实的。 为了提高的可观察性,当前的观测值也考虑了过去的观测值。 状态包括对环境的足够观察,以使策略决定采取哪种措施。 回忆“图 9.1.1”,这可能是汽水罐相对于机器人抓手的三维位置,如机器人摄像头所估计的那样。 + +每当环境转换到新状态时,智能体都会收到标量奖励`r[t + 1]`。 在“图 9.1.1”中,每当机器人靠近汽水罐时,奖励可能为 +1;当机器人离汽水罐更远时,奖励为 -1;当机器人关闭夹具并成功捡起苏打时,奖励为 +100。 能够。 智能体的目标是学习一种最佳策略`π*`,该策略可使所有状态的收益最大化: + +![](img/14853_09_012.png) (Equation 9.1.1) + +回报定义为折扣累积奖励`R[t] = Σ γ^t r[t+k], k = 0, ..., T`。 从“公式 9.1.1”可以看出,与通常的`γ^k < 1.0`相比,与立即获得的奖励相比,未来的奖励权重较低。 在极端情况下,当`γ = 0`时,仅立即获得奖励很重要。 当`γ = 1`时,将来的奖励与立即奖励的权重相同。 + +遵循任意策略`π`,可以将回报解释为对给定状态值的度量: + +![](img/14853_09_019.png) (Equation 9.1.2) + +换句话说,RL 问题是智能体的目标,是学习使所有状态`s`最大化的最优策略`V^π`: + +![](img/14853_09_021.png) (Equation 9.1.3) + +最优策略的值函数就是`V*`。 在“图 9.1.1”中,最佳策略是生成最短动作序列的一种,该动作序列使机器人越来越靠近苏打罐,直到被取走为止。 状态越接近目标状态,其值越高。 可以将导致目标(或最终状态)的事件序列建模为策略的*轨迹*或*部署*: + +![](img/14853_09_023.png) (Equation 9.1.4) + +如果 MDP 是偶发的,则当智能体到达终端状态`s[T]`时,状态将重置为`s[0]`。 如果`T`是有限的,则我们的水平范围是有限的。 否则,视野是无限的。 在“图 9.1.1”中,如果 MDP 是情景*剧集*,则在收集苏打罐后,机器人可能会寻找另一个苏打罐来拾取,并且 RL 问题重发。 + +因此,RL 的主要目标是找到一种使每个状态的值最大化的策略。 在下一部分中,我们将介绍可用于最大化值函数的策略学习算法。 + +# 2\. Q 值 + +如果 RL 问题是找到`π*`,则智能体如何通过与环境交互来学习?“公式 9.1.3”并未明确指出尝试进行的操作以及计算收益的后续状态。 在 RL 中,使用 Q 值更容易学习`π*`: + +![](img/14853_09_026.png) (Equation 9.2.1) + +哪里: + +![](img/14853_09_027.png) (Equation 9.2.2) + +换句话说,不是找到使所有状态的值最大化的策略,而是“公式 9.2.1”寻找使所有状态的质量(Q)值最大化的操作。 在找到 Q 值函数之后,分别由“公式 9.2.2”和“公式 9.1.3”确定`V*`,因此确定了`π*`。 + +如果对于每个动作,都可以观察到奖励和下一状态,则可以制定以下迭代或反复试验算法来学习 Q 值: + +![](img/14853_09_030.png) (Equation 9.2.3) + +为了简化符号,`s'`和`a'`分别是下一个状态和动作。 “公式 9.2.3”被称为贝尔曼方程,它是 Q 学习算法的核心。 Q 学习尝试根据当前状态和作用来近似返回值或值的一阶展开(“公式 9.1.2”)。 从对环境动态的零知识中,智能体尝试执行操作`a`,观察以奖励`r`和下一个状态`s'`的形式发生的情况。 `max[a'] Q(s', a')`选择下一个逻辑动作,该动作将为下一个状态提供最大 Q 值。 有了“公式 9.2.3”中的所有项,该当前状态-动作对的 Q 值就会更新。 迭代地执行更新将最终使智能体能够学习 Q 值函数。 + +Q 学习是一种*脱离策略* RL 算法。 它学习了如何通过不直接从策略中抽取经验来改进策略。 换句话说,Q 值的获取与智能体所使用的基础策略无关。 当 Q 值函数收敛时,才使用“公式 9.2.1”确定最佳策略。 + +在为提供有关如何使用 Q 学习的示例之前,请注意,智能体必须在不断利用其到目前为止所学知识的同时不断探索其环境。 这是 RL 中的问题之一-在*探索*和*开发*之间找到适当的平衡。 通常,在学习开始时,动作是随机的(探索)。 随着学习的进行,智能体会利用 Q 值(利用)。 例如,一开始,90% 的动作是随机的,而 10% 的动作则来自 Q 值函数。 在每个剧集的结尾,这逐渐减少。 最终,该动作是 10% 随机的,并且是 Q 值函数的 90%。 + +在下一节中,我们将给出有关在简单的确定性环境中如何使用 Q 学习的具体示例。 + +# 3\. Q 学习实例 + +为了说明 Q 学习算法,我们需要考虑一个简单的确定性环境,如图“图 9.3.1”所示。 环境具有六个状态。 + +显示允许的过渡的奖励。 在两种情况下,奖励是非零的。 转换为**目标**(`G`)状态可获得 +100 的奖励,同时移至**洞**(`H`)状态具有 -100 奖励。 这两个状态是终端状态,从**开始**状态构成一个剧集的结尾: + +![](img/B14853_09_02.png) + +图 9.3.1:简单确定性世界中的奖励 + +为了使每个状态的身份正式化,我们使用`(行, 列)`标识符,如图“图 9.3.2”所示。 由于智能体尚未了解有关其环境的任何信息,因此“图 9.3.2”中所示的 Q 表的初始值为零。 在此示例中,折扣因子`γ = 0.9`。 回想一下,在当前 Q 值的估计中,折扣因子确定了未来 Q 值的权重,该权重是步数`γ^k`的函数。 在“公式 9.2.3”中,我们仅考虑近期 Q 值`k = 1`。 + +![](img/B14853_09_03.png) + +图 9.3.2:简单确定性环境中的状态和智能体的初始 Q 表 + +最初,智能体采用的策略是 90% 的时间选择随机操作,并 10% 的时间使用 Q 表。 假设第一个动作是随机选择的,并且指示向右移动。“图 9.3.3”说明了向右移动时状态`(0, 0)`的新 Q 值的计算。 下一个状态是`(0, 1)`。 奖励为 0,所有下一个状态的 Q 值的最大值为零。 因此,向右移动的状态`(0, 0)`的 Q 值保持为 0。 + +为了轻松跟踪初始状态和下一个状态,我们在环境和 Q 表上使用不同的灰色阴影-初始状态浅灰色,下一个状态灰色。 + +在为下一个状态选择下一个动作时,候选动作位于较粗的边框中: + +![](img/B14853_09_04.png) + +图 9.3.3:假设智能体采取的行动是向右移动,则显示状态`(0, 0)`的 Q 值的更新 + +假设下一个随机选择的动作是向下移动。“图 9.3.4”显示状态`(0, 1)`的 Q 值沿向下方向的移动没有​​变化: + +![](img/B14853_09_05.png) + +图 9.3.4:假设智能体选择的动作是向下移动,则显示状态`(0, 1)`的 Q 值的更新 + +在“图 9.3.5”中,智能体的第三个随机动作是向右移动。 + +![](img/B14853_09_06.png) + +图 9.3.5:假设智能体选择的动作是向右移动,则显示状态`(1, 1)`的 Q 值的更新 + +它遇到了,`H`状态,并获得了 -100 奖励。 这次,更新不为零。 向右移动时,状态`(1, 1)`的新 Q 值为 -100。 注意,由于这是终端状态,因此没有下一个状态。 一集刚刚结束,**智能体**返回到**开始**状态。 + +假设**智能体**仍处于探索模式,如图“图 9.3.6”所示: + +![](img/B14853_09_07.png) + +图 9.3.6:假设智能体选择的动作是向右连续两次移动,则显示状态`(0, 1)`的 Q 值的更新 + +为第二集采取的第一步是向右移动。 正如预期的那样,更新为 0。但是,它选择的第二个随机动作也是向右移动。 智能体到达`G`状态并获得 +100 的巨额奖励。 向右移动的状态`(0, 1)`的 Q 值变为 100。完成第二集,并且**智能体**返回到**启动**状态。 + +在第三集开始时,智能体采取的随机行动是向右移动。 现在,状态`(0, 0)`的 Q 值将更新为非零值,因为下一个状态的可能动作将最大 Q 值设为 100。“图 9.3.7”显示了所涉及的计算。 下一个状态`(0, 1)`的 Q 值波动回到较早的状态`(0, 0)`。 这就像对帮助找到`G`状态的早期状态表示赞赏。 + +![](img/B14853_09_08.png) + +图 9.3.7:假设智能体选择的动作是向右移动,则显示状态`(0, 0)`的 Q 值的更新 + +Q 表的进步很大。 实际上,在下一集中,如果由于某种原因该策略决定使用 Q 表而不是随机探索环境,则第一个动作是根据“图 9.3.8”中的计算向右移动。 在 Q 表的第一行中,导致最大 Q 值的动作是向右移动。 对于下一个状态`(0, 1)`,Q 表的第二行表明下一个动作仍然是向右移动。 **智能体**已成功实现其目标。 该策略指导智能体采取了正确的措施来实现其目标: + +![](img/B14853_09_09.png) + +图 9.3.8:在这种情况下,智能体的策略决定利用 Q 表来确定状态`(0, 0)`和`(0, 1)`的动作。 Q 表建议两个状态都向右移动 + +如果 Q 学习算法继续无限期运行,则 Q 表将收敛。 收敛的假设是 RL 问题必须是具有有限奖励的确定性 MDP,并且所有状态都将被无限次地访问。 + +在下一节中,我们将使用 Python 模拟环境。 我们还将展示 Q 学习算法的代码实现。 + +## 用 Python 进行 Q 学习 + +上一节中讨论的环境和 Q 学习可以在 Python 中实现。 由于该策略只是一个简单的表,因此在此时,无需使用`tf.keras`库。“列表 9.3.1”显示了`q-learning-9.3.1.py`,它是使用`QWorld`类实现的简单确定性世界(环境,智能体,操作和 Q 表算法)的实现。 为简洁起见,未显示处理用户界面的函数。 + +在此示例中,环境动态由`self.transition_table`表示。 在每个动作中,`self.transition_table`确定下一个状态。 执行动作的奖励存储在`self.reward_table`中。 每次通过`step()`函数执行动作时,都要查阅这两个表。 Q 学习算法由`update_q_table()`函数实现。 每当智能体需要决定要采取的操作时,它都会调用`act()`函数。 策略可以使用 Q 表随机抽取或决定。 所选动作是随机的机会百分比存储在`self.epsilon`变量中,该变量由`update_epsilon()`函数使用固定的`epsilon_decay`更新。 + +在执行“列表 9.3.1”中的代码之前,我们需要运行: + +```py +sudo pip3 install termcolor +``` + +安装`termcolor`包。 该包有助于可视化终端上的文本输出。 + +[完整的代码可以在 GitHub 上找到](https://github.com/PacktPublishing/Advanced-Deep-Learning-with-Keras)。 + +“列表 9.3.1”:`q-learning-9.3.1.py` + +具有六个状态的简单确定性 MDP: + +```py +from collections import deque +import numpy as np +import argparse +import os +import time +from termcolor import colored +``` + +```py +class QWorld: + def __init__(self): + """Simulated deterministic world made of 6 states. + Q-Learning by Bellman Equation. + """ + # 4 actions + # 0 - Left, 1 - Down, 2 - Right, 3 - Up + self.col = 4 +``` + +```py + # 6 states + self.row = 6 +``` + +```py + # setup the environment + self.q_table = np.zeros([self.row, self.col]) + self.init_transition_table() + self.init_reward_table() +``` + +```py + # discount factor + self.gamma = 0.9 +``` + +```py + # 90% exploration, 10% exploitation + self.epsilon = 0.9 + # exploration decays by this factor every episode + self.epsilon_decay = 0.9 + # in the long run, 10% exploration, 90% exploitation + self.epsilon_min = 0.1 +``` + +```py + # reset the environment + self.reset() + self.is_explore = True +``` + +```py + def reset(self): + """start of episode""" + self.state = 0 + return self.state +``` + +```py + def is_in_win_state(self): + """agent wins when the goal is reached""" + return self.state == 2 +``` + +```py + def init_reward_table(self): + """ + 0 - Left, 1 - Down, 2 - Right, 3 - Up + ---------------- + | 0 | 0 | 100 | + ---------------- + | 0 | 0 | -100 | + ---------------- + """ + self.reward_table = np.zeros([self.row, self.col]) + self.reward_table[1, 2] = 100. + self.reward_table[4, 2] = -100. +``` + +```py + def init_transition_table(self): + """ + 0 - Left, 1 - Down, 2 - Right, 3 - Up + ------------- + | 0 | 1 | 2 | + ------------- + | 3 | 4 | 5 | + ------------- + """ + self.transition_table = np.zeros([self.row, self.col], + dtype=int) + self.transition_table[0, 0] = 0 + self.transition_table[0, 1] = 3 + self.transition_table[0, 2] = 1 + self.transition_table[0, 3] = 0 +``` + +```py + self.transition_table[1, 0] = 0 + self.transition_table[1, 1] = 4 + self.transition_table[1, 2] = 2 + self.transition_table[1, 3] = 1 +``` + +```py + # terminal Goal state + self.transition_table[2, 0] = 2 + self.transition_table[2, 1] = 2 + self.transition_table[2, 2] = 2 + self.transition_table[2, 3] = 2 +``` + +```py + self.transition_table[3, 0] = 3 + self.transition_table[3, 1] = 3 + self.transition_table[3, 2] = 4 + self.transition_table[3, 3] = 0 +``` + +```py + self.transition_table[4, 0] = 3 + self.transition_table[4, 1] = 4 + self.transition_table[4, 2] = 5 + self.transition_table[4, 3] = 1 +``` + +```py + # terminal Hole state + self.transition_table[5, 0] = 5 + self.transition_table[5, 1] = 5 + self.transition_table[5, 2] = 5 + self.transition_table[5, 3] = 5 +``` + +```py + def step(self, action): + """execute the action on the environment + Argument: + action (tensor): An action in Action space + Returns: + next_state (tensor): next env state + reward (float): reward received by the agent + done (Bool): whether the terminal state + is reached + """ + # determine the next_state given state and action + next_state = self.transition_table[self.state, action] + # done is True if next_state is Goal or Hole + done = next_state == 2 or next_state == 5 + # reward given the state and action + reward = self.reward_table[self.state, action] + # the enviroment is now in new state + self.state = next_state + return next_state, reward, done +``` + +```py + def act(self): + """determine the next action + either fr Q Table(exploitation) or + random(exploration) + Return: + action (tensor): action that the agent + must execute + """ + # 0 - Left, 1 - Down, 2 - Right, 3 - Up + # action is from exploration + if np.random.rand() <= self.epsilon: + # explore - do random action + self.is_explore = True + return np.random.choice(4,1)[0] +``` + +```py + # or action is from exploitation + # exploit - choose action with max Q-value + self.is_explore = False + action = np.argmax(self.q_table[self.state]) + return action +``` + +```py + def update_q_table(self, state, action, reward, next_state): + """Q-Learning - update the Q Table using Q(s, a) + Arguments: + state (tensor) : agent state + action (tensor): action executed by the agent + reward (float): reward after executing action + for a given state + next_state (tensor): next state after executing + action for a given state + """ + # Q(s, a) = reward + gamma * max_a' Q(s', a') + q_value = self.gamma * np.amax(self.q_table[next_state]) + q_value += reward + self.q_table[state, action] = q_value +``` + +```py + def update_epsilon(self): + """update Exploration-Exploitation mix""" + if self.epsilon > self.epsilon_min: + self.epsilon *= self.epsilon_decay +``` + +感知动作学习循环在“列表 9.3.2”中进行了说明。 在每个剧集中,环境都会重置为**开始**状态。 选择要执行的动作并将其应用于环境。 观察**奖励**和**下一个**状态,并将其用于更新 Q 表。 达到**目标**或**洞**状态后,剧集完成(`done = True`)。 + +对于此示例,Q 学习运行 100 集或 10 获胜,以先到者为准。 由于在每个剧集中变量的值均降低,因此智能体开始倾向于利用 Q 表来确定在给定状态下要执行的动作。 要查看 Q 学习模拟,我们只需要运行以下命令: + +```py +python3 q-learning-9.3.1.py +``` + +“列表 9.3.2”:`q-learning-9.3.1.py` + +主要的 Q 学习循环: + +```py + # state, action, reward, next state iteration + for episode in range(episode_count): + state = q_world.reset() + done = False + print_episode(episode, delay=delay) + while not done: + action = q_world.act() + next_state, reward, done = q_world.step(action) + q_world.update_q_table(state, action, reward, next_state) + print_status(q_world, done, step, delay=delay) + state = next_state + # if episode is done, perform housekeeping + if done: + if q_world.is_in_win_state(): + wins += 1 + scores.append(step) + if wins > maxwins: + print(scores) + exit(0) + # Exploration-Exploitation is updated every episode + q_world.update_epsilon() + step = 1 + else: + step += 1 +``` + +“图 9.3.9”显示了`maxwins = 2000`(达到`2000 x`目标状态)和`delay = 0`时的屏幕截图。 要仅查看最终的 Q 表,请执行: + +```py +python3 q-learning-9.3.1.py --train +``` + +![A screenshot of a cell phone Description automatically generated](img/B14853_09_10.png) + +图 9.3.9:屏幕快照显示智能体在 2,000 次获胜后的 Q 表 + +Q 表已收敛,并显示了智能体可以在给定状态下采取的逻​​辑操作。 例如,在第一行或状态`(0, 0)`中,该策略建议向右移动。 第二行的状态`(0, 1)`也是如此。 第二个动作达到**目标**状态。 `scores`变量转储显示,随着智能体从策略获取正确的操作,所采取的最少步骤数减少了。 + +从“图 9.3.9”,我们可以从“公式 9.2.2”和`V*(s) = max[a] Q(s, a)`计算每个状态的值。 例如,对于状态`(0, 0)`,`V*(s) = max[a](0.0, 72.9, 90.0, 81.0) = 9.0`。 + +“图 9.3.10”显示每种状态的值。 + +![](img/B14853_09_11.png) + +图 9.3.10:图 9.3.9 和公式 9.2.2 中每个状态的值 + +这个简单的示例说明了在简单确定性世界中智能体的 Q 学习的所有元素。 在下一节中,我们将介绍考虑随机性所需的轻微修改。 + +# 4\. 非确定性环境 + +如果环境不确定,则奖励和行动都是概率性的。 新系统是随机的 MDP。 为了反映不确定性报酬,新的值函数为: + +![](img/14853_09_042.png) (Equation 9.4.1) + +贝尔曼方程修改为: + +![](img/14853_09_043.png) (Equation 9.4.2) + +但是,在本章中,我们将重点介绍确定性环境。 在下一节中,我们将提出一种更通用的 Q 学习算法,称为**时差**(**TD**)学习。 + +# 5\. 时差学习 + +Q 学习是更广义的 TD 学习`TD(λ)`的特例。 更具体地说,这是单步 TD 学习的特殊情况,`TD(0)`: + +![](img/14853_09_045.png) (Equation 9.5.1) + +其中`α`是学习率。 注意,当`α = 1`,“公式 9.5.1”与贝尔曼等式相似。 为简单起见,我们还将“公式 9.5.1”称为 Q 学习或广义 Q 学习。 + +以前,我们将 Q 学习称为一种非策略性 RL 算法,因为它学习 Q 值函数而没有直接使用它尝试优化的策略。 *上策略*一步式 TD 学习算法的示例是 SARSA,类似于“公式 9.5.1”: + +![](img/14853_09_048.png) (Equation 9.5.2) + +主要区别是使用已优化的策略来确定`a'`。 必须知道项`s`,`a`,`r`,`s'`和`a'`(因此名称为 SARSA)才能在每次迭代时更新 Q 值函数。 Q 学习和 SARSA 都在 Q 值迭代中使用现有的估计,该过程称为*自举*。 在引导过程中,我们从奖励中更新当前的 Q 值估计,并随后更新 Q 值估计。 + +在提出另一个示例之前,似乎需要合适的 RL 模拟环境。 否则,我们只能对非常简单的问题(如上一个示例)运行 RL 模拟。 幸运的是,OpenAI 创建了 [Gym](https://gym.openai.com),我们将在下一节中介绍。 + +## 在 OpenAI Gym 上进行 Q 学习 + +OpenAI Gym 是的工具包,用于开发和比较 RL 算法。 它适用于大多数 DL 库,包括`tf.keras`。 可以通过运行以下命令来安装健身房: + +```py +sudo pip3 install gym +``` + +该体育馆有多种可以测试 RL 算法的环境,例如玩具文字,经典控件,算法,Atari 和二维/三维机器人。 例如,`FrozenLake-v0`(“图 9.5.1”)是一个玩具文本环境,类似于在 Python Q 学习示例中使用的简单确定性世界: + +![](img/B14853_09_12.png) + +图 9.5.1:OpenAI Gym 中的 FrozenLake-v0 环境 + +`FrozenLake-v0`具有 12 个状态,标记为`S`的状态为起始状态,`F`的状态为湖泊的冰冻部分,这是安全的,`H`为安全状态。 应当避免的空穴状态,`G`是飞盘所在的目标状态。 转换为目标状态的奖励为 +1。 对于所有其他状态,奖励为**零**。 + +在`FrozenLake-v0`中,还有四个可用动作(左,下,右,上),称为动作空间。 但是,与之前的简单确定性世界不同,实际运动方向仅部分取决于所选的动作。 `FrozenLake-v0`环境有两种变体。 滑和不滑。 不出所料,滑动模式更具挑战性。 + +应用于`FrozenLake-v0`的操作将返回观察结果(等效于下一个状态),奖励,完成(无论剧集是否完成)以及调试信息字典。 返回的观察对象捕获环境的可观察属性,称为观察空间。 + +通用 Q 学习可以应用于`FrozenLake-v0`环境。“表 9.5.1”显示了湿滑和非湿滑环境的表现改进。 衡量策略表现的一种方法是执行的事件达到目标状态的百分比。 百分比越高,效果越好。 从大约 1.5% 的纯探查(随机操作)的基准来看,该策略可以在非光滑环境中达到约 76% 的目标状态,在光滑环境中可以达到约 71% 的目标状态。 不出所料,很难控制湿滑的环境。 + +| **模式** | **运行** | **大约百分比的目标** | +| --- | --- | --- | +| 训练非滑动 | `python3 q-frozenlake-9.5.1.py` | 26 | +| 测试非滑动 | `python3 q-frozenlake-9.5.1.py -d` | 76 | +| 纯随机动作非滑动 | `python3 q-frozenlake-9.5.1.py -e` | 1.5 | +| 训练滑动 | `python3 q-frozenlake-9.5.1.py -s` | 26 | +| 测试滑动 | `python3 q-frozenlake-9.5.1.py -s -d` | 71 | +| 纯随机动作滑动 | `python3 q-frozenlake-9.5.1.py -s -e` | 1.5 | + +表 9.5.1:在 FrozenLake-v0 环境中学习率为 0.5 的广义 Q 学习的基线和表现 + +由于该代码仅需要一个 Q 表,因此仍可以在 Python 和 NumPy 中实现。“列表 9.5.1”显示了`QAgent`类的实现。 除了使用 OpenAI Gym 的`FrozenLake-v0`环境之外,最重要的更改是广义 Q 学习的实现,这由`update_q_table()`函数中的“公式 9.5.1”定义。 + +“列表 9.5.1”:`q-frozenlake-9.5.1.py` + +关于 FrozenLake-v0 环境的 Q 学习: + +```py +from collections import deque +import numpy as np +import argparse +import os +import time +import gym +from gym import wrappers, logger +``` + +```py +class QAgent: + def __init__(self, + observation_space, + action_space, + demo=False, + slippery=False, + episodes=40000): + """Q-Learning agent on FrozenLake-v0 environment +``` + +```py + Arguments: + observation_space (tensor): state space + action_space (tensor): action space + demo (Bool): whether for demo or training + slippery (Bool): 2 versions of FLv0 env + episodes (int): number of episodes to train + """ +``` + +```py + self.action_space = action_space + # number of columns is equal to number of actions + col = action_space.n + # number of rows is equal to number of states + row = observation_space.n + # build Q Table with row x col dims + self.q_table = np.zeros([row, col]) +``` + +```py + # discount factor + self.gamma = 0.9 +``` + +```py + # initially 90% exploration, 10% exploitation + self.epsilon = 0.9 + # iteratively applying decay til + # 10% exploration/90% exploitation + self.epsilon_min = 0.1 + self.epsilon_decay = self.epsilon_min / self.epsilon + self.epsilon_decay = self.epsilon_decay ** \ + (1\. / float(episodes)) +``` + +```py + # learning rate of Q-Learning + self.learning_rate = 0.1 +``` + +```py + # file where Q Table is saved on/restored fr + if slippery: + self.filename = 'q-frozenlake-slippery.npy' + else: + self.filename = 'q-frozenlake.npy' +``` + +```py + # demo or train mode + self.demo = demo + # if demo mode, no exploration + if demo: + self.epsilon = 0 +``` + +```py + def act(self, state, is_explore=False): + """determine the next action + if random, choose from random action space + else use the Q Table + Arguments: + state (tensor): agent's current state + is_explore (Bool): exploration mode or not + Return: + action (tensor): action that the agent + must execute + """ + # 0 - left, 1 - Down, 2 - Right, 3 - Up + if is_explore or np.random.rand() < self.epsilon: + # explore - do random action + return self.action_space.sample() +``` + +```py + # exploit - choose action with max Q-value + action = np.argmax(self.q_table[state]) + return action +``` + +```py + def update_q_table(self, state, action, reward, next_state): + """TD(0) learning (generalized Q-Learning) with learning rate + Arguments: + state (tensor): environment state + action (tensor): action executed by the agent for + the given state + reward (float): reward received by the agent for + executing the action + next_state (tensor): the environment next state + """ + # Q(s, a) += + # alpha * (reward + gamma * max_a' Q(s', a') - Q(s, a)) + q_value = self.gamma * np.amax(self.q_table[next_state]) + q_value += reward + q_value -= self.q_table[state, action] + q_value *= self.learning_rate + q_value += self.q_table[state, action] + self.q_table[state, action] = q_value +``` + +```py + def update_epsilon(self): + """adjust epsilon""" + if self.epsilon > self.epsilon_min: + self.epsilon *= self.epsilon_decay +``` + +“列表 9.5.2”演示了智能体的感知行为学习循环。 在每个剧集中,通过调用`env.reset()`重置环境。 要执行的动作由`agent.act()`选择,并由`env.step(action)`应用于环境。 奖励和下一个状态将被观察并用于更新 Q 表。 + +在每个动作之后,通过`agent.update_q_table()`执行 TD 学习。 由于每次调用`agent.update_epsilon()`时处`self.epsilon`变量的值都会减少,该智能体开始支持利用 Q 表来确定在给定状态下执行的操作。 达到目标或空洞状态后,剧集完成(`done = True`)。 对于此示例,TD 学习运行 4,000 集。 + +“列表 9.5.2”:`q-frozenlake-9.5.1.py`。 + +`FrozenLake-v0`环境的 Q 学习循环: + +```py + # loop for the specified number of episode + for episode in range(episodes): + state = env.reset() + done = False + while not done: + # determine the agent's action given state + action = agent.act(state, is_explore=args.explore) + # get observable data + next_state, reward, done, _ = env.step(action) + # clear the screen before rendering the environment + os.system('clear') + # render the environment for human debugging + env.render() + # training of Q Table + if done: + # update exploration-exploitation ratio + # reward > 0 only when Goal is reached + # otherwise, it is a Hole + if reward > 0: + wins += 1 +``` + +```py + if not args.demo: + agent.update_q_table(state, + action, + reward, + next_state) + agent.update_epsilon() +``` + +```py + state = next_state + percent_wins = 100.0 * wins / (episode + 1) +``` + +`agent`对象可以在湿滑或非湿滑模式下运行。 训练后,智能体可以利用 Q 表选择给定任何策略执行的操作,如“表 9.5.1”的测试模式所示。 如“表 9.5.1”所示,使用学习的策略可显着提高性能。 随着体育馆的使用,不再需要中构建环境的许多代码行。 例如,与上一个示例不同,使用 OpenAI Gym,我们不需要创建状态转换表和奖励表。 + +这将帮助我们专注于构建有效的 RL 算法。 要以慢动作方式运行代码或每个动作延迟 1 秒,请执行以下操作: + +```py +python3 q-frozenlake-9.5.1.py -d -t=1 +``` + +在本节中,我们在更具挑战性的环境中演示了 Q 学习。 我们还介绍了 OpenAI 体育馆。 但是,我们的环境仍然是玩具环境。 如果我们有大量的状态或动作怎么办? 在这种情况下,使用 Q 表不再可行。 在下一节中,我们将使用深度神经网络来学习 Q 表。 + +# 6\. 深度 Q 网络(DQN) + +在小型离散环境中,使用 Q 表执行 Q 学习是很好的选择。 但是,在大多数情况下,当环境具有许多状态或连续时,Q 表是不可行或不实际的。 例如,如果我们观察由四个连续变量组成的状态,则表的大小是无限的。 即使我们尝试将这四个变量离散化为 1,000 个值,表中的总行数也达到了惊人的`1000^4 = 1e12`。 即使经过训练,该表仍是稀疏的–该表中的大多数单元都是零。 + +这个问题的解决方案称为 DQN [2],它使用深度神经网络来近似 Q 表,如图“图 9.6.1”所示。 有两种构建 Q 网络的方法: + +* 输入是状态-动作对,预测是 Q 值 +* 输入是状态,预测是每个动作的 Q 值 + +第一种选择不是最佳的,因为网络被调用的次数等于操作数。 第二种是首选方法。 Q 网络仅被调用一次。 + +最希望得到的作用就是 Q 值最大的作用。 + +![](img/B14853_09_13.png) + +图 9.6.1:深度 Q 网络 + +训练 Q 网络所需的数据来自智能体的经验:`(s[0]a[0]r[1]s[1], s[1]a[1]r[2]s[2],d ..., s[T-1]a[T-1]r[T]s[T])`。 每个训练样本都是经验单元`s[t]a[t]r[t+1]s[t+1]`。 在时间步`t`,`s = s[t]`的给定状态下,使用类似于前一部分的 Q 学习算法来确定动作`a = a[t]`: + +![](img/14853_09_060.png) (Equation 9.6.1) + +为了简化符号,我们省略了下标和粗体字母的使用。 注意,`Q(s, a)`是 Q 网络。 严格来说,它是`Q(a | s)`,因为动作已移至预测阶段(换句话说,是输出),如“图 9.6.1”的右侧所示。 Q 值最高的动作是应用于环境以获得奖励`r = r[t+1]`,下一状态`s' = s[t+1]`和布尔值`done`的动作,指示下一个状态是否为终端 。 根据关于广义 Q 学习的“公式 9.5.1”,可以通过应用所选的操作来确定 MSE 损失函数: + +![](img/14853_09_065.png) (Equation 9.6.2) + +在前面有关 Q 学习和`Q(a | s) -> Q(s, a)`的讨论中,所有项都很熟悉。 项`max[a'] Q(a' | s') -> max[a'] Q(s', a')`。 换句话说,使用 Q 网络,在给定下一个状态的情况下预测每个动作的 Q 值,并从其中获得最大值。 注意,在终端状态下,`s'`,`max[a'] Q(a' | s') -> max[a'] Q(s', a') = 0`。 + +但是,事实证明训练 Q 网络是不稳定的。 导致不稳定的问题有两个:1)样本之间的相关性高; 2)非平稳目标。 高度相关性是由于采样经验的顺序性质。 DQN 通过创建经验缓冲解决了问题。 训练数据是从该缓冲区中随机采样的。 此过程称为**经验回放**。 + +非固定目标的问题是由于目标网络`Q(s', a')`在每小批训练后都会被修改。 目标网络的微小变化会导致策略,数据分布以及当前 Q 值和目标 Q 值之间的相关性发生重大变化。 这可以通过冻结`C`训练步骤的目标网络的权重来解决。 换句话说,创建了两个相同的 Q 网络。 在每个`C`训练步骤中,从训练中的 Q 网络复制目标 Q 网络参数。 + +“算法 9.6.1”中概述了深度 Q 网络算法。 + +“算法 9.6.1”: **DQN 算法** + +要求:将重播内存`D`初始化为容量`N` + +要求:使用随机权重`θ`初始化动作值函数`Q` + +要求:使用权重`θ- = 0`初始化目标操作值函数`Q_target` + +需要:探索率`ε`和折扣系数`γ` + +1. 对于`episode = 1, ..., M`,执行: +2. 给定初始状态`s` +3. 对于`step = 1, ..., T`,执行: +4. 选择动作 + + ![](img/14853_09_082.png) +5. 执行动作`a`,观察奖励`r`,以及下一个状态`s'` +6. 将转换`(s, a, r, s')`存储在`D`中 +7. 更新状态`s = s'` +8. 经验回放 +9. 从`D`中抽样一小部分经验`(s[j], a[j], r[j+1], s[j+1])` +10. ![](img/14853_09_090.png) +11. 在`(Q_max - Q(s[j], a[j]; θ))²`上相对于参数`θ`执行梯度下降步骤。 +12. 定期更新目标网络 +13. 每`C`个步骤,即`Q_target = Q`,换句话说,设置`θ- = θ` +14. `end` + +1. `end` + +“算法 9.6.1”总结了在具有离散动作空间和连续状态空间的环境上实现 Q 学习所需的所有技术。 在下一节中,我们将演示如何在更具挑战性的 OpenAI Gym 环境中使用 DQN。 + +## Keras 中的 DQN + +为了说明 DQN,使用了 OpenAI Gym 的`CartPole-v0`环境。 `CartPole-v0`是极点平衡问题。 目的是防止电杆跌落。 环境是二维的。 动作空间由两个离散的动作(左右移动)组成。 但是,状态空间是连续的,并且包含四个变量: + +* 直线位置 +* 线速度 +* 旋转角度 +* 角速度 + +`CartPole-v0`环境如图 9.6.1 所示: + +![](img/B14853_09_14.png) + +图 9.6.1:CartPole-v0 环境 + +最初,杆是直立的。 杆保持直立的每个时间步长都提供 +1 的奖励。 当极点与垂直方向的夹角超过 15 度或与中心的距离超过 2.4 单位时,剧集结束。 如果在 100 个连续试验中平均奖励为 195.0,则认为`CartPole-v0`问题已解决: + +“列表 9.6.1”向我们展示了`CartPole-v0`的 DQN 实现。 `DQNAgent`类表示使用 DQN 的智能体。 创建了两个 Q 网络: + +* “算法 9.6.1”中的 Q 网络或 Q +* “算法 9.6.1”中的目标 Q 网络或`Q_target` + +两个网络都是 MLP,每个都有 256 个单元的 3 个隐藏层。 这两个网络都是通过`build_model()`方法创建的。 在**经验回放**,`replay()`期间训练 Q 网络。 以`update_weights()`的固定间隔`C = 10`个训练步骤,将 Q 网络参数复制到目标 Q 网络。 在“算法 9.6.1”中,这实现了第 13 行,`Q_target = Q`。 每次发作后,`update_epsilon()`都会降低探索利用的比例,以利用已学习的策略。 + +“列表 9.6.1”:`dqn-cartpole-9.6.1.py` + +`tf.keras`中的 DQN: + +```py +class DQNAgent: + def __init__(self, + state_space, + action_space, + episodes=500): + """DQN Agent on CartPole-v0 environment +``` + +```py + Arguments: + state_space (tensor): state space + action_space (tensor): action space + episodes (int): number of episodes to train + """ + self.action_space = action_space +``` + +```py + # experience buffer + self.memory = [] +``` + +```py + # discount rate + self.gamma = 0.9 +``` + +```py + # initially 90% exploration, 10% exploitation + self.epsilon = 1.0 + # iteratively applying decay til + # 10% exploration/90% exploitation + self.epsilon_min = 0.1 + self.epsilon_decay = self.epsilon_min / self.epsilon + self.epsilon_decay = self.epsilon_decay ** \ + (1\. / float(episodes)) +``` + +```py + # Q Network weights filename + self.weights_file = 'dqn_cartpole.h5' + # Q Network for training + n_inputs = state_space.shape[0] + n_outputs = action_space.n + self.q_model = self.build_model(n_inputs, n_outputs) + self.q_model.compile(loss='mse', optimizer=Adam()) + # target Q Network + self.target_q_model = self.build_model(n_inputs, n_outputs) + # copy Q Network params to target Q Network + self.update_weights() +``` + +```py + self.replay_counter = 0 + self.ddqn = True if args.ddqn else False +``` + +```py + def build_model(self, n_inputs, n_outputs): + """Q Network is 256-256-256 MLP +``` + +```py + Arguments: + n_inputs (int): input dim + n_outputs (int): output dim +``` + +```py + Return: + q_model (Model): DQN + """ + inputs = Input(shape=(n_inputs, ), name='state') + x = Dense(256, activation='relu')(inputs) + x = Dense(256, activation='relu')(x) + x = Dense(256, activation='relu')(x) + x = Dense(n_outputs, + activation='linear', + name='action')(x) + q_model = Model(inputs, x) + q_model.summary() + return q_model +``` + +```py + def act(self, state): + """eps-greedy policy + Return: + action (tensor): action to execute + """ + if np.random.rand() < self.epsilon: + # explore - do random action + return self.action_space.sample() +``` + +```py + # exploit + q_values = self.q_model.predict(state) + # select the action with max Q-value + action = np.argmax(q_values[0]) + return action +``` + +```py + def remember(self, state, action, reward, next_state, done): + """store experiences in the replay buffer + Arguments: + state (tensor): env state + action (tensor): agent action + reward (float): reward received after executing + action on state + next_state (tensor): next state + """ + item = (state, action, reward, next_state, done) + self.memory.append(item) +``` + +```py + def get_target_q_value(self, next_state, reward): + """compute Q_max + Use of target Q Network solves the + non-stationarity problem + Arguments: + reward (float): reward received after executing + action on state + next_state (tensor): next state + Return: + q_value (float): max Q-value computed by + DQN or DDQN + """ + # max Q value among next state's actions + if self.ddqn: + # DDQN + # current Q Network selects the action + # a'_max = argmax_a' Q(s', a') + action = np.argmax(self.q_model.predict(next_state)[0]) + # target Q Network evaluates the action + # Q_max = Q_target(s', a'_max) + q_value = self.target_q_model.predict(\ + next_state)[0][action] + else: + # DQN chooses the max Q value among next actions + # selection and evaluation of action is + # on the target Q Network + # Q_max = max_a' Q_target(s', a') + q_value = np.amax(\ + self.target_q_model.predict(next_state)[0]) +``` + +```py + # Q_max = reward + gamma * Q_max + q_value *= self.gamma + q_value += reward + return q_value +``` + +```py + def replay(self, batch_size): + """experience replay addresses the correlation issue + between samples + Arguments: + batch_size (int): replay buffer batch + sample size + """ + # sars = state, action, reward, state' (next_state) + sars_batch = random.sample(self.memory, batch_size) + state_batch, q_values_batch = [], [] +``` + +```py + # fixme: for speedup, this could be done on the tensor level + # but easier to understand using a loop + for state, action, reward, next_state, done in sars_batch: + # policy prediction for a given state + q_values = self.q_model.predict(state) +``` + +```py + # get Q_max + q_value = self.get_target_q_value(next_state, reward) +``` + +```py + # correction on the Q value for the action used + q_values[0][action] = reward if done else q_value +``` + +```py + # collect batch state-q_value mapping + state_batch.append(state[0]) + q_values_batch.append(q_values[0]) +``` + +```py + # train the Q-network + self.q_model.fit(np.array(state_batch), + np.array(q_values_batch), + batch_size=batch_size, + epochs=1, + verbose=0) +``` + +```py + # update exploration-exploitation probability + self.update_epsilon() +``` + +```py + # copy new params on old target after + # every 10 training updates + if self.replay_counter % 10 == 0: + self.update_weights() +``` + +```py + self.replay_counter += 1 +``` + +```py + def update_epsilon(self): + """decrease the exploration, increase exploitation""" + if self.epsilon > self.epsilon_min: + self.epsilon *= self.epsilon_decay +``` + +为了在“算法 9.6.1”**经验回放**`replay()`中实现第 10 行,对于每个体验单元(`s[j]`,`a[j]`,`r[j + 1]`和`s[j + 1]`)将动作`a[j]`的 Q 值设置为`Q_max`。 所有其他动作的 Q 值保持不变。 + +这是通过 DQNAgent `replay()`函数中的以下行实现的: + +```py +# policy prediction for a given state q_values = self.q_model.predict(state) +# get Q_max +q_value = self.get_target_q_value(next_state) +# correction on the Q value for the action used q_values[0][action] = reward if done else q_value +``` + +如“算法 9.6.1”的第 11 行所示,只有动作`a[j]`具有等于`(Q_max - Q(s[j], a[j]; θ))²`的非零损失。 请注意,假设缓冲区中有足够的数据,换句话说,在每个剧集结束后,“列表 9.6.2”中的感知动作学习循环会调用经验回放。 缓冲区的大小大于或等于批量大小)。 在经验回放期间,会随机采样一批体验单元,并将其用于训练 Q 网络。 + +与 Q 表类似,`act()`实现了 ε-贪婪策略,“公式 9.6.1”。 + +体验由`remember()`存储在重播缓冲区中。 Q 通过`get_target_q_value()`函数计算。 + +“列表 9.6.2”总结了智能体的感知-行动-学习循环。 在每个剧集中,通过调用`env.reset()`重置环境。 要执行的动作由`agent.act()`选择,并由`env.step(action)`应用于环境。 奖励和下一状态将被观察并存储在重播缓冲区中。 在执行每个操作之后,智能体会调用`replay()`来训练 DQN 并调整探索利用比率。 + +当极点与垂直方向的夹角超过 15 度或与中心的距离超过 2.4 单位时,剧集完成(`done = True`)。 对于此示例,如果 DQN 智能体无法解决问题,则 Q 学习最多运行 3,000 集。 如果`average mean_score`奖励在 100 次连续试验`win_trials`中为 195.0,则认为`CartPole-v0`问题已解决。 + +“列表 9.6.2”:`dqn-cartpole-9.6.1.py` + +`tf.keras`中的 DQN 训练循环: + +```py + # Q-Learning sampling and fitting + for episode in range(episode_count): + state = env.reset() + state = np.reshape(state, [1, state_size]) + done = False + total_reward = 0 + while not done: + # in CartPole-v0, action=0 is left and action=1 is right + action = agent.act(state) + next_state, reward, done, _ = env.step(action) + # in CartPole-v0: + # state = [pos, vel, theta, angular speed] + next_state = np.reshape(next_state, [1, state_size]) + # store every experience unit in replay buffer + agent.remember(state, action, reward, next_state, done) + state = next_state + total_reward += reward +``` + +```py + # call experience relay + if len(agent.memory) >= batch_size: + agent.replay(batch_size) +``` + +```py + scores.append(total_reward) + mean_score = np.mean(scores) + if mean_score >= win_reward[args.env_id] \ + and episode >= win_trials: + print("Solved in episode %d: \ + Mean survival = %0.2lf in %d episodes" + % (episode, mean_score, win_trials)) + print("Epsilon: ", agent.epsilon) + agent.save_weights() + break + if (episode + 1) % win_trials == 0: + print("Episode %d: Mean survival = \ + %0.2lf in %d episodes" % + ((episode + 1), mean_score, win_trials)) +``` + +在平均 10 次运行的中,DQN 在 822 集内解决了。 我们需要注意的是,每次训练运行的结果可能会有所不同。 + +自从引入 DQN 以来,连续的论文都提出了对“算法 9.6.1”的改进。 一个很好的例子是**双 DQN(DDQN)**,下面将对其进行讨论。 + +## 双重 Q 学习(DDQN) + +在 DQN 中,目标 Q 网络选择并评估每个动作,从而导致 Q 值过高。 为了解决这个问题,DDQN [3]建议使用 Q 网络选择动作,并使用目标 Q 网络评估动作。 + +在 DQN 中,如“算法 9.6.1”所概述,第 10 行中 Q 值的估计为: + +![](img/14853_09_097.png) + +* `Q_target`选择并评估动作,`a[j+1]`。 + +DDQN 建议将第 10 行更改为: + +![](img/14853_09_100.png) + +项`argmax[a[j+1]] Q(s[j+1], a[j+1]; θ)`使 Q 函数可以选择动作。 然后,该动作由`Q_target`评估。 + +“列表 9.6.3”显示了当我们创建一个新的`DDQNAgent`类时,该类继承自`DQNAgent`类。 只有`get_target_q_value()`方法被覆盖,以实现最大 Q 值计算中的更改。 + +“列表 9.6.3”:`dqn-cartpole-9.6.1.py`: + +```py +class DDQNAgent(DQNAgent): + def __init__(self, + state_space, + action_space, + episodes=500): + super().__init__(state_space, + action_space, + episodes) + """DDQN Agent on CartPole-v0 environment +``` + +```py + Arguments: + state_space (tensor): state space + action_space (tensor): action space + episodes (int): number of episodes to train + """ +``` + +```py + # Q Network weights filename + self.weights_file = 'ddqn_cartpole.h5' +``` + +```py + def get_target_q_value(self, next_state, reward): + """compute Q_max + Use of target Q Network solves the + non-stationarity problem + Arguments: + reward (float): reward received after executing + action on state + next_state (tensor): next state + Returns: + q_value (float): max Q-value computed + """ + # max Q value among next state's actions + # DDQN + # current Q Network selects the action + # a'_max = argmax_a' Q(s', a') + action = np.argmax(self.q_model.predict(next_state)[0]) + # target Q Network evaluates the action + # Q_max = Q_target(s', a'_max) + q_value = self.target_q_model.predict(\ + next_state)[0][action] +``` + +```py + # Q_max = reward + gamma * Q_max + q_value *= self.gamma + q_value += reward + return q_value +``` + +为了进行比较,在平均 10 次运行中,`CartPole-v0`由 DDQN 在 971 个剧集中求解。 要使用 DDQN,请运行以下命令: + +```py +python3 dqn-cartpole-9.6.1.py -d +``` + +DQN 和 DDQN 均表明,借助 DL,Q 学习能够扩展并解决具有连续状态空间和离散动作空间的问题。 在本章中,我们仅在具有连续状态空间和离散动作空间的最简单问题之一上演示了 DQN。 在原始论文中,DQN [2]证明了它可以在许多 Atari 游戏中达到超人的表现水平。 + +# 7\. 总结 + +在本章中,我们已经介绍了 DRL,DRL 是一种强大的技术,许多研究人员认为它是 AI 的最有希望的领先者。 我们已经超越了 RL 的原则。 RL 能够解决许多玩具问题,但是 Q 表无法扩展到更复杂的现实问题。 解决方案是使用深度神经网络学习 Q 表。 但是,由于样本相关性和目标 Q 网络的非平稳性,在 RL 上训练深度神经网络非常不稳定。 + +DQN 提出了一种使用经验回放并将目标网络与受训 Q 网络分离的解决方案。 DDQN 建议通过将动作选择与动作评估分开来最大程度地降低 Q 值,从而进一步改进算法。 DQN 还提出了其他改进建议。 优先经验回放[6]认为,不应对体验缓冲区进行统一采样。 + +取而代之的是,应更频繁地采样基于 TD 误差的更重要的经验,以完成更有效的训练。 文献[7]提出了一种对决网络架构来估计状态值函数和优势函数。 这两个函数均用于估计 Q 值,以加快学习速度。 + +本章介绍的方法是值迭代/拟合。 通过找到最佳值函数间接学习策略。 在下一章中,方法将是使用称为策略梯度方法的一系列算法直接学习最佳策略。 学习策略有很多好处。 特别地,策略梯度方法可以处理离散和连续的动作空间。 + +# 8\. 参考 + +1. `Sutton and Barto: Reinforcement Learning: An Introduction, 2017 (http://incompleteideas.net/book/bookdraft2017nov5.pdf).` +1. `Volodymyr Mnih et al.: Human-level Control through Deep Reinforcement Learning. Nature 518.7540, 2015: 529 (http://www.davidqiu.com:8888/research/nature14236.pdf).` +1. `Hado Van Hasselt, Arthur Guez, and David Silver: Deep Reinforcement Learning with Double Q-Learning. AAAI. Vol. 16, 2016 (http://www.aaai.org/ocs/index.php/AAAI/AAAI16/paper/download/12389/11847).` +1. `Kai Arulkumaran et al.: A Brief Survey of Deep Reinforcement Learning. arXiv preprint arXiv:1708.05866, 2017 (https://arxiv.org/pdf/1708.05866.pdf).` +1. `David Silver: Lecture Notes on Reinforcement Learning (http://www0.cs.ucl.ac.uk/staff/d.silver/web/Teaching.html).` +1. `Tom Schaul et al.: Prioritized experience replay. arXiv preprint arXiv:1511.05952, 2015 (https://arxiv.org/pdf/1511.05952.pdf).` +1. `Ziyu Wang et al.: Dueling Network Architectures for Deep Reinforcement Learning. arXiv preprint arXiv:1511.06581, 2015 (https://arxiv.org/pdf/1511.06581.pdf).` \ No newline at end of file diff --git a/机器学习/ApacheCN/apachecn-dl-zh/adv-dl-tf2-keras/10.md b/机器学习/ApacheCN/apachecn-dl-zh/adv-dl-tf2-keras/10.md new file mode 100644 index 00000000..a25c3417 --- /dev/null +++ b/机器学习/ApacheCN/apachecn-dl-zh/adv-dl-tf2-keras/10.md @@ -0,0 +1,1189 @@ +# 十、策略梯度方法 + +在本章中,我们将介绍在强化学习中直接优化策略网络的算法。 这些算法统称为“策略梯度方法”。 由于策略网络是在训练期间直接优化的,因此策略梯度方法属于*基于策略*强化学习算法的族。 就像我们在“第 9 章”,“深度强化学习”中讨论的基于值的方法一样,策略梯度方法也可以实现为深度强化学习算法。 + +研究策略梯度方法的基本动机是解决 Q 学习的局限性。 我们会回想起 Q 学习是关于选择使状态值最大化的动作。 借助 Q 函数,我们能够确定策略,使智能体能够决定对给定状态采取何种操作。 选择的动作只是使智能体最大化的动作。 在这方面,Q 学习仅限于有限数量的离散动作。 它不能处理连续的动作空间环境。 此外,Q 学习不是直接优化策略。 最后,强化学习是要找到智能体能够使用的最佳策略,以便决定应采取何种行动以最大化回报。 + +相反,策略梯度方法适用于具有离散或连续动作空间的环境。 另外,我们将在本章中介绍的四种策略梯度方法是直接优化策略网络的表现度量。 这样就形成了一个经过训练的策略网络,智能体可以使用该网络来最佳地在其环境中采取行动。 + +总之,本章的目的是介绍: + +* 策略梯度定理 +* 四种策略梯度方法: **REINFORCE** , **带基线的 REINFORCE**, **演员评论家**和**优势演员评论家(A2C)** +* 在连续动作空间环境中如何在`tf.keras`中实现策略梯度方法的指南 + +让我们从定理开始。 + +# 1\. 策略梯度定理 + +如“第 9 章”,“深度强化学习”中所讨论的,智能体位于环境中,处于状态`s[t]`中,它是状态空间`S`的一个元素。 状态空间`S`可以是离散的,也可以是连续的。 智能体通过遵循策略`π(a[t], s[t])`从动作空间`A`采取动作`a[t]`。 `A`可以是离散的或连续的。 作为执行动作`a[t]`的结果,智能体会收到奖励`r[t + 1]`,并且环境转换为新状态`s[t + 1]`。 新状态仅取决于当前状态和操作。 智能体的目标是学习一种最佳策略`π*`,该策略可最大化所有状态的回报: + +![](img/B14853_10_009.png) (Equation 9.1.1) + +收益`R[t]`定义为从时间`t`直到剧集结束或达到最终状态时的折扣累积奖励: + +![](img/B14853_10_010.png) (Equation 9.1.2) + +根据“公式 9.1.2”,还可以通过遵循策略`π`将返回解释为给定状态的值。 从“公式 9.1.1”可以看出,与通常的`γ^k < 1.0`相比,与立即奖励相比,未来奖励的权重较低。 + +到目前为止,我们仅考虑通过优化基于值的函数`Q(s, a)`来学习策略。 + +本章的目标是通过参数化`π(a[t] | s[t]) -> π(a[t] | s[t], θ)`直接学习该策略。 通过参数化,我们可以使用神经网络来学习策略函数。 + +学习策略意味着我们将最大化某个目标函数`J(θ)`,这是相对于参数`θ`的一种表现度量。在间歇式强化学习中,表现度量是起始状态的值。 在连续的情况下,目标函数是平均奖励率。 + +通过执行梯度上升来最大化目标函数`J(θ)`。 在梯度上升中,梯度更新是在要优化的函数的导数方向上。 到目前为止,我们的所有损失函数都通过最小化或通过执行梯度下降进行了优化。 稍后,在`tf.keras`实现中,我们将看到可以通过简单地否定目标函数并执行梯度下降来执行梯度上升。 + +直接学习策略的好处是它可以应用于离散和连续动作空间。 对于离散的动作空间: + +![](img/B14853_10_019.png) (Equation 10.1.1) + +其中`a[i]`是第`i`个动作。 `a[i]`可以是神经网络的预测或状态作用特征的线性函数: + +![](img/B14853_10_022.png) (Equation 10.1.2) + +`φ(s[t], a[i])`是将状态操作转换为特征的任何函数,例如编码器。 + +`π(a[t] | s[t], θ)`确定每个`a[i]`的概率。 例如,在上一章中的柱杆平衡问题中,目标是通过沿二维轴向左或向右移动柱车来保持柱子直立。 在这种情况下,`a[0]`和`a[1]`分别是左右移动的概率。 通常,智能体以最高概率`a[t] = max[i] π(a[t] | s[t], θ)`采取行动。 + +对于连续动作空间,`π(a[t] | s[t], θ)`根据给定状态的概率分布对动作进行采样。 例如,如果连续动作空间在`a[t] ∈ [-1.0, 1.0]`范围内,则`π(a[t] | s[t], θ)`通常是高斯分布,其均值和标准差由策略网络预测。 预测动作是来自此高斯分布的样本。 为了确保不会生成任何无效的预测,该操作将被限制在 -1.0 和 1.0 之间。 + +正式地,对于连续的动作空间,该策略是高斯分布的样本: + +![](img/B14853_10_032.png) (Equation 10.1.3) + +平均值`μ`和标准差`σ`都是状态特征的函数: + +![](img/B14853_10_035.png) (Equation 10.1.4) + +![](img/B14853_10_036.png) (Equation 10.1.5) + +`φ(s[i])`是将状态转换为其特征的任何函数。 `ζ(x) = log(1 + e^x)`是确保标准差为正值的`softplus`函数。 实现状态特征函数`φ(s[t])`的一种方法是使用自编码器网络的编码器。 在本章的最后,我们将训练一个自编码器,并将编码器部分用作状态特征。 因此,训练策略网络是优化参数的问题`θ = [θ[μ], θ[σ]]`。 + +给定连续可微分的策略函数`π(a[t] | s[t], θ)`,策略梯度可以计算为: + +![](img/B14853_10_042.png) (Equation 10.1.6) + +“公式 10.1.6”也被称为*策略梯度定理*。 它适用于离散和连续动作空间。 根据通过 Q 值缩放的策略操作采样的自然对数,可以计算出相对于参数`θ`的梯度。“公式 10.1.6”利用了自然对数`ᐁx/x = ᐁlnx`的特性。 + +策略梯度定理在某种意义上是直观的,即表现梯度是根据目标策略样本估计的,并且与策略梯度成比例。 策略梯度由 Q 值缩放,以鼓励对状态值产生积极贡献的行动。 梯度还与动作概率成反比,以惩罚对提高性能没有贡献的频繁发生的动作。 + +有关策略梯度定理的证明,请参阅[2]和 [David Silver 关于强化学习的讲义](http://www0.cs.ucl.ac.uk/staff/d.silver/web/Teaching_files/pg.pdf)。 + +与策略梯度方法相关的细微优势。 例如,在某些基于纸牌的游戏中,与基于策略的方法不同,基于值的方法在处理随机性方面没有直接的过程。 在基于策略的方法中,操作概率随参数而平滑变化。 + +同时,相对于参数的微小变化,基于值的行为可能会发生剧烈变化。 最后,基于策略的方法对参数的依赖性使我们对如何执行表现考核的梯度提升产生了不同的表述。 这些是在后续部分中介绍的四种策略梯度方法。 + +基于策略的方法也有其自身的缺点。 由于趋向于收敛于局部最优而非全局最优,所以它们通常更难训练。 在本章末尾提出的实验中,智能体很容易适应并选择不一定提供最高值的动作。 策略梯度的特征还在于高差异。 + +梯度更新经常被高估。 此外,基于训练策略的方法非常耗时。 训练需要成千上万集(即采样效率不高)。 每个剧集仅提供少量样本。 在本章结尾处提供的实现方面的典型训练,大约需要一个小时才能在 GTX 1060 GPU 上进行 1,000 集。 + +在以下各节中,我们将讨论四种策略梯度方法。 虽然讨论的重点是连续的动作空间,但是该概念通常适用于离散的动作空间。 + +# 2\. 蒙特卡洛策略梯度(REINFORCE)方法 + +最简单的策略梯度方法是 REINFORCE [4],这是蒙特卡洛策略梯度方法: + +![](img/B14853_10_045.png) (Equation 10.2.1) + +其中`R[t]`是返回值,如“公式 9.1.2”所定义。`R[t]`是策略梯度定理中`Q^π(s[t], a[t])`的无偏样本。 + +“算法 10.2.1”总结了 REINFORCE 算法[2]。 REINFORCE 是一种蒙特卡洛算法。 它不需要环境动态知识(换句话说,无需模型)。 仅需要经验样本`(s[i], a[i], r[i+1], s[i+1])`来优化策略网络`π(a[t] | s[t])`的参数。 折扣因子`γ`考虑到奖励随着步数增加而降低的事实。 梯度被`γ^k`打折。 在后续步骤中采用的梯度贡献较小。 学习率`α`是梯度更新的比例因子。 + +通过使用折扣梯度和学习率执行梯度上升来更新参数。 作为蒙特卡洛算法,REINFORCE 要求智能体在处理梯度更新之前先完成一集。 同样由于其蒙特卡洛性质,REINFORCE 的梯度更新具有高方差的特征。 + +**算法 10.2.1 REINFORCE** + +*要求*:可微分的参数化目标策略网络`π(a[t] | s[t], θ)`。 + +*要求*:折扣因子,`γ = [0, 1]`和学习率`α`。 例如,`γ = 0.99`和`α = 1e - 3`。 + +*要求*:`θ[0]`,初始策略网络参数(例如,`θ[0] -> 0`)。 + +1. 重复。 +2. 通过跟随`π(a[t] | s[t], θ)`来生成剧集`(s[0]a[0]r[1]s[1], s[1]a[1]r[2]s[2], ..., s[T-1]a[T-1]r[T]s[T])`。 +3. 对于步骤`t = 0, ..., T - 1`,执行: +4. 计算返回值`R[t] = Σ γ^t r[t+k], k = 0, ..., T`。 +5. 计算折扣的表现梯度`ᐁJ(θ) = r^t R[t] ᐁ[θ] ln π(a[t] | s[t], θ)`。 + +1. 执行梯度上升`θ = θ + αᐁJ(θ)`。 + +在 REINFORCE 中,可以通过神经网络对参数化策略进行建模,如图“图 10.2.1”所示: + +![](img/B14853_10_01.png) + +图 10.2.1:策略网络 + +如上一节中讨论的,在连续动作空间的情况下,状态输入被转换为特征。 状态特征是策略网络的输入。 代表策略函数的高斯分布具有均值和标准差,均是状态特征的函数。 根据状态输入的性质,策略网络`π(θ)`可以是 MLP,CNN 或 RNN。 预测的动作只是策略函数的样本。 + +“列表 10.2.1”显示了`REINFORCEAgent` 类,该类在`tf.keras`中实现了“算法 10.2.1”。 `train_by_episode()`在剧集完成后调用,以计算每个步骤的回报。 `train()`通过针对目标函数`logp_model`优化网络来执行“算法 10.2.1”的第 5 行和第 6 行。 父类`PolicyAgent`在本章介绍的四种策略梯度方法的算法中实现了的通用代码。 在讨论所有策略梯度方法之后,将介绍`PolicyAgent`。 + +“列表 10.2.1”:`policygradient-car-10.1.1.py` + +```py +class REINFORCEAgent(PolicyAgent): + def __init__(self, env): + """Implements the models and training of + REINFORCE policy gradient method + Arguments: + env (Object): OpenAI gym environment + """ + super().__init__(env) +``` + +```py + def train_by_episode(self): + """Train by episode + Prepare the dataset before the step by step training + """ + # only REINFORCE and REINFORCE with baseline + # use the ff code + # convert the rewards to returns + rewards = [] + gamma = 0.99 + for item in self.memory: + [_, _, _, reward, _] = item + rewards.append(reward) + + # compute return per step + # return is the sum of rewards from t til end of episode + # return replaces reward in the list + for i in range(len(rewards)): + reward = rewards[i:] + horizon = len(reward) + discount = [math.pow(gamma, t) for t in range(horizon)] + return_ = np.dot(reward, discount) + self.memory[i][3] = return_ +``` + +```py + # train every step + for item in self.memory: + self.train(item, gamma=gamma) +``` + +```py + def train(self, item, gamma=1.0): + """Main routine for training + Arguments: + item (list) : one experience unit + gamma (float) : discount factor [0,1] + """ + [step, state, next_state, reward, done] = item +``` + +```py + # must save state for entropy computation + self.state = state +``` + +```py + discount_factor = gamma**step + delta = reward +``` + +```py + # apply the discount factor as shown in Algorithms + # 10\. 2.1, 10.3.1 and 10.4.1 + discounted_delta = delta * discount_factor + discounted_delta = np.reshape(discounted_delta, [-1, 1]) + verbose = 1 if done else 0 +``` + +```py + # train the logp model (implies training of actor model + # as well) since they share exactly the same set of + # parameters + self.logp_model.fit(np.array(state), + discounted_delta, + batch_size=1, + epochs=1, + verbose=verbose) +``` + +以下部分提出了对 REINFORCE 方法的改进。 + +# 3\. 带基线方法的 REINFORCE + +REINFORCE 算法可以通过从收益`δ = R[t] - B(s[t])`中减去基线来概括。 基线函数`B(s[t])`可以是任何函数,只要它不依赖`a[t]`即可。 基线不会改变表现梯度的期望: + +![](img/B14853_10_069.png) (Equation 10.3.1) + +“公式 10.3.1”隐含`E[π] [B(s[t]) ᐁ[θ] ln π(a[t] | s[t], θ)] = 0`,因为`B(s[t])`不是`a[t]`的函数。 尽管引入基准不会改变期望值,但会减小梯度更新的方差。 方差的减少通常会加速学习。 + +在大多数情况下,我们使用值函数`B(s[t]) = V(s[t])`作为基准。 如果收益被高估,则比例系数将通过值函数成比例地减小,从而导致较低的方差。 值函数也已参数化`V(s[t]) = V(s[t]; θ[v])`,并与策略网络一起进行了训练。 在连续动作空间中,状态值可以是状态特征的线性函数: + +![](img/B14853_10_075.png) (Equation 10.3.2) + +“算法 10.3.1”用基线方法[1]总结了 REINFORCE。 这与 REINFORCE 相似,只不过将返回值替换为`δ`。 区别在于我们现在正在训练两个神经网络。 + +算法 10.3.1 带基线的 REINFORCE + +*要求*:可微分的参数化目标策略网络`π(a[t] | s[t], θ)`。 + +*要求*:可微分的参数化值网络`V(s[t], θ[v])`。 + +*要求*:折扣因子`γ ∈ [0, 1]`,表现梯度的学习率`α`和值梯度`α[v]`的学习率。 + +*要求*:`θ[0]`,初始策略网络参数(例如,`θ[0] -> 0`)。 `θ[v0]`,初始值网络参数(例如`θ[v0] -> 0`)。 + +1. 重复。 +2. 通过跟随`π(a[t] | s[t], θ)`来生成剧集`(s[0]a[0]r[1]s[1], s[1]a[1]r[2]s[2], ..., a[T-1]a[T-1]r[T]s[T])`。 +3. 对于步骤`t = 0, ..., T - 1`,执行: +4. 计算返回值: + + ![](img/B14853_10_062.png) +5. 减去基线: + + ![](img/B14853_10_090.png) +6. 计算折扣值梯度: + + ![](img/B14853_10_091.png) +7. 执行梯度上升: + + ![](img/B14853_10_092.png) +8. 计算折扣的表现梯度: + + ![](img/B14853_10_093.png) + +1. 执行梯度上升: + + ![](img/B14853_10_064.png) + +如图“图 10.3.1”所示,除了策略网络`π(θ)`之外,值网络`V(θ)`也同时受到训练。 通过表现梯度`ᐁJ(θ)`更新策略网络参数,而通过梯度`ᐁV(θ[v])`调整值网络参数。 由于 REINFORCE 是蒙特卡罗算法,因此值函数训练也是蒙特卡罗算法。 + +学习率不一定相同。 请注意,值网络也在执行梯度上升。 + +![](img/B14853_10_02.png) + +图 10.3.1:策略和值网络。 具有基线的 REINFORCE 具有一个计算基线的值网络 + +“列表 10.3.1”显示了`REINFORCEBaselineAgent`类,该类在`tf.keras`中实现了“算法 10.3.1”。 它继承自`REINFORCEAgent`,因为这两种算法仅在和`train()`方法上有所不同。 “算法 10.3.1”的第 5 行由`delta = reward - self.value(state)[0]`计算。 然后,通过调用各自模型的`fit()`方法来优化第 7 行和第 9 行中用于目标和值函数的网络`logp_model`和`value_model`。 + +“列表 10.3.1”:`policygradient-car-10.1.1.py` + +```py +class REINFORCEBaselineAgent(REINFORCEAgent): + def __init__(self, env): + """Implements the models and training of + REINFORCE w/ baseline policy + gradient method + Arguments: + env (Object): OpenAI gym environment + """ + super().__init__(env) +``` + +```py + def train(self, item, gamma=1.0): + """Main routine for training + Arguments: + item (list) : one experience unit + gamma (float) : discount factor [0,1] + """ + [step, state, next_state, reward, done] = item +``` + +```py + # must save state for entropy computation + self.state = state +``` + +```py + discount_factor = gamma**step +``` + +```py + # reinforce-baseline: delta = return - value + delta = reward - self.value(state)[0] +``` + +```py + # apply the discount factor as shown in Algorithms + # 10\. 2.1, 10.3.1 and 10.4.1 + discounted_delta = delta * discount_factor + discounted_delta = np.reshape(discounted_delta, [-1, 1]) + verbose = 1 if done else 0 +``` + +```py + # train the logp model (implies training of actor model + # as well) since they share exactly the same set of + # parameters + self.logp_model.fit(np.array(state), + discounted_delta, + batch_size=1, + epochs=1, + verbose=verbose) +``` + +```py + # train the value network (critic) + self.value_model.fit(np.array(state), + discounted_delta, + batch_size=1, + epochs=1, + verbose=verbose) +``` + +在的下一部分中,我们将介绍使用基准线方法对 REINFORCE 的改进。 + +# 4\. 演员评论家方法 + +在带有基线的 REINFORCE 方法中,该值用作基线。 它不用于训练值函数。 在本节中,我们介绍 REINFORCE 与基线的变化,称为演员评论家方法。 策略和值网络扮演着参与者和批评者网络的角色。 策略网络是参与者决定给定状态时要采取的操作。 同时,值网络评估参与者或策略网络做出的决策。 + +值网络充当批评者的角色,可以量化参与者所选择的行动的好坏。 值网络通过将状态值`V(s, θ[v]`与收到的奖励`r`和观察到的下一个状态`γV(s', θ[v])`的折扣值之和来评估状态值。 差异`δ`表示为: + +![](img/B14853_10_103.png) (Equation 10.4.1) + +为了简单起见,我们在中删除了`r`和`s`的下标。“公式 10.4.1”类似于“第 9 章”,“深度强化学习”中讨论的 Q 学习中的时间差异。 下一个状态值被`γ = [0.0, 1.0]`折扣。估计遥远的未来奖励很困难。 因此,我们的估计仅基于近期`r + γV(s', θ[v])`。 这就是*自举*技术。 + +自举技术和“公式 10.4.1”中状态表示的依赖性通常会加速学习并减少差异。 从“公式 10.4.1”,我们注意到值网络评估了当前状态`s = s[t]`,这是由于策略网络的上一个操作`a[t-1]`。 同时,策略梯度基于当前动作`a[t]`。 从某种意义上说,评估延迟了一步。 + +“算法 10.4.1”总结了演员评论家方法[1]。 除了评估用于训练策略和值网络的状态值评估外,还可以在线进行训练。 在每个步骤中,两个网络都经过训练。 这与 REINFORCE 和带有基线的 REINFORCE 不同,在基线之前,智能体完成了一个剧集。 首先,在当前状态的值估计期间向值网络查询两次,其次,为下一个状态的值查询。 这两个值都用于梯度计算中。 + +**算法 10.4.1 演员评论家** + +*要求*:可微分的参数化目标策略网络`π(a | s, θ)`。 + +*要求*:可微分的参数化值网络`V(s, θ[v])`。 + +*要求*:折扣因子`γ ∈ [0, 1]`,表现梯度的学习率`α`和值梯度`α[v]`的学习率。 + +*要求*:`θ[0]`,初始策略网络参数(例如,`θ[0] -> 0`)。 `θ[v0]`,初始值网络参数(例如`θ[v0] -> 0`)。 + +1. 重复。 +2. 对于步骤`t = 0, ..., T - 1`,执行: +3. 对动作`a ~ π(a | s, θ)`进行采样。 +4. 执行动作并观察奖励`r`和下一个状态`s'`。 +5. 评估状态值估计: + + ![](img/B14853_10_121.png) +6. 计算折扣值梯度: + + ![](img/B14853_10_122.png) +7. 执行梯度上升: + + ![](img/B14853_10_092.png) +8. 计算折扣表现梯度: + + ![](img/B14853_10_124.png) +9. 执行梯度上升: + + ![](img/B14853_10_064.png) + +1. `s = s'` + +“图 10.4.1”显示了演员评论家网络: + +![](img/B14853_10_03.png) + +图 10.4.1:演员评论家网络。 通过对值`V'`的第二次评估,演员评论家与 REINFORCE 的基线有所不同 + +“列表 10.4.1”显示了`ActorCriticAgent`类,该类在`tf.keras`中实现了“算法 10.4.1”。 与两种 REINFORCE 方法不同,演员评论家不等待剧集完成。 因此,它没有实现`train_by_episode()`。 在每个体验单元,通过调用各自模型的`fit()`方法,优化第 7 行和第 9 行中用于目标和值函数`logp_model`和`value_model`的网络。 `delta`变量存储第 5 行的结果。 + +“列表 10.4.1”:`policygradient-car-10.1.1.py` + +```py +class ActorCriticAgent(PolicyAgent): + def __init__(self, env): + """Implements the models and training of + Actor Critic policy gradient method + Arguments: + env (Object): OpenAI gym environment + """ + super().__init__(env) +``` + +```py + def train(self, item, gamma=1.0): + """Main routine for training + Arguments: + item (list) : one experience unit + gamma (float) : discount factor [0,1] + """ + [step, state, next_state, reward, done] = item +``` + +```py + # must save state for entropy computation + self.state = state +``` + +```py + discount_factor = gamma**step +``` + +```py + # actor-critic: delta = reward - value + # + discounted_next_value + delta = reward - self.value(state)[0] +``` + +```py + # since this function is called by Actor-Critic + # directly, evaluate the value function here + if not done: + next_value = self.value(next_state)[0] + # add the discounted next value + delta += gamma*next_value +``` + +```py + # apply the discount factor as shown in Algortihms + # 10\. 2.1, 10.3.1 and 10.4.1 + discounted_delta = delta * discount_factor + discounted_delta = np.reshape(discounted_delta, [-1, 1]) + verbose = 1 if done else 0 +``` + +```py + # train the logp model (implies training of actor model + # as well) since they share exactly the same set of + # parameters + self.logp_model.fit(np.array(state), + discounted_delta, + batch_size=1, + epochs=1, + verbose=verbose) +``` + +最终的策略梯度方法是 A2C。 + +# 5\. 优势演员评论家(A2C)方法 + +在上一节的演员评论家方法中,目标是使的值函数正确评估状态值。 还有其他用于训练值网络的技术。 一种明显的方法是在值函数优化中使用**均方误差**(**MSE**),类似于 Q 学习中的算法。 新值梯度等于返回值`R[t]`与状态值之间的 MSE 偏导数: + +![](img/B14853_10_127.png) (Equation 10.5.1) + +作为`(R[t] - V(s, θ[v])) -> 0`,值网络预测在预测给定状态的收益时变得更加准确。 我们将演员评论家算法的这种变化称为“优势演员评论家(A2C)”。 A2C 是[3]提出的“异步优势参与者关键(A3C)”的单线程或同步版本。 数量`R[t] - V(s, θ[v])`被称为*优势*。 + +“算法 10.5.1”总结了 A2C 方法。 A2C 和演员评论家之间存在一些差异。演员评论家在线上或根据经验样本进行训练。 A2C 类似于带基线的蒙特卡洛算法,REINFORCE 和 REINFORCE。 一集完成后,将对其进行训练。 从第一个状态到最后一个状态都对演员评论家进行了训练。 A2C 训练从最后一个状态开始,并在第一个状态结束。 此外,`γ^t`不再打折 A2C 策略和值梯度。 + +A2C 的相应网络类似于“图 10.4.1”,因为我们仅更改了梯度计算方法。 为了鼓励训练过程中的探员探索,A3C 算法[3]建议将策略函数的加权熵值的梯度添加到到梯度函数`β ᐁ[θ] H(π(a[t] | s[t], θ))`中。 回想一下,熵是对信息或事件不确定性的度量。 + +**算法 10.5.1 优势演员评论家(A2C)** + +*要求*:可微分的参数化目标策略网络`π(a[t] | s[t], θ)`。 + +*要求*:可微分的参数化值网络`V(s[t], θ[v])`。 + +*要求*:折扣因子`γ ∈ [0, 1]`,表现梯度的学习率`α`,值梯度的学习率`α[v]`和熵权`β`。 + +*要求*:`θ[0]`,初始策略网络参数(例如,`θ[0] -> 0`)。 `θ[v0]`,初始值网络参数(例如`θ[v0] -> 0`)。 + +1. 重复。 +2. 通过跟随`π(a[t] | s[t], θ)`来生成剧集`(s[0]a[0]r[1]s[1], s[1]a[1]r[2]s[2], ..., a[T-1]a[T-1]r[T]s[T])`。 +3. ![](img/B14853_10_144.png) +4. 对于步骤`t = 0, ..., T - 1`,执行: +5. 计算返回值: + + ![](img/B14853_10_146.png) +6. 计算值梯度: + + ![](img/B14853_10_147.png) +7. 累积梯度: + + ![](img/B14853_10_092.png) +8. 计算表现梯度: + + ![](img/B14853_10_149.png) + +1. 执行梯度上升: + + ![](img/B14853_10_064.png) + +“列表 10.5.1”显示了`A2CAgent`类,该类在`tf.keras`中实现了“算法 10.5.1”。 与两个 REINFORCE 方法不同,返回值是从最后一个体验单元或状态到第一个体验单元或状态的计算得出的。 在每个体验单元,通过调用各自模型的`fit()`方法,优化第 7 行和第 9 行中用于目标和值函数`logp_model`和`value_model`的网络。 注意,在对象实例化期间,熵损失的`beta`或权重设置为`0.9`,以指示将使用熵损失函数。 此外,使用 MSE 损失函数训练`value_model`。 + +“列表 10.5.1”:`policygradient-car-10.1.1.py` + +```py +class A2CAgent(PolicyAgent): + def __init__(self, env): + """Implements the models and training of + A2C policy gradient method + Arguments: + env (Object): OpenAI gym environment + """ + super().__init__(env) + # beta of entropy used in A2C + self.beta = 0.9 + # loss function of A2C value_model is mse + self.loss = 'mse' +``` + +```py + def train_by_episode(self, last_value=0): + """Train by episode + Prepare the dataset before the step by step training + Arguments: + last_value (float): previous prediction of value net + """ + # implements A2C training from the last state + # to the first state + # discount factor + gamma = 0.95 + r = last_value + # the memory is visited in reverse as shown + # in Algorithm 10.5.1 + for item in self.memory[::-1]: + [step, state, next_state, reward, done] = item + # compute the return + r = reward + gamma*r + item = [step, state, next_state, r, done] + # train per step + # a2c reward has been discounted + self.train(item) +``` + +```py + def train(self, item, gamma=1.0): + """Main routine for training + Arguments: + item (list) : one experience unit + gamma (float) : discount factor [0,1] + """ + [step, state, next_state, reward, done] = item +``` + +```py + # must save state for entropy computation + self.state = state +``` + +```py + discount_factor = gamma**step +``` + +```py + # a2c: delta = discounted_reward - value + delta = reward - self.value(state)[0] +``` + +```py + verbose = 1 if done else 0 +``` + +```py + # train the logp model (implies training of actor model + # as well) since they share exactly the same set of + # parameters + self.logp_model.fit(np.array(state), + discounted_delta, + batch_size=1, + epochs=1, + verbose=verbose) +``` + +```py + # in A2C, the target value is the return (reward + # replaced by return in the train_by_episode function) + discounted_delta = reward + discounted_delta = np.reshape(discounted_delta, [-1, 1]) +``` + +```py + # train the value network (critic) + self.value_model.fit(np.array(state), + discounted_delta, + batch_size=1, + epochs=1, + verbose=verbose) +``` + +在介绍的四种算法中,它们仅在目标函数和值(如果适用)优化方面有所不同。 在下一节中,我们将介绍四种算法的统一代码。 + +# 6\. 使用 Keras 的策略梯度方法 + +上一节中讨论的策略梯度方法(“算法 10.2.1”至“算法 10.5.1”)使用相同的策略和值网络模型。“图 10.2.1”至“图 10.4.1”中的策略和值网络具有相同的配置。 四种策略梯度方法的不同之处仅在于: + +* 表现和值梯度公式 +* 训练策略 + +在本节中,我们将以一个代码讨论`tf.keras`算法 10.2.1 至“算法 10.5.1”的通用例程在`tf.keras`中的实现。 + +完整的代码可以在[这个页面](https://github.com/PacktPublishing/Advanced-Deep-Learning-with-Keras)中找到。 + +但是在讨论实现之前,让我们简要探讨训练环境。 + +与 Q 学习不同,策略梯度方法适用于离散和连续动作空间。 在我们的示例中,我们将在连续动作空间案例示例中演示四种策略梯度方法,例如 OpenAI 健身房的[`MountainCarContinuous-v0`](https://gym.openai.com)。 如果您不熟悉 OpenAI Gym,请参阅“第 9 章”,“深度强化学习”。 + +“图 10.6.1”中显示了`MountainCarContinuous-v0`二维环境的快照。在此二维环境中,一辆功率不太强的汽车停在两座山之间: + +![](img/B14853_10_04.png) + +图 10.6.1:`MountainCarContinuous-v0` OpenAI Gym 环境 + +为了到达右侧山顶的黄旗,它必须来回行驶以获得足够的动力。 应用于汽车的能量越多(即动作的绝对值越大),则奖励越小(或负作用越大)。 + +奖励始终为负,到达标志时仅为正。 在这种情况下,汽车将获得 +100 的奖励。 但是,每个操作都会受到以下代码的惩罚: + +```py +reward-= math.pow(action[0],2)*0.1 +``` + +有效动作值的连续范围是`[-1.0, 1.0]`。 超出范围时,动作将被剪裁为其最小值或最大值。 因此,应用大于 1.0 或小于 -1.0 的操作值是没有意义的。 + +`MountainCarContinuous-v0`环境状态包含两个元素: + +* 车厢位置 +* 车速 + +通过编码器将状态转换为状态特征。 像动作空间一样,状态空间也是连续的。 预测的动作是给定状态的策略模型的输出。 值函数的输出是状态的预测值。 + +如图“图 10.2.1”到“图 10.4.1”所示,在建立策略和值网络之前,我们必须首先创建一个将状态转换为特征的函数。 该函数由自编码器的编码器实现,类似于在“第 3 章”,“自编码器”中实现的编码器。 + +“图 10.6.2”显示了包括编码器和解码器的自编码器: + +![A screenshot of a cell phone Description automatically generated](img/B14853_10_05.png) + +图 10.6.2:自编码器模型 + +在“图 10.6.3”中,编码器是由`Input(2)-Dense(256, activation='relu')-Dense(128, activation='relu')-Dense(32)`制成的 MLP。 每个状态都转换为 32 维特征向量: + +![A screenshot of a cell phone Description automatically generated](img/B14853_10_06.png) + +图 10.6.3:编码器模型 + +在“图 10.6.4”中,解码器也是 MLP,但由`Input(32)-Dense(128, activation='relu')-Dense(256, activation='relu')-Dense(2)`制成: + +![A screenshot of a cell phone Description automatically generated](img/B14853_10_07.png) + +图 10.6.4:解码器模型 + +自编码器使用 MSE,损失函数和`tf.keras`默认的 Adam 优化器训练了 10 个周期。 我们为训练和测试数据集采样了 220,000 个随机状态,并应用了 200,000:20,000 个训练测试拆分。 训练后,将保存编码器权重,以备将来在策略和值网络的训练中使用。“列表 10.6.1”显示了构建和训练自编码器的方法。 + +在`tf.keras`实现中,除非另有说明,否则我们将在本节中提及的所有例程均作为`PolicyAgent`类中的方法实现。 `PolicyAgent`的作用是代表策略梯度方法的常用功能,包括建立和训练自编码器网络模型以及预测动作,对数概率,熵和状态值。 这是“列表 10.2.1”至“列表 10.5.1”中介绍的每个策略梯度方法智能体类的超类。 + +“列表 10.6.1”:`policygradient-car-10.1.1.py` + +构建和训练特征自编码器的方法: + +```py +class PolicyAgent: + def __init__(self, env): + """Implements the models and training of + Policy Gradient Methods + Argument: + env (Object): OpenAI gym environment + """ +``` + +```py + self.env = env + # entropy loss weight + self.beta = 0.0 + # value loss for all policy gradients except A2C + self.loss = self.value_loss +``` + +```py + # s,a,r,s' are stored in memory + self.memory = [] +``` + +```py + # for computation of input size + self.state = env.reset() + self.state_dim = env.observation_space.shape[0] + self.state = np.reshape(self.state, [1, self.state_dim]) + self.build_autoencoder() +``` + +```py + def build_autoencoder(self): + """autoencoder to convert states into features + """ + # first build the encoder model + inputs = Input(shape=(self.state_dim, ), name='state') + feature_size = 32 + x = Dense(256, activation='relu')(inputs) + x = Dense(128, activation='relu')(x) + feature = Dense(feature_size, name='feature_vector')(x) +``` + +```py + # instantiate encoder model + self.encoder = Model(inputs, feature, name='encoder') + self.encoder.summary() + plot_model(self.encoder, + to_file='encoder.png', + show_shapes=True) +``` + +```py + # build the decoder model + feature_inputs = Input(shape=(feature_size,), + name='decoder_input') + x = Dense(128, activation='relu')(feature_inputs) + x = Dense(256, activation='relu')(x) + outputs = Dense(self.state_dim, activation='linear')(x) +``` + +```py + # instantiate decoder model + self.decoder = Model(feature_inputs, + outputs, + name='decoder') + self.decoder.summary() + plot_model(self.decoder, + to_file='decoder.png', + show_shapes=True) +``` + +```py + # autoencoder = encoder + decoder + # instantiate autoencoder model + self.autoencoder = Model(inputs, + self.decoder(self.encoder(inputs)), + name='autoencoder') + self.autoencoder.summary() + plot_model(self.autoencoder, + to_file='autoencoder.png', + show_shapes=True) +``` + +```py + # Mean Square Error (MSE) loss function, Adam optimizer + self.autoencoder.compile(loss='mse', optimizer='adam') +``` + +```py + def train_autoencoder(self, x_train, x_test): + """Training the autoencoder using randomly sampled + states from the environment + Arguments: + x_train (tensor): autoencoder train dataset + x_test (tensor): autoencoder test dataset + """ +``` + +```py + # train the autoencoder + batch_size = 32 + self.autoencoder.fit(x_train, + x_train, + validation_data=(x_test, x_test), + epochs=10, + batch_size=batch_size) +``` + +在给定`MountainCarContinuous-v0`环境的情况下,策略(或参与者)模型会预测必须应用于汽车的操作。 如本章第一部分中有关策略梯度方法的讨论所述,对于连续动作空间,策略模型从高斯分布`π(a[t] | s[t], θ) = a[t] ~ N(μ(s[t]), σ²(s[t]))`中采样一个动作。 在`tf.` `keras`中,实现为: + +```py +import tensorflow_probability as tfp + def action(self, args): + """Given mean and stddev, sample an action, clip + and return + We assume Gaussian distribution of probability + of selecting an action given a state + Arguments: + args (list) : mean, stddev list + """ + mean, stddev = args + dist = tfp.distributions.Normal(loc=mean, scale=stddev) + action = dist.sample(1) + action = K.clip(action, + self.env.action_space.low[0], + self.env.action_space.high[0]) + return action +``` + +动作被限制在其最小和最大可能值之间。 在这种方法中,我们使用`TensorFlow probability`包。 可以通过以下方式单独安装: + +```py +pip3 install --upgrade tensorflow-probability +``` + +策略网络的作用是预测高斯分布的均值和标准差。“图 10.6.5”显示了为`π(a[t] | s[t], θ)`建模的策略网络。 + +![A close up of text on a white background Description automatically generated](img/B14853_10_08.png) + +图 10.6.5:策略模型(参与者模型) + +请注意,编码器模型具有冻结的预训练权重。 仅平均值和标准差权重会收到表现梯度更新。 策略网络基本上是“公式 10.1.4”和“公式 10.1.5”的实现,为方便起见在此重复: + +![](img/B14853_10_153.png) (Equation 10.1.4) + +![](img/B14853_10_036.png) (Equation 10.1.5) + +其中`φ(s[t])`是编码器,`θ[μ]`是平均值`Dense(1)`层的权重,`θ[σ]`是标准差`Dense(1)`层的权重。 我们使用修改后的`softplus`函数`ζ(·)`来避免标准差为零: + +```py +def softplusk(x): + """Some implementations use a modified softplus + to ensure that the stddev is never zero + Argument: + x (tensor): activation input + """ + return K.softplus(x) + 1e-10 +``` + +策略模型构建器显示在“列表 10.6.2”中。 对数概率,熵和值模型也包含在此清单中,我们将在下面讨论。 + +“列表 10.6.2”:`policygradient-car-10.1.1.py` + +根据编码后的状态特征构建策略(角色),`logp`,熵和值模型的方法: + +```py + def build_actor_critic(self): + """4 models are built but 3 models share the + same parameters. hence training one, trains the rest. + The 3 models that share the same parameters + are action, logp, and entropy models. + Entropy model is used by A2C only. + Each model has the same MLP structure: + Input(2)-Encoder-Output(1). + The output activation depends on the nature + of the output. + """ + inputs = Input(shape=(self.state_dim, ), name='state') + self.encoder.trainable = False + x = self.encoder(inputs) + mean = Dense(1, + activation='linear', + kernel_initializer='zero', + name='mean')(x) + stddev = Dense(1, + kernel_initializer='zero', + name='stddev')(x) + # use of softplusk avoids stddev = 0 + stddev = Activation('softplusk', name='softplus')(stddev) + action = Lambda(self.action, + output_shape=(1,), + name='action')([mean, stddev]) + self.actor_model = Model(inputs, action, name='action') + self.actor_model.summary() + plot_model(self.actor_model, + to_file='actor_model.png', + show_shapes=True) +``` + +```py + logp = Lambda(self.logp, + output_shape=(1,), + name='logp')([mean, stddev, action]) + self.logp_model = Model(inputs, logp, name='logp') + self.logp_model.summary() + plot_model(self.logp_model, + to_file='logp_model.png', + show_shapes=True) +``` + +```py + entropy = Lambda(self.entropy, + output_shape=(1,), + name='entropy')([mean, stddev]) + self.entropy_model = Model(inputs, entropy, name='entropy') + self.entropy_model.summary() + plot_model(self.entropy_model, + to_file='entropy_model.png', + show_shapes=True) +``` + +```py + value = Dense(1, + activation='linear', + kernel_initializer='zero', + name='value')(x) + self.value_model = Model(inputs, value, name='value') + self.value_model.summary() + plot_model(self.value_model, + to_file='value_model.png', + show_shapes=True) +``` + +```py + # logp loss of policy network + loss = self.logp_loss(self.get_entropy(self.state), + beta=self.beta) + optimizer = RMSprop(lr=1e-3) + self.logp_model.compile(loss=loss, optimizer=optimizer) +``` + +```py + optimizer = Adam(lr=1e-3) + self.value_model.compile(loss=self.loss, optimizer=optimizer) +``` + +![](img/B14853_10_09.png) + +图 10.6.6:策略的高斯对数概率模型 + +除了策略网络`π(a[t] | s[t], θ)`之外,我们还必须具有操作日志概率(`logp`)网络`ln π(a[t] | s[t], θ)`,因为该实际上是计算梯度的系统。 如图“图 10.6.6”所示,`logp`网络只是一个策略网络,其中附加的 Lambda(1)层在给定了作用,均值和标准差的情况下计算了高斯分布的对数概率。 + +`logp`网络和参与者(策略)模型共享同一组参数。 Lambda 层没有任何参数。 它是通过以下函数实现的: + +```py + def logp(self, args): + """Given mean, stddev, and action compute + the log probability of the Gaussian distribution + Arguments: + args (list) : mean, stddev action, list + """ + mean, stddev, action = args + dist = tfp.distributions.Normal(loc=mean, scale=stddev) + logp = dist.log_prob(action) + return logp +``` + +训练`logp`网络也可以训练角色模型。 在本节中讨论的训练方法中,仅训练`logp`网络。 + +如图“图 10.6.7”所示,熵模型还与策略网络共享参数: + +![](img/B14853_10_10.png) + +图 10.6.7:熵模型 + +给定平均值和标准差,使用以下函数,输出`Lambda(1)`层计算高斯分布的熵: + +```py + def entropy(self, args): + """Given the mean and stddev compute + the Gaussian dist entropy + Arguments: + args (list) : mean, stddev list + """ + mean, stddev = args + dist = tfp.distributions.Normal(loc=mean, scale=stddev) + entropy = dist.entropy() + return entropy +``` + +熵模型仅用于 A2C 方法。 + +“图 10.6.8”显示了值模型: + +![](img/B14853_10_11.png) + +图 10.6.8:值模型 + +该模型还使用具有权重的预训练编码器来实现以下公式“公式 10.3.2”,为方便起见,在此重复: + +![](img/B14853_10_161.png) (Equation 10.3.2) + +`θ[v]`是`Dense(1)`层的权重,该层是唯一接收值梯度更新的层。“图 10.6.8”表示“算法 10.3.1”至“算法 10.5.1”中的`V(s[t], θ[v])`。 值模型可以建立在以下几行中: + +```py +inputs = Input(shape=(self.state_dim, ), name='state') +self.encoder.trainable = False +x = self.encoder(inputs) +value = Dense(1, + activation='linear', + kernel_initializer='zero', + name='value')(x) +self.value_model = Model(inputs, value, name='value') +``` + +这些行也用`build_actor_critic()`方法实现,如清单 10.6.2 所示。 + +建立网络模型后,下一步就是训练。 在“算法 10.2.1”至“算法 10.5.1”中,我们通过梯度上升执行目标函数最大化。 在`tf.keras`中,我们通过梯度下降执行损失函数最小化。 损失函数只是目标函数最大化的负数。 梯度下降是梯度上升的负值。“列表 10.6.3”显示了`logp`和值损失函数。 + +我们可以利用损失函数的通用结构来统一“算法 10.2.1”至“算法 10.5.1”中的损失函数。 表现和值梯度仅在其恒定因子上有所不同。 所有表现梯度都有一个通用项`ᐁ[θ] ln π(a[t] | s[t], θ)`。 这由策略日志概率损失函数`logp_loss()`中的`y_pred`表示。 通用项`ᐁ[θ] ln π(a[t] | s[t], θ)`的因素取决于哪种算法,并实现为`y_true`。“表 10.6.1”显示`y_true`的值。 其余项是熵的加权梯度`β ᐁ[θ] H(π(a[t] | s[t], θ))`。 这是通过`logp_loss()`函数中`beta`和`entropy`的乘积实现的。 仅 A2C 使用此项,因此默认为`self.beta=0.0`。 对于 A2C,`self.beta=0.9`。 + +“列表 10.6.3”:`policygradient-car-10.1.1.py` + +`logp`和值网络的损失函数: + +```py + def logp_loss(self, entropy, beta=0.0): + """logp loss, the 3rd and 4th variables + (entropy and beta) are needed by A2C + so we have a different loss function structure + Arguments: + entropy (tensor): Entropy loss + beta (float): Entropy loss weight + """ + def loss(y_true, y_pred): + return -K.mean((y_pred * y_true) \ + + (beta * entropy), axis=-1) +``` + +```py + return loss +``` + +```py + def value_loss(self, y_true, y_pred): + """Typical loss function structure that accepts + 2 arguments only + this will be used by value loss of all methods + except A2C + Arguments: + y_true (tensor): value ground truth + y_pred (tensor): value prediction + """ + return -K.mean(y_pred * y_true, axis=-1) +``` + +| **算法** | `logp_loss`的`y_true` | `value_loss`的`y_true` | +| --- | --- | --- | +| 10.2.1 REINFORCE | `γ^t R[t]` | 不适用 | +| 10.3.1 使用基线的 REINFORCE | `γ^t δ` | `γ^t δ` | +| 10.4.1 演员评论家 | `γ^t δ` | `γ^t δ` | +| 10.5.1 A2C | `R[t] - V(s, θ[v])` | `R[t]` | + +表 10.6.1:`logp_loss`的`y_true`值和`value_loss` + +“表 10.6.2”中显示了用于计算“表 10.6.1”中的`y_true`的代码实现: + +| **算法** | `y_true`公式 | Keras 中的`y_true` | +| --- | --- | --- | +| 10.2.1 REINFORCE | `γ^t R[t]` | `reward * discount_factor` | +| 10.3.1 使用基线的 REINFORCE | `γ^t δ` | `(reward - self.value(state)[0]) * discount_factor` | +| 10.4.1 演员评论家 | `γ^t δ` | `(reward - self.value(state)[0] + gamma * next_value) * discount_factor` | +| 10.5.1 A2C | `R[t] - V(s, θ[v])`和`R[t]` | (`reward - self.value(state)[0]`)和`reward` | + +表 10.6.2:表 10.6.1 中的`y_true`值 + +类似地,“算法 10.3.1”和“算法 10.4.1”的值损失函数具有相同的结构。 值损失函数在`tf.keras`中实现为`value_loss()`,如“列表 10.6.3”所示。 公共梯度因子`ᐁ[θ[v]] V(s[t], θ[v])`由张量`y_pred`表示。 剩余因子由`y_true`表示。 `y_true`值也显示在“表 10.6.1”中。 REINFORCE 不使用值函数。 A2C 使用 MSE 损失函数来学习值函数。 在 A2C 中,`y_true`代表目标值或基本情况。 + +有了所有网络模型和损失函数,最后一部分是训练策略,每种算法都不同。 每个策略梯度方法的训练算法已在“列表 10.2.1”至“列表 10.5.1”中进行了讨论。 “算法 10.2.1”,“算法 10.3.1”和“算法 10.5.1”等待完整的剧集在训练之前完成,因此它同时运行`train_by_episode()` 和`train()`。 完整剧集保存在`self.memory`中。 演员评论家“算法 10.4.1”每步训练一次,仅运行`train()`。 + +“列表 10.6.4”显示了当智能体执行并训练策略和值模型时,一个剧集如何展开。 `for`循环执行 1,000 集。 当达到 1,000 步或汽车触及旗帜时,剧集终止。 智能体在每个步骤执行策略预测的操作。 在每个剧集或步骤之后,将调用训练例程。 + +“列表 10.6.4”:`policygradient-car-10.1.1.py` + +```py + # sampling and fitting + for episode in range(episode_count): + state = env.reset() + # state is car [position, speed] + state = np.reshape(state, [1, state_dim]) + # reset all variables and memory before the start of + # every episode + step = 0 + total_reward = 0 + done = False + agent.reset_memory() + while not done: + # [min, max] action = [-1.0, 1.0] + # for baseline, random choice of action will not move + # the car pass the flag pole + if args.random: + action = env.action_space.sample() + else: + action = agent.act(state) + env.render() + # after executing the action, get s', r, done + next_state, reward, done, _ = env.step(action) + next_state = np.reshape(next_state, [1, state_dim]) + # save the experience unit in memory for training + # Actor-Critic does not need this but we keep it anyway. + item = [step, state, next_state, reward, done] + agent.remember(item) +``` + +```py + if args.actor_critic and train: + # only actor-critic performs online training + # train at every step as it happens + agent.train(item, gamma=0.99) + elif not args.random and done and train: + # for REINFORCE, REINFORCE with baseline, and A2C + # we wait for the completion of the episode before + # training the network(s) + # last value as used by A2C + if args.a2c: + v = 0 if reward > 0 else agent.value(next_state)[0] + agent.train_by_episode(last_value=v) + else: + agent.train_by_episode() +``` + +```py + # accumulate reward + total_reward += reward + # next state is the new state + state = next_state + step += 1 +``` + +在训练期间,我们收集了数据以确定每个策略梯度算法的表现。 在下一部分中,我们总结了结果。 + +# 7\. 策略梯度方法的表现评估 + +通过训练智能体 1000 次剧集,评估了 4 种策略梯度方法。 我们将 1 次训练定义为 1,000 次训练。 第一表现度量标准是通过累计汽车在 1,000 集内达到标志的次数来衡量的。 + +在此指标中,A2C 达到该标志的次数最多,其次是 REINFORCE(具有基线,演员评论家和 REINFORCE)。 使用基线或批判者可以加速学习。 请注意,这些是训练会话,智能体会在其中不断提高其表现。 在实验中,有些情况下智能体的表现没有随时间改善。 + +第二个表现指标基于以下要求:如果每集的总奖励至少为 90.0,则认为`MountainCarContinuous-v0`已解决。 从每种方法的 5 个训练会话中,我们选择了最近 100 个剧集(第 900 至 999 集)中最高总奖励的 1 个训练会话。 + +“图 10.7.1”至“图 10.7.4”显示了在执行 1000 集时山地车到达标志的次数。 + +![](img/B14853_10_12.png) + +图 10.7.1:山车使用 REINFORCE 方法到达标志的次数 + +![](img/B14853_10_13.png) + +图 10.7.2:使用基线方法使用 REINFORCE,山地车到达标志的次数 + +![](img/B14853_10_14.png) + +图 10.7.3:使用演员评论家方法山地车到达旗帜的次数 + +![](img/B14853_10_15.png) + +图 10.7.4:山地车使用 A2C 方法到达标志的次数 + +“图 10.7.5”至“图 10.7.8”显示 1,000 集的总奖励。 + +![](img/B14853_10_16.png) + +图 10.7.5:使用 REINFORCE 方法获得的每集总奖励 + +![](img/B14853_10_17.png) + +图 10.7.6:使用带有基线方法的 REINFORCE,每集获得的总奖励。 + +![](img/B14853_10_18.png) + +图 10.7.7:使用演员评论家方法获得的每集总奖励 + +![](img/B14853_10_19.png) + +图 10.7.8:使用 A2C 方法获得的每集总奖励 + +以为基准的 REINFORCE 是唯一能够在 1,000 次训练中始终获得约 90 的总奖励的方法。 A2C 的表现仅次于第二名,但无法始终达到至少 90 分的总奖励。 + +在进行的实验中,我们使用相同的学习率`1e-3`进行对数概率和值网络优化。 折扣系数设置为 0.99(A2C 除外),以 0.95 的折扣系数更容易训练。 + +鼓励阅读器通过执行以下操作来运行受过训练的网络: + +```py +python3 policygradient-car-10.1.1.py +--encoder_weights=encoder_weights.h5 --actor_weights=actor_weights.h5 +``` + +“表 10.7.1”显示了其他运行`policygradient-car-10.1.1.py`的模式。 权重文件(即`*.h5`)可以替换为您自己的预训练权重文件。 请查阅代码以查看其他可能的选项。 + +| **目的** | **运行** | +| --- | --- | +| 从零开始训练 REINFORCE | `python3 policygradient-car-10.1.1.py` | +| 从头开始使用基线训练 REINFORCE | `python3 policygradient-car-10.1.1.py -b` | +| 从零开始训练演员评论家 | `python3 policygradient-car-10.1.1.py -a` | +| 从头开始训练 A2C | `python3 policygradient-car-10.1.1.py -c` | +| 从先前保存的权重中训练 REINFORCE | `python3 policygradient-car-10.1.1.py``--encoder-weights=encoder_weights.h5``--actor-weights=actor_weights.h5 --train` | +| 使用先前保存的权重使用基线训练 REINFORCE | `python3 policygradient-car-10.1.1.py``--encoder-weights=encoder_weights.h5``--actor-weights=actor_weights.h5``--value-weights=value_weights.h5 -b --train` | +| 使用先前保存的权重训练演员评论家 | `python3 policygradient-car-10.1.1.py``--encoder-weights=encoder_weights.h5``--actor-weights=actor_weights.h5``--value-weights=value_weights.h5 -a --train` | +| 使用先前保存的权重训练 A2C | `python3 policygradient-car-10.1.1.py``--encoder-weights=encoder_weights.h5``--actor-weights=actor_weights.h5``--value-weights=value_weights.h5 -c --train` | + +表 10.7.1:运行 policygradient-car-10.1.1.py 时的不同选项 + +最后一点,我们在`tf.keras`中对策略梯度方法的实现存在一些局限性。 例如,训练演员模型需要对动作进行重新采样。 首先对动作进行采样并将其应用于环境,以观察奖励和下一个状态。 然后,采取另一个样本来训练对数概率模型。 第二个样本不一定与第一个样本相同,但是用于训练的奖励来自第一个采样动作,这可能会在梯度计算中引入随机误差。 + +# 8\. 总结 + +在本章中,我们介绍了策略梯度方法。 从策略梯度定理开始,我们制定了四种方法来训练策略网络。 详细讨论了四种方法:REINFORCE,带有基线的 REINFORCE,演员评论家和 A2C 算法。 我们探讨了如何在 Keras 中实现这四种方法。 然后,我们通过检查智能体成功达到目标的次数以及每集获得的总奖励来验证算法。 + +与上一章中讨论的深度 Q 网络[2]相似,基本策略梯度算法可以进行一些改进。 例如,最突出的一个是 A3C [3],它是 A2C 的多线程版本。 这使智能体可以同时接触不同的经验,并异步优化策略和值网络。 但是,在 [OpenAI](https://blog.openai.com/baselines-acktr-a2c/) 进行的实验中,与 A2C 相比,A3C 没有强大的优势,因为前者无法利用当今提供强大的 GPU 的优势。 + +在接下来的两章中,我们将着手于另一个领域-对象检测和语义分割。 对象检测使智能体能够识别和定位给定图像中的对象。 语义分割基于对象类别识别给定图像中的像素区域。 + +# 9\. 参考 + +1. `Richard Sutton and Andrew Barto: Reinforcement Learning: An Introduction: http://incompleteideas.net/book/bookdraft2017nov5.pdf (2017)` +1. `Volodymyr Mnih et al.: Human-level control through deep reinforcement learning, Nature 518.7540 (2015): 529` +1. `Volodymyr Mnih et al.: Asynchronous Methods for Deep Reinforcement Learning, International conference on machine learning, 2016` +1. `Ronald Williams: Simple statistical gradient-following algorithms for connectionist reinforcement learning, Machine learning 8.3-4 (1992): 229-256` \ No newline at end of file diff --git a/机器学习/ApacheCN/apachecn-dl-zh/adv-dl-tf2-keras/11.md b/机器学习/ApacheCN/apachecn-dl-zh/adv-dl-tf2-keras/11.md new file mode 100644 index 00000000..73ee0810 --- /dev/null +++ b/机器学习/ApacheCN/apachecn-dl-zh/adv-dl-tf2-keras/11.md @@ -0,0 +1,1672 @@ +# 十一、对象检测 + +目标检测是计算机视觉最重要的应用之一。 对象检测是同时定位和识别图像中存在的对象的任务。 为了使自动驾驶汽车安全地在街道上行驶,该算法必须检测到行人,道路,车辆,交通信号灯,标志和意外障碍物的存在。 在安全方面,入侵者的存在可以用来触发警报或通知适当的当局。 + +尽管很重要,但是对象检测一直是计算机视觉中的一个长期存在的问题。 已经提出了许多算法,但是通常很慢,并且精度和召回率很低。 与 AlexNet [1]在 ImageNet 大规模图像分类问题中所取得的成就类似,深度学习显着提高了对象检测领域。 最新的对象检测方法现在可以实时运行,并且具有更高的精度和召回率。 + +在本章中,我们重点介绍实时对象检测。 特别是,我们讨论了`tf.keras`中**单发检测**(**SSD**)[2]的概念和实现。 与其他深度学习检测算法相比,SSD 可在现代 GPU 上实现实时检测速度,而表现不会显着下降。 SSD 还易于端到端训练。 + +总之,本章的目的是介绍: + +* 对象检测的概念 +* 多尺度目标检测的概念 +* SSD 作为多尺度目标检测算法 +* `tf.keras`中 SSD 的实现 + +我们将从介绍对象检测的概念开始。 + +# 1\. 对象检测 + +在对象检测中,目标是在图像中定位和识别物体。“图 11.1.1”显示了目标**汽水罐**的目标物检测。 本地化意味着必须估计对象的边界框。 使用左上角像素坐标和右下角像素坐标是用于描述边界框的通用约定。 在“图 11.1.1”中,左上角像素具有坐标`(x_min, y_min)`,而右下角像素的坐标为`(x_max, y_max)`。像素坐标系的原点`(0, 0)`位于整个图像的左上角像素。 + +在执行定位时,检测还必须识别对象。 识别是计算机视觉中的经典识别或分类任务。 至少,对象检测必须确定边界框是属于已知对象还是背景。 可以训练对象检测网络以仅检测一个特定对象,例如“图 11.1.1”中的**汽水罐**。 其他所有内容均视为背景,因此无需显示其边界框。 同一对象的多个实例,例如两个或多个**汽水罐**,也可以通过同一网络检测到,如图“图 11.1.2”所示。 + +![](img/B14853_11_01.png) + +图 11.1.1 说明了对象检测是在图像中定位和识别对象的过程。 + +![](img/B14853_11_02.png) + +图 11.1.2 被训练为检测一个对象实例的同一网络可以检测到同一对象的多个实例。 + +如果场景中存在多个对象,例如在“图 11.1.3”中,则对象检测方法只能识别在其上训练的一个对象。 其他两个对象将被分类为背景,并且不会分配边界框。 + +![](img/B14853_11_03.png) + +图 11.1.3 如果仅在检测汽水罐方面训练了对象检测,它将忽略图像中的其他两个对象。 + +但是,如果重新训练了网络以检测三个对象:1)**汽水罐**,2)**果汁罐**和 3)**水瓶**会同时定位和识别,如图“图 11.1.4”所示。 + +![](img/B14853_11_04.png) + +图 11.1.4 即使背景杂乱或照明发生变化,也可以重新训练对象检测网络以检测所有三个对象。 + +一个好的对象检测器必须在现实环境中具有鲁棒性。“图 11.1.4”显示了一个好的对象检测网络,即使背景杂乱甚至在弱光条件下,也可以定位和识别已知对象。 对象检测器必须具有鲁棒性的其他因素是物体变换(旋转和/或平移),表面反射,纹理变化和噪声。 + +总之,对象检测的目标是针对图像中每个**可识别的**对象同时预测以下内容: + +* `y_cls`或单热向量形式的类别或类 +* `y_box = ((x_min, y_min), (x_max, y_max))`或像素坐标形式的边界框坐标 + +通过解释了对象检测的基本概念,我们可以开始讨论对象检测的某些特定机制。 我们将从介绍锚框开始。 + +# 2\. 锚框 + +从上一节的讨论中,我们了解到,对象检测必须预测边界框区域以及其中的对象类别。 假设与此同时,我们的重点是边界框坐标估计。 + +网络如何预测坐标`(x_min, y_min)`和`(x_max, y_max)`? 网络可以做出与图像的左上角像素坐标和右下角像素坐标相对应的初始猜测,例如`(0, 0)`和`(w, h)`。`w`是图像宽度,而`h`是图像高度。 然后,网络通过对地面真实边界框坐标执行回归来迭代地校正估计。 + +由于可能的像素值存在较大差异,因此使用原始像素估计边界框坐标不是最佳方法。 SSD 代替原始像素,将地面真值边界框和预测边界框坐标之间的像素误差值最小化。 对于此示例,像素的误差值为`(x_min, y_min)`和`(x_max - w, y_max - h)`。 这些值称为`offsets`。 + +为了帮助网络找出正确的边界框坐标,将图像划分为多个区域。 每个区域称为**定位框**。 然后,网络估计每个锚框的**偏移**。 这样得出的预测更接近于基本事实。 + +例如,如图“图 11.2.1”所示,将普通图像尺寸`640 x 480`分为`2 x 1`个区域,从而产生两个锚框。 与`2 x 2`的大小不同,`2 x 1`的划分创建了近似方形的锚框。 在第一个锚点框中,新的偏移量是`(x_min, y_min)`和`{x_max - w/2, y_max - h}`,它们比没有锚框的像素误差值更小。 第二个锚框的偏移量也较小。 + +在“图 11.2.2”中,图像被进一步分割。 这次,锚框为`3 x 2`。第二个锚框偏移为`{x_min - w/3, y_min}`和`{x_max - 2w/3, y_max - h/2}`,这是迄今为止最小的。 但是,如果将图像进一步分为`5 x 4`,则偏移量开始再次增加。 主要思想是,在创建各种尺寸的区域的过程中,将出现最接近地面真值边界框的最佳锚框大小。 使用多尺度锚框有效地检测不同大小的对象将巩固**多尺度对象检测**算法的概念。 + +找到一个最佳的锚框并不是零成本。 尤其是,有些外部锚框的偏移量比使用整个图像还要差。 在这种情况下,SSD 建议这些锚定框不应对整个优化过程有所帮助,而应予以抑制。 在以下各节中,将更详细地讨论排除非最佳锚框的算法。 + +到目前为止,我们已经有三套锚框。 + +第一个创建一个`2 x 1`的锚框网格,每个锚框的尺寸为`(w/2, h)`。 + +第二个创建一个`3 x 2`的锚框网格,每个锚框的尺寸为`(w/3, h/2)`。 + +第三个创建一个`5 x 4`的锚框网格,每个锚框的尺寸为`(w/5, h/4)`。 + +我们还需要多少套锚盒? 它取决于图像的尺寸和对象最小边框的尺寸。 对于此示例中使用的`640 x 480`图像,其他锚点框为: + +`10 x 8`格的锚框,每个框的尺寸为`(w/10, h/8)` + +`20 x 15`格的锚框,每个锚框的尺寸为`(w/20, h/15)` + +`40 x 30`格的锚框,每个框的尺寸为`(w/40, h/30)` + +对于具有`40 x 30`网格的锚框的`640 x 480`图像,最小的锚框覆盖输入图像的`16 x 16`像素斑块,也称为**接收域**。 到目前为止,包围盒的总数为 1608。对于所有尺寸,最小的**缩放因子**可以总结为: + +![](img/B14853_11_010.png) (Equation 11.2.1) + +锚框如何进一步改进? 如果我们允许锚框具有不同的纵横比,则可以减少偏移量。 每个调整大小的锚点框的质心与原始锚点框相同。 除宽高比 1 外,SSD [2]包括其他宽高比: + +![](img/B14853_11_011.png) (Equation 11.2.2) + +对于每个纵横比`a[i]`,对应的锚框尺寸为: + +![](img/B14853_11_013.png) (Equation 11.2.3) + +`(s[xj], s[yj])`是“公式 11.2.1”中的第`j`个比例因子。 + +使用每个锚框五个不同的长宽比,锚框的总数将增加到`1,608 x 5 = 8,040`。“图 11.2.3”显示了`(s[x4], s[y4]) = (1/3, 1/2)`和`a[i ∈ {0, 1, 3}] = 1, 2, 1/2`情况下的锚框。 + +请注意,为了达到一定的纵横比,我们不会使锚框变形。 而是调整锚框的宽度和高度。 + +对于`a[0] = 1`,SSD 建议使用其他尺寸的锚框: + +![](img/B14853_11_018.png) (Equation 11.2.4) + +现在每个区域有六个锚定框。 有五个是由于五个纵横比,另外还有一个纵横比为 1。新的锚框总数增加到 9,648。 + +![](img/B14853_11_05.png) + +图 11.2.1 将图像划分为多个区域(也称为锚框),使网络可以进行更接近地面真实情况的预测。 + +![](img/B14853_11_06.png) + +图 11.2.2 使用较小的锚框可以进一步减少偏移。 + +![](img/B14853_11_07.png) + +图 11.2.3 具有比例因子`(s[x4], s[y4]) = (1/3, 1/2)`和纵横比`a[i ∈ {0, 1, 3}] = 1, 2, 1/2`的一个区域的锚框。 + +下面的“列表 11.2.1”显示了锚框生成函数`anchor_boxes()`。 给定输入的图像形状(`image_shape`),纵横比(`aspect_ratios`)和缩放因子(`sizes`),将计算不同的锚框大小并将其存储在名为`width_height`的列表中。 从给定的特征映射形状(`feature_shape`或`(h_fmap, w_fmap)`和`width_height`, 生成具有尺寸`(h_fmap, w_fmap, n_boxes, 4)`。`n_boxes`或每个特征映射点的锚点框数是基于纵横比和等于 1 的纵横比的一个附加大小计算的。 + +“列表 11.2.1”:锚框生成函数的`layer_utils.py`函数: + +```py +def anchor_boxes(feature_shape, + image_shape, + index=0, + n_layers=4, + aspect_ratios=(1, 2, 0.5)): + """ Compute the anchor boxes for a given feature map. + Anchor boxes are in minmax format +``` + +```py + Arguments: + feature_shape (list): Feature map shape + image_shape (list): Image size shape + index (int): Indicates which of ssd head layers + are we referring to + n_layers (int): Number of ssd head layers +``` + +```py + Returns: + boxes (tensor): Anchor boxes per feature map + """ +``` + +```py + # anchor box sizes given an index of layer in ssd head + sizes = anchor_sizes(n_layers)[index] + # number of anchor boxes per feature map pt + n_boxes = len(aspect_ratios) + 1 + # ignore number of channels (last) + image_height, image_width, _ = image_shape + # ignore number of feature maps (last) + feature_height, feature_width, _ = feature_shape +``` + +```py + # normalized width and height + # sizes[0] is scale size, sizes[1] is sqrt(scale*(scale+1)) + norm_height = image_height * sizes[0] + norm_width = image_width * sizes[0] +``` + +```py + # list of anchor boxes (width, height) + width_height = [] + # anchor box by aspect ratio on resized image dims + # Equation 11.2.3 + for ar in aspect_ratios: + box_width = norm_width * np.sqrt(ar) + box_height = norm_height / np.sqrt(ar) + width_height.append((box_width, box_height)) + # multiply anchor box dim by size[1] for aspect_ratio = 1 + # Equation 11.2.4 + box_width = image_width * sizes[1] + box_height = image_height * sizes[1] + width_height.append((box_width, box_height)) +``` + +```py + # now an array of (width, height) + width_height = np.array(width_height) +``` + +```py + # dimensions of each receptive field in pixels + grid_width = image_width / feature_width + grid_height = image_height / feature_height +``` + +```py + # compute center of receptive field per feature pt + # (cx, cy) format + # starting at midpoint of 1st receptive field + start = grid_width * 0.5 + # ending at midpoint of last receptive field + end = (feature_width - 0.5) * grid_width + cx = np.linspace(start, end, feature_width) +``` + +```py + start = grid_height * 0.5 + end = (feature_height - 0.5) * grid_height + cy = np.linspace(start, end, feature_height) +``` + +```py + # grid of box centers + cx_grid, cy_grid = np.meshgrid(cx, cy) +``` + +```py + # for np.tile() + cx_grid = np.expand_dims(cx_grid, -1) + cy_grid = np.expand_dims(cy_grid, -1) +``` + +```py + # tensor = (feature_map_height, feature_map_width, n_boxes, 4) + # aligned with image tensor (height, width, channels) + # last dimension = (cx, cy, w, h) + boxes = np.zeros((feature_height, feature_width, n_boxes, 4)) +``` + +```py + # (cx, cy) + boxes[..., 0] = np.tile(cx_grid, (1, 1, n_boxes)) + boxes[..., 1] = np.tile(cy_grid, (1, 1, n_boxes)) +``` + +```py + # (w, h) + boxes[..., 2] = width_height[:, 0] + boxes[..., 3] = width_height[:, 1] +``` + +```py + # convert (cx, cy, w, h) to (xmin, xmax, ymin, ymax) + # prepend one dimension to boxes + # to account for the batch size = 1 + boxes = centroid2minmax(boxes) + boxes = np.expand_dims(boxes, axis=0) + return boxes +``` + +```py +def centroid2minmax(boxes): + """Centroid to minmax format + (cx, cy, w, h) to (xmin, xmax, ymin, ymax) +``` + +```py + Arguments: + boxes (tensor): Batch of boxes in centroid format +``` + +```py + Returns: + minmax (tensor): Batch of boxes in minmax format + """ + minmax= np.copy(boxes).astype(np.float) + minmax[..., 0] = boxes[..., 0] - (0.5 * boxes[..., 2]) + minmax[..., 1] = boxes[..., 0] + (0.5 * boxes[..., 2]) + minmax[..., 2] = boxes[..., 1] - (0.5 * boxes[..., 3]) + minmax[..., 3] = boxes[..., 1] + (0.5 * boxes[..., 3]) + return minmax +``` + +我们已经介绍了锚框如何协助对象检测以及如何生成它们。 在下一节中,我们将介绍一种特殊的锚点框:真实情况锚点框。 给定图像中的对象,必须将其分配给多个锚点框之一。 这就是,称为真实情况锚定框。 + +# 3\. 真实情况锚框 + +从“图 11.2.3”看来,给定一个对象边界框,有许多可以分配给对象的真实情况锚定框。 实际上,仅出于“图 11.2.3”中的说明,已经有 3 个锚定框。 如果考虑每个区域的所有锚框,则仅针对`(s[x4], s[y4]) = (1/3, 1/2)`就有`6 x 6 = 36`个地面真实框。 使用所有 9,648 个锚点框显然过多。 所有锚定框中只有一个应与地面真值边界框相关联。 所有其他锚点框都是背景锚点框。 选择哪个对象应被视为图像中对象的真实情况锚定框的标准是什么? + +选择锚框的基础称为**交并比**(**IoU**)。 IoU 也称为 *Jaccard 指数*。 在“图 11.3.1”中说明了 IoU。 给定 2 个区域,对象边界框`B[0]`和锚定框`A[1]`,IoU 等于重叠除以合并区域的面积: + +![](img/B14853_11_022.png) (Equation 11.3.1) + +![](img/B14853_11_08.png) + +图 11.3.1 IoU 等于(左)候选锚点框`A[1]`与(右)对象边界框`B[0]`之间的相交面积除以并集面积。 + +我们删除了该等式的下标。 对于给定的对象边界框`B[i]`,对于所有锚点框`A[j]`,地面真值锚点框`A[j(gt)]`是具有最大 IoU 的一个: + +![](img/B14853_11_023.png) (Equation 11.3.2) + +请注意,对于每个对象,只有一个基于“公式 11.3.2”的地面真值锚定框。 此外,必须在所有比例因子和尺寸(长宽比和附加尺寸)中对所有锚框进行最大化。 在“图 11.3.1”中,在 9,648 个锚框中仅显示了一个比例因子大小。 + +为了说明“公式 11.3.2”,假设考虑了“图 11.3.1”中纵横比为 1 的锚框。 对于每个锚框,估计的 IoU 均显示在“表 11.3.1”中。 由于边界框`B[0]`的最大 IoU 为 0.32,因此带有锚框`A[1]`,`A[1]`被分配为地面真值边界框`B[0]`。`A[1]`也被称为**正锚框**。 + +正锚定框的类别和偏移量是相对于其地面真值边界框确定的。 正锚定框的类别与其地面真值边界框相同。 同时,可以将正锚框偏移量计算为等于地面真实边界框坐标减去其自身的边界框坐标。 + +其余锚框发生了什么,`A[0]`,`A[2]`,`A[3]`,`A[4]`,和`A[5]`? 我们可以通过找到他们的 IoU 大于某个阈值的边界框来给他们第二次机会。 + +例如,如果阈值为 0.5,则没有可分配给它们的地面真理边界框。 如果阈值降低到 0.25,则`A[4]`也分配有地面真值边界框`B[0]`,因为其 IoU 为 0.30 。 将`A[4]`添加到肯定锚框列表中。 在这本书中,`A[4]`被称为额外的正面锚盒。 没有地面边界框的其余锚框称为**负锚框**。 + +在以下有关损失函数的部分中,负锚框不构成偏移损失函数。 + +| |`B[0]`| +| --- | --- | +|`A[0]`| 0 | +|`A[1]`| 0.32 | +|`A[2]`| 0 | +|`A[3]`| 0 | +|`A[4]`| 0.30 | +|`A[5]`| 0 | + +“表 11.3.1”每个锚框`A[j ∈ 0 .. 5]`的 IoU,带有对象边界框`B[0]`,如“图 11.3.1”所示。 + +如果加载了另一个带有 2 个要检测的对象的图像,我们将寻找 2 个正 IoU,最大 IoU,并带有边界框`B[0]`和`B[1]`。 然后,我们使用边界框`B[0]`和`B[1]`寻找满足最小 IoU 准则的额外正锚框。 + +为了简化讨论,我们只考虑每个区域一个锚框。 实际上,应该考虑代表不同缩放比例,大小和纵横比的所有锚框。 在下一节中,我们讨论如何制定损失函数,这些损失函数将通过 SSD 网络进行优化。 + +“列表 11.3.1”显示了`get_gt_data()`的实现,该实现计算锚定框的真实情况标签。 + +“列表 11.3.1”:`layer_utils.py` + +```py +def get_gt_data(iou, + n_classes=4, + anchors=None, + labels=None, + normalize=False, + threshold=0.6): + """Retrieve ground truth class, bbox offset, and mask + + Arguments: + iou (tensor): IoU of each bounding box wrt each anchor box + n_classes (int): Number of object classes + anchors (tensor): Anchor boxes per feature layer + labels (list): Ground truth labels + normalize (bool): If normalization should be applied + threshold (float): If less than 1.0, anchor boxes>threshold + are also part of positive anchor boxes +``` + +```py + Returns: + gt_class, gt_offset, gt_mask (tensor): Ground truth classes, + offsets, and masks + """ + # each maxiou_per_get is index of anchor w/ max iou + # for the given ground truth bounding box + maxiou_per_gt = np.argmax(iou, axis=0) +``` + +```py + # get extra anchor boxes based on IoU + if threshold < 1.0: + iou_gt_thresh = np.argwhere(iou>threshold) + if iou_gt_thresh.size > 0: + extra_anchors = iou_gt_thresh[:,0] + extra_classes = iou_gt_thresh[:,1] + extra_labels = labels[extra_classes] + indexes = [maxiou_per_gt, extra_anchors] + maxiou_per_gt = np.concatenate(indexes, + axis=0) + labels = np.concatenate([labels, extra_labels], + axis=0) +``` + +```py + # mask generation + gt_mask = np.zeros((iou.shape[0], 4)) + # only indexes maxiou_per_gt are valid bounding boxes + gt_mask[maxiou_per_gt] = 1.0 +``` + +```py + # class generation + gt_class = np.zeros((iou.shape[0], n_classes)) + # by default all are background (index 0) + gt_class[:, 0] = 1 + # but those that belong to maxiou_per_gt are not + gt_class[maxiou_per_gt, 0] = 0 + # we have to find those column indexes (classes) + maxiou_col = np.reshape(maxiou_per_gt, + (maxiou_per_gt.shape[0], 1)) + label_col = np.reshape(labels[:,4], + (labels.shape[0], 1)).astype(int) + row_col = np.append(maxiou_col, label_col, axis=1) + # the label of object in maxio_per_gt + gt_class[row_col[:,0], row_col[:,1]] = 1.0 +``` + +```py + # offsets generation + gt_offset = np.zeros((iou.shape[0], 4)) +``` + +```py + #(cx, cy, w, h) format + if normalize: + anchors = minmax2centroid(anchors) + labels = minmax2centroid(labels) + # bbox = bounding box + # ((bbox xcenter - anchor box xcenter)/anchor box width)/.1 + # ((bbox ycenter - anchor box ycenter)/anchor box height)/.1 + # Equation 11.4.8 Chapter 11 + offsets1 = labels[:, 0:2] - anchors[maxiou_per_gt, 0:2] + offsets1 /= anchors[maxiou_per_gt, 2:4] + offsets1 /= 0.1 +``` + +```py + # log(bbox width / anchor box width) / 0.2 + # log(bbox height / anchor box height) / 0.2 + # Equation 11.4.8 Chapter 11 + offsets2 = np.log(labels[:, 2:4]/anchors[maxiou_per_gt, 2:4]) + offsets2 /= 0.2 +``` + +```py + offsets = np.concatenate([offsets1, offsets2], axis=-1) +``` + +```py + # (xmin, xmax, ymin, ymax) format + else: + offsets = labels[:, 0:4] - anchors[maxiou_per_gt] +``` + +```py + gt_offset[maxiou_per_gt] = offsets +``` + +```py + return gt_class, gt_offset, gt_mask +``` + +```py +def minmax2centroid(boxes): + """Minmax to centroid format + (xmin, xmax, ymin, ymax) to (cx, cy, w, h) +``` + +```py + Arguments: + boxes (tensor): Batch of boxes in minmax format +``` + +```py + Returns: + centroid (tensor): Batch of boxes in centroid format + """ + centroid = np.copy(boxes).astype(np.float) + centroid[..., 0] = 0.5 * (boxes[..., 1] - boxes[..., 0]) + centroid[..., 0] += boxes[..., 0] + centroid[..., 1] = 0.5 * (boxes[..., 3] - boxes[..., 2]) + centroid[..., 1] += boxes[..., 2] + centroid[..., 2] = boxes[..., 1] - boxes[..., 0] + centroid[..., 3] = boxes[..., 3] - boxes[..., 2] + return centroid +``` + +`maxiou_per_gt = np.argmax(iou, axis=0)`实现了“公式 11.3.2”。 额外的阳性锚框是基于由`iou_gt_thresh = np.argwhere(iou>threshold)`实现的用户定义的阈值确定的。 + +仅当阈值小于 1.0 时,才会查找额外的正锚框。 所有带有地面真值边界框的锚框(即组合的正锚框和额外的正锚框)的索引成为真实情况掩码的基础: + +`gt_mask[maxiou_per_gt] = 1.0`。 + +所有其他锚定框(负锚定框)的掩码为 0.0,并且不影响偏移损失函数的优化。 + +每个锚定框的类别`gt_class`被分配为其地面实况边界框的类别。 最初,为所有锚框分配背景类: + +```py + # class generation + gt_class = np.zeros((iou.shape[0], n_classes)) + # by default all are background (index 0) + gt_class[:, 0] = 1 +``` + +然后,将每个正面锚点框的类分配给其非背景对象类: + +```py + # but those that belong to maxiou_per_gt are not + gt_class[maxiou_per_gt, 0] = 0 + # we have to find those column indexes (classes) + maxiou_col = np.reshape(maxiou_per_gt, + (maxiou_per_gt.shape[0], 1)) + label_col = np.reshape(labels[:,4], + (labels.shape[0], 1)).astype(int) + row_col = np.append(maxiou_col, label_col, axis=1) + # the label of object in maxio_per_gt + gt_class[row_col[:,0], row_col[:,1]] = 1.0 +``` + +`row_col[:,0]`是正锚框的索引,而`row_col[:,1]`是它们的非背景对象类的索引。 请注意,`gt_class`是单热点向量的数组。 这些值都为零,除了锚点框对象的索引处。 索引 0 是背景,索引 1 是第一个非背景对象,依此类推。 最后一个非背景对象的索引等于`n_classes-1`。 + +例如,如果锚点框 0 是负锚点框,并且有 4 个对象类别(包括背景),则: + +```py +gt_class[0] = [1.0, 0.0, 0.0, 0.0] +``` + +如果锚定框 1 是正锚定框,并且其地面真值边界框包含带有标签 2 的**汽水罐**,则: + +```py +gt_class[1] = [0.0, 0.0, 1.0, 0.0] +``` + +最后,偏移量只是地面真实边界框坐标减去锚框坐标: + +```py + # (xmin, xmax, ymin, ymax) format + else: + offsets = labels[:, 0:4] - anchors[maxiou_per_gt] +``` + +注意,我们仅计算正锚框的偏移量。 + +如果选择了该选项,则可以将偏移量标准化。 下一部分将讨论偏移量归一化。 我们将看到: + +```py + #(cx, cy, w, h) format + if normalize: +``` + +```py + anchors = minmax2centroid(anchors) + labels = minmax2centroid(labels) + # bbox = bounding box + # ((bbox xcenter - anchor box xcenter)/anchor box width)/.1 + # ((bbox ycenter - anchor box ycenter)/anchor box height)/.1 + # Equation 11.4.8 + offsets1 = labels[:, 0:2] - anchors[maxiou_per_gt, 0:2] + offsets1 /= anchors[maxiou_per_gt, 2:4] + offsets1 /= 0.1 +``` + +```py + # log(bbox width / anchor box width) / 0.2 + # log(bbox height / anchor box height) / 0.2 + # Equation 11.4.8 + offsets2 = np.log(labels[:, 2:4]/anchors[maxiou_per_gt, 2:4]) + offsets2 /= 0.2 +``` + +```py + offsets = np.concatenate([offsets1, offsets2], axis=-1) +``` + +只是“公式 11.4.8”的实现,下一节将进行讨论,为方便起见,在此处显示: + +![](img/B14853_11_025.png) (Equation 11.4.8) + +现在我们已经了解了地面真锚框的作用,我们将继续研究对象检测中的另一个关键组件:损失函数。 + +# 4\. 损失函数 + +在 SSD 中,有数千个锚定框。 如本章前面所述,对象检测的目的是预测每个锚框的类别和偏移量。 我们可以对每个预测使用以下损失函数: + +* `L_cls` - `y_cls`的分类交叉熵损失 +* `L_off` - L1 或 L2,用于`y_cls`。 请注意,只有正锚框有助于`L_off` L1,也称为**平均绝对误差**(**MAE**)损失,而 L2 也称为**均方误差**(**MSE**)损失。 + +总的损失函数为: + +![](img/B14853_11_081.png) (Equation 11.4.1) + +对于每个定位框,网络都会预测以下内容: + +* `y_cls`或单热向量形式的类别或类 +* `y_off = ((x_omin, y_omin), (x_omax, y_omax))`或相对于锚框的像素坐标形式的偏移。 + +为了方便计算,可以将偏移量更好地表示为以下形式: + +`y_off = ((x_omin, y_omin), (x_omax, y_omax))` (Equation 11.4.2) + +SSD 是一种监督对象检测算法。 可以使用以下基本真值: + +* `y_label`或要检测的每个对象的类标签 +* `y_gt = (x_gmin, x_gmax, y_gmin, y_gmax)`或地面真实偏差,其计算公式如下: + +`y_gt = (x_bmin – x_amin, x_bmax – x_amax, y_bmin – y_amin, y_bmax – y_amax)` (Equation 11.4.3) + +换句话说,将地面真实偏移量计算为对象包围盒相对于锚定框的地面真实偏移量。 为了清楚起见,`y_box`下标中的细微调整。 如上一节所述,基本真值是通过`get_gt_data()`函数计算的。 + +但是,SSD 不建议直接从预测原始像素误差值`y_off`。 而是使用归一化的偏移值。 地面真值边界框和锚点框坐标首先以质心尺寸格式表示: + +![](img/B14853_11_026.png) + +![](img/B14853_11_027.png) + +(Equation 11.4.4) + +哪里: + +![](img/B14853_11_028.png) (Equation 11.4.5) + +是边界框中心的坐标,并且: + +`(w[b], h[b]) = (x_max – x_min, y_max - y_min)` (Equation 11.4.6) + +分别对应于宽度和高度。 锚框遵循相同的约定。 归一化的真实情况偏移量表示为: + +![](img/B14853_11_029.png) (Equation 11.4.7) + +通常,`y_gt`的元素值很小,`||y_gt|| << 1.0`。 较小的梯度会使网络训练更加难以收敛。 + +为了缓解该问题,将每个元素除以其估计的标准差。 由此产生的基本事实抵消了: + +![](img/B14853_11_025.png) (Equation 11.4.8) + +推荐值为:`σ[x] = σ[y] = 0.1`和`σ[w] = σ[h] = 0.2`。 换句话说,沿着`x`和`y`轴的像素误差的预期范围是`± 10%`,而对于宽度和高度,则是`± 20%。 这些值纯粹是任意的。 + +“列表 11.4.1”:`loss.py` L1 和平滑 L1 损失函数 + +```py +from tensorflow.keras.losses import Huber +def mask_offset(y_true, y_pred): + """Pre-process ground truth and prediction data""" + # 1st 4 are offsets + offset = y_true[..., 0:4] + # last 4 are mask + mask = y_true[..., 4:8] + # pred is actually duplicated for alignment + # either we get the 1st or last 4 offset pred + # and apply the mask + pred = y_pred[..., 0:4] + offset *= mask + pred *= mask + return offset, pred + +def l1_loss(y_true, y_pred): + """MAE or L1 loss + """ + offset, pred = mask_offset(y_true, y_pred) + # we can use L1 + return K.mean(K.abs(pred - offset), axis=-1) + +def smooth_l1_loss(y_true, y_pred): + """Smooth L1 loss using tensorflow Huber loss + """ + offset, pred = mask_offset(y_true, y_pred) + # Huber loss as approx of smooth L1 + return Huber()(offset, pred) +``` + +此外,代替`y_cls`的 L1 损失,SSD 受 Fast-RCNN [3]启发,使用平滑 L1: + +![](img/B14853_11_035.png) (Equation 11.4.9) + +其中`u`代表地面真实情况与预测之间的误差中的每个元素: + +![](img/B14853_11_037.png) (Equation 11.4.10) + +与 L1 相比,平滑 L1 更健壮,并且对异常值的敏感性较低。 在 SSD 中,`σ = 1`。 作为`σ -> ∞`,平滑 L1 接近 L1。 L1 和平滑 L1 损失函数都在“列表 11.4.1”中显示。 `mask_offset()`方法可确保仅在具有地面真实边界框的预测上计算偏移量。 平滑的 L1 函数与`σ = 1`[8]时的 Huber 损失相同。 + +作为对损失函数的进一步改进,RetinaNet [3]建议将 CE`y_cls`的分类交叉熵函数替换为焦点损失 FL: + +![](img/B14853_11_041.png) (Equation 11.4.11) + +![](img/B14853_11_042.png) (Equation 11.4.12) + +区别在于额外因素`α(1 - p[i])^γ`。 在 RetinaNet 中,当`γ = 2`和`α = 0.25`时,对象检测效果最好。 焦点损失在“列表 11.4.2”中实现。 + +“列表 11.4.2”:`loss.py`焦点损失 + +```py +def focal_loss_categorical(y_true, y_pred): + """Categorical cross-entropy focal loss""" + gamma = 2.0 + alpha = 0.25 +``` + +```py + # scale to ensure sum of prob is 1.0 + y_pred /= K.sum(y_pred, axis=-1, keepdims=True) +``` + +```py + # clip the prediction value to prevent NaN and Inf + epsilon = K.epsilon() + y_pred = K.clip(y_pred, epsilon, 1\. - epsilon) + # calculate cross entropy + cross_entropy = -y_true * K.log(y_pred) +``` + +```py + # calculate focal loss + weight = alpha * K.pow(1 - y_pred, gamma) + cross_entropy *= weight +``` + +```py + return K.sum(cross_entropy, axis=-1) +``` + +聚焦损失的动机是,如果我们检查图像,则大多数锚框应分类为背景或负锚框。 只有很少的正锚框是代表目标对象的良好候选对象。 负熵损失是造成交叉熵损失的主要因素。 因此,负锚框的贡献使优化过程中正锚框的贡献无法实现。 这种现象也称为**类不平衡**,其中一个或几个类占主导地位。 有关其他详细信息,Lin 等。 文献[4]讨论了对象检测中的类不平衡问题。 + +有了**焦点损失**,我们在优化过程的早期就确信负锚框属于背景。 因此,由于`p[i] -> 1.0`,项`(1 - p[i])^γ`减少了负锚框的贡献。 对于正锚框,其贡献仍然很大,因为`p[i]`远非 1.0。 + +既然我们已经讨论了锚定框,地面真值锚定框和损失函数的概念,我们现在准备介绍实现多尺度目标检测算法的 SSD 模型架构。 + +# 5\. SSD 模型架构 + +“图 11.5.1”显示了 SSD 的模型架构,该模型实现了多尺度单发目标检测的概念框架。 网络接受 RGB 图像,并输出几个预测级别。 基本或**骨干**网络提取用于分类和偏移量预测的下游任务的特征。 ResNet50 是骨干网络的一个很好的例子,它类似于“第 2 章”,“深度神经网络”中讨论,实现和评估的内容。 在骨干网络之后,对象检测任务由执行其余的网络,我们将其称为 **SSD 头**。 + +骨干网络可以是具有冻结权重的预训练网络(例如,以前为 ImageNet 分类而训练),也可以是与对象检测一起训练的网络。 如果使用预先训练的基础网络,则可以利用重用以前从大型数据集中学习的特征提取过滤器的优势。 此外,由于冻结了骨干网参数,因此可以加快学习速度。 仅训练对象检测中的顶层。 在这本书中,骨干网是与对象检测联合训练的,因为我们假设我们不一定需要访问预先训练的骨干网。 + +骨干网网络通常使用跨步 2 或通过最大池化实现几轮下采样。 对于 ResNet50,这是 4 倍。 基本网络变为`(w/2^4, h/2^4) = (w/16, h/16)`之后,特征映射的结果尺寸。 如果图像的宽度和高度均可以被 16 整除,则尺寸是精确的。 + +例如,对于`640 x 480`的图像,生成的特征映射的尺寸为`40 x 30 = 1200`。 如前几节所述,这是基础网络之后长宽比等于 1 的锚点框的数量。 此数字乘以每个锚定框的大小数。 在前面的部分中,由于长宽比,有 6 种不同的尺寸,而长宽比为 1 时,还有一个其他尺寸。 + +在本书中,我们将纵横比限制为`a[i ∈ {0, 1, 3}] = 1, 2, 1/2`。 因此,将只有 4 种不同的大小。 对于`640 x 480`图像,第一组锚框的锚框总数为`n[1] = 4,800`。 + +在“图 11.5.1”中,指示密集网格以表明对于第一组预测,存在大量预测(例如:`40 x 30 x 4`),从而导致大量补丁 。 尽管每个锚点框有 4 种尺寸,但为清楚起见,仅显示了与宽高比 1 对应的`16 x 16`锚点框。 + +此锚框也是`40 x 30 x n_filter`特征映射中每个元素的接受字段大小。`n_filter`是骨干网最后卷积层中过滤器的数量。 对于每个锚框,都将预测类别和偏移量。 + +总共有`n[1]`类和`n[1]`偏移量预测。 单热类预测的维数等于要检测的对象类别的数量,背景为 1。 每个偏移量变量预测的尺寸为 4,对应于`(x, y)`到预测边界框的 2 个角的偏移量。 + +类预测器由卷积层组成,该卷积层由使用 *softmax* 进行分类交叉熵损失的激活层终止。 偏移量预测值是具有线性激活的独立卷积层。 + +在基础网络之后可以应用其他特征提取模块。 每个特征提取器块都是`Conv2D(strides=2)-BN-ELU`的形式。 在特征提取块之后,特征映射的大小减半,并且过滤器的数量增加一倍。 例如,基本网络之后的第一个特征提取器块具有`20 x 15 x 2 n_filter`特征映射。 根据该特征映射,使用卷积层进行`n[2]`类和`n[2]`偏移量预测。`n[2] = 20 x 15 x 4 = 1,200` + +可以继续添加具有类和偏移量预测变量的特征提取块的过程。 在前面的部分中,对于`640 x 480`的图像,最大可达`2 x 1 x 2^5 n_filter`特征映射产生`n[6]`类和`n[6]`抵消了其中`n[6] = 2 x 1 x 4 = 8`的预测。 到 6 层特征提取和预测块。 在第 6 个块之后,一个`640 x 480`图像的锚点映射预测总数为 9,648。 + +在前面的部分中,锚定框的比例因子大小按降序排列: + +![](img/B14853_11_050.png) Equation 11.5.1) + +这样做是为了使讨论清晰。 在本节中,应该意识到比例因子的大小实际上是从骨干网之后的特征映射大小开始的。 实际上,缩放因子应按升序排列: + +![](img/B14853_11_051.png) (Equation 11.5.2) + +这意味着如果将特征提取块的数量减少到 4,则缩放因子为: + +![](img/B14853_11_052.png) (Equation 11.5.3) + +如果特征映射的宽度或高度不能被 2 整除(例如:15),则将应用天花板函数(例如:`ceil(15/2) = 8`)。 但是,在原始的 SSD [2]实现中,所使用的缩放因子被简化为`[0.2, 0.9]`范围,该范围通过缩放因子的数量或特征提取块的数量`n_layers`进行线性缩放: + +```py +s = np.linspace(0.2, 0.9, n_layers + 1) +``` + +![](img/B14853_11_09.png) + +图 11.5.1 SSD 模型架构。请注意,对于`w/16 x h/16`网格,锚框的数量可能不准确。 网格显示了锚框的紧密包装。 + +讨论了 SSD 模型架构之后,现在让我们看一下如何在 Keras 中实现 SSD 模型架构。 + +# 6\. Keras 中的 SSD 模型架构 + +与前面章节中的代码示例不同,SSD 的`tf.keras`实现更加复杂。 与 SSD 的其他`tf.keras`实现相比,本章中提供的代码示例重点介绍多尺度目标检测的关键概念。 可以进一步优化代码实现的某些部分,例如缓存地面真锚框类,偏移量和掩码。 在我们的示例中,每次从文件系统加载图像时,线程都会计算出地面真实值。 + +“图 11.6.1”显示了包含 SSD 的`tf.keras`实现的代码块的概述。 `ssd-11.6.1.py`中的 SSD 对象可以构建,训练和评估 SSD 模型。 它借助`model.py`和`resnet.py`以及`data_generator.py`中的多线程数据生成器,位于 SSD 模型创建器的顶部。 SSD 模型实现了“图 11.5.1”中所示的 SSD 架构。 每个主要模块的实现将在后续部分中详细讨论。 + +SSD 模型使用 ResNet 作为其骨干网络。 它在`resnet.py`中调用 ResNet V1 或 V2 模型创建者。 与前几章中的示例不同,SSD 使用的数据集由数千个高分辨率图像组成。 多线程数据生成器将加载文件,并且将这些文件从文件系统排队。 它还计算锚点箱的地面真值标签。 如果没有多线程数据生成器,则在训练期间图像的加载和排队以及地面真值的计算将非常缓慢。 + +有许多小的但重要的例程在后台运行。 这些都集中存储在工具块中。 这些例程创建锚框,计算 IoU,建立真实情况标签,运行非最大抑制,绘制标签和框,在视频帧上显示检测到的对象,提供损失函数等。 + +![](img/B14853_11_10.png) + +图 11.6.1 实现 SSD 的代码块。 + +# 7\. Keras 中的 SSD 对象 + +“列表 11.7.1”(很快显示)显示了 SSD 类。 说明了两个主要例程: + +1. 使用`build_model()`创建 SSD 模型 + +1. 通过`build_generator()`实例化数据生成器 + +`build_model`首先根据训练标签创建数据字典。 字典存储图像文件名以及每个图像中每个对象的地面真实边界框坐标和类。 之后,构建骨干网和 SSD 网络模型。 模型创建的最重要产品是`self.ssd` – SSD 的网络模型。 + +标签存储在 csv 文件中。 对于本书中使用的示例训练图像,标签以以下格式保存在`dataset/drinks/labels_train.csv`中: + +```py +frame,xmin,xmax,ymin,ymax,class_id +0001000.jpg,310,445,104,443,1 +0000999.jpg,194,354,96,478,1 +0000998.jpg,105,383,134,244,1 +0000997.jpg,157,493,89,194,1 +0000996.jpg,51,435,207,347,1 +0000995.jpg,183,536,156,283,1 +0000994.jpg,156,392,178,266,2 +0000993.jpg,207,449,119,213,2 +0000992.jpg,47,348,213,346,2 +… +``` + +“列表 11.7.1”:`ssd-11.6.1.py` + +```py +class SSD: + """Made of an ssd network model and a dataset generator. + SSD defines functions to train and validate + an ssd network model. +``` + +```py + Arguments: + args: User-defined configurations +``` + +```py + Attributes: + ssd (model): SSD network model + train_generator: Multi-threaded data generator for training + """ + def __init__(self, args): + """Copy user-defined configs. + Build backbone and ssd network models. + """ + self.args = args + self.ssd = None + self.train_generator = None + self.build_model() +``` + +```py + def build_model(self): + """Build backbone and SSD models.""" + # store in a dictionary the list of image files and labels + self.build_dictionary() + # input shape is (480, 640, 3) by default + self.input_shape = (self.args.height, + self.args.width, + self.args.channels) +``` + +```py + # build the backbone network (eg ResNet50) + # the number of feature layers is equal to n_layers + # feature layers are inputs to SSD network heads + # for class and offsets predictions + self.backbone = self.args.backbone(self.input_shape, + n_layers=self.args.layers) +``` + +```py + # using the backbone, build ssd network + # outputs of ssd are class and offsets predictions + anchors, features, ssd = build_ssd(self.input_shape, + self.backbone, + n_layers=self.args.layers, + n_classes=self.n_classes) + # n_anchors = num of anchors per feature point (eg 4) + self.n_anchors = anchors + # feature_shapes is a list of feature map shapes + # per output layer - used for computing anchor boxes sizes + self.feature_shapes = features + # ssd network model + self.ssd = ssd +``` + +```py + def build_dictionary(self): + """Read input image filenames and obj detection labels + from a csv file and store in a dictionary. + """ + # train dataset path + path = os.path.join(self.args.data_path, + self.args.train_labels) +``` + +```py + # build dictionary: + # key=image filaname, value=box coords + class label + # self.classes is a list of class labels + self.dictionary, self.classes = build_label_dictionary(path) + self.n_classes = len(self.classes) + self.keys = np.array(list(self.dictionary.keys())) +``` + +```py + def build_generator(self): + """Build a multi-thread train data generator.""" +``` + +```py + self.train_generator = \ + DataGenerator(args=self.args, + dictionary=self.dictionary, + n_classes=self.n_classes, + feature_shapes=self.feature_shapes, + n_anchors=self.n_anchors, + shuffle=True) +``` + +“列表 11.7.2”显示了 SSD 对象中的另一种重要方法`train()`。 指示了使用默认损失函数或改进的损失函数的选项,如先前部分所述。 还有一个选项可以选择仅平滑 L1。 + +`self.ssd.fit_generator()`是此函数中最重要的调用。 它借助多线程数据生成器启动有监督的训练。 在每个周期,都会执行两个回调函数。 首先,将模型权重保存到文件中。 然后,对于 ResNet 模型,以与“第 2 章”,“深度神经网络”相同的方式使用的改进的学习率调度器称为: + +“列表 11.7.2”:`ssd-11.6.1.py` + +```py + def train(self): + """Train an ssd network.""" + # build the train data generator + if self.train_generator is None: + self.build_generator() +``` + +```py + optimizer = Adam(lr=1e-3) + # choice of loss functions via args + if self.args.improved_loss: + print_log("Focal loss and smooth L1", self.args.verbose) + loss = [focal_loss_categorical, smooth_l1_loss] + elif self.args.smooth_l1: + print_log("Smooth L1", self.args.verbose) + loss = ['categorical_crossentropy', smooth_l1_loss] + else: + print_log("Cross-entropy and L1", self.args.verbose) + loss = ['categorical_crossentropy', l1_loss] +``` + +```py + self.ssd.compile(optimizer=optimizer, loss=loss) +``` + +```py + # prepare callbacks for saving model weights + # and learning rate scheduler + # learning rate decreases by 50% every 20 epochs + # after 60th epoch + checkpoint = ModelCheckpoint(filepath=filepath, + verbose=1, + save_weights_only=True) + scheduler = LearningRateScheduler(lr_scheduler) +``` + +```py + callbacks = [checkpoint, scheduler] + # train the ssd network + self.ssd.fit_generator(generator=self.train_generator, + use_multiprocessing=True, + callbacks=callbacks, + epochs=self.args.epochs, + workers=self.args.workers) +``` + +在下一部分中,我们将讨论 Keras 中 SSD 架构实现的其他详细信息。 特别是 SSD 模型和多线程数据生成器的实现。 + +# 8\. Keras 中的 SSD 模型 + +“列表 11.8.1”显示了 SSD 模型创建函数`build_ssd()`。 该模型在“图 11.5.1”中进行了说明。 该函数通过调用`base_outputs = backbone(inputs)`从骨干网或基础网络检索输出特征的`n_layers`。 + +在本书中,`backbone()`是`build_resnet()`。 `build_resnet()`可以生成的 ResNet 模型类似于“第 2 章”,“深度神经网络”中讨论的残差网络。 `build_resnet()`函数可以由构建基础网络的任何函数名称代替。 + +如图“图 11.5.1”所示,返回值`base_outputs`是输出特征的列表,这些特征将作为类别和偏移预测层的输入。 例如,第一输出`base_outputs[0]`用于生成`n[1]`类预测和`n[1]`偏移量预测。 + +在`build_ssd()`的`for`循环中,类别预测是`classes`变量,而偏移量预测是`offsets`变量。 在`for`循环迭代之后,将类别预测连接,并最终合并为一个具有以下尺寸的`classes`变量: + +![](img/B14853_11_055.png) + +对`offsets`变量执行相同的过程。 结果尺寸为: + +![](img/B14853_11_056.png) + +其中`n_mini_batch`是迷你批量大小,`n_anchor_box`是锚定框的数量。 `for`循环迭代的次数等于`n_layers`。 该数目也等于锚定框缩放因子的所需数目或 SSD 头的特征提取块的数目。 + +函数`build_ssd()`返回每个特征点或区域的锚框数量,每个前类的特征形状,偏移量预测层以及 SSD 模型本身。 + +“列表 11.8.1”:`model.py` + +```py +def build_ssd(input_shape, + backbone, + n_layers=4, + n_classes=4, + aspect_ratios=(1, 2, 0.5)): + """Build SSD model given a backbone + + Arguments: + input_shape (list): input image shape + backbone (model): Keras backbone model + n_layers (int): Number of layers of ssd head + n_classes (int): Number of obj classes + aspect_ratios (list): annchor box aspect ratios + + Returns: + n_anchors (int): Number of anchor boxes per feature pt + feature_shape (tensor): SSD head feature maps + model (Keras model): SSD model + """ + # number of anchor boxes per feature map pt + n_anchors = len(aspect_ratios) + 1 +``` + +```py + inputs = Input(shape=input_shape) + # no. of base_outputs depends on n_layers + base_outputs = backbone(inputs) + + outputs = [] + feature_shapes = [] + out_cls = [] + out_off = [] +``` + +```py + for i in range(n_layers): + # each conv layer from backbone is used + # as feature maps for class and offset predictions + # also known as multi-scale predictions + conv = base_outputs if n_layers==1 else base_outputs[i] + name = "cls" + str(i+1) + classes = conv2d(conv, + n_anchors*n_classes, + kernel_size=3, + name=name) +``` + +```py + # offsets: (batch, height, width, n_anchors * 4) + name = "off" + str(i+1) + offsets = conv2d(conv, + n_anchors*4, + kernel_size=3, + name=name) +``` + +```py + shape = np.array(K.int_shape(offsets))[1:] + feature_shapes.append(shape) + # reshape the class predictions, yielding 3D tensors of + # shape (batch, height * width * n_anchors, n_classes) + # last axis to perform softmax on them + name = "cls_res" + str(i+1) + classes = Reshape((-1, n_classes), + name=name)(classes) +``` + +```py + # reshape the offset predictions, yielding 3D tensors of + # shape (batch, height * width * n_anchors, 4) + # last axis to compute the (smooth) L1 or L2 loss + name = "off_res" + str(i+1) + offsets = Reshape((-1, 4), + name=name)(offsets) + # concat for alignment with ground truth size + # made of ground truth offsets and mask of same dim + # needed during loss computation + offsets = [offsets, offsets] + name = "off_cat" + str(i+1) + offsets = Concatenate(axis=-1, + name=name)(offsets) +``` + +```py + # collect offset prediction per scale + out_off.append(offsets) +``` + +```py + name = "cls_out" + str(i+1) +``` + +```py + #activation = 'sigmoid' if n_classes==1 else 'softmax' + #print("Activation:", activation) +``` + +```py + classes = Activation('softmax', + name=name)(classes) +``` + +```py + # collect class prediction per scale + out_cls.append(classes) +``` + +```py + if n_layers > 1: + # concat all class and offset from each scale + name = "offsets" + offsets = Concatenate(axis=1, + name=name)(out_off) + name = "classes" + classes = Concatenate(axis=1, + name=name)(out_cls) + else: + offsets = out_off[0] + classes = out_cls[0] +``` + +```py + outputs = [classes, offsets] + model = Model(inputs=inputs, + outputs=outputs, + name='ssd_head') +``` + +```py + return n_anchors, feature_shapes, model +``` + +如前面所述,与 MNIST 和 CIFAR-10 等小型数据集不同,SSD 中使用的映像很大。 因此,不可能将图像加载到张量变量中。 在下一节中,我们将介绍一个多线程数据生成器,该生成器将使我们能够从文件系统并发加载图像,并避免内存瓶颈。 + +# 9\. Keras 中的数据生成器模型 + +SSD 需要大量带标签的高分辨率图像来进行对象检测。 与之前的章节中使用的数据集可以加载到到内存中以训练模型不同,SSD 实现了多线程数据生成器。 多线程生成器的任务是加载图像的多个迷你批量及其相应的标签。 由于具有多线程,GPU 可以保持繁忙,因为一个线程向其提供数据,而其余的 CPU 线程处于队列中,准备从文件系统中馈入另一批数据或加载一批图像并计算基本真值 。“列表 11.9.1”显示了 Keras 中的数据生成器模型。 + +`DataGenerator`类继承自 Keras 的`Sequence`类,以确保它支持多处理。 `DataGenerator`保证在一个周期内使用整个数据集。 + +给定批量大小的整个周期的长度由`__len__()`方法返回。 对小批量数据的每个请求都可以通过`__getitem__()`方法来满足。 在每个周期之后,如果`self.shuffle`为`True`,则调用`on_epoch_end()`方法以随机播放整个批量。 + +“列表 11.9.1”:`data_generator.py` + +```py +class DataGenerator(Sequence): + """Multi-threaded data generator. + Each thread reads a batch of images and their object labels +``` + +```py + Arguments: + args: User-defined configuration + dictionary: Dictionary of image filenames and object labels + n_classes (int): Number of object classes + feature_shapes (tensor): Shapes of ssd head feature maps + n_anchors (int): Number of anchor boxes per feature map pt + shuffle (Bool): If dataset should be shuffled bef sampling + """ + def __init__(self, + args, + dictionary, + n_classes, + feature_shapes=[], + n_anchors=4, + shuffle=True): + self.args = args + self.dictionary = dictionary + self.n_classes = n_classes + self.keys = np.array(list(self.dictionary.keys())) + self.input_shape = (args.height, + args.width, + args.channels) + self.feature_shapes = feature_shapes + self.n_anchors = n_anchors + self.shuffle = shuffle + self.on_epoch_end() + self.get_n_boxes() +``` + +```py + def __len__(self): + """Number of batches per epoch""" + blen = np.floor(len(self.dictionary) / self.args.batch_size) + return int(blen) +``` + +```py + def __getitem__(self, index): + """Get a batch of data""" + start_index = index * self.args.batch_size + end_index = (index+1) * self.args.batch_size + keys = self.keys[start_index: end_index] + x, y = self.__data_generation(keys) + return x, y +``` + +```py + def on_epoch_end(self): + """Shuffle after each epoch""" + if self.shuffle == True: + np.random.shuffle(self.keys) +``` + +```py + def get_n_boxes(self): + """Total number of bounding boxes""" + self.n_boxes = 0 + for shape in self.feature_shapes: + self.n_boxes += np.prod(shape) // self.n_anchors + return self.n_boxes +``` + +数据生成器的大部分工作都是通过`__data_generation()`方法完成的,如“列表 11.9.2”所示。 给定一个小批量,该方法执行: + +* `imread()`从文件系统读取图像。 +* `labels = self.dictionary[key]`访问词典中存储的边界框和类标签。 前四个项目是边界框偏移量。 最后一个是类标签。 +* `anchor_boxes()`生成锚框。 +* `iou()`计算相对于地面真值边界框的每个锚点框的 IoU。 +* `get_gt_data()`为每个锚框分配地面真实等级和偏移量。 + +样本数据扩充函数也包括在内,但此处不再讨论,例如添加随机噪声,强度重新缩放和曝光调整。 `__data_generation()`返回输入`x`和输出`y`对,其中张量`x`存储输入图像,而张量`y`捆绑类,偏移量 ,和面具一起。 + +“列表 11.9.2”:`data_generator.py` + +```py +import layer_utils +``` + +```py +from skimage.io import imread + def __data_generation(self, keys): + """Generate train data: images and + object detection ground truth labels +``` + +```py + Arguments: + keys (array): Randomly sampled keys + (key is image filename) +``` + +```py + Returns: + x (tensor): Batch images + y (tensor): Batch classes, offsets, and masks + """ + # train input data + x = np.zeros((self.args.batch_size, *self.input_shape)) + dim = (self.args.batch_size, self.n_boxes, self.n_classes) + # class ground truth + gt_class = np.zeros(dim) + dim = (self.args.batch_size, self.n_boxes, 4) + # offsets ground truth + gt_offset = np.zeros(dim) + # masks of valid bounding boxes + gt_mask = np.zeros(dim) +``` + +```py + for i, key in enumerate(keys): + # images are assumed to be stored in self.args.data_path + # key is the image filename + image_path = os.path.join(self.args.data_path, key) + image = skimage.img_as_float(imread(image_path)) + # assign image to a batch index + x[i] = image + # a label entry is made of 4-dim bounding box coords + # and 1-dim class label + labels = self.dictionary[key] + labels = np.array(labels) + # 4 bounding box coords are 1st four items of labels + # last item is object class label + boxes = labels[:,0:-1] + for index, feature_shape in enumerate(self.feature_shapes): + # generate anchor boxes + anchors = anchor_boxes(feature_shape, + image.shape, + index=index, + n_layers=self.args.layers) + # each feature layer has a row of anchor boxes + anchors = np.reshape(anchors, [-1, 4]) + # compute IoU of each anchor box + # with respect to each bounding boxes + iou = layer_utils.iou(anchors, boxes) +``` + +```py + # generate ground truth class, offsets & mask + gt = get_gt_data(iou, + n_classes=self.n_classes, + anchors=anchors, + labels=labels, + normalize=self.args.normalize, + threshold=self.args.threshold) + gt_cls, gt_off, gt_msk = gt + if index == 0: + cls = np.array(gt_cls) + off = np.array(gt_off) + msk = np.array(gt_msk) + else: + cls = np.append(cls, gt_cls, axis=0) + off = np.append(off, gt_off, axis=0) + msk = np.append(msk, gt_msk, axis=0) +``` + +```py + gt_class[i] = cls + gt_offset[i] = off + gt_mask[i] = msk +``` + +```py + y = [gt_class, np.concatenate((gt_offset, gt_mask), axis=-1)] +``` + +```py + return x, y +``` + +现在我们有一个多线程生成器,我们可以用它来从文件系统加载图像。 在下一节中,我们将演示如何通过拍摄目标对象的图像并对其进行标记来构建自定义数据集。 + +# 10\. 示例数据集 + +使用便宜的 USB 相机(A4TECH PK-635G)收集了一个由 1,000 `640 X 480` RGB 训练图像和 50 `640 X 480` RGB 测试图像组成的小型数据集。 使用 **VGG 图像标注器**(**VIA**)[5]标记数据集图像,以检测三个对象:1)**汽水罐**,2)**果汁罐**和 3)**水瓶**。“图 11.10.1”显示了标记过程的示例 UI。 + +可以在`GitHub`存储库的`utils/video_capture.py`中找到用于收集图像的工具脚本。 该脚本每 5 秒自动捕获一次图像,因此可以加快数据收集过程。 + +![](img/B14853_11_11.png) + +图 11.10.1 使用 VGG 图像标注器(VIA)进行数据集标记的过程 + +数据收集和标记是一项耗时的活动。 在行业中,通常将其外包给第三方标注公司。 使用自动数据标记软件是加快数据标记任务的另一种选择。 + +有了这个示例数据集,我们现在可以训练我们的对象检测网络。 + +# 11\. SSD 模型训练 + +[可以从以下链接下载包含 csv 格式标签的 train 和测试数据集](https://bit.ly/adl2-ssd)。 + +在顶层文件夹(即“第 11 章”,“对象检测”)中,创建数据集文件夹,将下载的文件复制到此处,然后运行以下命令将其解压缩: + +```py +mkdir dataset +cp drinks.tar.gz dataset +cd dataset +tar zxvf drinks.tar.gz +cd.. +``` + +通过执行以下步骤,将 SSD 模型训练 200 个周期: + +```py +python3 ssd-11.6.1.py --train +``` + +可以根据 GPU 内存调整默认的批量大小`--batch-size=4`。 在 1080Ti 上,批量大小为 2。在 32GB V100 上,每个 GPU 可以为 4 或 8。 `--train`代表模型训练选项。 + +为了支持边界框偏移量的归一化,包含`--normalize`选项。 为了使用改进的损失函数,添加了`--improved_loss`选项。 如果仅需要平滑的 L1(无焦点损失),请使用`–smooth-l1`。 为了显示: + +* L1,无规范化: + * `python3 ssd-11.1.1.py –-train` +* 改进的损失函数,无规范化: + * `python3 ssd-11.1.1.py –-train --improved-loss` +* 改进的损失函数,具有规范化: + * `python3 ssd-11.1.1.py –-train –improved-loss --normalize` +* 平滑 L1,具有规范化: + * `python3 ssd-11.1.1.py –-train –-smooth-l1 --normalize` + +训练完 SSD 网络之后,我们需要解决另一个问题。 我们如何处理给定对象的多个预测? 在测试训练好的模型之前,我们将首先讨论**非最大抑制**(**NMS**)算法。 + +# 12\. 非最大抑制(NMS)算法 + +模型训练完成后,网络将预测边界框偏移量和相应的类别。 在某些情况下,两个或更多边界框引用同一对象,从而创建冗余预测。 图 11.12.1 中的**汽水罐**表示了这种情况。 为了删除多余的预测,将调用 NMS 算法。 本书涵盖了经典 NMS 和软 NMS [6],如“算法 11.12.1”中所示。 两种算法都假定边界框和相应的置信度得分或概率是已知的。 + +![](img/B14853_11_12.png) + +图 11.12.1 网络预测了汽水罐对象的两个重叠边界框。 只选择一个有效的边界框,即得分为 0.99 的边界框。 + +在经典 NMS 中,基于概率选择最终边界框,并将其存储在列表`D`中,并带有相应的分数`S`。 所有边界框和相应的概率都存储在初始列表`B`和`P`中。 在第 3 行和第 4 行中,将具有最高分数`p[m]`的边界框用作参考,`b[m]`。 + +参考边界框被添加到最终选择的边界框`D`的列表中,并从列表`B`中删除,如第 5 行所示。 并且列表`S`从`P`中删除。 对于其余边界框,如果 *IoU* 与`b[m]`大于或等于设置的阈值`N[t]`,将其从`B`中删除。 其相应的分数也从`P`中删除。 + +步骤在第 6 和 9-11 行中显示。 这些步骤将删除所有分数较小的冗余边界框。 在检查完所有其余的边界框之后,重复从第 3 行开始的过程。 该过程继续进行,直到边界框`B`的列表为空。 该算法返回选定的边界框`D`和相应的分数`S`。 + +经典 NMS 的问题是边界盒包含另一个对象,但其中的 *IoU* 和`b[m]`会从列表中删除。 Soft NMS [6]提出,与其从列表中彻底删除,不如以`b[m]`,如第 8 行所示。 + +重叠的边界框具有第二次机会。 IoU 较小的边界框在将来的迭代中具有更高的生存机会。 在将来的选择中,它实际上可能证明它包含一个与`b[m]`不同的对象。 如“算法 11.12.1”中所示,Soft NMS 是传统 NMS 的便捷替代。 无需重新训练 SSD 网络。 与经典 NMS 相比,Soft NMS 具有更高的平均精度。 + +“列表 11.12.1”说明了经典 NMS 和软 NMS。 除了最终的边界框和相应的分数外,还返回相应的对象。 当其余边界框的最大分数小于某个阈值(例如:0.2)时,该代码将实现 NMS 的提前终止。 + +“算法 11.12.1”**NMS 和软 NMS** + +**要求**:边界框预测:`B = {b[1], b[2], …, b[n]}` + +**要求**:边界框类别的置信度或分数:`B = {b[1], b[2], …, b[n]}` + +**要求**:最小 NMS *IoU* 阈值:`N[t]` + +1. `D <- {}`;`S <- {}` +2. 当`B ≠ empty`,执行 +3. `m <- argmax P` +4. `M <- b[m]`; `N <- p[m]`, +5. `D <- D ∪ M`;`B <- B - M`;`S <- S ∪ N`;`P <- P - N`; +6. 对于步骤`b[i] ∈ B`,执行 +7. 如果`soft_NMS = True` +8. `p[i] = p[i] exp(-IOU(M, b[i])^2 / σ)` +9. 否则如果`IOU(M, b[i]) >= N[t]`,那么 +10. `B = B - b[i]`;`P = P - p[i]` +11. 结束 +12. 结束 +13. 结束 + +1. 返回`D, S` + +“列表 11.12.1”:`boxes.py` + +```py +def nms(args, classes, offsets, anchors): + """Perform NMS (Algorithm 11.12.1). +``` + +```py + Arguments: + args: User-defined configurations + classes (tensor): Predicted classes + offsets (tensor): Predicted offsets + + Returns: + objects (tensor): class predictions per anchor + indexes (tensor): indexes of detected objects + filtered by NMS + scores (tensor): array of detected objects scores + filtered by NMS + """ +``` + +```py + # get all non-zero (non-background) objects + objects = np.argmax(classes, axis=1) + # non-zero indexes are not background + nonbg = np.nonzero(objects)[0] +``` + +```py + # D and S indexes in Line 1 + indexes = [] + while True: + # list of zero probability values + scores = np.zeros((classes.shape[0],)) + # set probability values of non-background + scores[nonbg] = np.amax(classes[nonbg], axis=1) +``` + +```py + # max probability given the list + # Lines 3 and 4 + score_idx = np.argmax(scores, axis=0) + score_max = scores[score_idx] +``` + +```py + # get all non max probability & set it as new nonbg + # Line 5 + nonbg = nonbg[nonbg != score_idx] +``` + +```py + # if max obj probability is less than threshold (def 0.8) + if score_max < args.class_threshold: + # we are done + break + + # Line 5 + indexes.append(score_idx) + score_anc = anchors[score_idx] + score_off = offsets[score_idx][0:4] + score_box = score_anc + score_off + score_box = np.expand_dims(score_box, axis=0) + nonbg_copy = np.copy(nonbg) +``` + +```py + # get all overlapping predictions (Line 6) + # perform Non-Max Suppression (NMS) + for idx in nonbg_copy: + anchor = anchors[idx] + offset = offsets[idx][0:4] + box = anchor + offset + box = np.expand_dims(box, axis=0) + iou = layer_utils.iou(box, score_box)[0][0] + # if soft NMS is chosen (Line 7) + if args.soft_nms: + # adjust score: Line 8 + iou = -2 * iou * iou + classes[idx] *= math.exp(iou) + # else NMS (Line 9), (iou threshold def 0.2) + elif iou >= args.iou_threshold: + # remove overlapping predictions with iou>threshold + # Line 10 + nonbg = nonbg[nonbg != idx] +``` + +```py + # Line 2, nothing else to process + if nonbg.size == 0: + break +``` + +```py + # get the array of object scores + scores = np.zeros((classes.shape[0],)) + scores[indexes] = np.amax(classes[indexes], axis=1) +``` + +```py + return objects, indexes, scores +``` + +假设我们具有训练有素的 SSD 网络和一种抑制冗余预测的方法,则下一节将讨论对测试数据集的验证。 基本上,我们想知道我们的 SSD 是否可以对从未见过的图像执行对象检测。 + +# 13\. SSD 模型验证 + +在对 SSD 模型进行 200 个周期的训练之后,可以验证表现。 用于评估的三个可能指标:1)**IoU**,2)**精度**和 3)**召回**。 + +第一个指标是**平均 IoU**(**mIoU**)。 给定真实情况测试数据集,计算真实情况边界框和预测边界框之间的 IoU。 在执行 NMS 之后,对所有真实情况和预测的边界框执行此操作。 所有 IoU 的平均值计算为 mIoU: + +![](img/B14853_11_074.png) (Equation 11.13.1) + +其中`n_box`是地面真值边界框`b[i]`的数量和`n_pred`是预测边界框`d[j]`的数量。 请注意,该度量标准无法验证两个重叠的边界框是否属于同一类。 如果需要,则可以轻松修改代码。“列表 11.13.1”显示了代码实现。 + +第二个度量是**精度**,如“公式 11.3.2”所示。 它是正确预测的对象类别的数量(真阳性或 TP)除以正确预测的对象类别的数量(真阳性或 TP)与错误预测的对象类别的数量(假阳性或 FP)之和。 精度是衡量 SSD 正确识别图像中对象的表现的指标。 精度越接近 1.0 越好。 + +![](img/B14853_11_075.png) (Equation 11.3.2) + +第三个度量是**召回**,如“公式 11.3.3”所示。 它是正确预测的对象类别的数量(真阳性或 TP)除以正确预测的对象类别的数量(真阳性或 TP)加上错过的对象数量(假阴性或 FN)之和。 召回率是衡量 SSD 在不对图像中的对象进行错误分类方面有多出色的度量。 召回率越接近 1.0,则越好。 + +![](img/B14853_11_076.png) (Equation 11.3.3) + +如果我们对测试数据集中的所有图像取均值,则它们称为平均精度和平均召回率。 在目标检测中,使用不同 mIoU 的精度和召回曲线来衡量表现。 为了简单起见,我们仅针对特定类别阈值(默认值为 0.5)计算这些指标的值。 感兴趣的读者可以参考 Pascal VOC [7]文章,以获取有关对象检测指标的更多详细信息。 + +评价结果示于“表 11.13.1”。 结果可以通过运行: + +* 无规范化: + * `python3 ssd-11.6.1.py --restore-weights=ResNet56v2-4layer-extra_anchors-drinks-200.h5 --evaluate` +* 无规范化,平滑 L1: + * `python3 ssd-11.6.1.py --restore-weights=ResNet56v2-4layer-smooth_l1-extra_anchors-drinks-200.h5 --evaluate` +* 具有规范化: + * `python3 ssd-11.6.1.py --restore-weights=ResNet56v2-4layer-norm-extra_anchors-drinks-200.h5 --evaluate --normalize` +* 具有规范化,平滑 L1: + * `python3 ssd-11.6.1.py --restore-weights=ResNet56v2-4layer-norm-smooth_l1-extra_anchors-drinks-200.h5 --evaluate --normalize` +* 具有规范化,平滑 L1,焦点损失: + * `python3 ssd-11.6.1.py --restore-weights=ResNet56v2-4layer-norm-improved_loss-extra_anchors-drinks-200.h5 --evaluate --normalize` + +权重在 GitHub 上可用。 + +在 mIoU 上,最佳表现是非归一化偏移选项,而归一化偏移设置具有最高的平均精度和召回率。 考虑到训练数据集中只有 1,000 张图像,表现并不是最新技术。 也没有应用数据扩充。 + +从结果来看,使用损失函数的改进会降低表现。 使用平滑 L1 或焦距损失函数或同时使用两者时,会发生这种情况。“图 11.13.1”至“图 11.13.5”显示了样本预测。 可以通过执行以下操作获得图像上的对象检测: + +```py +python3 ssd-11.6.1.py –-restore-weights= +--image-file= --evaluate +``` + +例如,要在`dataset/drinks/0010050.jpg`上运行对象检测: + +```py +python3 ssd-11.6.1.py --restore-weights=ResNet56v2-4layer-extra_anchors-drinks-200.h5 --image-file=dataset/drinks/0010050.jpg --evaluate +``` + +如果模型权重文件名中包含单词`norm`,请附加`--normalize option`。 + +“列表 11.13.1”:`ssd-11.6.1.py` + +```py + def evaluate_test(self): + # test labels csv path + path = os.path.join(self.args.data_path, + self.args.test_labels) + # test dictionary + dictionary, _ = build_label_dictionary(path) + keys = np.array(list(dictionary.keys())) + # sum of precision + s_precision = 0 + # sum of recall + s_recall = 0 + # sum of IoUs + s_iou = 0 + # evaluate per image + for key in keys: + # ground truth labels + labels = np.array(dictionary[key]) + # 4 boxes coords are 1st four items of labels + gt_boxes = labels[:, 0:-1] + # last one is class + gt_class_ids = labels[:, -1] + # load image id by key + image_file = os.path.join(self.args.data_path, key) + image = skimage.img_as_float(imread(image_file)) + image, classes, offsets = self.detect_objects(image) + # perform nms + _, _, class_ids, boxes = show_boxes(args, + image, + classes, + offsets, + self.feature_shapes, + show=False) +``` + +```py + boxes = np.reshape(np.array(boxes), (-1,4)) + # compute IoUs + iou = layer_utils.iou(gt_boxes, boxes) + # skip empty IoUs + if iou.size ==0: + continue + # the class of predicted box w/ max iou + maxiou_class = np.argmax(iou, axis=1) +``` + +```py + # true positive + tp = 0 + # false positiove + fp = 0 + # sum of objects iou per image + s_image_iou = [] + for n in range(iou.shape[0]): + # ground truth bbox has a label + if iou[n, maxiou_class[n]] > 0: + s_image_iou.append(iou[n, maxiou_class[n]]) + # true positive has the same class and gt + if gt_class_ids[n] == class_ids[maxiou_class[n]]: + tp += 1 + else: + fp += 1 +``` + +```py + # objects that we missed (false negative) + fn = abs(len(gt_class_ids) - tp) + s_iou += (np.sum(s_image_iou) / iou.shape[0]) + s_precision += (tp/(tp + fp)) + s_recall += (tp/(tp + fn)) +``` + +```py + n_test = len(keys) + print_log("mIoU: %f" % (s_iou/n_test), + self.args.verbose) + print_log("Precision: %f" % (s_precision/n_test), + self.args.verbose) + print_log("Recall: %f" % (s_recall/n_test), + self.args.verbose) +``` + +结果如下,在“表 11.13.1”中: + +| | **未归一化的偏移** | **未归一化的偏移,平滑 L1** | **归一化的偏移** | **归一化偏移,平滑 L1** | **归一化偏移,平滑 L1,焦点损失** | +| --- | --- | --- | --- | --- | --- | +| IoU | 0.64 | 0.61 | 0.53 | 0.50 | 0.51 | +| 平均精度 | 0.87 | 0.86 | 0.90 | 0.85 | 0.85 | +| 平均召回率 | 0.87 | 0.85 | 0.87 | 0.83 | 0.83 | + +表 11.13.1 测试数据集上 SSD 的表现基准。 + +![](img/B14853_11_13.png) + +图 11.13.1 来自测试数据集的图像上的示例预测示例(未归一化的偏移量)。 + +![](img/B14853_11_14.png) + +图 11.13.2 来自测试数据集的图像上的示例预测示例(未归一化的偏移量,平滑 L1)。 + +![](img/B14853_11_15.png) + +图 11.13.3 来自测试数据集的图像预测示例(标准化偏移)。 + +![](img/B14853_11_16.png) + +图 11.13.4 对来自测试数据集的图像进行的预测示例(标准化偏移,平滑 L1)。 + +![](img/B14853_11_17.png) + +图 11.13.5 对来自测试数据集的图像进行的预测示例(归一化偏移,平滑 L1,聚焦损失)。 + +本节中的结果验证了我们的 SSD 模型。 一个重要的经验教训是,只要我们理解了问题,无论问题多么复杂,我们都可以逐步构建一个可行的解决方案。 SSD 是迄今为止我们在本书中介绍过的最复杂的模型。 它需要许多工具,模块以及大量数据准备和管理才能工作。 + +# 14\. 总结 + +在本章中,讨论了多尺度单发对象检测的概念。 使用以接收场斑块的质心为中心的锚框,可以计算地面真值边界框偏移量。 代替原始像素误差,归一化像素误差会鼓励更适合优化的有限范围。 + +每个锚框都分配有地面实况类别标签。 如果锚点框不与对象重叠,则为其分配背景类,并且其偏移量不包括在偏移量损失计算中。 已经提出了焦点损失以改善类别损失函数。 可以使用平滑的 L1 损失函数代替默认的 L1 偏置损失函数。 + +对测试数据集的评估表明,使用默认损失函数的归一化偏移可实现平均精度和召回率方面的最佳表现,而当消除偏移归一化时,mIoU 会得到改善。 通过增加训练图像的数量和变化可以提高性能。 + +在“第 12 章”中,“语义分割”建立在本章中开发的概念的基础上。 特别是,我们重用 ResNet 骨干网络来构建分段网络和 IoU 指标进行验证。 + +# 15\. 参考 + +1. `Krizhevsky Alex, Ilya Sutskever, and Geoffrey E. Hinton. "Imagenet classification with deep convolutional neural networks." Advances in neural information processing systems. 2012.` +1. `Liu Wei, et al. "SSD: Single Shot MultiBox Detector." European conference on computer vision. Springer, Cham, 2016.` +1. `Girshick Ross. "Fast R-CNN." Proceedings of the IEEE international conference on computer vision. 2015.` +1. `Lin Tsung-Yi, et al. "Focal loss for Dense Object Detection. "Proceedings of the IEEE international conference on computer vision. 2017.` +1. `Dutta, et al. VGG Image Annotator http://www.robots.ox.ac.uk/~vgg/software/via/` +1. `Bodla Navaneeth, et al. "Soft-NMS--Improving Object Detection With One Line of Code." Proceedings of the IEEE international conference on computer vision. 2017.` +1. `Everingham Mark, et al. "The Pascal Visual Object Classes (VOC) challenge." International journal of computer vision 88.2 (2010): 303-338.` +1. `"Huber Loss." https://en.wikipedia.org/wiki/Huber_loss` \ No newline at end of file diff --git a/机器学习/ApacheCN/apachecn-dl-zh/adv-dl-tf2-keras/12.md b/机器学习/ApacheCN/apachecn-dl-zh/adv-dl-tf2-keras/12.md new file mode 100644 index 00000000..00f0bb79 --- /dev/null +++ b/机器学习/ApacheCN/apachecn-dl-zh/adv-dl-tf2-keras/12.md @@ -0,0 +1,420 @@ +# 十二、语义分割 + +在“第 11 章”,“对象检测”中,我们讨论了对象检测作为一种重要的计算机视觉算法,具有多种实际应用。 在本章中,我们将讨论另一种称为语义分割的相关算法。 如果对象检测的目的是对图像中的每个对象同时执行定位和标识,则在语义分割中,目的是根据每个像素的对象类别对它们进行分类。 + +进一步扩展类比,在对象检测中,我们使用边界框显示结果。 在语义分割中,同一对象的所有像素都属于同一类别。 在视觉上,同一对象的所有像素将具有相同的颜色。 例如,属于*汽水**类别的所有像素均为蓝色。 非苏打罐对象的像素将具有不同的颜色。 + +类似于对象检测,语义分割有许多实际应用。 在医学成像中,它可用于分离和测量正常细胞与异常细胞的区域。 在卫星成像中,语义分段可用于度量森林覆盖率或灾难期间的洪水程度。 通常,语义分割可用于识别属于同一类对象的像素。 识别每个对象的各个实例并不重要。 + +好奇的读者可能会想知道,一般而言,不同的分割算法与特别是语义分割算法之间有什么区别? 在以下部分中,我们将对不同的分割算法进行限定。 + +总而言之,本章的目的是为了提出: + +* 不同类型的分割算法 +* **全卷积网络**(**FCN**)作为语义分割算法的实现 +* `tf.keras`中 FCN 的实现和评估 + +我们将从讨论不同的分割算法开始。 + +# 1\. 分割 + +分割算法将图像划分为像素或区域集。 分区的目的是为了更好地理解图像表示的内容。 像素组可以表示图像中特定应用感兴趣的对象。 我们划分的方式区分了不同的分割算法。 + +在某些应用中,我们对给定图像中的特定可数对象感兴趣。 例如,在自主导航中,我们对车辆,交通标志,行人和道路上的其他物体的实例感兴趣。 这些可计数对象统称为,称为**事物**。 所有其他像素都集中在一起作为背景。 这种类型的细分称为**实例细分**。 + +在其他应用中,我们对可数对象不感兴趣,而对无定形的不可数区域感兴趣,例如天空,森林,植被,道路,草地,建筑物和水体。 这些对象统称为东西。 这种类型的分段称为**语义分段**。 + +大致上,**事物**和**事物**共同构成了整个图像。 如果算法可以识别事物像素和填充像素,则其称为**全光分割**,如 Kirilov 等人所定义 [1]。 + +但是,事物与事物之间的区别并不严格。 应用可能将可数对象统称为东西。 例如,在百货商店中,不可能识别机架上的服装实例。 它们可以作为布料一起集中在一起。 + +“图 12.1.1”显示了不同类型的细分之间的区别。 输入的图像在桌子的顶部显示了两个汽水罐和两个果汁罐。 背景杂乱无章。 假设我们只对汽水罐和果汁罐感兴趣,在实例细分中,我们为每个对象实例分配唯一的颜色以分别区分四个对象。 对于语义分割,我们假设将所有的汽水罐都塞在一起,将果汁罐作为另一罐塞在一起,将背景作为最后的罐塞在一起。 基本上,我们为每种物料分配了唯一的颜色。 最后,在全景分割中,我们假设只有背景才是背景,而我们只对苏打水和果汁罐感兴趣。 + +对于这本书,我们仅探讨语义分割。 按照“图 12.1.1”中的示例,我们将为“第 11 章”,“对象检测”中使用的对象分配唯一的填充类别:1)水瓶,2)**汽水罐**和 3)**果汁罐**。 第四个也是最后一个类别是背景。 + +![A close up of a bottle Description automatically generated](img/B14853_12_01.png) + +![A picture containing indoor Description automatically generated](img/B14853_12_02.png) + +![A close up of a logo Description automatically generated](img/B14853_12_03.png) + +![A screenshot of a cell phone Description automatically generated](img/B14853_12_04.png) + +图 12.1.1:显示不同分割算法的四幅图像。 彩色效果最佳。 原始图像可以在[这个页面](https://github.com/PacktPublishing/Advanced-Deep-Learning-with-Keras/tree/master/chapter12-segmentation)中找到。 + +# 2\. 语义分割网络 + +从上一节中,我们了解到语义分割网络是一个像素级分类器。 网络框图显示在“图 12.2.1”中。 但是,与简单分类器不同(例如,“第 1 章”,“Keras 深度神经网络”和“第 2 章”,“MNIST 分类器简介”) 其中只有一个分类器生成`one-hot vector`作为输出,在语义分段中,我们有并行运行的并行分类器。 每个人都在生成自己的单热点向量预测。 分类器的数量等于输入图像中的像素数量或图像宽度与高度的乘积。 每个`one-hot vector`预测的维数等于感兴趣的填充对象类别的数量。 + +![A screenshot of a cell phone Description automatically generated](img/B14853_12_05.png) + +图 12.2.1:可以将语义分割网络视为按像素分类器。 彩色效果最佳。 原始图像可以在[这个页面](https://github.com/PacktPublishing/Advanced-Deep-Learning-with-Keras/tree/master/chapter12-segmentation)中找到 + +例如,假设我们对以下四个类别感兴趣:0)**背景**,1)**水瓶**,2)**汽水罐**和 3)**果汁罐**,我们可以在“图 12.2.2”中看到,每个对象类别有四个像素。 + +相应地,使用 4 维`one-hot vector`对每个像素进行分类。 我们使用阴影表示像素的类别。 利用这一知识,我们可以想象一个语义分割网络预测`image_width x image_height` 4 维一热向量作为输出,每个像素一个 4 维一热向量: + +![A bottle of water on a table Description automatically generated](img/B14853_12_06.png) + +图 12.2.2:四个不同的样本像素。 使用 4 维一热向量,每个像素根据其类别进行分类。 彩色效果最佳。 原始图像可以在[这个页面](https://github.com/PacktPublishing/Advanced-Deep-Learning-with-Keras/tree/master/chapter12-segmentation)中找到 + +了解了语义分割的概念后,我们现在可以介绍神经网络像素级分类器。 Long 等人的《全卷积网络(FCN)》启发了我们的语义分段网络架构 [2]。FCN 的关键思想是在生成最终预测时使用多个比例的特征映射。 + +我们的语义分段网络显示在“图 12.2.3”中。 它的输入是 RGB 图像(例如`640 x 480 x 3`),并且输出具有类似尺寸的张量,但最后一个尺寸是填充类别的数量(例如,对于 4 种填充类别而言是`640 x 480 x 4`)。 出于可视化目的,我们通过为每种类别分配颜色来将输出映射到 RGB: + +![A screenshot of a cell phone Description automatically generated](img/B14853_12_07.png) + +图 12.2.3:语义分割的网络架构。 除非另有说明,否则核大小为 3。 除非另有说明,否则跨步为 1。 彩色效果最佳。 原始图像可以在[这个页面](https://github.com/PacktPublishing/Advanced-Deep-Learning-with-Keras/tree/master/chapter12-segmentation)中找到 + +类似于“第 11 章”,“对象检测”中讨论的 SSD,我们采用骨干网作为特征提取器。 我们在 SSD 中使用类似的 ResNetv2 网络。 ResNet 主干网执行两次最大池化,以到达第一组特征映射,其尺寸为输入图像的 1/4。 通过使用连续的`Conv2D(strides=2)-BN-ReLU`层生成其他特征映射集,从而生成具有输入图像尺寸`(1/8, 1/16, 1/32)`的特征映射。 + +Zhao 等人的《金字塔场景解析网络(PSPNet)》进行了改进,进一步增强了我们的语义分割网络架构 [3]。 在 PSPNet 中,每个特征映射由另一个卷积层进一步处理。 此外,还使用了第一组特征映射。 + +FCN 和 PSPNet 都对特征金字塔进行了上采样,以达到与第一组特征映射相同的大小。 之后,使用`Concatenate`层将所有上采样特征融合在一起。 然后级联层通过步长等于 2 的转置卷积处理两次,以恢复原始图像的宽度和高度。 最后,使用核大小为 1 且过滤器等于 4(换句话说,类别数)和`Softmax`层的转置卷积生成按像素分类预测。 + +在下一节中,我们将讨论细分网络的`tf.keras`实现。 我们可以重用“第 11 章”,“对象检测”中的 SSD 中的某些网络块,以加快实现速度。 + +# 3\. Keras 中的语义分割网络 + +如图“图 12.2.3”所示,我们已经有了语义细分网络的一些关键构建块。 我们可以重用“第 2 章”,“深度神经网络”中介绍的 ResNet 模型。 我们只需要构建特征的金字塔以及上采样和预测层。 + +借用我们在“第 2 章”,“深度神经网络”中开发的 ResNet 模型,并在“第 11 章”,“对象检测”中重用了该模型, 我们提取具有四个级别的特征金字塔。“列表 12.3.1”显示了从 ResNet 提取特征的金字塔。 `conv_layer()`只是创建`Conv2D(strides=2)-BN-ReLU`层的辅助函数。 + +“列表 12.3.1”:`resnet.py`: + +特征的金字塔函数: + +```py +def features_pyramid(x, n_layers): + """Generate features pyramid from the output of the + last layer of a backbone network (e.g. ResNetv1 or v2) +``` + +```py + Arguments: + x (tensor): Output feature maps of a backbone network + n_layers (int): Number of additional pyramid layers +``` + +```py + Return: + outputs (list): Features pyramid + """ + outputs = [x] + conv = AveragePooling2D(pool_size=2, name='pool1')(x) + outputs.append(conv) + prev_conv = conv + n_filters = 512 +``` + +```py + # additional feature map layers + for i in range(n_layers - 1): + postfix = "_layer" + str(i+2) + conv = conv_layer(prev_conv, + n_filters, + kernel_size=3, + strides=2, + use_maxpool=False, + postfix=postfix) + outputs.append(conv) + prev_conv = conv +``` + +```py + return outputs +``` + +“列表 12.3.1”只是特征金字塔的一半。 剩下的一半是每组特征之后的卷积。 另一半显示在“列表 12.3.2”中,以及金字塔各层的上采样。 例如,图像尺寸为 1/8 的特征会被上采样 2 倍,以使其尺寸与图像尺寸为 1/4 的第一组特征相匹配。 在同一清单中,我们还建立了完整的分割模型,从骨干网络到特征金字塔,再连接上采样特征金字塔,最后进一步进行特征提取,上采样和预测。 我们在输出层使用`n`维(例如 4 维)`Softmax`层执行逐像素分类。 + +“列表 12.3.2”:`model.py`: + +构建语义分割网络: + +```py +def build_fcn(input_shape, + backbone, + n_classes=4): + """Helper function to build an FCN model. + + Arguments: + backbone (Model): A backbone network + such as ResNetv2 or v1 + n_classes (int): Number of object classes + including background. + """ +``` + +```py + inputs = Input(shape=input_shape) + features = backbone(inputs) +``` + +```py + main_feature = features[0] + features = features[1:] + out_features = [main_feature] + feature_size = 8 + size = 2 + # other half of the features pyramid + # including upsampling to restore the + # feature maps to the dimensions + # equal to 1/4 the image size + for feature in features: + postfix = "fcn_" + str(feature_size) + feature = conv_layer(feature, + filters=256, + use_maxpool=False, + postfix=postfix) + postfix = postfix + "_up2d" + feature = UpSampling2D(size=size, + interpolation='bilinear', + name=postfix)(feature) + size = size * 2 + feature_size = feature_size * 2 + out_features.append(feature) +``` + +```py + # concatenate all upsampled features + x = Concatenate()(out_features) + # perform 2 additional feature extraction + # and upsampling + x = tconv_layer(x, 256, postfix="up_x2") + x = tconv_layer(x, 256, postfix="up_x4") + # generate the pixel-wise classifier + x = Conv2DTranspose(filters=n_classes, + kernel_size=1, + strides=1, + padding='same', + kernel_initializer='he_normal', + name="pre_activation")(x) + x = Softmax(name="segmentation")(x) +``` + +```py + model = Model(inputs, x, name="fcn") +``` + +```py + return model +``` + +给定分割网络模型,我们使用学习速度为`1e-3`的 Adam 优化器和分类交叉熵损失函数来训练网络。“列表 12.3.3”显示了模型构建和训练函数调用。 在 40 个周期之后,学习率每 20 个周期减半。 我们使用`AccuracyCallback`监视网络表现,类似于“第 11 章”,“对象检测”中的 SSD 网络。 回调使用类似于对象检测平均 IoU 的**平均 IoU**(**mIoU**)指标计算表现。 表现最佳的平均值 IoU 的权重保存在文件中。 通过调用`fit_generator()`将网络训练 100 个周期。 + +“列表 12.3.3”:`fcn-12.3.1.py`: + +语义分割网络的初始化和训练: + +```py + def build_model(self): + """Build a backbone network and use it to + create a semantic segmentation + network based on FCN. + """ +``` + +```py + # input shape is (480, 640, 3) by default + self.input_shape = (self.args.height, + self.args.width, + self.args.channels) +``` + +```py + # build the backbone network (eg ResNet50) + # the backbone is used for 1st set of features + # of the features pyramid + self.backbone = self.args.backbone(self.input_shape, + n_layers=self.args.layers) +``` + +```py + # using the backbone, build fcn network + # output layer is a pixel-wise classifier + self.n_classes = self.train_generator.n_classes + self.fcn = build_fcn(self.input_shape, + self.backbone, + self.n_classes) +``` + +```py + def train(self): + """Train an FCN""" + optimizer = Adam(lr=1e-3) + loss = 'categorical_crossentropy' + self.fcn.compile(optimizer=optimizer, loss=loss) +``` + +```py + log = "# of classes %d" % self.n_classes + print_log(log, self.args.verbose) + log = "Batch size: %d" % self.args.batch_size + print_log(log, self.args.verbose) +``` + +```py + # prepare callbacks for saving model weights + # and learning rate scheduler + # model weights are saved when test iou is highest + # learning rate decreases by 50% every 20 epochs + # after 40th epoch + accuracy = AccuracyCallback(self) + scheduler = LearningRateScheduler(lr_scheduler) +``` + +```py + callbacks = [accuracy, scheduler] + # train the fcn network + self.fcn.fit_generator(generator=self.train_generator, + use_multiprocessing=True, + callbacks=callbacks, + epochs=self.args.epochs, + workers=self.args.workers) +``` + +多线程数据生成器类`DataGenerator`与“第 11 章”,“对象检测”中使用的类类似。 如“列表 12.3.4”所示,对`__data_generation(self, keys)`签名方法进行了修改,以生成一对图像张量及其相应的按像素方向的真实情况标签或分割蒙版 。 在下一节中,我们将讨论如何生成基本事实标签。 + +“列表 12.3.4”:`data_generator.py`: + +`DataGenerator`类用于语义分割的数据生成方法: + +```py + def __data_generation(self, keys): + """Generate train data: images and + segmentation ground truth labels +``` + +```py + Arguments: + keys (array): Randomly sampled keys + (key is image filename) +``` + +```py + Returns: + x (tensor): Batch of images + y (tensor): Batch of pixel-wise categories + """ + # a batch of images + x = [] + # and their corresponding segmentation masks + y = [] +``` + +```py + for i, key in enumerate(keys): + # images are assumed to be stored + # in self.args.data_path + # key is the image filename + image_path = os.path.join(self.args.data_path, key) + image = skimage.img_as_float(imread(image_path)) + # append image to the list + x.append(image) + # and its corresponding label (segmentation mask) + labels = self.dictionary[key] + y.append(labels) +``` + +```py + return np.array(x), np.array(y) +``` + +语义分割网络现已完成。 使用`tf.keras`,我们讨论了其架构实现,初始化和训练。 + +在运行训练程序之前,我们需要训练和测试带有地面真实性标签的数据集。 在的下一部分中,我们将讨论将在本章中使用的语义分割数据集。 + +# 4\. 示例数据集 + +我们可以使用在“第 11 章”,“对象检测”中使用的数据集。 回想一下,我们使用了一个小型数据集,其中包含使用便宜的 USB 相机(A4TECH PK-635G)收集的 1,000 `640 x 480` RGB 训练图像和 50 `640 x 480` RGB 测试图像。 但是,我们没有使用边界框和类别进行标记,而是使用多边形形状跟踪了每个对象类别的边缘。 我们使用相同的数据集标注器 **VGG 图像标注器**(**VIA**)[4]手动跟踪边缘并分配以下标签:1)**水瓶**,2)**汽水罐**和 3)**果汁罐**。 + +“图 12.4.1”显示了标记过程的示例 UI。 + +![A picture containing indoor, bottle, appliance, wall Description automatically generated](img/B14853_12_08.png) + +图 12.4.1:使用 VGG 图像标注器(VIA)进行语义分割的数据集标记过程 + +威盛标签软件将标签保存在 JSON 文件中。 对于训练和测试数据集,这些是: + +```py +segmentation_train.json +segmentation_test.json +``` + +无法原样使用存储在 JSON 文件中的多边形区域。 每个区域都必须转换成分割蒙版,即张量,其尺寸为`img_w x img_h x px – wise_category`。 在此数据集中,分割蒙版的尺寸为`640 x 480 x 4`。类别 0 为背景,其余为 1)对于**水瓶**,2)对于**苏打罐**,以及 3)表示**果汁罐**。 在`utils`文件夹中,我们创建了一个`generate_gt_segmentation.py`工具,用于将 JSON 文件转换为分段掩码。 为了方便起见,用于训练和测试的地面真实数据存储在压缩数据集中,该数据集是从[上一章](https://bit.ly/adl2-ssd)下载的: + +```py +segmentation_train.npy +segmentation_test.npy +``` + +每个文件都包含`image filename: segmentation mask`格式的真实情况数据字典,该字典在训练和验证期间加载。“图 12.4.2”显示了使用彩色像素可视化的“图 12.4.1”中图像的分割蒙版的示例。 + +![A screenshot of a cell phone Description automatically generated](img/B14853_12_09.png) + +图 12.4.2:可视化图 12.4.1 中所做标注的分段蒙版 + +现在,我们准备训练和验证语义分割网络。 在下一节中,我们将显示在本节中标注的数据集上语义分割的结果。 + +# 5\. 语义分割验证 + +要训​​练语义分段网络,请运行以下命令: + +```py +python3 fcn-12.3.1.py --train +``` + +在每个周期,也会执行验证以确定表现最佳的参数。 对于语义分割,可以使用两个度量。 首先是平均 IOU。 这类似于上一章中目标检测中的平均 IoU。 区别在于针对每个填充类别在真实情况分割掩码和预测的分割掩码之间计算 IoU。 这包括背景。 平均 IoU 只是测试数据集所有 IoU 的平均值。 + +“图 12.5.1”显示了在每个周期使用 mIoU 的语义分割网络的表现。 最大 mIoU 为 0.91。 这个比较高。 但是,我们的数据集只有四个对象类别: + +![A screenshot of a cell phone Description automatically generated](img/B14853_12_10.png) + +图 12.5.1:使用 mIoU 进行测试数据集训练期间的语义分割表现 + +第二个指标是平均像素精度。 这类似于在分类器预测上计算准确率的方式。 不同之处在于,分割网络具有的预测数量等于图像中的像素数量,而不是具有一个预测。 对于每个测试输入图像,计算平均像素精度。 然后,计算所有测试图像的平均值。 + +“图 12.5.2”显示了在每个周期使用平均像素精度的语义分割网络的表现。 最大平均像素精度为 97.9%。 我们可以看到平均像素精度与 mIoU 之间的相关性: + +![A screenshot of a cell phone Description automatically generated](img/B14853_12_11.png) + +图 12.5.2:使用测试数据集的平均像素精度在训练期间的语义分割表现 + +“图 12.5.3”显示了输入图像,地面实况语义分割掩码和预测的语义分割掩码的样本: + +![A picture containing indoor, table, bottle Description automatically generated](img/B14853_12_12.png) + +![](img/B14853_12_13.png) + +![](img/B14853_12_14.png) + +图 12.5.3:样本输入,基本事实和语义细分的预测。 我们将黑色分配为背景类,而不是紫色,如先前所用 + +总体而言,我们基于 FCN 并经过 PSPNet 的思想改进的语义分割网络的表现相对较好。 我们的语义分割网络绝不是最优化的。 可以减少特征金字塔中的过滤器数量,以最大程度地减少参数的数量,该参数约为 1110 万。 探索增加特征金字塔中的级别数也很有趣。 读者可以通过执行以下命令来运行验证: + +```py +python3 fcn-12.3.1.py --evaluate +--restore-weights=ResNet56v2-3layer-drinks-best-iou.h5 +``` + +在下一章中,我们将介绍无监督的学习算法。 考虑到监督学习中所需的昂贵且费时的标签,强烈地开发了无监督学习技术。 例如,在本章的语义分割数据集中,一个人花了大约 4 天的手工标签。 如果深度学习始终需要人工标记,那么它就不会前进。 + +# 6\. 总结 + +在本章中,讨论了分割的概念。 我们了解到细分有不同类别。 每个都有自己的目标应用。 本章重点介绍语义分段的网络设计,实现和验证。 + +我们的语义分割网络受到 FCN 的启发,FCN 已成为许多现代,最先进的分割算法(例如 Mask-R-CNN [5])的基础。 PSPNet 的构想进一步增强了我们的网络,该构想在 ImageNet 2016 解析挑战赛中获得第一名。 + +使用 VIA 标记工具,使用与“第 11 章”,“对象检测”中使用的相同图像集生成用于语义分割的新数据集标签。 分割蒙版标记属于同一对象类的所有像素。 + +我们使用平均 IoU 和平均像素准确率指标对语义分割网络进行了训练和验证。 测试数据集上的表现表明,它可以有效地对测试图像中的像素进行分类。 + +如本章最后一部分所述,由于所涉及的成本和时间,深度学习领域正在意识到监督学习的局限性。 下一章重点介绍无监督学习。 它利用了通信领域信息理论中使用的互信息概念。 + +# 7\. 参考 + +1. `Kirillov, Alexander, et al.: Panoptic Segmentation. Proceedings of the IEEE conference on computer vision and pattern recognition. 2019.` +1. `Long, Jonathan, Evan Shelhamer, and Trevor Darrell: Fully Convolutional Networks for Semantic Segmentation. Proceedings of the IEEE conference on computer vision and pattern recognition. 2015.` +1. `Zhao, Hengshuang, et al.: Pyramid Scene Parsing Network. Proceedings of the IEEE conference on computer vision and pattern recognition. 2017.` +1. `Dutta, et al.: VGG Image Annotator http://www.robots.ox.ac.uk/~vgg/software/via/` +1. `He Kaiming, et al.: Mask R-CNN. Proceedings of the IEEE international conference on computer vision. 2017.` \ No newline at end of file diff --git a/机器学习/ApacheCN/apachecn-dl-zh/adv-dl-tf2-keras/13.md b/机器学习/ApacheCN/apachecn-dl-zh/adv-dl-tf2-keras/13.md new file mode 100644 index 00000000..7836612f --- /dev/null +++ b/机器学习/ApacheCN/apachecn-dl-zh/adv-dl-tf2-keras/13.md @@ -0,0 +1,1240 @@ +# 十三、使用互信息的无监督学习 + +许多机器学习任务(例如分类,检测和分段)都依赖于标记的数据。 网络在这些任务上的表现直接受到标记质量和数据量的影响。 问题在于产生足够数量的高质量标注数据既昂贵又费时。 + +为了继续机器学习的发展,新算法应减少对人类标签的依赖。 理想情况下,网络应该从无标签数据中学习,由于互联网的发展以及诸如智能手机和**物联网**(**IoT**)。 从未标记的数据中学习是无监督学习的领域。 在某些情况下,无监督学习也称为自我监督学习,以强调使用纯净的未标记数据进行训练和缺乏人工监督。 在本文中,我们将使用术语无监督学习。 + +在机器学习中,有一些方法可以从未标记的数据中学习。 可以使用深度神经网络和无监督学习中的新思想来改善这些方法的表现。 当处理高度非结构化的数据(例如文本,图像,音频和视频)时,尤其如此。 + +在无监督学习中成功的方法之一是最大化给定神经网络中两个随机变量之间的互信息。 在信息论领域,**互信息**(**MI**)是两个随机变量之间依存性的量度。 + +MI 最近已成功地从未标记的数据中提取了有用的信息,可以帮助学习下游任务。 例如,MI 能够对潜在代码向量进行聚类,从而使分类任务成为简单的线性分离问题。 + +总之,本章的目的是介绍: + +* 互信息的概念 +* 使用神经网络估计 MI +* 下游任务的离散和连续随机变量上的 MI 最大化 +* Keras 中 MI 估计网络的实现 + +我们将从介绍互信息的概念开始。 + +# 1\. 互信息 + +互信息是对两个随机变量`X`和`Y`之间依赖性的度量。 有时,MI 也定义为通过观察`Y`得出的有关`X`的信息量。 MI 也被称为信息获取或观察`Y`时`X`不确定性的降低。 + +与相关性相反,MI 可以测量`X`和`Y`之间的非线性统计依赖性。 在深度学习中,MI 是一种合适的方法,因为大多数现实世界中的数据都是非结构化的,并且输入和输出之间的依赖关系通常是非线性的。 在深度学习中,最终目标是对输入数据和预先训练的模型执行特定任务,例如分类,翻译,回归或检测。 这些任务也称为下游任务。 + +由于 MI 可以发现输入,中间特征,表示和输出中的相关性的重要方面,这些方面本身就是随机变量,因此共享信息通常可以提高下游任务中模型的表现。 + +在数学上,两个随机变量`X`和`Y`之间的 MI 可以定义为: + +![](img/B14853_13_001.png) (Equation 13.1.1) + +哪里: + +* `P`(`X`,`Y`)是 X 和 Y 在样本空间`X`x`Y`上的联合分布 。 +* `P`(`X`)`P`(`Y`)是边际分布`P`(`X`)和`P`(`Y`)分别位于样本空间`X`和`Y`上。 + +换句话说,MI 是联合分布与边际分布乘积之间的 **Kullback-Leibler**(**KL**)散度。 回顾“第 5 章”,“改进的 GAN” ,KL 是两个分布之间距离的度量。 在 MI 的上下文中,KL 距离越大,两个随机变量`X`和`Y`之间的 MI 越高。 通过扩展,MI 越高,`X`对`Y`的依赖性越高。 + +由于 MI 等于边际分布的联合与乘积之间的 KL 散度,因此它暗示它大于或等于零:`I(X; Y) > 0`。 当`X`和`Y`是独立随机变量时,MI 完全等于零。 当`X`和`Y`是独立的时,观察一个随机变量(例如`Y`)不会提供关于另一个随机变量的信息(例如`X`)。 因此,MI 是`X`和`Y`独立程度的度量。 + +如果`X`和`Y`是**离散随机变量**,则通过扩展 KL 散度,MI 可以计算为: + +![](img/B14853_13_003.png) (Equation 13.1.2) + +哪里: + +* `P`(`X`,`Y`)是联合**概率质量函数**(**PMF**)。 +* `P`(`X`)和`P`(`Y`)是边际 PMF。 + +如果联合和边际分布已知,则 MI 可以进行精确计算。 + +如果`X`和`Y`是**连续随机变量**,则通过扩展 KL 散度,MI 可以表示为: + +![](img/B14853_13_004.png) (Equation 13.1.3) + +哪里: + +* `p`(`x`,`y`)是联合**概率密度函数**(**PDF**)。 +* `p`(`x`)和`p`(`y`)是边缘 PDF。 + +连续随机变量的 MI 通常很难处理,并且可以通过变分方法进行估计。 在本章中,我们将讨论估计两个连续随机变量之间的 MI 的技术。 + +在讨论用于计算互信息的技术之前,让我们首先解释一下 MI 与熵之间的关系。 熵在“第 6 章”,“纠缠表示 GAN”中非正式引入,并在 InfoGAN 中得到了应用。 + +# 2\. 互信息和熵 + +MI 也可以用熵来解释。 回想一下“第 6 章”,“纠缠表示 GAN” ,熵`H`(`X`)是对预期信息量的度量。 随机变量`X`的: + +![](img/B14853_13_005.png) (Equation 13.2.1) + +“公式 13.2.1”表示熵还是不确定性的量度。 不确定事件的发生给我们带来了更多的惊喜或信息。 例如,有关员工意外晋升的新闻具有大量信息或熵。 + +使用“公式 13.2.1”,MI 可以表示为: + +![](img/B14853_13_006.png) + +![](img/B14853_13_007.png) + +![](img/B14853_13_008.png) + +![](img/B14853_13_009.png) (Equation 13.2.2) + +“公式 13.2.2”表示 MI 随着边际熵增加而增加,但随联合熵而减少。 就熵而言,MI 的一个更常见的表达式如下: + +![](img/B14853_13_010.png) + +![](img/B14853_13_011.png) + +![](img/B14853_13_012.png) (Equation 13.2.3) + +“公式 13.2.3”告诉我们,MI 随随机变量的熵增加而减小,而随另一个随机变量的条件熵而减小。 或者,如果我们知道`Y`,则 MI 是的信息减少量或`X`的不确定性。 + +等效地, + +![](img/B14853_13_013.png) + +![](img/B14853_13_014.png) (Equation 13.2.4) + +“公式 13.2.4”表示 MI 是对称的: + +![](img/B14853_13_015.png) (Equation 13.2.5) + +MI 也可以用`X`和`Y`的条件熵表示: + +![](img/B14853_13_016.png) (Equation 13.2.6) + +使用贝叶斯定理: + +![](img/B14853_13_017.png) + +![](img/B14853_13_018.png) + +![](img/B14853_13_019.png) + +![](img/B14853_13_020.png) (Equation 13.2.7) + +“图 13.2.1”总结了到目前为止我们讨论的 MI 与条件熵和边际熵之间的所有关系: + +![MI_Venn_Diagram.png](img/B14853_13_01.png) + +图 13.2.1 维恩图显示了 MI 与条件熵和边际熵之间的关系 + +MI 的另一种有趣解释是根据“公式 13.2.3”,可以将其重写为: + +![](img/B14853_13_021.png) (Equation 13.2.8) + +由于`H(X | Y)`是观察到`Y`时的`X`的不确定性,因此“公式 13.2.8”告诉我们, 如果可以最大化 MI,则可以确定`X`给定`Y`。 在“图 13.2.1”中,新月形`H(X | Y)`的面积随着代表 MI 的圆之间的交点增加而减小。 + +再举一个的具体例子,假设`X`是一个随机变量,表示观察到在给定随机字节中的 0 到 255 之间的数字。 假设分布均匀,则转换为`P(X) = 1/256`的概率。 以 2 为底的`X`的熵为: + +![](img/B14853_13_023.png) + +假设随机变量`Y`代表随机字节的 4 个最高有效位。 如果我们观察到 4 个最高有效位全为零,则数字 0 到 15 包含`P(X) = 1/16`,其余数字具有`P(X) = 0`。条件熵在基数 2 中是: + +![](img/B14853_13_025.png) + +这为我们提供了`I(X; Y) = 8 - 4 = 4`的 MI。 注意,随机变量`X`的不确定性或预期信息量在知道`Y`后降低。`X`和`Y`共享的互信息为 4,这也等于两个随机变量共享的位数。“图 13.2.2”说明了两种情况,其中所有位都是随机的,而四个最高有效位都为 0。 + +![A close up of a logo Description automatically generated](img/B14853_13_02.png) + +图 13.2.2 当所有位未知时与某些位已知时的熵 + +鉴于我们已经对 MI 和熵有了很好的了解,我们现在可以将此概念用作无监督学习的一种方法。 + +# 3\. 通过最大化离散随机变量的互信息来进行无监督学习 + +深度学习中的经典问题是监督分类。 在“第 1 章”,“Keras 简介”和“第 2 章”,“深度神经网络”中,我们了解到,在监督分类下,我们需要标记输入图像。 我们对 MNIST 和 CIFAR10 数据集都进行了分类。 对于 MNIST,三层 CNN 和密集层可实现高达 99.3% 的精度。 对于使用 ResNet 或 DenseNet 的 CIFAR10,我们可以实现大约 93% 至 94% 的精度。 MNIST 和 CIFAR10 都被标记为数据集。 + +与监督学习不同,本章的目标是执行无监督学习。 我们的重点是没有标签的分类。 这个想法是,如果我们学习如何对所有训练数据的潜在代码向量进行聚类,那么线性分离算法可以对每个测试输入数据潜在向量进行分类。 + +为了学习没有标签的潜在代码向量的聚类,我们的训练目标是在输入图像`X`和其潜在代码`Y`之间最大化 MI。`X`和`Y`都是随机变量。 这个想法是外观*相似的*图像将具有聚集到相同区域的潜在向量。 线性分配问题可以很容易地将彼此远离的区域分开。 因此,可以以无监督的方式完成分类问题。 数学上,目标是最大化: + +![](img/B14853_13_027.png) (Equation 13.2.3) + +直观地,一旦我们观察到`Y`,我们对`X`充满信心。 “公式 13.2.3”的问题在于,我们无法很好地估计要测量的密度`P(X | Y) H(X | Y)`。 + +Ji 等人的**不变信息聚类**(**IIC**)[1] 建议从联合和边际分布直接测量`I(X; Y)`。 目的是使用“公式 13.1.2”测量引用同一输入的两个潜在代码随机变量之间的 MI。 假设输入`X`编码为`Z`: + +![](img/B14853_13_029.png) + +将相同的输入`X`转换为`X_bar = G(X)`,以便`X`仍可清晰地归类为与`X`相同的类别。 在图像处理中,`G`可以是常见的操作,例如小旋转,随机裁剪和剪切。 有时,只要结果图像的含义相同,就可以接受诸如对比度和亮度调整,边缘检测,少量噪声添加以及归一化之类的操作。 例如,如果`X`是狗的图像,则在`G`之后,`X_bar`显然仍是狗。 + +使用相同编码器网络的潜在代码向量为: + +![](img/B14853_13_035.png) + +因此,我们可以用两个随机变量`Z`和`Z_bar`将“公式 13.1.2”重写为: + +![](img/B14853_13_037.png) (Equation 13.3.1) + +其中`P(Z)`和`P(Z_bar)`可以解释为`Z`和`Z_bar`的边际分布。 对于离散随机变量,`Z`和`Z_bar`都是`P(Z)`和`P(Z_bar)`都是分类分布。 我们可以想象,编码器输出是 *softmax* ,其维数等于训练和测试数据分布中的类数`N`。 例如,对于 MNIST,编码器输出是与训练和测试数据集中的 10 位数字相对应的 10 维一热向量。 + +为了确定“公式 13.3.1”中的每个项,我们首先估计`P(Z, Z_bar)`。 IIC 假设`Z`和`Z_bar`是独立的,因此联合分布可以估计为: + +![](img/B14853_13_045.png) (Equation 13.3.2) + +这将创建一个`N x N`矩阵`P(Z, Z_bar)`,其中每个元素`Z[ij]`对应于同时观察两个随机变量`(Z[i], Z_bar[j])`的概率。 如果对大批量进行此估计,则大样本均值将估计联合概率。 + +由于我们将使用 MI 来估计密度函数,因此 IIC 将采样限制为`(Z[i], Z_bar[i])`。 本质上,对于每个样本`x[i]`,我们计算其潜在代码`P(Z[i]) = E(X[i])`。 然后,我们将`x[i]`转换,并计算其潜在代码`P(Z_bar[i]) = E(X_bar[i])`。 联合分布计算如下: + +![](img/B14853_13_051.png) (Equation 13.3.3) + +其中`M`是批量大小。 由于我们对`x[i]`和`x_bar[i]`使用相同的编码器`E`,因此联合分布应该对称。 我们通过执行以下命令来增强对称性: + +![](img/B14853_13_054.png) (Equation 13.3.4) + +给定`P(Z, Z_bar)`,边际分布可以计算为: + +![](img/B14853_13_056.png) (Equation 13.3.5) + +我们按行求和矩阵的所有条目。 类似地: + +![](img/B14853_13_057.png) (Equation 13.3.6) + +我们按矩阵汇总矩阵的所有条目。 + +给定“公式 13.3.1”中的所有项,我们可以训练神经网络编码器`E`,该编码器使用损失函数来最大化 MI 或最小化负 MI: + +![](img/B14853_13_059.png) (Equation 13.3.7) + +在实现无监督聚类之前,让我们再次反思目标–最大化`I(Z; Z_bar)`。 由于`X`和`X_bar = G(X)`及其对应的潜在代码向量`Z`和`Z_bar`共享相同的信息,因此神经网络编码器`E`应该学习映射`X`和`X_bar`成为潜在向量`Z`和`Z_bar`,它们具有几乎相同的值以最大化其 MI。 在 MNIST 的背景下,看起来相似的数字将具有潜在代码向量,它们聚集在空间的同一区域中。 + +如果潜在代码向量是 *softmax* 的输出,则表明我们正在执行无监督聚类,可以使用线性分配算法将其转换为分类器。 在本章中,我们将介绍两种可能的线性分配算法,这些算法可用于将无监督的聚类转换为无监督的分类。 + +在下一节中,我们将讨论可用于实现无监督聚类的编码器网络模型。 特别是,我们将介绍可用于估计`P(Z)`和`P(Z_bar)`的编码器网络。 + +# 4\. 用于无监督聚类的编码器网络 + +图 13.4.1 中显示了用于无监督聚类的编码器网络实现。 它是一种编码器,具有类似 VGG 的[2]主干和`Dense`层,并具有 *softmax* 输出。 最简单的 VGG-11 具有主干,如“图 13.4.2”所示。 + +对于 MNIST,使用最简单的 VGG-11 骨干将特征映射大小从`MaxPooling2D`操作的 5 倍减至零。 因此,当在 Keras 中实现时,将使用按比例缩小的 VGG-11 主干版本,如图“图 13.4.3”所示。 使用同一组过滤器。 + +![A close up of a logo Description automatically generated](img/B14853_13_03.png) + +图 13.4.1 IIC 编码器网络`E`的网络实现。 输入的 MNIST 图像被中心裁剪为`24 x 24`像素。 在此示例中,`X_bar = G(X)`是随机的`24 x 24`像素裁剪操作。 + +![](img/B14853_13_04.png) + +图 13.4.2 VGG-11 分类器主干 + +在“图 13.4.3”中,有 4 个`Conv2D-BN-ReLU Activation-MaxPooling2D`层,其过滤器大小为`(64, 128, 256, 512)`。 最后的`Conv2D`层不使用`MaxPooling2D`。 因此,最后的`Conv2D`层针对`24 x 24 x 1`裁剪的 MNIST 输入输出`(3, 3, 512)`特征映射。 + +![A screenshot of a cell phone Description automatically generated](img/B14853_13_05.png) + +图 13.4.3 缩小的 VGG 用作编码器主干 + +“图 13.4.4”显示了“图 13.4.1”的 Keras 模型图。 为了提高性能,IIC 执行了超集群。 两个或更多编码器用于生成两个或更多个边际分布`P(Z)`和`P(Z_bar)`。 生成相应的联合分布。 就网络模型的而言,这是由具有两个或更多头的编码器实现的。 + +![A screenshot of a cell phone Description automatically generated](img/B14853_13_06.png) + +图 13.4.4 Keras 中 IIC 编码器`E`的网络实现 + +“图 13.4.4”是单头编码器,而“图 13.4.5”是双头编码器。 请注意,两个头共享相同的 VGG 主干。 + +![A screenshot of a cell phone Description automatically generated](img/B14853_13_07.png) + +图 13.4.5 Keras 中的两头编码器网络`E` + +在以下两个部分的中,我们将研究[II]网络模型是如何实现,训练和评估的。 我们还将研究线性分配问题,作为为每个聚类指定标签的工具。 + +# 5\. Keras 中的无监督聚类实现 + +Keras 中用于无监督聚类的网络模型实现在“列表 13.5.1”中显示。 仅显示初始化。 网络超参数存储在`args`中。 VGG 主干对象在初始化期间提供。 给定骨干,模型实际上只是具有 *softmax* 激活的`Dense`层,如`build_model()`方法所示。 有创建多个头的选项。 + +与“第 11 章”,“对象检测”相似,我们实现了`DataGenerator`类以多线程方式有效地提供输入数据。 `DataGenerator`对象生成由输入图像`X`及其变换后的图像`X_bar`组成的所需配对训练输入数据(即,连体输入图像)。 `DataGenerator`类中最关键的方法`__data_generation()`显示在“列表 13.5.2”中。 输入图像`X`从原始输入图像中央裁切。 对于 MNIST,这是`24 x 24`像素中心裁剪。 变换后的输入图像`X_bar`可以随机旋转`±20`范围内的某个角度,也可以从图像的任何部分随机裁剪`16 x 16`、`18 x 18`或`20 x 20`像素,然后将其调整为`24 x 24`像素。 作物尺寸存储在`crop_sizes`列表中。 + +注意,仅输入图像和变换图像在`DataGenerator`对象生成的数据中很重要。 同样,损失函数所需的配对数据沿批量轴连接。 这将使我们能够在单批配对数据中计算损失函数。 + +“列表 13.5.1”:`iic-13.5.1.py`。 显示初始化和模型创建的 IIC 类:IIC 类: + +```py + def __init__(self, + args, + backbone): + """Contains the encoder model, the loss function, + loading of datasets, train and evaluation routines + to implement IIC unsupervised clustering via mutual + information maximization +``` + +```py + Arguments: + args : Command line arguments to indicate choice + of batch size, number of heads, folder to save + weights file, weights file name, etc + backbone (Model): IIC Encoder backbone (eg VGG) + """ + self.args = args + self.backbone = backbone + self._model = None + self.train_gen = DataGenerator(args, siamese=True) + self.n_labels = self.train_gen.n_labels + self.build_model() + self.load_eval_dataset() + self.accuracy = 0 +``` + +```py + def build_model(self): + """Build the n_heads of the IIC model + """ + inputs = Input(shape=self.train_gen.input_shape, name='x') + x = self.backbone(inputs) + x = Flatten()(x) + # number of output heads + outputs = [] + for i in range(self.args.heads): + name = "z_head%d" % i + outputs.append(Dense(self.n_labels, + activation='softmax', + name=name)(x)) + self._model = Model(inputs, outputs, name='encoder') + optimizer = Adam(lr=1e-3) + self._model.compile(optimizer=optimizer, loss=self.mi_loss) +``` + +“列表 13.5.2”:`data_generator.py`。 用于生成成对的输入数据以训练 IIC 编码器的`DataGenerator`类方法: + +```py + def __data_generation(self, start_index, end_index): + """Data generation algorithm. The method generates + a batch of pair of images (original image X and + transformed imaged Xbar). The batch of Siamese + images is used to trained MI-based algorithms: + 1) IIC and 2) MINE (Section 7) +``` + +```py + Arguments: + start_index (int): Given an array of images, + this is the start index to retrieve a batch + end_index (int): Given an array of images, + this is the end index to retrieve a batch + """ +``` + +```py + d = self.crop_size // 2 + crop_sizes = [self.crop_size*2 + i for i in range(0,5,2)] + image_size = self.data.shape[1] - self.crop_size + x = self.data[self.indexes[start_index : end_index]] + y1 = self.label[self.indexes[start_index : end_index]] +``` + +```py + target_shape = (x.shape[0], *self.input_shape) + x1 = np.zeros(target_shape) + if self.siamese: + y2 = y1 + x2 = np.zeros(target_shape) +``` + +```py + for i in range(x1.shape[0]): + image = x[i] + x1[i] = image[d: image_size + d, d: image_size + d] + if self.siamese: + rotate = np.random.randint(0, 2) + # 50-50% chance of crop or rotate + if rotate == 1: + shape = target_shape[1:] + x2[i] = self.random_rotate(image, + target_shape=shape) + else: + x2[i] = self.random_crop(image, + target_shape[1:], + crop_sizes) +``` + +```py + # for IIC, we are mostly interested in paired images + # X and Xbar = G(X) + if self.siamese: + # If MINE Algorithm is chosen, use this to generate + # the training data (see Section 9) + if self.mine: + y = np.concatenate([y1, y2], axis=0) + m1 = np.copy(x1) + m2 = np.copy(x2) + np.random.shuffle(m2) + x1 = np.concatenate((x1, m1), axis=0) + x2 = np.concatenate((x2, m2), axis=0) + x = (x1, x2) + return x, y +``` + +```py + x_train = np.concatenate([x1, x2], axis=0) + y_train = np.concatenate([y1, y2], axis=0) + y = [] + for i in range(self.args.heads): + y.append(y_train) + return x_train, y +``` + +```py + return x1, y1 +``` + +为了实现 VGG 骨干,在 Keras 中实现了`VGG`类,如“列表 13.5.3”所示。 `VGG`类的灵活性在于可以用不同的方式(或 VGG 的不同样式)进行配置。 显示了用于 IIC VGG 主干配置`cfg`的选项'F'。 我们使用一个辅助函数来生成`Conv2D-BN-ReLU-MaxPooling2D`层。 + +“列表 13.5.3”:`vgg.py`。 + +Keras 中的`VGG backbone`类方法: + +```py +cfg = { + 'F': [64, 'M', 128, 'M', 256, 'M', 512], +} +``` + +```py +class VGG: + def __init__(self, cfg, input_shape=(24, 24, 1)): + """VGG network model creator to be used as backbone + feature extractor +``` + +```py + Arguments: + cfg (dict): Summarizes the network configuration + input_shape (list): Input image dims + """ + self.cfg = cfg + self.input_shape = input_shape + self._model = None + self.build_model() +``` + +```py + def build_model(self): + """Model builder uses a helper function + make_layers to read the config dict and + create a VGG network model + """ + inputs = Input(shape=self.input_shape, name='x') + x = VGG.make_layers(self.cfg, inputs) + self._model = Model(inputs, x, name='VGG') +``` + +```py + @property + def model(self): + return self._model +``` + +```py + @staticmethod + def make_layers(cfg, + inputs, + batch_norm=True, + in_channels=1): + """Helper function to ease the creation of VGG + network model +``` + +```py + Arguments: + cfg (dict): Summarizes the network layer + configuration + inputs (tensor): Input from previous layer + batch_norm (Bool): Whether to use batch norm + between Conv2D and ReLU + in_channel (int): Number of input channels + """ + x = inputs + for layer in cfg: + if layer == 'M': + x = MaxPooling2D()(x) + elif layer == 'A': + x = AveragePooling2D(pool_size=3)(x) + else: + x = Conv2D(layer, + kernel_size=3, + padding='same', + kernel_initializer='he_normal' + )(x) + if batch_norm: + x = BatchNormalization()(x) + x = Activation('relu')(x) +``` + +```py + return x +``` + +回到`IIC`类,`IIC`的关键算法是使负 MI 最小的损失函数。 此方法显示在“列表 13.5.4”中。 为了评估单个批量中的损失,我们研究了`y_pred`,并将其分为上下两半,分别对应于输入图像`X`及其变换后的图像`X_bar`的编码器输出的。 回想一下,配对数据是通过将一批图像`X`和一批其变换后的图像`X_bar`连接在一起而制成的。 + +`y_pred`的下半部分为`Z`,而上半部分为`Z_bar`遵循“公式 10.3.2”至“公式 10.3.7”,联合分布`P(Z, Z_bar)`和边际分布被计算。 最后,返回负数 MI。 注意,每个头对总损失函数的贡献均等。 因此,损失是根据头部的数量来缩放的。 + +“列表 13.5.4”:`iic-13.5.1.py`。 + +Keras 中的`IIC`类损失函数。 损失函数使负 MI 最小化(即,使 MI 最大化): + +```py + def mi_loss(self, y_true, y_pred): + """Mutual information loss computed from the joint + distribution matrix and the marginals +``` + +```py + Arguments: + y_true (tensor): Not used since this is + unsupervised learning + y_pred (tensor): stack of softmax predictions for + the Siamese latent vectors (Z and Zbar) + """ + size = self.args.batch_size + n_labels = y_pred.shape[-1] + # lower half is Z + Z = y_pred[0: size, :] + Z = K.expand_dims(Z, axis=2) + # upper half is Zbar + Zbar = y_pred[size: y_pred.shape[0], :] + Zbar = K.expand_dims(Zbar, axis=1) + # compute joint distribution (Eq 10.3.2 & .3) + P = K.batch_dot(Z, Zbar) + P = K.sum(P, axis=0) + # enforce symmetric joint distribution (Eq 10.3.4) + P = (P + K.transpose(P)) / 2.0 + # normalization of total probability to 1.0 + P = P / K.sum(P) + # marginal distributions (Eq 10.3.5 & .6) + Pi = K.expand_dims(K.sum(P, axis=1), axis=1) + Pj = K.expand_dims(K.sum(P, axis=0), axis=0) + Pi = K.repeat_elements(Pi, rep=n_labels, axis=1) + Pj = K.repeat_elements(Pj, rep=n_labels, axis=0) + P = K.clip(P, K.epsilon(), np.finfo(float).max) + Pi = K.clip(Pi, K.epsilon(), np.finfo(float).max) + Pj = K.clip(Pj, K.epsilon(), np.finfo(float).max) + # negative MI loss (Eq 10.3.7) + neg_mi = K.sum((P * (K.log(Pi) + K.log(Pj) - K.log(P)))) + # each head contribute 1/n_heads to the total loss + return neg_mi/self.args.heads +``` + +IIC 网络训练方法显示在“列表 13.5.5”中。 由于我们使用的是从`Sequence`类派生的`DataGenerator`对象,因此可以使用 Keras `fit_generator()`方法来训练模型。 + +我们使用学习率调度器,每 400 个周期将学习率降低 80%。 `AccuracyCallback`调用`eval()`方法,因此我们可以在每个周期之后记录网络的表现。 + +可以选择保存表现最佳的模型的权重。 在`eval()`方法中,我们使用线性分类器为每个聚类分配标签。 线性分类器`unsupervised_labels()`是一种匈牙利算法,它以最小的成本将标签分配给群集。 + +最后一步将无监督的聚类转换为无监督的分类。 `unsupervised_labels()`函数在“列表 13.5.6”中显示。 + +“列表 13.5.5”:`iic-13.5.1.py`。 + +IIC 网络训练和评估: + +```py + def train(self): + """Train function uses the data generator, + accuracy computation, and learning rate + scheduler callbacks + """ + accuracy = AccuracyCallback(self) + lr_scheduler = LearningRateScheduler(lr_schedule, + verbose=1) + callbacks = [accuracy, lr_scheduler] + self._model.fit_generator(generator=self.train_gen, + use_multiprocessing=True, + epochs=self.args.epochs, + callbacks=callbacks, + workers=4, + shuffle=True) +``` + +```py + def eval(self): + """Evaluate the accuracy of the current model weights + """ + y_pred = self._model.predict(self.x_test) + print("") + # accuracy per head + for head in range(self.args.heads): + if self.args.heads == 1: + y_head = y_pred + else: + y_head = y_pred[head] + y_head = np.argmax(y_head, axis=1) + accuracy = unsupervised_labels(list(self.y_test), + list(y_head), + self.n_labels, + self.n_labels) + info = "Head %d accuracy: %0.2f%%" + if self.accuracy > 0: + info += ", Old best accuracy: %0.2f%%" + data = (head, accuracy, self.accuracy) + else: + data = (head, accuracy) + print(info % data) + # if accuracy improves during training, + # save the model weights on a file + if accuracy > self.accuracy \ + and self.args.save_weights is not None: + self.accuracy = accuracy + folder = self.args.save_dir + os.makedirs(folder, exist_ok=True) + path = os.path.join(folder, self.args.save_weights) + print("Saving weights... ", path) + self._model.save_weights(path) +``` + +“列表 13.5.6”:`utils.py`。 + +匈牙利语算法将标签分配给具有最低成本的集群: + +```py +from scipy.optimize import linear_sum_assignment +def unsupervised_labels(y, yp, n_classes, n_clusters): + """Linear assignment algorithm + + Arguments: + y (tensor): Ground truth labels + yp (tensor): Predicted clusters + n_classes (int): Number of classes + n_clusters (int): Number of clusters + """ + assert n_classes == n_clusters + + # initialize count matrix + C = np.zeros([n_clusters, n_classes]) + + # populate count matrix + for i in range(len(y)): + C[int(yp[i]), int(y[i])] += 1 + + # optimal permutation using Hungarian Algo + # the higher the count, the lower the cost + # so we use -C for linear assignment + row, col = linear_sum_assignment(-C) + + # compute accuracy + accuracy = C[row, col].sum() / C.sum() + + return accuracy * 100 +``` + +![A close up of a logo Description automatically generated](img/B14853_13_08.png) + +图 13.5.1 在三个群集的简单场景中说明的线性分配算法,可以将其最佳地分配给三个类别 + +如图“图 13.5.1”所示,线性分配问题最好使用将三个群集分配给三个类别的简化方案来解释。 线性分配问题找到了类对类的一对一分配,从而使总成本最小。 在“图 13.5.1*”的左侧,显示了聚类结果和真实情况标签。 + +线性分配问题可以找到每个群集的类或类别,或者如何为每个群集分配标签。 还显示了成本矩阵`C`。 对于每个聚类-真实情况对,成本矩阵像元递减 1。该像元的行-列索引是聚类编号-真实情况标签索引。 使用成本矩阵,线性分配问题的工作是找到导致总成本最小的最优矩阵`X`: + +![](img/B14853_13_080.png) (Equation 13.5.1) + +其中`c[ij]`和`x[ij]`分别是矩阵`C`和`X`的元素 。`i`和`j`是索引。`X`的元素受的以下约束: + +`x[ij] ∈ {0, 1}` + +`Σ[j] x[ij] = 1`对于`i = 1, 2, ..., N` + +`Σ[i] x[ij] = 1`对于`j = 1, 2, ..., N` + +`X`是一个二进制矩阵。 每行仅分配给一列。 因此,线性分配问题是组合问题。 最佳解决方案的详细信息超出了本书的范围,此处不再讨论。 + +最佳权重矩阵`X`显示在“图 13.5.1”中。 群集 0 被分配了标签 1。群集 1 被分配了标签 2。群集 2 被分配了标签 0。这可以从成本矩阵中直观地进行验证,因为这导致最低成本为 -4,同时确保每行仅分配给一列。 + +使用此矩阵,群集类的分配显示在最右边的表中。 使用群集类分配时,第四行上只有一个错误。 结果精度为五分之四,即 80%。 + +我们可以将的线性分配问题扩展到为 10 个 MNIST 集群分配标签的问题。 我们在`scipy`包中使用`linear_sum_assignment()`函数。 该函数基于匈牙利算法。“列表 13.5.6”显示了群集标记过程的实现。 有关`linear_sum_assignment()`函数的更多详细信息,请参见[这里](https://docs.scipy.org/doc/scipy-0.18.1/reference/generated/scipy.optimize.linear_sum_assignment.html)。 + +要训​​练 1 头情况下的 IIC 模型,请执行: + +```py +python3 iic-13.5.1.py --heads=1 --train --save-weights=head1.h5 +``` + +对于其他数量的打印头,应相应地修改选项`--heads`和`--save-weights`。 在下一部分中,我们将检查 IIC 作为 MNIST 分类器的表现。 + +# 6\. 将 MNIST 用于验证 + +在本节中,我们将研究使用 MNIST 测试数据集对 IIC 进行验证之后的结果。 在测试数据集上运行聚类预测后,线性分配问题为每个聚类分配标签,从本质上将聚类转换为分类。 我们计算了分类精度,如“表 13.6.1”所示。 IIC 的准确率高于论文中报告的 99.3%。 但是,应该注意的是,并非每次训练都会导致高精度分类。 + +有时,由于优化似乎停留在局部最小值中,我们不得不多次运行训练。 此外,在多头 IIC 模型中,对于所有头部,我们都无法获得相同水平的表现。“表 13.6.1”报告了最佳表现的头部。 + +| **头部数** |`1`|`2`|`3`|`4`|`5`| +| --- | --- | --- | --- | --- | --- | +| **精度,%** | 99.49 | 99.47 | 99.54 | 99.52 | 99.53 | + +表 13.6.1 不同头数的 IIC 精度 + +权重在 GitHub 上可用。 例如,要在单头 IIC 上运行验证: + +```py +python3 iic-13.5.1.py --heads=1 --eval --restore-weights=head1-best.h5 +``` + +总之,我们可以看到可以执行无监督分类。 结果实际上比我们在“第 2 章”,“深度神经网络”中检查的监督分类更好。 在以下各节中,我们将把注意力转向对连续随机变量的无监督学习。 + +# 7\. 通过最大化连续随机变量的互信息进行无监督学习 + +在前面的章节中,我们了解到可以很好地估计离散随机变量的 MI。 我们还证明了借助线性分配算法,通过最大化 MI 来执行聚类的网络可以得出准确的分类器。 + +如果 IIC 是离散随机变量 MI 的良好估计者,那么连续随机变量又如何呢? 在本节的中,我们讨论 Belghazi 等人的**互信息网络估计器**(**MINE**)。 [3]作为连续随机变量 MI 的估计量。 + +MINE 在“公式 13.1.1”中提出了 KL 散度的另一种表示形式,以使用神经网络实现 MI 估计器。 在 MINE 中,使用 KL 散度的 **Donsker-Varadhan**(**DV**)表示: + +![](img/B14853_13_085.png) (Equation 13.7.1) + +在函数`T`的整个空间中占据最高位的位置。`T`是从输入空间(例如图像)映射到实数的任意函数。 回想一下,最高被粗略地解释为最大值。 对于`T`,我们可以从`θ ∈ Θ`参数化的函数`T[θ] = X x Y -> R`系列中进行选择。 因此,我们可以用估计 KL 散度的深度神经网络表示`T[θ]`,因此代表`T`。 + +给定作为 MI 的精确(但难处理)表示`I(X; Y)`及其参数化的估计值`I[θ](X; Y)`作为易于处理的下限,我们可以安全地说: + +![](img/B14853_13_091.png) (Equation 13.7.2) + +其中参数化的 MI 估计为: + +![](img/B14853_13_092.png) (Equation 13.7.3) + +`I[θ](X; Y)`也称为神经信息测度。 在第一个期望中,样本`(x, y) ~ P(X, Y)`从联合分布`P`(`X`,`Y`)中获取。 在第二个期望中,样本`x ~ P(X), y ~ P(Y)`来自边际分布`P(X)`和`P(Y)`。 + +“算法 13.7.1”:`MINE`。 + +初始化所有网络参数`θ`。 + +`θ`尚未收敛时,请执行: + +1. 从联合分布`{(x^(1), y^(1)), (x^(2), y^(2)), ..., (x^(b), y^(b))} ~ P(X, Y)`中抽取一个小批量的`b` +2. 从边际分布`{x^(1), x^(2), ..., x^(b)} ~ P(X)`和`{y^(1), y^(2), ..., y^(b)} ~ P(Y)`中抽取一个小批量的`b`。 +3. 评估下界: + + ![](img/B14853_13_101.png) +4. 评估偏差校正后的梯度: + + ![](img/B14853_13_103.png) + +1. 更新网络参数: + + ![](img/B14853_13_102.png) + + 其中`ε`是学习率。 + +“算法 13.7.1”总结了 MINE 算法。 来自边际分布的样本是来自联合分布的样本,另一个变量已删除。 例如,样本`x`只是简单的样本`(x, y)`,变量`y`被丢弃。 在降为变量`y`的值之后,将`x`的样本进行混洗。 对`y`执行相同的采样方法。 为了清楚起见,我们使用符号`x_bar`和`y_bar`从边际分布中识别样本。 + +在下一部分中,在双变量高斯分布的情况下,我们将使用 MINE 算法估计 MI。 我们将展示使用解析方法估计的 MI 和使用 MINE 估计 MI 的方法。 + +# 8\. 估计二元高斯的互信息 + +在本节中,我们将验证 MINE 的二元高斯分布。“图 13.8.1”显示具有均值和协方差的双变量高斯分布: + +![](img/B14853_13_107.png) (Equation 13.8.1) + +![](img/B14853_13_108.png) (Equation 13.8.2) + +![A picture containing text Description automatically generated](img/B14853_13_09.png) + +图 13.8.1 具有均值和协方差的二维高斯分布,如公式 13.8.1 和公式 13.8.2 所示 + +我们的目标是通过近似“公式 13.1.3”来估计 MI。 可以通过获得大量样本(例如 1 百万个)并创建具有大量箱子(例如 100 个箱子)的直方图来进行近似。“列表 13.8.1”显示了使用装仓对二元高斯分布的 MI 进行的手动计算。 + +“列表 13.8.1”:`mine-13.8.1.py`: + +```py +def sample(joint=True, + mean=[0, 0], + cov=[[1, 0.5], [0.5, 1]], + n_data=1000000): + """Helper function to obtain samples + fr a bivariate Gaussian distribution +``` + +```py + Arguments: + joint (Bool): If joint distribution is desired + mean (list): The mean values of the 2D Gaussian + cov (list): The covariance matrix of the 2D Gaussian + n_data (int): Number of samples fr 2D Gaussian + """ + xy = np.random.multivariate_normal(mean=mean, + cov=cov, + size=n_data) +``` + +```py + # samples fr joint distribution + if joint: + return xy + y = np.random.multivariate_normal(mean=mean, + cov=cov, + size=n_data) +``` + +```py + # samples fr marginal distribution + x = xy[:,0].reshape(-1,1) + y = y[:,1].reshape(-1,1) + xy = np.concatenate([x, y], axis=1) + return xy +``` + +```py +def compute_mi(cov_xy=0.5, n_bins=100): + """Analytic computation of MI using binned + 2D Gaussian +``` + +```py + Arguments: + cov_xy (list): Off-diagonal elements of covariance + matrix + n_bins (int): Number of bins to "quantize" the + continuous 2D Gaussian + """ + cov=[[1, cov_xy], [cov_xy, 1]] + data = sample(cov=cov) + # get joint distribution samples + # perform histogram binning + joint, edge = np.histogramdd(data, bins=n_bins) + joint /= joint.sum() + eps = np.finfo(float).eps + joint[joint