From 71a4306ee3c109dbdb24d6b3b455a2fd55dfdf41 Mon Sep 17 00:00:00 2001 From: estomm Date: Thu, 22 Apr 2021 21:09:22 +0800 Subject: [PATCH] =?UTF-8?q?=E6=95=B0=E6=8D=AE=E5=8A=A0=E8=BD=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .vscode/c_cpp_properties.json | 1 - .vscode/settings.json | 3 +- Tensorflow/TensorFlow2.0/0 概述.md | 27 + Tensorflow/TensorFlow2.0/5 数据流水线.ipynb | 236 +++++ ...据.ipynb => 5.1 加载numpy&pandas数据.ipynb} | 0 ...rd数据.ipynb => 5.2 加载generator数据.ipynb} | 0 ....ipynb => 5.3 加载make_csv_dataset数据.ipynb} | 0 .../TensorFlow2.0/5.4 加载tf.Record数据.ipynb | 0 .../5.5 加载tf.TextLineReader数据.ipynb | 0 Tensorflow/TensorFlow2.0/7 数据流水线.ipynb | 85 -- .../{1.ipynb => 1 基础概念.ipynb} | 0 .../{6.ipynb => 6 图像分类.ipynb} | 0 .../{7.ipynb => 7 Torch重构神经网络.ipynb} | 0 .../{8.ipynb => 8 图像分类.ipynb} | 0 pytorch/殷康龙的笔记/8nn_tutorial.ipynb | 914 ------------------ pytorch/殷康龙的笔记/9 TensorBoard.ipynb | 0 16 files changed, 265 insertions(+), 1001 deletions(-) create mode 100644 Tensorflow/TensorFlow2.0/0 概述.md create mode 100644 Tensorflow/TensorFlow2.0/5 数据流水线.ipynb rename Tensorflow/TensorFlow2.0/{5 加载numpy&pandas数据.ipynb => 5.1 加载numpy&pandas数据.ipynb} (100%) rename Tensorflow/TensorFlow2.0/{8 加载tf.Record数据.ipynb => 5.2 加载generator数据.ipynb} (100%) rename Tensorflow/TensorFlow2.0/{6 加载tf.data.dataset数据.ipynb => 5.3 加载make_csv_dataset数据.ipynb} (100%) rename pytorch/殷康龙的笔记/9.ipynb => Tensorflow/TensorFlow2.0/5.4 加载tf.Record数据.ipynb (100%) create mode 100644 Tensorflow/TensorFlow2.0/5.5 加载tf.TextLineReader数据.ipynb delete mode 100644 Tensorflow/TensorFlow2.0/7 数据流水线.ipynb rename pytorch/殷康龙的笔记/{1.ipynb => 1 基础概念.ipynb} (100%) rename pytorch/殷康龙的笔记/{6.ipynb => 6 图像分类.ipynb} (100%) rename pytorch/殷康龙的笔记/{7.ipynb => 7 Torch重构神经网络.ipynb} (100%) rename pytorch/殷康龙的笔记/{8.ipynb => 8 图像分类.ipynb} (100%) delete mode 100644 pytorch/殷康龙的笔记/8nn_tutorial.ipynb create mode 100644 pytorch/殷康龙的笔记/9 TensorBoard.ipynb diff --git a/.vscode/c_cpp_properties.json b/.vscode/c_cpp_properties.json index 1be3b2d4..6b778b10 100644 --- a/.vscode/c_cpp_properties.json +++ b/.vscode/c_cpp_properties.json @@ -2,7 +2,6 @@ "configurations": [ { "name": "Win32", - // 用来设置头文件 "includePath": [ "${workspaceFolder}/**" ], diff --git a/.vscode/settings.json b/.vscode/settings.json index 1710b354..86c44650 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -78,5 +78,6 @@ "xstddef": "cpp", "xtr1common": "cpp", "xutility": "cpp" - } + }, + "python.pythonPath": "D:\\anaconda\\envs\\ml\\python.exe" } \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/0 概述.md b/Tensorflow/TensorFlow2.0/0 概述.md new file mode 100644 index 00000000..cff90c56 --- /dev/null +++ b/Tensorflow/TensorFlow2.0/0 概述.md @@ -0,0 +1,27 @@ +# TensorFlow + +> 只需要知道加载数据的方法。和使用keras进行训练的方法。 + +## 概述 + +* tf.data + * experimental + * Dataset + * Iterator + * FixedLengthRecordDataset + * TFRecordDataset + * TextLineDataset +* tf.kerase + * layers + * activations + * datasets + * processing + * experimental + * models + * loss + * optimizers + * mertrics + * utils + * class Model: Model groups layers into an object with training and inference features. + * class Sequential: Sequential groups a linear stack of layers into a tf.keras.Model. +## 其他 \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/5 数据流水线.ipynb b/Tensorflow/TensorFlow2.0/5 数据流水线.ipynb new file mode 100644 index 00000000..f8730c32 --- /dev/null +++ b/Tensorflow/TensorFlow2.0/5 数据流水线.ipynb @@ -0,0 +1,236 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8-final" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python388jvsc74a57bd05ef0042cb263260037aa2928643ae94e240dd3afaec7872ebebe4f07619ddd0c", + "display_name": "Python 3.8.8 64-bit ('ml': conda)" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "# 1 Pipeline Structure的结构\n", + "\n", + "我们可以将典型的 TensorFlow 训练输入流水线视为 ETL 流程:\n", + "\n", + "1. Extract:从永久性存储(可以是 HDD 或 SSD 等本地存储或 GCS 或 HDFS 等远程存储)读取数据。\n", + "2. Transform:使用CPU核心解析数据并对其执行预处理操作,例如图像解压缩、数据增强转换(例如随机裁剪、翻转和颜色失真)、重排和批处理。\n", + "3. Load:将转换后的数据加载到执行机器学习模型的加速器设备(例如,GPU 或 TPU)上。" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "# 2 tf.data.dataset API说明\n", + "\n", + "tf.data API 围绕可组合转换而设计,旨在为用户提供灵活性。虽然这些转换中有很多都是可以交替的,但某些转换的顺序会对性能产生影响。\n", + "\n", + "## 1 map映射和batch批次\n", + "调用传递给 map 转换的用户定义函数具有与调度和执行用户定义函数相关的开销。通常,与函数执行的计算量相比,这种开销很小。但是,如果 map 几乎不起作用,那么这种开销可能会占总成本的很大一部分。在这种情况下,建议向量化用户定义的函数(即,让该函数一次对一批输入进行操作),并在 map 转换之前先应用 batch 转换。\n", + "\n", + "## 2 map映射和cache缓存\n", + "\n", + "tf.data.Dataset.cache 转换可以在内存或本地存储中缓存数据集。如果传递给 map 转换的用户定义函数代价很高,则只要内存或本地存储仍可以容纳生成的数据集,就可以在映射转换后应用缓存转换。如果用户定义的函数会增加存储数据集所需的空间,并超出缓存容量,请考虑在训练作业之前预处理数据以减少资源消耗量。\n", + "\n", + "## 3 map映射和interleave交错/prefetch预取/shuffle重排\n", + "许多转换(包括map interleave、prefetch 和 shuffle)都维持一个内部元素缓冲区。如果传递给 map 转换的用户定义函数改变了元素的大小,那么映射转换的顺序和缓冲元素的转换会影响内存使用量。通常,我们建议选择可以减少内存占用的顺序,除非为了提高性能而需要采用不同的顺序(例如,为了混合映射和批次转换)。\n", + "\n", + "## 4 repeat重复和shuffle重排\n", + "tf.data.Dataset.repeat 转换会将输入数据重复有限(或无限)次;每次数据重复通常称为一个周期。tf.data.Dataset.shuffle 转换会随机化数据集样本的顺序。\n", + "\n", + "如果在 shuffle 转换之前应用 repeat 转换,则系统会对周期边界进行模糊处理。也就是说,某些元素可以在其他元素出现之前重复出现。另一方面,如果在重复转换之前应用 shuffle 转换,那么在每个周期开始时性能可能会降低,因为需要初始化 shuffle 转换的内部状态。换言之,前者(repeat 在 shuffle 之前)可提供更好的性能,而后者(repeat 在 shuffle 之前)可提供更强的排序保证。\n", + "\n", + "如果可能,建议您使用 tf.contrib.data.shuffle_and_repeat 混合转换,这样可以达到两全其美的效果(良好的性能和强大的排序保证)。否则,我们建议在repeat重复之前进行shuffle重排。" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "# 3 tf.data.dataset API 实例\n", + "\n", + "### .map\n", + "使用map可以对数据进行预测,和python自带原理一样\n", + "```\n", + "def prepare_mnist_fea(x, y):\n", + " x = tf.cast(x, tf.float32) / 255.0\n", + " y = tf.cast(y, tf.float32)\n", + " return x, y\n", + "\n", + "ds.map(prepare_mnist_fea)\n", + "```\n", + "\n", + "### .shuffle#\n", + "打乱顺序\n", + "```\n", + "ds.shuffle(10000)\n", + "```\n", + "\n", + "### .batch#\n", + "使用某个batch进行迭代\n", + "\n", + "```\n", + "ds.batch(32)\n", + "```\n", + "\n", + "### .repeat#\n", + "重复执行整个数据多少次,也就是epoch的意思\n", + "```\n", + "ds.repeat(10)\n", + "```" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "# 4 tf.data加载csv\n", + "\n", + "## 加载数据的方式(csv)\n", + "### 从内存中加载数据\n", + "* 例如使用numpy.load()或者pandas.read_csv()将数据加载到内存中。然后使用tf.data.dataset方法将数据加载到tensorflow中。_\n", + "```\n", + "tf.data.Dataset.from_tensors() \n", + "tf.data.Dataset.from_tensor_slices()\n", + "```\n", + "### 从生成器中读取数据\n", + "```\n", + "ds_counter = tf.data.Dataset.from_generator(python_generator, args=[25], output_types=tf.int32, output_shapes = (), )\n", + "```\n", + "### 直接读取csv数据\n", + "```\n", + "tf.data.experimental.make_csv_dataset()\n", + "```\n", + "### 从文件中加载数据\n", + "```\n", + "tf.data.TFRecordDataset() \n", + "tf.data.TextLineDataset()\n", + "tf.data.FixedLengthRecordDataset\n", + "```\n", + "### 从generator中加载数据\n", + "当有多个文件的时候,可以使用pandas生成读取文件的生成器。然后通过from_generator逐步加载数据。\n", + "```\n", + "ds_counter = tf.data.Dataset.from_generator(count, args=[25], output_types=tf.int32, output_shapes = (), )\n", + "```\n", + "## 数据流水线的多层含义\n", + "\n", + "1. 加载数据处理过程形成的流水线。(处理过程的流水线)\n", + "2. 多个文件,按顺序加载形成的流水线。(多个文件的流水线)" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "# 5 tf.data.dataset常用方法说明" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "\n", + "import pathlib\n", + "import os\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "np.set_printoptions(precision=4)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "dataset = tf.data.Dataset.from_tensor_slices(([8, 3, 0, 1],[1,2,1,2]))" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "8\n3\n0\n1\n" + ] + } + ], + "source": [ + "\n", + "# 当做可迭代对象\n", + "for elem,lebel in dataset:\n", + " print(elem.numpy())" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[12 6]\n" + ] + } + ], + "source": [ + "# 使用reduce对数据内容进行合并\n", + "print(dataset.reduce(0, lambda state, value: state + value).numpy())" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(TensorSpec(shape=(), dtype=tf.int32, name=None), TensorSpec(shape=(), dtype=tf.int32, name=None))\n" + ] + } + ], + "source": [ + "# dataset对象可以包含各种数据结构。包括TensorFlow提供的tf.Tensor,tf.sparse.SparseTensor, tf.RaggedTensor,tf.TensorArray,或tf.data.Dataset。和Python原生的数据结构tuple,dict,NamedTuple\n", + "\n", + "print(dataset.element_spec)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ] +} \ No newline at end of file diff --git a/Tensorflow/TensorFlow2.0/5 加载numpy&pandas数据.ipynb b/Tensorflow/TensorFlow2.0/5.1 加载numpy&pandas数据.ipynb similarity index 100% rename from Tensorflow/TensorFlow2.0/5 加载numpy&pandas数据.ipynb rename to Tensorflow/TensorFlow2.0/5.1 加载numpy&pandas数据.ipynb diff --git a/Tensorflow/TensorFlow2.0/8 加载tf.Record数据.ipynb b/Tensorflow/TensorFlow2.0/5.2 加载generator数据.ipynb similarity index 100% rename from Tensorflow/TensorFlow2.0/8 加载tf.Record数据.ipynb rename to Tensorflow/TensorFlow2.0/5.2 加载generator数据.ipynb diff --git a/Tensorflow/TensorFlow2.0/6 加载tf.data.dataset数据.ipynb b/Tensorflow/TensorFlow2.0/5.3 加载make_csv_dataset数据.ipynb similarity index 100% rename from Tensorflow/TensorFlow2.0/6 加载tf.data.dataset数据.ipynb rename to Tensorflow/TensorFlow2.0/5.3 加载make_csv_dataset数据.ipynb diff --git a/pytorch/殷康龙的笔记/9.ipynb b/Tensorflow/TensorFlow2.0/5.4 加载tf.Record数据.ipynb similarity index 100% rename from pytorch/殷康龙的笔记/9.ipynb rename to Tensorflow/TensorFlow2.0/5.4 加载tf.Record数据.ipynb diff --git a/Tensorflow/TensorFlow2.0/5.5 加载tf.TextLineReader数据.ipynb b/Tensorflow/TensorFlow2.0/5.5 加载tf.TextLineReader数据.ipynb new file mode 100644 index 00000000..e69de29b diff --git a/Tensorflow/TensorFlow2.0/7 数据流水线.ipynb b/Tensorflow/TensorFlow2.0/7 数据流水线.ipynb deleted file mode 100644 index 8d17b11b..00000000 --- a/Tensorflow/TensorFlow2.0/7 数据流水线.ipynb +++ /dev/null @@ -1,85 +0,0 @@ -{ - "metadata": { - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": 3 - }, - "orig_nbformat": 2 - }, - "nbformat": 4, - "nbformat_minor": 2, - "cells": [ - { - "source": [ - "# tf.data.dataset常用的API\n", - "\n", - "tf.data API 围绕可组合转换而设计,旨在为用户提供灵活性。虽然这些转换中有很多都是可以交替的,但某些转换的顺序会对性能产生影响。\n", - "\n", - "## 1 map映射和batch批次\n", - "调用传递给 map 转换的用户定义函数具有与调度和执行用户定义函数相关的开销。通常,与函数执行的计算量相比,这种开销很小。但是,如果 map 几乎不起作用,那么这种开销可能会占总成本的很大一部分。在这种情况下,建议向量化用户定义的函数(即,让该函数一次对一批输入进行操作),并在 map 转换之前先应用 batch 转换。\n", - "\n", - "## 2 map映射和cache缓存\n", - "\n", - "tf.data.Dataset.cache 转换可以在内存或本地存储中缓存数据集。如果传递给 map 转换的用户定义函数代价很高,则只要内存或本地存储仍可以容纳生成的数据集,就可以在映射转换后应用缓存转换。如果用户定义的函数会增加存储数据集所需的空间,并超出缓存容量,请考虑在训练作业之前预处理数据以减少资源消耗量。\n", - "\n", - "## 3 map映射和interleave交错/prefetch预取/shuffle重排\n", - "许多转换(包括map interleave、prefetch 和 shuffle)都维持一个内部元素缓冲区。如果传递给 map 转换的用户定义函数改变了元素的大小,那么映射转换的顺序和缓冲元素的转换会影响内存使用量。通常,我们建议选择可以减少内存占用的顺序,除非为了提高性能而需要采用不同的顺序(例如,为了混合映射和批次转换)。\n", - "\n", - "## 4 repeat重复和shuffle重排\n", - "tf.data.Dataset.repeat 转换会将输入数据重复有限(或无限)次;每次数据重复通常称为一个周期。tf.data.Dataset.shuffle 转换会随机化数据集样本的顺序。\n", - "\n", - "如果在 shuffle 转换之前应用 repeat 转换,则系统会对周期边界进行模糊处理。也就是说,某些元素可以在其他元素出现之前重复出现。另一方面,如果在重复转换之前应用 shuffle 转换,那么在每个周期开始时性能可能会降低,因为需要初始化 shuffle 转换的内部状态。换言之,前者(repeat 在 shuffle 之前)可提供更好的性能,而后者(repeat 在 shuffle 之前)可提供更强的排序保证。\n", - "\n", - "如果可能,建议您使用 tf.contrib.data.shuffle_and_repeat 混合转换,这样可以达到两全其美的效果(良好的性能和强大的排序保证)。否则,我们建议在repeat重复之前进行shuffle重排。" - ], - "cell_type": "markdown", - "metadata": {} - }, - { - "source": [ - "# Pipeline Structure的结构\n", - "\n", - "我们可以将典型的 TensorFlow 训练输入流水线视为 ETL 流程:\n", - "\n", - "1. Extract:从永久性存储(可以是 HDD 或 SSD 等本地存储或 GCS 或 HDFS 等远程存储)读取数据。\n", - "2. Transform:使用CPU核心解析数据并对其执行预处理操作,例如图像解压缩、数据增强转换(例如随机裁剪、翻转和颜色失真)、重排和批处理。\n", - "3. Load:将转换后的数据加载到执行机器学习模型的加速器设备(例如,GPU 或 TPU)上。" - ], - "cell_type": "markdown", - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# tf.data说明\n", - "\n", - "## 加载数据的方式\n", - "### 从内存中加载数据\n", - "例如使用numpy.load()或者pandas.read_csv()将数据加载到内存中。然后使用tf.data.dataset方法将数据加载到tensorflow中。_\n", - "tf.data.Dataset.from_tensors() or tf.data.Dataset.from_tensor_slices()\n", - "### 从文件中加载数据\n", - "tf.data.TFRecordDataset()\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# 数据流水线读取多个文件实例(并非广义上的数据流水线)\n" - ] - } - ] -} \ No newline at end of file diff --git a/pytorch/殷康龙的笔记/1.ipynb b/pytorch/殷康龙的笔记/1 基础概念.ipynb similarity index 100% rename from pytorch/殷康龙的笔记/1.ipynb rename to pytorch/殷康龙的笔记/1 基础概念.ipynb diff --git a/pytorch/殷康龙的笔记/6.ipynb b/pytorch/殷康龙的笔记/6 图像分类.ipynb similarity index 100% rename from pytorch/殷康龙的笔记/6.ipynb rename to pytorch/殷康龙的笔记/6 图像分类.ipynb diff --git a/pytorch/殷康龙的笔记/7.ipynb b/pytorch/殷康龙的笔记/7 Torch重构神经网络.ipynb similarity index 100% rename from pytorch/殷康龙的笔记/7.ipynb rename to pytorch/殷康龙的笔记/7 Torch重构神经网络.ipynb diff --git a/pytorch/殷康龙的笔记/8.ipynb b/pytorch/殷康龙的笔记/8 图像分类.ipynb similarity index 100% rename from pytorch/殷康龙的笔记/8.ipynb rename to pytorch/殷康龙的笔记/8 图像分类.ipynb diff --git a/pytorch/殷康龙的笔记/8nn_tutorial.ipynb b/pytorch/殷康龙的笔记/8nn_tutorial.ipynb deleted file mode 100644 index df9a0ebb..00000000 --- a/pytorch/殷康龙的笔记/8nn_tutorial.ipynb +++ /dev/null @@ -1,914 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "%matplotlib inline" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\nWhat is `torch.nn` *really*?\n============================\nby Jeremy Howard, `fast.ai `_. Thanks to Rachel Thomas and Francisco Ingham.\n\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We recommend running this tutorial as a notebook, not a script. To download the notebook (.ipynb) file,\nclick the link at the top of the page.\n\nPyTorch provides the elegantly designed modules and classes `torch.nn `_ ,\n`torch.optim `_ ,\n`Dataset `_ ,\nand `DataLoader `_\nto help you create and train neural networks.\nIn order to fully utilize their power and customize\nthem for your problem, you need to really understand exactly what they're\ndoing. To develop this understanding, we will first train basic neural net\non the MNIST data set without using any features from these models; we will\ninitially only use the most basic PyTorch tensor functionality. Then, we will\nincrementally add one feature from ``torch.nn``, ``torch.optim``, ``Dataset``, or\n``DataLoader`` at a time, showing exactly what each piece does, and how it\nworks to make the code either more concise, or more flexible.\n\n**This tutorial assumes you already have PyTorch installed, and are familiar\nwith the basics of tensor operations.** (If you're familiar with Numpy array\noperations, you'll find the PyTorch tensor operations used here nearly identical).\n\nMNIST data setup\n----------------\n\nWe will use the classic `MNIST `_ dataset,\nwhich consists of black-and-white images of hand-drawn digits (between 0 and 9).\n\nWe will use `pathlib `_\nfor dealing with paths (part of the Python 3 standard library), and will\ndownload the dataset using\n`requests `_. We will only\nimport modules when we use them, so you can see exactly what's being\nused at each point.\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "from pathlib import Path\nimport requests\n\nDATA_PATH = Path(\"data\")\nPATH = DATA_PATH / \"mnist\"\n\nPATH.mkdir(parents=True, exist_ok=True)\n\nURL = \"https://github.com/pytorch/tutorials/raw/master/_static/\"\nFILENAME = \"mnist.pkl.gz\"\n\nif not (PATH / FILENAME).exists():\n content = requests.get(URL + FILENAME).content\n (PATH / FILENAME).open(\"wb\").write(content)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This dataset is in numpy array format, and has been stored using pickle,\na python-specific format for serializing data.\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import pickle\nimport gzip\n\nwith gzip.open((PATH / FILENAME).as_posix(), \"rb\") as f:\n ((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f, encoding=\"latin-1\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Each image is 28 x 28, and is being stored as a flattened row of length\n784 (=28x28). Let's take a look at one; we need to reshape it to 2d\nfirst.\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "from matplotlib import pyplot\nimport numpy as np\n\npyplot.imshow(x_train[0].reshape((28, 28)), cmap=\"gray\")\nprint(x_train.shape)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "PyTorch uses ``torch.tensor``, rather than numpy arrays, so we need to\nconvert our data.\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import torch\n\nx_train, y_train, x_valid, y_valid = map(\n torch.tensor, (x_train, y_train, x_valid, y_valid)\n)\nn, c = x_train.shape\nx_train, x_train.shape, y_train.min(), y_train.max()\nprint(x_train, y_train)\nprint(x_train.shape)\nprint(y_train.min(), y_train.max())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Neural net from scratch (no torch.nn)\n---------------------------------------------\n\nLet's first create a model using nothing but PyTorch tensor operations. We're assuming\nyou're already familiar with the basics of neural networks. (If you're not, you can\nlearn them at `course.fast.ai `_).\n\nPyTorch provides methods to create random or zero-filled tensors, which we will\nuse to create our weights and bias for a simple linear model. These are just regular\ntensors, with one very special addition: we tell PyTorch that they require a\ngradient. This causes PyTorch to record all of the operations done on the tensor,\nso that it can calculate the gradient during back-propagation *automatically*!\n\nFor the weights, we set ``requires_grad`` **after** the initialization, since we\ndon't want that step included in the gradient. (Note that a trailing ``_`` in\nPyTorch signifies that the operation is performed in-place.)\n\n

Note

We are initializing the weights here with\n `Xavier initialisation `_\n (by multiplying with 1/sqrt(n)).

\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import math\n\nweights = torch.randn(784, 10) / math.sqrt(784)\nweights.requires_grad_()\nbias = torch.zeros(10, requires_grad=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Thanks to PyTorch's ability to calculate gradients automatically, we can\nuse any standard Python function (or callable object) as a model! So\nlet's just write a plain matrix multiplication and broadcasted addition\nto create a simple linear model. We also need an activation function, so\nwe'll write `log_softmax` and use it. Remember: although PyTorch\nprovides lots of pre-written loss functions, activation functions, and\nso forth, you can easily write your own using plain python. PyTorch will\neven create fast GPU or vectorized CPU code for your function\nautomatically.\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "def log_softmax(x):\n return x - x.exp().sum(-1).log().unsqueeze(-1)\n\ndef model(xb):\n return log_softmax(xb @ weights + bias)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In the above, the ``@`` stands for the dot product operation. We will call\nour function on one batch of data (in this case, 64 images). This is\none *forward pass*. Note that our predictions won't be any better than\nrandom at this stage, since we start with random weights.\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "bs = 64 # batch size\n\nxb = x_train[0:bs] # a mini-batch from x\npreds = model(xb) # predictions\npreds[0], preds.shape\nprint(preds[0], preds.shape)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As you see, the ``preds`` tensor contains not only the tensor values, but also a\ngradient function. We'll use this later to do backprop.\n\nLet's implement negative log-likelihood to use as the loss function\n(again, we can just use standard Python):\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "def nll(input, target):\n return -input[range(target.shape[0]), target].mean()\n\nloss_func = nll" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's check our loss with our random model, so we can see if we improve\nafter a backprop pass later.\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "yb = y_train[0:bs]\nprint(loss_func(preds, yb))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's also implement a function to calculate the accuracy of our model.\nFor each prediction, if the index with the largest value matches the\ntarget value, then the prediction was correct.\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "def accuracy(out, yb):\n preds = torch.argmax(out, dim=1)\n return (preds == yb).float().mean()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's check the accuracy of our random model, so we can see if our\naccuracy improves as our loss improves.\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "print(accuracy(preds, yb))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We can now run a training loop. For each iteration, we will:\n\n- select a mini-batch of data (of size ``bs``)\n- use the model to make predictions\n- calculate the loss\n- ``loss.backward()`` updates the gradients of the model, in this case, ``weights``\n and ``bias``.\n\nWe now use these gradients to update the weights and bias. We do this\nwithin the ``torch.no_grad()`` context manager, because we do not want these\nactions to be recorded for our next calculation of the gradient. You can read\nmore about how PyTorch's Autograd records operations\n`here `_.\n\nWe then set the\ngradients to zero, so that we are ready for the next loop.\nOtherwise, our gradients would record a running tally of all the operations\nthat had happened (i.e. ``loss.backward()`` *adds* the gradients to whatever is\nalready stored, rather than replacing them).\n\n.. tip:: You can use the standard python debugger to step through PyTorch\n code, allowing you to check the various variable values at each step.\n Uncomment ``set_trace()`` below to try it out.\n\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "from IPython.core.debugger import set_trace\n\nlr = 0.5 # learning rate\nepochs = 2 # how many epochs to train for\n\nfor epoch in range(epochs):\n for i in range((n - 1) // bs + 1):\n # set_trace()\n start_i = i * bs\n end_i = start_i + bs\n xb = x_train[start_i:end_i]\n yb = y_train[start_i:end_i]\n pred = model(xb)\n loss = loss_func(pred, yb)\n\n loss.backward()\n with torch.no_grad():\n weights -= weights.grad * lr\n bias -= bias.grad * lr\n weights.grad.zero_()\n bias.grad.zero_()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "That's it: we've created and trained a minimal neural network (in this case, a\nlogistic regression, since we have no hidden layers) entirely from scratch!\n\nLet's check the loss and accuracy and compare those to what we got\nearlier. We expect that the loss will have decreased and accuracy to\nhave increased, and they have.\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "print(loss_func(model(xb), yb), accuracy(model(xb), yb))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Using torch.nn.functional\n------------------------------\n\nWe will now refactor our code, so that it does the same thing as before, only\nwe'll start taking advantage of PyTorch's ``nn`` classes to make it more concise\nand flexible. At each step from here, we should be making our code one or more\nof: shorter, more understandable, and/or more flexible.\n\nThe first and easiest step is to make our code shorter by replacing our\nhand-written activation and loss functions with those from ``torch.nn.functional``\n(which is generally imported into the namespace ``F`` by convention). This module\ncontains all the functions in the ``torch.nn`` library (whereas other parts of the\nlibrary contain classes). As well as a wide range of loss and activation\nfunctions, you'll also find here some convenient functions for creating neural\nnets, such as pooling functions. (There are also functions for doing convolutions,\nlinear layers, etc, but as we'll see, these are usually better handled using\nother parts of the library.)\n\nIf you're using negative log likelihood loss and log softmax activation,\nthen Pytorch provides a single function ``F.cross_entropy`` that combines\nthe two. So we can even remove the activation function from our model.\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import torch.nn.functional as F\n\nloss_func = F.cross_entropy\n\ndef model(xb):\n return xb @ weights + bias" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Note that we no longer call ``log_softmax`` in the ``model`` function. Let's\nconfirm that our loss and accuracy are the same as before:\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "print(loss_func(model(xb), yb), accuracy(model(xb), yb))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Refactor using nn.Module\n-----------------------------\nNext up, we'll use ``nn.Module`` and ``nn.Parameter``, for a clearer and more\nconcise training loop. We subclass ``nn.Module`` (which itself is a class and\nable to keep track of state). In this case, we want to create a class that\nholds our weights, bias, and method for the forward step. ``nn.Module`` has a\nnumber of attributes and methods (such as ``.parameters()`` and ``.zero_grad()``)\nwhich we will be using.\n\n

Note

``nn.Module`` (uppercase M) is a PyTorch specific concept, and is a\n class we'll be using a lot. ``nn.Module`` is not to be confused with the Python\n concept of a (lowercase ``m``) `module `_,\n which is a file of Python code that can be imported.

\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "from torch import nn\n\nclass Mnist_Logistic(nn.Module):\n def __init__(self):\n super().__init__()\n self.weights = nn.Parameter(torch.randn(784, 10) / math.sqrt(784))\n self.bias = nn.Parameter(torch.zeros(10))\n\n def forward(self, xb):\n return xb @ self.weights + self.bias" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Since we're now using an object instead of just using a function, we\nfirst have to instantiate our model:\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "model = Mnist_Logistic()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now we can calculate the loss in the same way as before. Note that\n``nn.Module`` objects are used as if they are functions (i.e they are\n*callable*), but behind the scenes Pytorch will call our ``forward``\nmethod automatically.\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "print(loss_func(model(xb), yb))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Previously for our training loop we had to update the values for each parameter\nby name, and manually zero out the grads for each parameter separately, like this:\n::\n with torch.no_grad():\n weights -= weights.grad * lr\n bias -= bias.grad * lr\n weights.grad.zero_()\n bias.grad.zero_()\n\n\nNow we can take advantage of model.parameters() and model.zero_grad() (which\nare both defined by PyTorch for ``nn.Module``) to make those steps more concise\nand less prone to the error of forgetting some of our parameters, particularly\nif we had a more complicated model:\n::\n with torch.no_grad():\n for p in model.parameters(): p -= p.grad * lr\n model.zero_grad()\n\n\nWe'll wrap our little training loop in a ``fit`` function so we can run it\nagain later.\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "def fit():\n for epoch in range(epochs):\n for i in range((n - 1) // bs + 1):\n start_i = i * bs\n end_i = start_i + bs\n xb = x_train[start_i:end_i]\n yb = y_train[start_i:end_i]\n pred = model(xb)\n loss = loss_func(pred, yb)\n\n loss.backward()\n with torch.no_grad():\n for p in model.parameters():\n p -= p.grad * lr\n model.zero_grad()\n\nfit()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's double-check that our loss has gone down:\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "print(loss_func(model(xb), yb))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Refactor using nn.Linear\n-------------------------\n\nWe continue to refactor our code. Instead of manually defining and\ninitializing ``self.weights`` and ``self.bias``, and calculating ``xb @\nself.weights + self.bias``, we will instead use the Pytorch class\n`nn.Linear `_ for a\nlinear layer, which does all that for us. Pytorch has many types of\npredefined layers that can greatly simplify our code, and often makes it\nfaster too.\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "class Mnist_Logistic(nn.Module):\n def __init__(self):\n super().__init__()\n self.lin = nn.Linear(784, 10)\n\n def forward(self, xb):\n return self.lin(xb)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We instantiate our model and calculate the loss in the same way as before:\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "model = Mnist_Logistic()\nprint(loss_func(model(xb), yb))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We are still able to use our same ``fit`` method as before.\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "fit()\n\nprint(loss_func(model(xb), yb))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Refactor using optim\n------------------------------\n\nPytorch also has a package with various optimization algorithms, ``torch.optim``.\nWe can use the ``step`` method from our optimizer to take a forward step, instead\nof manually updating each parameter.\n\nThis will let us replace our previous manually coded optimization step:\n::\n with torch.no_grad():\n for p in model.parameters(): p -= p.grad * lr\n model.zero_grad()\n\nand instead use just:\n::\n opt.step()\n opt.zero_grad()\n\n(``optim.zero_grad()`` resets the gradient to 0 and we need to call it before\ncomputing the gradient for the next minibatch.)\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "from torch import optim" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We'll define a little function to create our model and optimizer so we\ncan reuse it in the future.\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "def get_model():\n model = Mnist_Logistic()\n return model, optim.SGD(model.parameters(), lr=lr)\n\nmodel, opt = get_model()\nprint(loss_func(model(xb), yb))\n\nfor epoch in range(epochs):\n for i in range((n - 1) // bs + 1):\n start_i = i * bs\n end_i = start_i + bs\n xb = x_train[start_i:end_i]\n yb = y_train[start_i:end_i]\n pred = model(xb)\n loss = loss_func(pred, yb)\n\n loss.backward()\n opt.step()\n opt.zero_grad()\n\nprint(loss_func(model(xb), yb))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Refactor using Dataset\n------------------------------\n\nPyTorch has an abstract Dataset class. A Dataset can be anything that has\na ``__len__`` function (called by Python's standard ``len`` function) and\na ``__getitem__`` function as a way of indexing into it.\n`This tutorial `_\nwalks through a nice example of creating a custom ``FacialLandmarkDataset`` class\nas a subclass of ``Dataset``.\n\nPyTorch's `TensorDataset `_\nis a Dataset wrapping tensors. By defining a length and way of indexing,\nthis also gives us a way to iterate, index, and slice along the first\ndimension of a tensor. This will make it easier to access both the\nindependent and dependent variables in the same line as we train.\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "from torch.utils.data import TensorDataset" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Both ``x_train`` and ``y_train`` can be combined in a single ``TensorDataset``,\nwhich will be easier to iterate over and slice.\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "train_ds = TensorDataset(x_train, y_train)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Previously, we had to iterate through minibatches of x and y values separately:\n::\n xb = x_train[start_i:end_i]\n yb = y_train[start_i:end_i]\n\n\nNow, we can do these two steps together:\n::\n xb,yb = train_ds[i*bs : i*bs+bs]\n\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "model, opt = get_model()\n\nfor epoch in range(epochs):\n for i in range((n - 1) // bs + 1):\n xb, yb = train_ds[i * bs: i * bs + bs]\n pred = model(xb)\n loss = loss_func(pred, yb)\n\n loss.backward()\n opt.step()\n opt.zero_grad()\n\nprint(loss_func(model(xb), yb))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Refactor using DataLoader\n------------------------------\n\nPytorch's ``DataLoader`` is responsible for managing batches. You can\ncreate a ``DataLoader`` from any ``Dataset``. ``DataLoader`` makes it easier\nto iterate over batches. Rather than having to use ``train_ds[i*bs : i*bs+bs]``,\nthe DataLoader gives us each minibatch automatically.\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "from torch.utils.data import DataLoader\n\ntrain_ds = TensorDataset(x_train, y_train)\ntrain_dl = DataLoader(train_ds, batch_size=bs)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Previously, our loop iterated over batches (xb, yb) like this:\n::\n for i in range((n-1)//bs + 1):\n xb,yb = train_ds[i*bs : i*bs+bs]\n pred = model(xb)\n\nNow, our loop is much cleaner, as (xb, yb) are loaded automatically from the data loader:\n::\n for xb,yb in train_dl:\n pred = model(xb)\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "model, opt = get_model()\n\nfor epoch in range(epochs):\n for xb, yb in train_dl:\n pred = model(xb)\n loss = loss_func(pred, yb)\n\n loss.backward()\n opt.step()\n opt.zero_grad()\n\nprint(loss_func(model(xb), yb))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Thanks to Pytorch's ``nn.Module``, ``nn.Parameter``, ``Dataset``, and ``DataLoader``,\nour training loop is now dramatically smaller and easier to understand. Let's\nnow try to add the basic features necessary to create effective models in practice.\n\nAdd validation\n-----------------------\n\nIn section 1, we were just trying to get a reasonable training loop set up for\nuse on our training data. In reality, you **always** should also have\na `validation set `_, in order\nto identify if you are overfitting.\n\nShuffling the training data is\n`important `_\nto prevent correlation between batches and overfitting. On the other hand, the\nvalidation loss will be identical whether we shuffle the validation set or not.\nSince shuffling takes extra time, it makes no sense to shuffle the validation data.\n\nWe'll use a batch size for the validation set that is twice as large as\nthat for the training set. This is because the validation set does not\nneed backpropagation and thus takes less memory (it doesn't need to\nstore the gradients). We take advantage of this to use a larger batch\nsize and compute the loss more quickly.\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "train_ds = TensorDataset(x_train, y_train)\ntrain_dl = DataLoader(train_ds, batch_size=bs, shuffle=True)\n\nvalid_ds = TensorDataset(x_valid, y_valid)\nvalid_dl = DataLoader(valid_ds, batch_size=bs * 2)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We will calculate and print the validation loss at the end of each epoch.\n\n(Note that we always call ``model.train()`` before training, and ``model.eval()``\nbefore inference, because these are used by layers such as ``nn.BatchNorm2d``\nand ``nn.Dropout`` to ensure appropriate behaviour for these different phases.)\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "model, opt = get_model()\n\nfor epoch in range(epochs):\n model.train()\n for xb, yb in train_dl:\n pred = model(xb)\n loss = loss_func(pred, yb)\n\n loss.backward()\n opt.step()\n opt.zero_grad()\n\n model.eval()\n with torch.no_grad():\n valid_loss = sum(loss_func(model(xb), yb) for xb, yb in valid_dl)\n\n print(epoch, valid_loss / len(valid_dl))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Create fit() and get_data()\n----------------------------------\n\nWe'll now do a little refactoring of our own. Since we go through a similar\nprocess twice of calculating the loss for both the training set and the\nvalidation set, let's make that into its own function, ``loss_batch``, which\ncomputes the loss for one batch.\n\nWe pass an optimizer in for the training set, and use it to perform\nbackprop. For the validation set, we don't pass an optimizer, so the\nmethod doesn't perform backprop.\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "def loss_batch(model, loss_func, xb, yb, opt=None):\n loss = loss_func(model(xb), yb)\n\n if opt is not None:\n loss.backward()\n opt.step()\n opt.zero_grad()\n\n return loss.item(), len(xb)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "``fit`` runs the necessary operations to train our model and compute the\ntraining and validation losses for each epoch.\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import numpy as np\n\ndef fit(epochs, model, loss_func, opt, train_dl, valid_dl):\n for epoch in range(epochs):\n model.train()\n for xb, yb in train_dl:\n loss_batch(model, loss_func, xb, yb, opt)\n\n model.eval()\n with torch.no_grad():\n losses, nums = zip(\n *[loss_batch(model, loss_func, xb, yb) for xb, yb in valid_dl]\n )\n val_loss = np.sum(np.multiply(losses, nums)) / np.sum(nums)\n\n print(epoch, val_loss)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "``get_data`` returns dataloaders for the training and validation sets.\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "def get_data(train_ds, valid_ds, bs):\n return (\n DataLoader(train_ds, batch_size=bs, shuffle=True),\n DataLoader(valid_ds, batch_size=bs * 2),\n )" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now, our whole process of obtaining the data loaders and fitting the\nmodel can be run in 3 lines of code:\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "train_dl, valid_dl = get_data(train_ds, valid_ds, bs)\nmodel, opt = get_model()\nfit(epochs, model, loss_func, opt, train_dl, valid_dl)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can use these basic 3 lines of code to train a wide variety of models.\nLet's see if we can use them to train a convolutional neural network (CNN)!\n\nSwitch to CNN\n-------------\n\nWe are now going to build our neural network with three convolutional layers.\nBecause none of the functions in the previous section assume anything about\nthe model form, we'll be able to use them to train a CNN without any modification.\n\nWe will use Pytorch's predefined\n`Conv2d `_ class\nas our convolutional layer. We define a CNN with 3 convolutional layers.\nEach convolution is followed by a ReLU. At the end, we perform an\naverage pooling. (Note that ``view`` is PyTorch's version of numpy's\n``reshape``)\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "class Mnist_CNN(nn.Module):\n def __init__(self):\n super().__init__()\n self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1)\n self.conv2 = nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=1)\n self.conv3 = nn.Conv2d(16, 10, kernel_size=3, stride=2, padding=1)\n\n def forward(self, xb):\n xb = xb.view(-1, 1, 28, 28)\n xb = F.relu(self.conv1(xb))\n xb = F.relu(self.conv2(xb))\n xb = F.relu(self.conv3(xb))\n xb = F.avg_pool2d(xb, 4)\n return xb.view(-1, xb.size(1))\n\nlr = 0.1" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "`Momentum `_ is a variation on\nstochastic gradient descent that takes previous updates into account as well\nand generally leads to faster training.\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "model = Mnist_CNN()\nopt = optim.SGD(model.parameters(), lr=lr, momentum=0.9)\n\nfit(epochs, model, loss_func, opt, train_dl, valid_dl)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "nn.Sequential\n------------------------\n\n``torch.nn`` has another handy class we can use to simplify our code:\n`Sequential `_ .\nA ``Sequential`` object runs each of the modules contained within it, in a\nsequential manner. This is a simpler way of writing our neural network.\n\nTo take advantage of this, we need to be able to easily define a\n**custom layer** from a given function. For instance, PyTorch doesn't\nhave a `view` layer, and we need to create one for our network. ``Lambda``\nwill create a layer that we can then use when defining a network with\n``Sequential``.\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "class Lambda(nn.Module):\n def __init__(self, func):\n super().__init__()\n self.func = func\n\n def forward(self, x):\n return self.func(x)\n\n\ndef preprocess(x):\n return x.view(-1, 1, 28, 28)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The model created with ``Sequential`` is simply:\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "model = nn.Sequential(\n Lambda(preprocess),\n nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1),\n nn.ReLU(),\n nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=1),\n nn.ReLU(),\n nn.Conv2d(16, 10, kernel_size=3, stride=2, padding=1),\n nn.ReLU(),\n nn.AvgPool2d(4),\n Lambda(lambda x: x.view(x.size(0), -1)),\n)\n\nopt = optim.SGD(model.parameters(), lr=lr, momentum=0.9)\n\nfit(epochs, model, loss_func, opt, train_dl, valid_dl)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Wrapping DataLoader\n-----------------------------\n\nOur CNN is fairly concise, but it only works with MNIST, because:\n - It assumes the input is a 28\\*28 long vector\n - It assumes that the final CNN grid size is 4\\*4 (since that's the average\npooling kernel size we used)\n\nLet's get rid of these two assumptions, so our model works with any 2d\nsingle channel image. First, we can remove the initial Lambda layer by\nmoving the data preprocessing into a generator:\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "def preprocess(x, y):\n return x.view(-1, 1, 28, 28), y\n\n\nclass WrappedDataLoader:\n def __init__(self, dl, func):\n self.dl = dl\n self.func = func\n\n def __len__(self):\n return len(self.dl)\n\n def __iter__(self):\n batches = iter(self.dl)\n for b in batches:\n yield (self.func(*b))\n\ntrain_dl, valid_dl = get_data(train_ds, valid_ds, bs)\ntrain_dl = WrappedDataLoader(train_dl, preprocess)\nvalid_dl = WrappedDataLoader(valid_dl, preprocess)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Next, we can replace ``nn.AvgPool2d`` with ``nn.AdaptiveAvgPool2d``, which\nallows us to define the size of the *output* tensor we want, rather than\nthe *input* tensor we have. As a result, our model will work with any\nsize input.\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "model = nn.Sequential(\n nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1),\n nn.ReLU(),\n nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=1),\n nn.ReLU(),\n nn.Conv2d(16, 10, kernel_size=3, stride=2, padding=1),\n nn.ReLU(),\n nn.AdaptiveAvgPool2d(1),\n Lambda(lambda x: x.view(x.size(0), -1)),\n)\n\nopt = optim.SGD(model.parameters(), lr=lr, momentum=0.9)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's try it out:\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "fit(epochs, model, loss_func, opt, train_dl, valid_dl)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Using your GPU\n---------------\n\nIf you're lucky enough to have access to a CUDA-capable GPU (you can\nrent one for about $0.50/hour from most cloud providers) you can\nuse it to speed up your code. First check that your GPU is working in\nPytorch:\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "print(torch.cuda.is_available())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "And then create a device object for it:\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "dev = torch.device(\n \"cuda\") if torch.cuda.is_available() else torch.device(\"cpu\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's update ``preprocess`` to move batches to the GPU:\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "def preprocess(x, y):\n return x.view(-1, 1, 28, 28).to(dev), y.to(dev)\n\n\ntrain_dl, valid_dl = get_data(train_ds, valid_ds, bs)\ntrain_dl = WrappedDataLoader(train_dl, preprocess)\nvalid_dl = WrappedDataLoader(valid_dl, preprocess)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Finally, we can move our model to the GPU.\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "model.to(dev)\nopt = optim.SGD(model.parameters(), lr=lr, momentum=0.9)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You should find it runs faster now:\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "fit(epochs, model, loss_func, opt, train_dl, valid_dl)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Closing thoughts\n-----------------\n\nWe now have a general data pipeline and training loop which you can use for\ntraining many types of models using Pytorch. To see how simple training a model\ncan now be, take a look at the `mnist_sample` sample notebook.\n\nOf course, there are many things you'll want to add, such as data augmentation,\nhyperparameter tuning, monitoring training, transfer learning, and so forth.\nThese features are available in the fastai library, which has been developed\nusing the same design approach shown in this tutorial, providing a natural\nnext step for practitioners looking to take their models further.\n\nWe promised at the start of this tutorial we'd explain through example each of\n``torch.nn``, ``torch.optim``, ``Dataset``, and ``DataLoader``. So let's summarize\nwhat we've seen:\n\n - **torch.nn**\n\n + ``Module``: creates a callable which behaves like a function, but can also\n contain state(such as neural net layer weights). It knows what ``Parameter`` (s) it\n contains and can zero all their gradients, loop through them for weight updates, etc.\n + ``Parameter``: a wrapper for a tensor that tells a ``Module`` that it has weights\n that need updating during backprop. Only tensors with the `requires_grad` attribute set are updated\n + ``functional``: a module(usually imported into the ``F`` namespace by convention)\n which contains activation functions, loss functions, etc, as well as non-stateful\n versions of layers such as convolutional and linear layers.\n - ``torch.optim``: Contains optimizers such as ``SGD``, which update the weights\n of ``Parameter`` during the backward step\n - ``Dataset``: An abstract interface of objects with a ``__len__`` and a ``__getitem__``,\n including classes provided with Pytorch such as ``TensorDataset``\n - ``DataLoader``: Takes any ``Dataset`` and creates an iterator which returns batches of data.\n\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.12" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file diff --git a/pytorch/殷康龙的笔记/9 TensorBoard.ipynb b/pytorch/殷康龙的笔记/9 TensorBoard.ipynb new file mode 100644 index 00000000..e69de29b