diff --git a/.vscode/settings.json b/.vscode/settings.json index fddb9882..00689eab 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -79,5 +79,5 @@ "xtr1common": "cpp", "xutility": "cpp" }, - "python.pythonPath": "D:\\anaconda\\envs\\tensorflow\\python.exe" + "python.pythonPath": "C:\\Python\\python.exe", } \ No newline at end of file diff --git a/Python/python标准库2/9网络和进程通信.md b/Python/python标准库2/9网络和进程通信.md index e69de29b..6467a749 100644 --- a/Python/python标准库2/9网络和进程通信.md +++ b/Python/python标准库2/9网络和进程通信.md @@ -0,0 +1 @@ +# asyncio --- 异步 I/O \ No newline at end of file diff --git a/pytorch/Pysyft实例/websockets-example-MNIST-parallel/Asynchronous-federated-learning-on-MNIST.ipynb b/pytorch/Pysyft实例/websockets-example-MNIST-parallel/Asynchronous-federated-learning-on-MNIST.ipynb new file mode 100644 index 00000000..dada802a --- /dev/null +++ b/pytorch/Pysyft实例/websockets-example-MNIST-parallel/Asynchronous-federated-learning-on-MNIST.ipynb @@ -0,0 +1,508 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 联合学习设置\n", + "对于使用TrainConfig的联合学习设置,我们需要不同的参与者:\n", + "\n", + "* 工作者:自己的数据集。\n", + "\n", + "* 协调员:知道工作人员以及每个工作人员中存在的数据集名称的实体。\n", + "\n", + "* 评估器:保存测试数据并跟踪模型性能\n", + "\n", + "每个工作进程由两部分表示,即调度程序本地的代理(Websocket客户端工作进程)和保存数据并执行计算的远程实例。远程部分称为Websocket服务器工作程序。" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "\n", + "# inspect模块主要用来查看相关的代码。可以显示源代码。\n", + "import inspect" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1 准备工作:启动WebSocket的worker\n", + "\n", + "因此,首先,我们需要创建远程工作者。为此,您需要在终端中运行(无法从笔记本计算机上运行):\n", + "\n", + "python start_websocket_servers.py\n", + "\n", + "\n", + "这是怎么回事?\n", + "该脚本将实例化三个工作人员Alice,Bob和Charlie并准备他们的本地数据。每个工作人员都设置为拥有MNIST培训数据集的子集。爱丽丝持有与数字0-3对应的所有图像,鲍勃持有与数字4-6对应的所有图像,查理持有与数字7-9对应的所有图像。\n", + "\n", + "工人\t本地数据集中的数字\t样品数\n", + "爱丽丝\t0-3\t24754\n", + "鲍勃\t4-6\t17181\n", + "查理\t7-9\t18065\n", + "\n", + "| Worker | Digits in local dataset | Number of samples |\n", + "| ----------- | ----------------------- | ----------------- |\n", + "| Alice | 0-3 | 24754 |\n", + "| Bob | 4-6 | 17181 |\n", + "| Charlie | 7-9 | 18065 |\n", + "\n", + "\n", + "该评估程序将称为“测试”,并保存整个MNIST测试数据集。\n", + "\n", + "| Evaluator | Digits in local dataset | Number of samples |\n", + "| ----------- | ----------------------- | ----------------- |\n", + "| Testing | 0-9 | 10000 |\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "def start_websocket_server_worker(id, host, port, hook, verbose, keep_labels=None, training=True):\n \"\"\"Helper function for spinning up a websocket server and setting up the local datasets.\"\"\"\n\n server = websocket_server.WebsocketServerWorker(\n id=id, host=host, port=port, hook=hook, verbose=verbose\n )\n\n # Setup toy data (mnist example)\n mnist_dataset = datasets.MNIST(\n root=\"../../官方教程/data\",\n train=training,\n download=True,\n transform=transforms.Compose(\n [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]\n ),\n )\n\n if training:\n indices = np.isin(mnist_dataset.targets, keep_labels).astype(\"uint8\")\n logger.info(\"number of true indices: %s\", indices.sum())\n selected_data = (\n torch.native_masked_select(mnist_dataset.data.transpose(0, 2), torch.tensor(indices))\n .view(28, 28, -1)\n .transpose(2, 0)\n )\n logger.info(\"after selection: %s\", selected_data.shape)\n selected_targets = torch.native_masked_select(mnist_dataset.targets, torch.tensor(indices))\n\n dataset = sy.BaseDataset(\n data=selected_data, targets=selected_targets, transform=mnist_dataset.transform\n )\n key = \"mnist\"\n else:\n dataset = sy.BaseDataset(\n data=mnist_dataset.data,\n targets=mnist_dataset.targets,\n transform=mnist_dataset.transform,\n )\n key = \"mnist_testing\"\n\n server.add_dataset(dataset, key=key)\n count = [0] * 10\n logger.info(\n \"MNIST dataset (%s set), available numbers on %s: \", \"train\" if training else \"test\", id\n )\n for i in range(10):\n count[i] = (dataset.targets == i).sum().item()\n logger.info(\" %s: %s\", i, count[i])\n\n logger.info(\"datasets: %s\", server.datasets)\n if training:\n logger.info(\"len(datasets[mnist]): %s\", len(server.datasets[key]))\n\n server.start()\n return server\n\n" + ] + } + ], + "source": [ + "import run_websocket_server\n", + "# 用来查看模块内部的代码。\n", + "print(inspect.getsource(run_websocket_server.start_websocket_server_worker))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "在继续之前,我们首先需要导入依赖项,设置所需的参数并配置日志记录。" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# 导入模块\n", + "import sys\n", + "# python中异步IO的实现方法。提供了websocket一种应用层全双工的异步、非阻塞通信方式,通过消息响应实现通信。\n", + "import asyncio\n", + "\n", + "# syft模块主要封装实现了基于websocket的异步通信。\n", + "import syft as sy\n", + "from syft.workers.websocket_client import WebsocketClientWorker\n", + "from syft.frameworks.torch.fl import utils\n", + "\n", + "# torch主要提供了机器学习的算法。\n", + "import torch\n", + "from torchvision import datasets, transforms\n", + "import numpy as np\n", + "\n", + "# rwc提供了客户端运行的主要方法。\n", + "import run_websocket_client as rwc\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# 将syft与torch建立联系\n", + "hook = sy.TorchHook(torch)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Namespace(batch_size=32, cuda=False, federate_after_n_batches=10, lr=0.1, save_model=False, seed=1, test_batch_size=128, training_rounds=40, verbose=False)\n" + ] + } + ], + "source": [ + "# 配置训练过程中的相关列参数。\n", + "# batch_size batch大小\n", + "# cuda 是否启用GPU\n", + "# federate_after_n_batches多少轮之后进行联邦平均\n", + "# lr学习率\n", + "# test_batch_size测试数据集\n", + "# training_round worker上训练的次数。\n", + "# verbose 概要?用来做什么的不清楚。\n", + "\n", + "args = rwc.define_and_get_arguments(args=[])\n", + "use_cuda = args.cuda and torch.cuda.is_available()\n", + "torch.manual_seed(args.seed)\n", + "device = torch.device(\"cuda\" if use_cuda else \"cpu\")\n", + "print(args)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "# 配置一个日志模块。使用python原本的logging模块。\n", + "import logging\n", + "\n", + "# 获得一个命名的记录器\n", + "logger = logging.getLogger(\"run_websocket_client\")\n", + "\n", + "if not len(logger.handlers):\n", + " # print(123)\n", + " FORMAT = \"%(asctime)s - %(message)s\"\n", + " DATE_FMT = \"%H:%M:%S\"\n", + " formatter = logging.Formatter(FORMAT, DATE_FMT)\n", + " handler = logging.StreamHandler()\n", + " handler.setFormatter(formatter)\n", + " logger.addHandler(handler)\n", + " logger.propagate = False\n", + "LOG_LEVEL = logging.DEBUG\n", + "logger.setLevel(LOG_LEVEL)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "现在,让我们实例化websocket客户端工作程序,即远程工作程序的本地代理。请注意,如果websocket服务器工作程序未在运行,则此步骤将失败。\n", + "\n", + "工人Alice,Bob和Charlie将进行培训,然后由测试人员托管测试数据并进行评估。" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# 在客户端定义服务端的句柄。通过websocketclientworker类,建立通信。每一个类维护一个通信链接。\n", + "# 将客户端websocket与启动worker服务端的websocket建立一对一链接。\n", + "# pysyft通过设置,将通信模块单独剥离出来。\n", + "kwargs_websocket = {\"host\": \"127.0.0.1\", \"hook\": hook, \"verbose\": args.verbose}\n", + "alice = WebsocketClientWorker(id=\"alice\", port=8777, **kwargs_websocket)\n", + "bob = WebsocketClientWorker(id=\"bob\", port=8778, **kwargs_websocket)\n", + "charlie = WebsocketClientWorker(id=\"charlie\", port=8779, **kwargs_websocket)\n", + "testing = WebsocketClientWorker(id=\"testing\", port=8780, **kwargs_websocket)\n", + "\n", + "# 用来试下通信的句柄。\n", + "worker_instances = [alice, bob, charlie]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2 设置培训\n", + "\n", + "让我们实例化机器学习模型。这是一个具有2个卷积层和2个完全连接层的小型神经网络。它使用ReLU激活和最大池化。" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "class Net(nn.Module):\n def __init__(self):\n super(Net, self).__init__()\n self.conv1 = nn.Conv2d(1, 20, 5, 1)\n self.conv2 = nn.Conv2d(20, 50, 5, 1)\n self.fc1 = nn.Linear(4 * 4 * 50, 500)\n self.fc2 = nn.Linear(500, 10)\n\n def forward(self, x):\n x = F.relu(self.conv1(x))\n x = F.max_pool2d(x, 2, 2)\n x = F.relu(self.conv2(x))\n x = F.max_pool2d(x, 2, 2)\n x = x.view(-1, 4 * 4 * 50)\n x = F.relu(self.fc1(x))\n x = self.fc2(x)\n return F.log_softmax(x, dim=1)\n\n" + ] + } + ], + "source": [ + "# 输出模型。\n", + "print(inspect.getsource(rwc.Net))" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Net(\n (conv1): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))\n (conv2): Conv2d(20, 50, kernel_size=(5, 5), stride=(1, 1))\n (fc1): Linear(in_features=800, out_features=500, bias=True)\n (fc2): Linear(in_features=500, out_features=10, bias=True)\n)\n" + ] + } + ], + "source": [ + "model = rwc.Net().to(device)\n", + "print(model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 使模型可序列化\n", + "为了将模型发送给工作人员,我们需要模型可序列化,为此我们使用jit。" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "# 将需要训练的模型进行序列化。\n", + "# jit提供了一种不依赖Python环境的执行方法。这样在发送到客户端之后,即是没有导入相关的包。也能运行模型,进行梯度下降。\n", + "traced_model = torch.jit.trace(model, torch.zeros([1, 1, 28, 28], dtype=torch.float))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3 让我们开始训练\n", + "现在我们准备开始联合培训。我们将分别对每个工人进行给定数量的批次培训,然后计算所得模型的联合平均值。\n", + "\n", + "每隔10轮培训,我们将评估工人返回的模型以及通过联合平均获得的模型的性能。\n", + "\n", + "性能将作为准确性(正确预测的比率)和预测数字的直方图给出。这很有趣,因为每个工人仅拥有数字的一个子集。因此,在开始时,每个工作人员将仅预测他们的人数,并且仅通过联合平均过程知道其他人数。\n", + "\n", + "培训以异步方式完成。这意味着调度程序仅告诉工人进行培训,而不会阻止与下一个工人交谈之前等待培训的结果。\n", + "\n", + "训练的参数在参数中给出。每个工作人员将按照给定数量的批次进行培训,该数量由federate_after_n_batches的值给出。还配置了培训批次大小和学习率。" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Federate_after_n_batches: 10\nBatch size: 32\nInitial learning rate: 0.1\n" + ] + } + ], + "source": [ + "print(\"Federate_after_n_batches: \" + str(args.federate_after_n_batches))\n", + "print(\"Batch size: \" + str(args.batch_size))\n", + "print(\"Initial learning rate: \" + str(args.lr))" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "20:21:34 - Training round 1/40\n", + "20:21:42 - Evaluating models\n", + "20:21:45 - Model update alice: Percentage numbers 0-3: 100%, 4-6: 0%, 7-9: 0%\n", + "20:21:45 - Model update alice: Average loss: 0.0216, Accuracy: 1498/10000 (14.98%)\n", + "20:21:49 - Model update bob: Percentage numbers 0-3: 0%, 4-6: 100%, 7-9: 0%\n", + "20:21:49 - Model update bob: Average loss: 0.0441, Accuracy: 892/10000 (8.92%)\n", + "20:21:52 - Model update charlie: Percentage numbers 0-3: 0%, 4-6: 0%, 7-9: 100%\n", + "20:21:52 - Model update charlie: Average loss: 0.0323, Accuracy: 1092/10000 (10.92%)\n", + "20:21:56 - Federated model: Percentage numbers 0-3: 0%, 4-6: 99%, 7-9: 0%\n", + "20:21:56 - Federated model: Average loss: 0.0177, Accuracy: 892/10000 (8.92%)\n", + "20:21:56 - Training round 2/40\n", + "20:22:02 - Training round 3/40\n", + "20:22:10 - Training round 4/40\n", + "20:22:17 - Training round 5/40\n", + "20:22:24 - Training round 6/40\n", + "20:22:32 - Training round 7/40\n", + "20:22:39 - Training round 8/40\n", + "20:22:46 - Training round 9/40\n", + "20:22:53 - Training round 10/40\n", + "20:23:00 - Training round 11/40\n", + "20:23:07 - Evaluating models\n", + "20:23:10 - Model update alice: Percentage numbers 0-3: 80%, 4-6: 13%, 7-9: 5%\n", + "20:23:10 - Model update alice: Average loss: 0.0106, Accuracy: 5668/10000 (56.68%)\n", + "20:23:14 - Model update bob: Percentage numbers 0-3: 17%, 4-6: 73%, 7-9: 8%\n", + "20:23:14 - Model update bob: Average loss: 0.0133, Accuracy: 5320/10000 (53.20%)\n", + "20:23:17 - Model update charlie: Percentage numbers 0-3: 11%, 4-6: 2%, 7-9: 86%\n", + "20:23:17 - Model update charlie: Average loss: 0.0192, Accuracy: 4246/10000 (42.46%)\n", + "20:23:20 - Federated model: Percentage numbers 0-3: 42%, 4-6: 21%, 7-9: 36%\n", + "20:23:20 - Federated model: Average loss: 0.0030, Accuracy: 8647/10000 (86.47%)\n", + "20:23:20 - Training round 12/40\n", + "20:23:27 - Training round 13/40\n", + "20:23:34 - Training round 14/40\n", + "20:23:41 - Training round 15/40\n", + "20:23:47 - Training round 16/40\n", + "20:23:54 - Training round 17/40\n", + "20:24:01 - Training round 18/40\n", + "20:24:08 - Training round 19/40\n", + "20:24:15 - Training round 20/40\n", + "20:24:22 - Training round 21/40\n", + "20:24:29 - Evaluating models\n", + "20:24:32 - Model update alice: Percentage numbers 0-3: 70%, 4-6: 16%, 7-9: 13%\n", + "20:24:32 - Model update alice: Average loss: 0.0092, Accuracy: 6886/10000 (68.86%)\n", + "20:24:36 - Model update bob: Percentage numbers 0-3: 28%, 4-6: 59%, 7-9: 11%\n", + "20:24:36 - Model update bob: Average loss: 0.0072, Accuracy: 6739/10000 (67.39%)\n", + "20:24:39 - Model update charlie: Percentage numbers 0-3: 31%, 4-6: 8%, 7-9: 59%\n", + "20:24:39 - Model update charlie: Average loss: 0.0072, Accuracy: 6914/10000 (69.14%)\n", + "20:24:43 - Federated model: Percentage numbers 0-3: 44%, 4-6: 26%, 7-9: 28%\n", + "20:24:43 - Federated model: Average loss: 0.0017, Accuracy: 9354/10000 (93.54%)\n", + "20:24:43 - Training round 22/40\n", + "20:24:50 - Training round 23/40\n", + "20:24:56 - Training round 24/40\n", + "20:25:03 - Training round 25/40\n", + "20:25:10 - Training round 26/40\n", + "20:25:17 - Training round 27/40\n", + "20:25:23 - Training round 28/40\n", + "20:25:30 - Training round 29/40\n", + "20:25:37 - Training round 30/40\n", + "20:25:44 - Training round 31/40\n", + "20:25:51 - Evaluating models\n", + "20:25:54 - Model update alice: Percentage numbers 0-3: 54%, 4-6: 24%, 7-9: 21%\n", + "20:25:54 - Model update alice: Average loss: 0.0035, Accuracy: 8525/10000 (85.25%)\n", + "20:25:57 - Model update bob: Percentage numbers 0-3: 34%, 4-6: 49%, 7-9: 16%\n", + "20:25:57 - Model update bob: Average loss: 0.0050, Accuracy: 7755/10000 (77.55%)\n", + "20:26:01 - Model update charlie: Percentage numbers 0-3: 32%, 4-6: 13%, 7-9: 54%\n", + "20:26:01 - Model update charlie: Average loss: 0.0059, Accuracy: 7441/10000 (74.41%)\n", + "20:26:04 - Federated model: Percentage numbers 0-3: 41%, 4-6: 28%, 7-9: 29%\n", + "20:26:04 - Federated model: Average loss: 0.0012, Accuracy: 9572/10000 (95.72%)\n", + "20:26:04 - Training round 32/40\n", + "20:26:11 - Training round 33/40\n", + "20:26:18 - Training round 34/40\n", + "20:26:25 - Training round 35/40\n", + "20:26:32 - Training round 36/40\n", + "20:26:38 - Training round 37/40\n", + "20:26:45 - Training round 38/40\n", + "20:26:52 - Training round 39/40\n", + "20:26:59 - Training round 40/40\n", + "20:27:06 - Evaluating models\n", + "20:27:09 - Model update alice: Percentage numbers 0-3: 54%, 4-6: 23%, 7-9: 21%\n", + "20:27:09 - Model update alice: Average loss: 0.0036, Accuracy: 8535/10000 (85.35%)\n", + "20:27:13 - Model update bob: Percentage numbers 0-3: 36%, 4-6: 43%, 7-9: 19%\n", + "20:27:13 - Model update bob: Average loss: 0.0035, Accuracy: 8318/10000 (83.18%)\n", + "20:27:16 - Model update charlie: Percentage numbers 0-3: 31%, 4-6: 12%, 7-9: 55%\n", + "20:27:16 - Model update charlie: Average loss: 0.0057, Accuracy: 7364/10000 (73.64%)\n", + "20:27:19 - Federated model: Percentage numbers 0-3: 41%, 4-6: 28%, 7-9: 30%\n", + "20:27:19 - Federated model: Average loss: 0.0010, Accuracy: 9631/10000 (96.31%)\n" + ] + } + ], + "source": [ + "learning_rate = args.lr\n", + "device = \"cpu\" #torch.device(\"cpu\")\n", + "traced_model = torch.jit.trace(model, torch.zeros([1, 1, 28, 28], dtype=torch.float))\n", + "for curr_round in range(1, args.training_rounds + 1):\n", + " logger.info(\"Training round %s/%s\", curr_round, args.training_rounds)\n", + "\n", + " # 异步调用多个客户端执行并行训练。await等待多个异步调用执行完成。\n", + " # 这里包含了模型的发送过程和取回过程。\n", + " results = await asyncio.gather(\n", + " *[\n", + " rwc.fit_model_on_worker(\n", + " worker=worker,\n", + " traced_model=traced_model,\n", + " batch_size=args.batch_size,\n", + " curr_round=curr_round,\n", + " max_nr_batches=args.federate_after_n_batches,\n", + " lr=learning_rate,\n", + " )\n", + " for worker in worker_instances\n", + " ]\n", + " )\n", + " models = {}\n", + " loss_values = {}\n", + " \n", + " # 每10轮进行一次test。使用test客户端检验当前结果的准确性。\n", + " # 这里主要测试,每个客户端发过来的模型的准确率。\n", + " test_models = curr_round % 10 == 1 or curr_round == args.training_rounds\n", + " if test_models:\n", + " logger.info(\"Evaluating models\")\n", + " np.set_printoptions(formatter={\"float\": \"{: .0f}\".format})\n", + " for worker_id, worker_model, _ in results:\n", + " rwc.evaluate_model_on_worker(\n", + " model_identifier=\"Model update \" + worker_id,\n", + " worker=testing,\n", + " dataset_key=\"mnist_testing\",\n", + " model=worker_model,\n", + " nr_bins=10,\n", + " batch_size=128,\n", + " print_target_hist=False,\n", + " device=device\n", + " )\n", + "\n", + " # 将并行执行的多个客户端训练的结果,进行聚合。\n", + " for worker_id, worker_model, worker_loss in results:\n", + " if worker_model is not None:\n", + " models[worker_id] = worker_model\n", + " loss_values[worker_id] = worker_loss\n", + "\n", + " # 调用联邦平均算法,对分布式models进行聚合。\n", + " traced_model = utils.federated_avg(models)\n", + "\n", + " # 每10轮进行一次test。使用test客户端检验当前结果的准确性。\n", + " # 这里主要测试,模型聚合后,模型的准确率。\n", + " if test_models:\n", + " rwc.evaluate_model_on_worker(\n", + " model_identifier=\"Federated model\",\n", + " worker=testing,\n", + " dataset_key=\"mnist_testing\",\n", + " model=traced_model,\n", + " nr_bins=10,\n", + " batch_size=128,\n", + " print_target_hist=False,\n", + " device=device\n", + " )\n", + "\n", + " # decay learning rate\n", + " learning_rate = max(0.98 * learning_rate, args.lr * 0.01)\n", + "\n", + "if args.save_model:\n", + " torch.save(model.state_dict(), \"mnist_cnn.pt\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "经过40轮训练,我们在整个测试数据集上的准确率均达到95%以上。鉴于没有工人能使用超过4位数字,这给人留下了深刻的印象!" + ] + } + ], + "metadata": { + "kernelspec": { + "name": "python388jvsc74a57bd057f55249afac9e3bb90b27c0916a1d44f0a08c86299e4ac4c83ac98b0a805cf4", + "display_name": "Python 3.8.8 64-bit ('pysyft': conda)" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/pytorch/Pysyft实例/websockets-example-MNIST-parallel/readme.md b/pytorch/Pysyft实例/websockets-example-MNIST-parallel/readme.md new file mode 100644 index 00000000..e3fc4852 --- /dev/null +++ b/pytorch/Pysyft实例/websockets-example-MNIST-parallel/readme.md @@ -0,0 +1,48 @@ +# 代码中涉及到的pysyft模块。 + + +* 发现代码中只涉及少量的syft模块。 +* 可以考虑,去掉syft模块,将syft模块直接导入到当前工程中,既方便修改,也方便理解。 +* 可以让最后的工程变得更小型化,并且可以定制。 + +## 1 pytorch相关模块 + +### torch.nn + +### torch.nn.function + +### torchvision.datasets & torchvision.transforms +> 用来下载手写体数据 + + +### torch.jit +> 用来序列化pytorch的模型,然后进行训练。 + + +## 2 syft相关模块 + + +### syft +调用了hook方法,与pytorch建立联系。 + + + + +### syft.workers.websocket_client.WebsocketClientWorker +实现客户端与多个服务器建立连接。 + +然后将连接的实体导入到rwc中,进行通信和训练迭代。 + + +### syft.frameworks.torch.fl.utils + + +## 3 Python标准库 + +### asyncio + +### logging + +### argparse + +### websockets diff --git a/pytorch/Pysyft实例/websockets-example-MNIST-parallel/run_websocket_client.py b/pytorch/Pysyft实例/websockets-example-MNIST-parallel/run_websocket_client.py new file mode 100644 index 00000000..bba02b79 --- /dev/null +++ b/pytorch/Pysyft实例/websockets-example-MNIST-parallel/run_websocket_client.py @@ -0,0 +1,275 @@ +import logging +import argparse +import sys +import asyncio +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F + +import syft as sy +from syft.workers import websocket_client +from syft.frameworks.torch.fl import utils + +LOG_INTERVAL = 25 +logger = logging.getLogger("run_websocket_client") + + +# Loss function +@torch.jit.script +def loss_fn(pred, target): + return F.nll_loss(input=pred, target=target) + + +# Model +class Net(nn.Module): + def __init__(self): + super(Net, self).__init__() + self.conv1 = nn.Conv2d(1, 20, 5, 1) + self.conv2 = nn.Conv2d(20, 50, 5, 1) + self.fc1 = nn.Linear(4 * 4 * 50, 500) + self.fc2 = nn.Linear(500, 10) + + def forward(self, x): + x = F.relu(self.conv1(x)) + x = F.max_pool2d(x, 2, 2) + x = F.relu(self.conv2(x)) + x = F.max_pool2d(x, 2, 2) + x = x.view(-1, 4 * 4 * 50) + x = F.relu(self.fc1(x)) + x = self.fc2(x) + return F.log_softmax(x, dim=1) + + +def define_and_get_arguments(args=sys.argv[1:]): + # 首先定义一个解析器 + parser = argparse.ArgumentParser( + description="Run federated learning using websocket client workers." + ) + parser.add_argument("--batch_size", type=int, default=32, help="batch size of the training") + parser.add_argument( + "--test_batch_size", type=int, default=128, help="batch size used for the test data" + ) + parser.add_argument( + "--training_rounds", type=int, default=40, help="number of federated learning rounds" + ) + parser.add_argument( + "--federate_after_n_batches", + type=int, + default=10, + help="number of training steps performed on each remote worker before averaging", + ) + parser.add_argument("--lr", type=float, default=0.1, help="learning rate") + parser.add_argument("--cuda", action="store_true", help="use cuda") + parser.add_argument("--seed", type=int, default=1, help="seed used for randomization") + parser.add_argument("--save_model", action="store_true", help="if set, model will be saved") + parser.add_argument( + "--verbose", + "-v", + action="store_true", + help="if set, websocket client workers will be started in verbose mode", + ) + + # 使用解析器解析相关的参数 + args = parser.parse_args(args=args) + return args + + +async def fit_model_on_worker( + worker: websocket_client.WebsocketClientWorker, + traced_model: torch.jit.ScriptModule, + batch_size: int, + curr_round: int, + max_nr_batches: int, + lr: float, +): + """Send the model to the worker and fit the model on the worker's training data. + + Args: + worker: Remote location, where the model shall be trained. + traced_model: Model which shall be trained. + batch_size: Batch size of each training step. + curr_round: Index of the current training round (for logging purposes). + max_nr_batches: If > 0, training on worker will stop at min(max_nr_batches, nr_available_batches). + lr: Learning rate of each training step. + + Returns: + A tuple containing: + * worker_id: Union[int, str], id of the worker. + * improved model: torch.jit.ScriptModule, model after training at the worker. + * loss: Loss on last training batch, torch.tensor. + """ + train_config = sy.TrainConfig( + model=traced_model, + loss_fn=loss_fn, + batch_size=batch_size, + shuffle=True, + max_nr_batches=max_nr_batches, + epochs=1, + optimizer="SGD", + optimizer_args={"lr": lr}, + ) + train_config.send(worker) + loss = await worker.async_fit(dataset_key="mnist", return_ids=[0]) + model = train_config.model_ptr.get().obj + return worker.id, model, loss + + +def evaluate_model_on_worker( + model_identifier, + worker, + dataset_key, + model, + nr_bins, + batch_size, + device, + print_target_hist=False, +): + model.eval() + + # Create and send train config + train_config = sy.TrainConfig( + batch_size=batch_size, model=model, loss_fn=loss_fn, optimizer_args=None, epochs=1 + ) + + train_config.send(worker) + + result = worker.evaluate( + dataset_key=dataset_key, + return_histograms=True, + nr_bins=nr_bins, + return_loss=True, + return_raw_accuracy=True, + device=device, + ) + test_loss = result["loss"] + correct = result["nr_correct_predictions"] + len_dataset = result["nr_predictions"] + hist_pred = result["histogram_predictions"] + hist_target = result["histogram_target"] + + if print_target_hist: + logger.info("Target histogram: %s", hist_target) + percentage_0_3 = int(100 * sum(hist_pred[0:4]) / len_dataset) + percentage_4_6 = int(100 * sum(hist_pred[4:7]) / len_dataset) + percentage_7_9 = int(100 * sum(hist_pred[7:10]) / len_dataset) + logger.info( + "%s: Percentage numbers 0-3: %s%%, 4-6: %s%%, 7-9: %s%%", + model_identifier, + percentage_0_3, + percentage_4_6, + percentage_7_9, + ) + + logger.info( + "%s: Average loss: %s, Accuracy: %s/%s (%s%%)", + model_identifier, + f"{test_loss:.4f}", + correct, + len_dataset, + f"{100.0 * correct / len_dataset:.2f}", + ) + + +async def main(): + args = define_and_get_arguments() + + hook = sy.TorchHook(torch) + + kwargs_websocket = {"hook": hook, "verbose": args.verbose, "host": "127.0.0.1"} + alice = websocket_client.WebsocketClientWorker(id="alice", port=8777, **kwargs_websocket) + bob = websocket_client.WebsocketClientWorker(id="bob", port=8778, **kwargs_websocket) + charlie = websocket_client.WebsocketClientWorker(id="charlie", port=8779, **kwargs_websocket) + testing = websocket_client.WebsocketClientWorker(id="testing", port=8780, **kwargs_websocket) + + for wcw in [alice, bob, charlie, testing]: + wcw.clear_objects_remote() + + worker_instances = [alice, bob, charlie] + + use_cuda = args.cuda and torch.cuda.is_available() + + torch.manual_seed(args.seed) + + device = torch.device("cuda" if use_cuda else "cpu") + + model = Net().to(device) + + traced_model = torch.jit.trace(model, torch.zeros([1, 1, 28, 28], dtype=torch.float).to(device)) + learning_rate = args.lr + + for curr_round in range(1, args.training_rounds + 1): + logger.info("Training round %s/%s", curr_round, args.training_rounds) + + results = await asyncio.gather( + *[ + fit_model_on_worker( + worker=worker, + traced_model=traced_model, + batch_size=args.batch_size, + curr_round=curr_round, + max_nr_batches=args.federate_after_n_batches, + lr=learning_rate, + ) + for worker in worker_instances + ] + ) + models = {} + loss_values = {} + + test_models = curr_round % 10 == 1 or curr_round == args.training_rounds + if test_models: + logger.info("Evaluating models") + np.set_printoptions(formatter={"float": "{: .0f}".format}) + for worker_id, worker_model, _ in results: + evaluate_model_on_worker( + model_identifier="Model update " + worker_id, + worker=testing, + dataset_key="mnist_testing", + model=worker_model, + nr_bins=10, + batch_size=128, + device=device, + print_target_hist=False, + ) + + # Federate models (note that this will also change the model in models[0] + for worker_id, worker_model, worker_loss in results: + if worker_model is not None: + models[worker_id] = worker_model + loss_values[worker_id] = worker_loss + + traced_model = utils.federated_avg(models) + + if test_models: + evaluate_model_on_worker( + model_identifier="Federated model", + worker=testing, + dataset_key="mnist_testing", + model=traced_model, + nr_bins=10, + batch_size=128, + device=device, + print_target_hist=False, + ) + + # decay learning rate + learning_rate = max(0.98 * learning_rate, args.lr * 0.01) + + if args.save_model: + torch.save(model.state_dict(), "mnist_cnn.pt") + + +if __name__ == "__main__": + # Logging setup + FORMAT = "%(asctime)s | %(message)s" + logging.basicConfig(format=FORMAT) + logger.setLevel(level=logging.DEBUG) + + # Websockets setup + websockets_logger = logging.getLogger("websockets") + websockets_logger.setLevel(logging.INFO) + websockets_logger.addHandler(logging.StreamHandler()) + + # Run main + asyncio.get_event_loop().run_until_complete(main()) diff --git a/pytorch/Pysyft实例/websockets-example-MNIST-parallel/run_websocket_server.py b/pytorch/Pysyft实例/websockets-example-MNIST-parallel/run_websocket_server.py new file mode 100644 index 00000000..c19475ae --- /dev/null +++ b/pytorch/Pysyft实例/websockets-example-MNIST-parallel/run_websocket_server.py @@ -0,0 +1,120 @@ +import logging +import argparse +import numpy as np +import torch +from torchvision import datasets +from torchvision import transforms + +import syft as sy +from syft.workers import websocket_server + +KEEP_LABELS_DICT = { + "alice": [0, 1, 2, 3], + "bob": [4, 5, 6], + "charlie": [7, 8, 9], + "testing": list(range(10)), + None: list(range(10)), +} + + +def start_websocket_server_worker(id, host, port, hook, verbose, keep_labels=None, training=True): + """Helper function for spinning up a websocket server and setting up the local datasets.""" + + server = websocket_server.WebsocketServerWorker( + id=id, host=host, port=port, hook=hook, verbose=verbose + ) + + # Setup toy data (mnist example) + mnist_dataset = datasets.MNIST( + root="../../官方教程/data", + train=training, + download=True, + transform=transforms.Compose( + [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))] + ), + ) + + if training: + indices = np.isin(mnist_dataset.targets, keep_labels).astype("uint8") + logger.info("number of true indices: %s", indices.sum()) + selected_data = ( + torch.native_masked_select(mnist_dataset.data.transpose(0, 2), torch.tensor(indices)) + .view(28, 28, -1) + .transpose(2, 0) + ) + logger.info("after selection: %s", selected_data.shape) + selected_targets = torch.native_masked_select(mnist_dataset.targets, torch.tensor(indices)) + + dataset = sy.BaseDataset( + data=selected_data, targets=selected_targets, transform=mnist_dataset.transform + ) + key = "mnist" + else: + dataset = sy.BaseDataset( + data=mnist_dataset.data, + targets=mnist_dataset.targets, + transform=mnist_dataset.transform, + ) + key = "mnist_testing" + + server.add_dataset(dataset, key=key) + count = [0] * 10 + logger.info( + "MNIST dataset (%s set), available numbers on %s: ", "train" if training else "test", id + ) + for i in range(10): + count[i] = (dataset.targets == i).sum().item() + logger.info(" %s: %s", i, count[i]) + + logger.info("datasets: %s", server.datasets) + if training: + logger.info("len(datasets[mnist]): %s", len(server.datasets[key])) + + server.start() + return server + + +if __name__ == "__main__": + # Logging setup + FORMAT = "%(asctime)s | %(message)s" + logging.basicConfig(format=FORMAT) + logger = logging.getLogger("run_websocket_server") + logger.setLevel(level=logging.DEBUG) + + # Parse args + parser = argparse.ArgumentParser(description="Run websocket server worker.") + parser.add_argument( + "--port", + "-p", + type=int, + help="port number of the websocket server worker, e.g. --port 8777", + ) + parser.add_argument("--host", type=str, default="localhost", help="host for the connection") + parser.add_argument( + "--id", type=str, help="name (id) of the websocket server worker, e.g. --id alice" + ) + parser.add_argument( + "--testing", + action="store_true", + help="if set, websocket server worker will load the test dataset instead of the training dataset", + ) + parser.add_argument( + "--verbose", + "-v", + action="store_true", + help="if set, websocket server worker will be started in verbose mode", + ) + + args = parser.parse_args() + + # Hook and start server + hook = sy.TorchHook(torch) + server = start_websocket_server_worker( + id=args.id, + host=args.host, + port=args.port, + hook=hook, + verbose=args.verbose, + keep_labels=KEEP_LABELS_DICT[args.id], + training=not args.testing, + ) diff --git a/pytorch/Pysyft实例/websockets-example-MNIST-parallel/start_websocket_servers.py b/pytorch/Pysyft实例/websockets-example-MNIST-parallel/start_websocket_servers.py new file mode 100644 index 00000000..4e40ba45 --- /dev/null +++ b/pytorch/Pysyft实例/websockets-example-MNIST-parallel/start_websocket_servers.py @@ -0,0 +1,87 @@ +import subprocess + +from torchvision import datasets +from torchvision import transforms + +import signal +import sys + + +# Downloads MNIST dataset +mnist_trainset = datasets.MNIST( + root="../../官方教程/data", + train=True, + download=True, + transform=transforms.Compose( + [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))] + ), +) + +call_alice = [ + "python", + "run_websocket_server.py", + "--port", + "8777", + "--id", + "alice", + "--host", + "127.0.0.1", +] + +call_bob = [ + "python", + "run_websocket_server.py", + "--port", + "8778", + "--id", + "bob", + "--host", + "127.0.0.1", +] + +call_charlie = [ + "python", + "run_websocket_server.py", + "--port", + "8779", + "--id", + "charlie", + "--host", + "127.0.0.1", +] + +call_testing = [ + "python", + "run_websocket_server.py", + "--port", + "8780", + "--id", + "testing", + "--testing", + "--host", + "127.0.0.1", +] + +print("Starting server for Alice") +process_alice = subprocess.Popen(call_alice) + +print("Starting server for Bob") +process_bob = subprocess.Popen(call_bob) + +print("Starting server for Charlie") +process_charlie = subprocess.Popen(call_charlie) + +print("Starting server for Testing") +process_testing = subprocess.Popen(call_testing) + + +def signal_handler(sig, frame): + print("You pressed Ctrl+C!") + for p in [process_alice, process_bob, process_charlie, process_testing]: + p.terminate() + sys.exit(0) + + +signal.signal(signal.SIGINT, signal_handler) + +signal.pause() diff --git a/pytorch/Pysyft实例/websockets-example-MNIST/Federated learning with websockets and federated averaging.ipynb b/pytorch/Pysyft实例/websockets-example-MNIST/Federated learning with websockets and federated averaging.ipynb new file mode 100644 index 00000000..613c627f --- /dev/null +++ b/pytorch/Pysyft实例/websockets-example-MNIST/Federated learning with websockets and federated averaging.ipynb @@ -0,0 +1,404 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "# Please ignore these variable, they only provide options for our CI system.\n", + "args = []\n", + "abort_after_one = False" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 教程:使用websockets进行联合学习,并对可能遇到的问题的可能解决方案进行联合平均\n", + "\n", + "安装websocket库\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1 启动websocket服务工作程序\n", + "\n", + "每个工作程序由两部分组成,本地句柄和保存数据并执行计算的远程实例。远程部分称为Websocket服务器工作程序。\n", + "\n", + "因此,首先,您需要转到cd此笔记本以及其他用于运行服务器和客户端的其他文件所在的文件夹\n", + "\n", + "需要在终端中运行以下命令。\n", + "\n", + "```bash\n", + "python start_websocket_servers.py\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2 设置websocket客户端工作程序\n", + "\n", + "导入并设置一些参数和变量。" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "import syft as sy\n", + "from syft.workers.websocket_client import WebsocketClientWorker\n", + "import torch\n", + "from torchvision import datasets, transforms\n", + "\n", + "from syft.frameworks.torch.fl import utils" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "import run_websocket_client as rwc" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Namespace(batch_size=64, cuda=False, epochs=2, federate_after_n_batches=50, lr=0.01, save_model=False, seed=1, test_batch_size=1000, use_virtual=False, verbose=False)\n" + ] + } + ], + "source": [ + "args = rwc.define_and_get_arguments(args=args)\n", + "use_cuda = args.cuda and torch.cuda.is_available()\n", + "torch.manual_seed(args.seed)\n", + "device = torch.device(\"cuda\" if use_cuda else \"cpu\")\n", + "print(args)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "现在,让我们实例化websocket客户端工作程序,这是我们到远程工作程序的本地访问点。请注意,如果websocket服务器工作程序未在运行,则此步骤将失败。" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[, , ]\n" + ] + } + ], + "source": [ + "hook = sy.TorchHook(torch)\n", + "\n", + "kwargs_websocket = {\"host\": \"localhost\", \"hook\": hook, \"verbose\": args.verbose}\n", + "alice = WebsocketClientWorker(id=\"alice\", port=8777, **kwargs_websocket)\n", + "bob = WebsocketClientWorker(id=\"bob\", port=8778, **kwargs_websocket)\n", + "charlie = WebsocketClientWorker(id=\"charlie\", port=8779, **kwargs_websocket)\n", + "\n", + "workers = [alice, bob, charlie]\n", + "print(workers)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3 准备和分发训练数据\n", + "我们将使用MNIST数据集并将数据随机分配到工作人员上。对于联合培训设置而言,这是不现实的,因为在远程培训中,数据通常通常已经可用。\n", + "\n", + "我们实例化了两个FederatedDataLoader,一个用于训练,一个用于MNIST数据集的测试集。" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 7 + } + ], + "source": [ + "#run this box only if the the next box gives pipeline error\n", + "torch.utils.data.DataLoader(\n", + " datasets.MNIST(\n", + " \"../../官方教程/data/\",\n", + " train=True,download=True))" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "federated_train_loader = sy.FederatedDataLoader(\n", + " datasets.MNIST(\n", + " \"../../官方教程/data\",\n", + " train=True,\n", + " download=True,\n", + " transform=transforms.Compose(\n", + " [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]\n", + " ),\n", + " ).federate(tuple(workers)),\n", + " batch_size=args.batch_size,\n", + " shuffle=True,\n", + " iter_per_worker=True\n", + ")\n", + "\n", + "test_loader = torch.utils.data.DataLoader(\n", + " datasets.MNIST(\n", + " \"../../官方教程/data\",\n", + " train=False,\n", + " transform=transforms.Compose(\n", + " [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]\n", + " ),\n", + " ),\n", + " batch_size=args.test_batch_size,\n", + " shuffle=True\n", + ")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "接下来,我们需要实例化机器学习模型。这是一个具有2个卷积层和2个完全连接层的小型神经网络。它使用ReLU激活和最大池化。" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Net(\n (conv1): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))\n (conv2): Conv2d(20, 50, kernel_size=(5, 5), stride=(1, 1))\n (fc1): Linear(in_features=800, out_features=500, bias=True)\n (fc2): Linear(in_features=500, out_features=10, bias=True)\n)\n" + ] + } + ], + "source": [ + "model = rwc.Net().to(device)\n", + "print(model)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "import logging\n", + "import sys\n", + "logger = logging.getLogger()\n", + "logger.setLevel(logging.DEBUG)\n", + "handler = logging.StreamHandler(sys.stderr)\n", + "formatter = logging.Formatter(\"%(asctime)s %(levelname)s %(filename)s(l:%(lineno)d) - %(message)s\")\n", + "handler.setFormatter(formatter)\n", + "logger.handlers = [handler]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4 让我们开始训练\n", + "现在我们准备开始联合培训。我们将分别对每个工人进行给定数量的批次培训,然后计算所得模型的联合平均值,并计算该模型的测试准确性" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Starting epoch 1/2\n", + "2021-05-08 07:30:49,922 DEBUG run_websocket_client.py(l:130) - Starting training round, batches [0, 50]\n", + "2021-05-08 07:30:51,082 DEBUG run_websocket_client.py(l:72) - Train Worker alice: [0/50 (0%)]\tLoss: 2.312261\n", + "2021-05-08 07:30:53,140 DEBUG run_websocket_client.py(l:72) - Train Worker alice: [25/50 (50%)]\tLoss: 2.254601\n", + "2021-05-08 07:30:58,222 DEBUG run_websocket_client.py(l:72) - Train Worker bob: [0/50 (0%)]\tLoss: 2.301047\n", + "2021-05-08 07:30:59,940 DEBUG run_websocket_client.py(l:72) - Train Worker bob: [25/50 (50%)]\tLoss: 2.247398\n", + "2021-05-08 07:31:04,928 DEBUG run_websocket_client.py(l:72) - Train Worker charlie: [0/50 (0%)]\tLoss: 2.324203\n", + "2021-05-08 07:31:06,843 DEBUG run_websocket_client.py(l:72) - Train Worker charlie: [25/50 (50%)]\tLoss: 2.244190\n", + "2021-05-08 07:31:28,701 DEBUG run_websocket_client.py(l:130) - Starting training round, batches [50, 100]\n", + "2021-05-08 07:31:29,527 DEBUG run_websocket_client.py(l:72) - Train Worker alice: [0/50 (0%)]\tLoss: 2.138325\n", + "2021-05-08 07:31:31,377 DEBUG run_websocket_client.py(l:72) - Train Worker alice: [25/50 (50%)]\tLoss: 1.969910\n", + "2021-05-08 07:31:36,403 DEBUG run_websocket_client.py(l:72) - Train Worker bob: [0/50 (0%)]\tLoss: 2.151269\n", + "2021-05-08 07:31:38,178 DEBUG run_websocket_client.py(l:72) - Train Worker bob: [25/50 (50%)]\tLoss: 1.928512\n", + "2021-05-08 07:31:43,205 DEBUG run_websocket_client.py(l:72) - Train Worker charlie: [0/50 (0%)]\tLoss: 2.153984\n", + "2021-05-08 07:31:45,160 DEBUG run_websocket_client.py(l:72) - Train Worker charlie: [25/50 (50%)]\tLoss: 1.915180\n", + "2021-05-08 07:32:06,354 DEBUG run_websocket_client.py(l:130) - Starting training round, batches [100, 150]\n", + "2021-05-08 07:32:07,190 DEBUG run_websocket_client.py(l:72) - Train Worker alice: [0/50 (0%)]\tLoss: 1.458981\n", + "2021-05-08 07:32:08,918 DEBUG run_websocket_client.py(l:72) - Train Worker alice: [25/50 (50%)]\tLoss: 1.025910\n", + "2021-05-08 07:32:13,790 DEBUG run_websocket_client.py(l:72) - Train Worker bob: [0/50 (0%)]\tLoss: 1.516349\n", + "2021-05-08 07:32:15,488 DEBUG run_websocket_client.py(l:72) - Train Worker bob: [25/50 (50%)]\tLoss: 1.068960\n", + "2021-05-08 07:32:20,267 DEBUG run_websocket_client.py(l:72) - Train Worker charlie: [0/50 (0%)]\tLoss: 1.422252\n", + "2021-05-08 07:32:21,991 DEBUG run_websocket_client.py(l:72) - Train Worker charlie: [25/50 (50%)]\tLoss: 0.936653\n", + "2021-05-08 07:32:42,900 DEBUG run_websocket_client.py(l:130) - Starting training round, batches [150, 200]\n", + "2021-05-08 07:32:43,715 DEBUG run_websocket_client.py(l:72) - Train Worker alice: [0/50 (0%)]\tLoss: 0.655009\n", + "2021-05-08 07:32:45,441 DEBUG run_websocket_client.py(l:72) - Train Worker alice: [25/50 (50%)]\tLoss: 0.514669\n", + "2021-05-08 07:32:50,506 DEBUG run_websocket_client.py(l:72) - Train Worker bob: [0/50 (0%)]\tLoss: 0.856013\n", + "2021-05-08 07:32:52,366 DEBUG run_websocket_client.py(l:72) - Train Worker bob: [25/50 (50%)]\tLoss: 0.497991\n", + "2021-05-08 07:32:57,283 DEBUG run_websocket_client.py(l:72) - Train Worker charlie: [0/50 (0%)]\tLoss: 0.723168\n", + "2021-05-08 07:32:59,055 DEBUG run_websocket_client.py(l:72) - Train Worker charlie: [25/50 (50%)]\tLoss: 0.481013\n", + "2021-05-08 07:33:20,036 DEBUG run_websocket_client.py(l:130) - Starting training round, batches [200, 250]\n", + "2021-05-08 07:33:20,885 DEBUG run_websocket_client.py(l:72) - Train Worker alice: [0/50 (0%)]\tLoss: 0.449779\n", + "2021-05-08 07:33:22,576 DEBUG run_websocket_client.py(l:72) - Train Worker alice: [25/50 (50%)]\tLoss: 0.745741\n", + "2021-05-08 07:33:27,466 DEBUG run_websocket_client.py(l:72) - Train Worker bob: [0/50 (0%)]\tLoss: 0.462799\n", + "2021-05-08 07:33:29,214 DEBUG run_websocket_client.py(l:72) - Train Worker bob: [25/50 (50%)]\tLoss: 0.429508\n", + "2021-05-08 07:33:34,177 DEBUG run_websocket_client.py(l:72) - Train Worker charlie: [0/50 (0%)]\tLoss: 0.374787\n", + "2021-05-08 07:33:35,898 DEBUG run_websocket_client.py(l:72) - Train Worker charlie: [25/50 (50%)]\tLoss: 0.610133\n", + "2021-05-08 07:33:57,142 DEBUG run_websocket_client.py(l:130) - Starting training round, batches [250, 300]\n", + "2021-05-08 07:33:57,969 DEBUG run_websocket_client.py(l:72) - Train Worker alice: [0/50 (0%)]\tLoss: 0.274819\n", + "2021-05-08 07:33:59,728 DEBUG run_websocket_client.py(l:72) - Train Worker alice: [25/50 (50%)]\tLoss: 0.364993\n", + "2021-05-08 07:34:04,760 DEBUG run_websocket_client.py(l:72) - Train Worker bob: [0/50 (0%)]\tLoss: 0.439275\n", + "2021-05-08 07:34:06,597 DEBUG run_websocket_client.py(l:72) - Train Worker bob: [25/50 (50%)]\tLoss: 0.288160\n", + "2021-05-08 07:34:11,593 DEBUG run_websocket_client.py(l:72) - Train Worker charlie: [0/50 (0%)]\tLoss: 0.301319\n", + "2021-05-08 07:34:13,378 DEBUG run_websocket_client.py(l:72) - Train Worker charlie: [25/50 (50%)]\tLoss: 0.371563\n", + "2021-05-08 07:34:21,885 DEBUG run_websocket_client.py(l:130) - Starting training round, batches [300, 350]\n", + "2021-05-08 07:34:22,715 DEBUG run_websocket_client.py(l:72) - Train Worker alice: [0/13 (0%)]\tLoss: 0.238244\n", + "2021-05-08 07:34:26,747 DEBUG run_websocket_client.py(l:72) - Train Worker bob: [0/13 (0%)]\tLoss: 0.351565\n", + "2021-05-08 07:34:30,730 DEBUG run_websocket_client.py(l:72) - Train Worker charlie: [0/13 (0%)]\tLoss: 0.346110\n", + "2021-05-08 07:34:34,004 DEBUG run_websocket_client.py(l:130) - Starting training round, batches [350, 400]\n", + "2021-05-08 07:34:34,013 DEBUG run_websocket_client.py(l:142) - At least one worker ran out of data, stopping.\n", + "2021-05-08 07:34:37,068 DEBUG run_websocket_client.py(l:166) - \n", + "\n", + "2021-05-08 07:34:37,069 INFO run_websocket_client.py(l:168) - Test set: Average loss: 0.3303, Accuracy: 9064/10000 (91%)\n", + "\n", + "Starting epoch 2/2\n", + "2021-05-08 07:34:54,177 DEBUG run_websocket_client.py(l:130) - Starting training round, batches [0, 50]\n", + "2021-05-08 07:34:54,991 DEBUG run_websocket_client.py(l:72) - Train Worker alice: [0/50 (0%)]\tLoss: 0.357613\n", + "2021-05-08 07:34:56,873 DEBUG run_websocket_client.py(l:72) - Train Worker alice: [25/50 (50%)]\tLoss: 0.315697\n", + "2021-05-08 07:35:02,034 DEBUG run_websocket_client.py(l:72) - Train Worker bob: [0/50 (0%)]\tLoss: 0.386555\n", + "2021-05-08 07:35:03,788 DEBUG run_websocket_client.py(l:72) - Train Worker bob: [25/50 (50%)]\tLoss: 0.501474\n", + "2021-05-08 07:35:08,634 DEBUG run_websocket_client.py(l:72) - Train Worker charlie: [0/50 (0%)]\tLoss: 0.317542\n", + "2021-05-08 07:35:10,364 DEBUG run_websocket_client.py(l:72) - Train Worker charlie: [25/50 (50%)]\tLoss: 0.371363\n", + "2021-05-08 07:35:31,315 DEBUG run_websocket_client.py(l:130) - Starting training round, batches [50, 100]\n", + "2021-05-08 07:35:32,152 DEBUG run_websocket_client.py(l:72) - Train Worker alice: [0/50 (0%)]\tLoss: 0.314157\n", + "2021-05-08 07:35:33,959 DEBUG run_websocket_client.py(l:72) - Train Worker alice: [25/50 (50%)]\tLoss: 0.273596\n", + "2021-05-08 07:35:39,089 DEBUG run_websocket_client.py(l:72) - Train Worker bob: [0/50 (0%)]\tLoss: 0.178877\n", + "2021-05-08 07:35:40,845 DEBUG run_websocket_client.py(l:72) - Train Worker bob: [25/50 (50%)]\tLoss: 0.249033\n", + "2021-05-08 07:35:46,076 DEBUG run_websocket_client.py(l:72) - Train Worker charlie: [0/50 (0%)]\tLoss: 0.332668\n", + "2021-05-08 07:35:47,875 DEBUG run_websocket_client.py(l:72) - Train Worker charlie: [25/50 (50%)]\tLoss: 0.287937\n", + "2021-05-08 07:36:08,669 DEBUG run_websocket_client.py(l:130) - Starting training round, batches [100, 150]\n", + "2021-05-08 07:36:09,494 DEBUG run_websocket_client.py(l:72) - Train Worker alice: [0/50 (0%)]\tLoss: 0.340244\n", + "2021-05-08 07:36:11,351 DEBUG run_websocket_client.py(l:72) - Train Worker alice: [25/50 (50%)]\tLoss: 0.243478\n", + "2021-05-08 07:36:16,331 DEBUG run_websocket_client.py(l:72) - Train Worker bob: [0/50 (0%)]\tLoss: 0.594652\n", + "2021-05-08 07:36:19,380 DEBUG run_websocket_client.py(l:72) - Train Worker bob: [25/50 (50%)]\tLoss: 0.289975\n", + "2021-05-08 07:36:24,405 DEBUG run_websocket_client.py(l:72) - Train Worker charlie: [0/50 (0%)]\tLoss: 0.301248\n", + "2021-05-08 07:36:26,203 DEBUG run_websocket_client.py(l:72) - Train Worker charlie: [25/50 (50%)]\tLoss: 0.261488\n", + "2021-05-08 07:36:46,982 DEBUG run_websocket_client.py(l:130) - Starting training round, batches [150, 200]\n", + "2021-05-08 07:36:47,817 DEBUG run_websocket_client.py(l:72) - Train Worker alice: [0/50 (0%)]\tLoss: 0.298423\n", + "2021-05-08 07:36:49,697 DEBUG run_websocket_client.py(l:72) - Train Worker alice: [25/50 (50%)]\tLoss: 0.363770\n", + "2021-05-08 07:36:55,125 DEBUG run_websocket_client.py(l:72) - Train Worker bob: [0/50 (0%)]\tLoss: 0.277870\n", + "2021-05-08 07:36:56,971 DEBUG run_websocket_client.py(l:72) - Train Worker bob: [25/50 (50%)]\tLoss: 0.099462\n", + "2021-05-08 07:37:02,173 DEBUG run_websocket_client.py(l:72) - Train Worker charlie: [0/50 (0%)]\tLoss: 0.179270\n", + "2021-05-08 07:37:03,984 DEBUG run_websocket_client.py(l:72) - Train Worker charlie: [25/50 (50%)]\tLoss: 0.250538\n", + "2021-05-08 07:37:25,135 DEBUG run_websocket_client.py(l:130) - Starting training round, batches [200, 250]\n", + "2021-05-08 07:37:25,980 DEBUG run_websocket_client.py(l:72) - Train Worker alice: [0/50 (0%)]\tLoss: 0.322626\n", + "2021-05-08 07:37:27,800 DEBUG run_websocket_client.py(l:72) - Train Worker alice: [25/50 (50%)]\tLoss: 0.446016\n", + "2021-05-08 07:37:32,884 DEBUG run_websocket_client.py(l:72) - Train Worker bob: [0/50 (0%)]\tLoss: 0.518507\n", + "2021-05-08 07:37:34,742 DEBUG run_websocket_client.py(l:72) - Train Worker bob: [25/50 (50%)]\tLoss: 0.293078\n", + "2021-05-08 07:37:40,028 DEBUG run_websocket_client.py(l:72) - Train Worker charlie: [0/50 (0%)]\tLoss: 0.150113\n", + "2021-05-08 07:37:42,054 DEBUG run_websocket_client.py(l:72) - Train Worker charlie: [25/50 (50%)]\tLoss: 0.135191\n", + "2021-05-08 07:38:03,169 DEBUG run_websocket_client.py(l:130) - Starting training round, batches [250, 300]\n", + "2021-05-08 07:38:03,999 DEBUG run_websocket_client.py(l:72) - Train Worker alice: [0/50 (0%)]\tLoss: 0.426932\n", + "2021-05-08 07:38:05,752 DEBUG run_websocket_client.py(l:72) - Train Worker alice: [25/50 (50%)]\tLoss: 0.170185\n", + "2021-05-08 07:38:10,679 DEBUG run_websocket_client.py(l:72) - Train Worker bob: [0/50 (0%)]\tLoss: 0.161225\n", + "2021-05-08 07:38:12,414 DEBUG run_websocket_client.py(l:72) - Train Worker bob: [25/50 (50%)]\tLoss: 0.264307\n", + "2021-05-08 07:38:17,249 DEBUG run_websocket_client.py(l:72) - Train Worker charlie: [0/50 (0%)]\tLoss: 0.286649\n", + "2021-05-08 07:38:19,006 DEBUG run_websocket_client.py(l:72) - Train Worker charlie: [25/50 (50%)]\tLoss: 0.179075\n", + "2021-05-08 07:38:27,289 DEBUG run_websocket_client.py(l:130) - Starting training round, batches [300, 350]\n", + "2021-05-08 07:38:28,111 DEBUG run_websocket_client.py(l:72) - Train Worker alice: [0/13 (0%)]\tLoss: 0.324086\n", + "2021-05-08 07:38:32,056 DEBUG run_websocket_client.py(l:72) - Train Worker bob: [0/13 (0%)]\tLoss: 0.336624\n", + "2021-05-08 07:38:35,983 DEBUG run_websocket_client.py(l:72) - Train Worker charlie: [0/13 (0%)]\tLoss: 0.220500\n", + "2021-05-08 07:38:39,175 DEBUG run_websocket_client.py(l:130) - Starting training round, batches [350, 400]\n", + "2021-05-08 07:38:39,184 DEBUG run_websocket_client.py(l:142) - At least one worker ran out of data, stopping.\n", + "2021-05-08 07:38:42,194 DEBUG run_websocket_client.py(l:166) - \n", + "\n", + "2021-05-08 07:38:42,195 INFO run_websocket_client.py(l:168) - Test set: Average loss: 0.2021, Accuracy: 9416/10000 (94%)\n", + "\n" + ] + } + ], + "source": [ + "for epoch in range(1, args.epochs + 1):\n", + " print(\"Starting epoch {}/{}\".format(epoch, args.epochs))\n", + " model = rwc.train(model, device, federated_train_loader, args.lr, args.federate_after_n_batches, \n", + " abort_after_one=abort_after_one)\n", + " rwc.test(model, device, test_loader)" + ] + } + ], + "metadata": { + "kernelspec": { + "name": "python388jvsc74a57bd057f55249afac9e3bb90b27c0916a1d44f0a08c86299e4ac4c83ac98b0a805cf4", + "display_name": "Python 3.8.8 64-bit ('pysyft': conda)" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/pytorch/Pysyft实例/websockets-example-MNIST/README.md b/pytorch/Pysyft实例/websockets-example-MNIST/README.md new file mode 100644 index 00000000..b2eeb412 --- /dev/null +++ b/pytorch/Pysyft实例/websockets-example-MNIST/README.md @@ -0,0 +1,25 @@ +# Federated learning using websockets - MNIST example + +The scripts in this folder let you execute a federated training via three websocket connections. + +The script start_websocket_servers.py will start the Websocket server workers for Alice, Bob and Charlie. +``` +$ python start_websocket_servers.py +``` + +The training is then started by running the script run_websocket_client.py: +``` +$ python run_websocket_client.py +``` +This script + * loads the MNIST dataset, + * distributes it onto the three workers + * starts a federated training. + + The federated training loop contains the following steps + * the current model is sent to the workers + * the workers train on a fixed number of batches + * the three models from Alice, Bob and Charlie are then averaged (federated averaging) + + This training loop is then executed for a given number of epochs. + The performance on the test set of MNIST is shown after each epoch. diff --git a/pytorch/Pysyft实例/websockets-example-MNIST/run_websocket_client.py b/pytorch/Pysyft实例/websockets-example-MNIST/run_websocket_client.py new file mode 100644 index 00000000..798a7366 --- /dev/null +++ b/pytorch/Pysyft实例/websockets-example-MNIST/run_websocket_client.py @@ -0,0 +1,281 @@ +import torch +import torch.nn as nn +import torch.nn.functional as f +import torch.optim as optim +from torchvision import datasets, transforms +import logging +import argparse +import sys + +import syft as sy +from syft.workers.websocket_client import WebsocketClientWorker +from syft.workers.virtual import VirtualWorker +from syft.frameworks.torch.fl import utils + +logger = logging.getLogger(__name__) + +LOG_INTERVAL = 25 + + +class Net(nn.Module): + def __init__(self): + super(Net, self).__init__() + self.conv1 = nn.Conv2d(1, 20, 5, 1) + self.conv2 = nn.Conv2d(20, 50, 5, 1) + self.fc1 = nn.Linear(4 * 4 * 50, 500) + self.fc2 = nn.Linear(500, 10) + + def forward(self, x): + x = f.relu(self.conv1(x)) + x = f.max_pool2d(x, 2, 2) + x = f.relu(self.conv2(x)) + x = f.max_pool2d(x, 2, 2) + x = x.view(-1, 4 * 4 * 50) + x = f.relu(self.fc1(x)) + x = self.fc2(x) + return f.log_softmax(x, dim=1) + + +def train_on_batches(worker, batches, model_in, device, lr): + """Train the model on the worker on the provided batches + + Args: + worker(syft.workers.BaseWorker): worker on which the + training will be executed + batches: batches of data of this worker + model_in: machine learning model, training will be done on a copy + device (torch.device): where to run the training + lr: learning rate of the training steps + + Returns: + model, loss: obtained model and loss after training + + """ + model = model_in.copy() + optimizer = optim.SGD(model.parameters(), lr=lr) # TODO momentum is not supported at the moment + + model.train() + model.send(worker) + loss_local = False + + for batch_idx, (data, target) in enumerate(batches): + loss_local = False + data, target = data.to(device), target.to(device) + optimizer.zero_grad() + output = model(data) + loss = f.nll_loss(output, target) + loss.backward() + optimizer.step() + if batch_idx % LOG_INTERVAL == 0: + loss = loss.get() # <-- NEW: get the loss back + loss_local = True + logger.debug( + "Train Worker {}: [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format( + worker.id, + batch_idx, + len(batches), + 100.0 * batch_idx / len(batches), + loss.item(), + ) + ) + + if not loss_local: + loss = loss.get() # <-- NEW: get the loss back + model.get() # <-- NEW: get the model back + return model, loss + + +def get_next_batches(fdataloader: sy.FederatedDataLoader, nr_batches: int): + """retrieve next nr_batches of the federated data loader and group + the batches by worker + + Args: + fdataloader (sy.FederatedDataLoader): federated data loader + over which the function will iterate + nr_batches (int): number of batches (per worker) to retrieve + + Returns: + Dict[syft.workers.BaseWorker, List[batches]] + + """ + batches = {} + for worker_id in fdataloader.workers: + worker = fdataloader.federated_dataset.datasets[worker_id].location + batches[worker] = [] + try: + for i in range(nr_batches): + next_batches = next(fdataloader) + for worker in next_batches: + batches[worker].append(next_batches[worker]) + except StopIteration: + pass + return batches + + +def train( + model, device, federated_train_loader, lr, federate_after_n_batches, abort_after_one=False +): + model.train() + + nr_batches = federate_after_n_batches + + models = {} + loss_values = {} + + iter(federated_train_loader) # initialize iterators + batches = get_next_batches(federated_train_loader, nr_batches) + counter = 0 + + while True: + logger.debug(f"Starting training round, batches [{counter}, {counter + nr_batches}]") + data_for_all_workers = True + for worker in batches: + curr_batches = batches[worker] + if curr_batches: + models[worker], loss_values[worker] = train_on_batches( + worker, curr_batches, model, device, lr + ) + else: + data_for_all_workers = False + counter += nr_batches + if not data_for_all_workers: + logger.debug("At least one worker ran out of data, stopping.") + break + + model = utils.federated_avg(models) + batches = get_next_batches(federated_train_loader, nr_batches) + if abort_after_one: + break + return model + + +def test(model, device, test_loader): + model.eval() + test_loss = 0 + correct = 0 + with torch.no_grad(): + for data, target in test_loader: + data, target = data.to(device), target.to(device) + output = model(data) + test_loss += f.nll_loss(output, target, reduction="sum").item() # sum up batch loss + pred = output.argmax(1, keepdim=True) # get the index of the max log-probability + correct += pred.eq(target.view_as(pred)).sum().item() + + test_loss /= len(test_loader.dataset) + + logger.debug("\n") + accuracy = 100.0 * correct / len(test_loader.dataset) + logger.info( + "Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n".format( + test_loss, correct, len(test_loader.dataset), accuracy + ) + ) + + +def define_and_get_arguments(args=sys.argv[1:]): + parser = argparse.ArgumentParser( + description="Run federated learning using websocket client workers." + ) + parser.add_argument("--batch_size", type=int, default=64, help="batch size of the training") + parser.add_argument( + "--test_batch_size", type=int, default=1000, help="batch size used for the test data" + ) + parser.add_argument("--epochs", type=int, default=2, help="number of epochs to train") + parser.add_argument( + "--federate_after_n_batches", + type=int, + default=50, + help="number of training steps performed on each remote worker " "before averaging", + ) + parser.add_argument("--lr", type=float, default=0.01, help="learning rate") + parser.add_argument("--cuda", action="store_true", help="use cuda") + parser.add_argument("--seed", type=int, default=1, help="seed used for randomization") + parser.add_argument("--save_model", action="store_true", help="if set, model will be saved") + parser.add_argument( + "--verbose", + "-v", + action="store_true", + help="if set, websocket client workers will " "be started in verbose mode", + ) + parser.add_argument( + "--use_virtual", action="store_true", help="if set, virtual workers will be used" + ) + + args = parser.parse_args(args=args) + return args + + +def main(): + args = define_and_get_arguments() + + hook = sy.TorchHook(torch) + + if args.use_virtual: + alice = VirtualWorker(id="alice", hook=hook, verbose=args.verbose) + bob = VirtualWorker(id="bob", hook=hook, verbose=args.verbose) + charlie = VirtualWorker(id="charlie", hook=hook, verbose=args.verbose) + else: + kwargs_websocket = {"host": "localhost", "hook": hook, "verbose": args.verbose} + alice = WebsocketClientWorker(id="alice", port=8777, **kwargs_websocket) + bob = WebsocketClientWorker(id="bob", port=8778, **kwargs_websocket) + charlie = WebsocketClientWorker(id="charlie", port=8779, **kwargs_websocket) + + workers = [alice, bob, charlie] + + use_cuda = args.cuda and torch.cuda.is_available() + + torch.manual_seed(args.seed) + + device = torch.device("cuda" if use_cuda else "cpu") + + kwargs = {"num_workers": 1, "pin_memory": True} if use_cuda else {} + + federated_train_loader = sy.FederatedDataLoader( + datasets.MNIST( + "../data", + train=True, + download=True, + transform=transforms.Compose( + [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))] + ), + ).federate(tuple(workers)), + batch_size=args.batch_size, + shuffle=True, + iter_per_worker=True, + **kwargs, + ) + + test_loader = torch.utils.data.DataLoader( + datasets.MNIST( + "../data", + train=False, + transform=transforms.Compose( + [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))] + ), + ), + batch_size=args.test_batch_size, + shuffle=True, + **kwargs, + ) + + model = Net().to(device) + + for epoch in range(1, args.epochs + 1): + logger.info("Starting epoch %s/%s", epoch, args.epochs) + model = train(model, device, federated_train_loader, args.lr, args.federate_after_n_batches) + test(model, device, test_loader) + + if args.save_model: + torch.save(model.state_dict(), "mnist_cnn.pt") + + +if __name__ == "__main__": + FORMAT = "%(asctime)s %(levelname)s %(filename)s(l:%(lineno)d) - %(message)s" + LOG_LEVEL = logging.DEBUG + logging.basicConfig(format=FORMAT, level=LOG_LEVEL) + + websockets_logger = logging.getLogger("websockets") + websockets_logger.setLevel(logging.DEBUG) + websockets_logger.addHandler(logging.StreamHandler()) + + main() diff --git a/pytorch/Pysyft实例/websockets-example-MNIST/run_websocket_server.py b/pytorch/Pysyft实例/websockets-example-MNIST/run_websocket_server.py new file mode 100644 index 00000000..50affb4f --- /dev/null +++ b/pytorch/Pysyft实例/websockets-example-MNIST/run_websocket_server.py @@ -0,0 +1,57 @@ +from multiprocessing import Process +import syft as sy +from syft.workers.websocket_server import WebsocketServerWorker +import torch +import argparse +import os + +hook = sy.TorchHook(torch) + + +def start_proc(participant, kwargs): # pragma: no cover + """ helper function for spinning up a websocket participant """ + + def target(): + server = participant(**kwargs) + server.start() + + p = Process(target=target) + p.start() + return p + + +parser = argparse.ArgumentParser(description="Run websocket server worker.") + +parser.add_argument( + "--port", "-p", type=int, help="port number of the websocket server worker, e.g. --port 8777" +) + +parser.add_argument("--host", type=str, default="localhost", help="host for the connection") + +parser.add_argument( + "--id", type=str, help="name (id) of the websocket server worker, e.g. --id alice" +) + +parser.add_argument( + "--verbose", + "-v", + action="store_true", + help="if set, websocket server worker will be started in verbose mode", +) + +args = parser.parse_args() + +kwargs = { + "id": args.id, + "host": args.host, + "port": args.port, + "hook": hook, + "verbose": args.verbose, +} + + +if os.name != "nt": + server = start_proc(WebsocketServerWorker, kwargs) +else: + server = WebsocketServerWorker(**kwargs) + server.start() diff --git a/pytorch/Pysyft实例/websockets-example-MNIST/start_websocket_servers.py b/pytorch/Pysyft实例/websockets-example-MNIST/start_websocket_servers.py new file mode 100644 index 00000000..77b4cceb --- /dev/null +++ b/pytorch/Pysyft实例/websockets-example-MNIST/start_websocket_servers.py @@ -0,0 +1,24 @@ +import subprocess +import sys +import os + +if os.name == "nt": + python = "python" +else: + python = "python" + sys.version[0:3] + +call_alice = [python, "run_websocket_server.py", "--port", "8777", "--id", "alice"] + +call_bob = [python, "run_websocket_server.py", "--port", "8778", "--id", "bob"] + +call_charlie = [python, "run_websocket_server.py", "--port", "8779", "--id", "charlie"] + + +print("Starting server for Alice") +subprocess.Popen(call_alice) + +print("Starting server for Bob") +subprocess.Popen(call_bob) + +print("Starting server for Charlie") +subprocess.Popen(call_charlie) diff --git a/pytorch/官方教程/.gitignore b/pytorch/官方教程/.gitignore index 6320cd24..ff7915f6 100644 --- a/pytorch/官方教程/.gitignore +++ b/pytorch/官方教程/.gitignore @@ -1 +1,3 @@ -data \ No newline at end of file +data +runs +mnist_train \ No newline at end of file diff --git a/pytorch/官方教程/01 Pytorch 概述.md b/pytorch/官方教程/00 Pytorch 概述.md similarity index 95% rename from pytorch/官方教程/01 Pytorch 概述.md rename to pytorch/官方教程/00 Pytorch 概述.md index 98984279..403e44a1 100644 --- a/pytorch/官方教程/01 Pytorch 概述.md +++ b/pytorch/官方教程/00 Pytorch 概述.md @@ -1,5 +1,11 @@ # 学习 PyTorch +> 教程说明 +> * 0*是5套特殊的教程,一步步重构。 +> * 1*是pysyft相关的文章和教程 +> * 2*pytorch的基础教程。包含构建一个完整的神经网络的标准步骤。 + + ## 过程 1. 获取数据集 @@ -86,4 +92,5 @@ torch.Size([32, 3, 4, 3, 2]) * 算子。 * 自己定义的算子,需要自己声明参数并初始化。 * 使用系统定义的算子,系统会自动添加参数。 - * 过程:前项传播的函数。后向传播的函数。误差计算的函数。梯度下降的函数。 \ No newline at end of file + * 过程:前项传播的函数。后向传播的函数。误差计算的函数。梯度下降的函数。 + diff --git a/pytorch/官方教程/00目录.md b/pytorch/官方教程/00目录.md deleted file mode 100644 index ac9323cb..00000000 --- a/pytorch/官方教程/00目录.md +++ /dev/null @@ -1 +0,0 @@ -# Summary diff --git a/pytorch/官方教程/01 Pytorch 概述.ipynb b/pytorch/官方教程/02 Pytorch 60分钟(1).ipynb similarity index 100% rename from pytorch/官方教程/01 Pytorch 概述.ipynb rename to pytorch/官方教程/02 Pytorch 60分钟(1).ipynb diff --git a/pytorch/官方教程/02 Pytorch 60分钟.ipynb b/pytorch/官方教程/02 Pytorch 60分钟(2).ipynb similarity index 100% rename from pytorch/官方教程/02 Pytorch 60分钟.ipynb rename to pytorch/官方教程/02 Pytorch 60分钟(2).ipynb diff --git a/pytorch/官方教程/03 Pytorch 重构网络1.ipynb b/pytorch/官方教程/03 Pytorch 实例学习.ipynb similarity index 100% rename from pytorch/官方教程/03 Pytorch 重构网络1.ipynb rename to pytorch/官方教程/03 Pytorch 实例学习.ipynb diff --git a/pytorch/官方教程/03 Pytorch 重构网络1.md b/pytorch/官方教程/03 Pytorch 实例学习.md similarity index 100% rename from pytorch/官方教程/03 Pytorch 重构网络1.md rename to pytorch/官方教程/03 Pytorch 实例学习.md diff --git a/pytorch/官方教程/04 Pytorch 重构网络2.ipynb b/pytorch/官方教程/04 Pytorch nn本质.ipynb similarity index 100% rename from pytorch/官方教程/04 Pytorch 重构网络2.ipynb rename to pytorch/官方教程/04 Pytorch nn本质.ipynb diff --git a/pytorch/官方教程/04 Pytorch 重构网络2.md b/pytorch/官方教程/04 Pytorch nn本质.md similarity index 100% rename from pytorch/官方教程/04 Pytorch 重构网络2.md rename to pytorch/官方教程/04 Pytorch nn本质.md diff --git a/pytorch/官方教程/05 Pytorch 可视化.ipynb b/pytorch/官方教程/05 Pytorch 可视化.ipynb index e69de29b..dedf777a 100644 --- a/pytorch/官方教程/05 Pytorch 可视化.ipynb +++ b/pytorch/官方教程/05 Pytorch 可视化.ipynb @@ -0,0 +1,69 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python388jvsc74a57bd0527a93331b4b1a8345148922acc34427fb7591433d63b66d32040b6fbbc6d593", + "display_name": "Python 3.8.8 64-bit ('pytorch': conda)" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import torchvision\n", + "from torchvision import datasets, transforms\n", + "\n", + "# 数据处理\n", + "transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])\n", + "trainset = datasets.MNIST('mnist_train', train=True, download=True, transform=transform)\n", + "trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)\n", + "\n", + "# 定义模型\n", + "model = torchvision.models.resnet50(False)\n", + "# Have ResNet model take in grayscale rather than RGB\n", + "model.conv1 = torch.nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)\n", + "\n", + "# 取出数据\n", + "images, labels = next(iter(trainloader))\n", + "grid = torchvision.utils.make_grid(images)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "# 写入数据\n", + "from torch.utils.tensorboard import SummaryWriter\n", + "\n", + "# Writer will output to ./runs/ directory by default\n", + "writer = SummaryWriter()\n", + "\n", + "writer.add_image('images', grid, 0)\n", + "writer.add_graph(model, images)\n", + "writer.add_histogram('hello',1,1)\n", + "writer.add_histogram('hello',2,2)\n", + "writer.close()" + ] + } + ] +} \ No newline at end of file diff --git a/pytorch/官方教程/05 Pytorch 可视化.md b/pytorch/官方教程/05 Pytorch 可视化.md index 12d5eeb4..e39163b5 100644 --- a/pytorch/官方教程/05 Pytorch 可视化.md +++ b/pytorch/官方教程/05 Pytorch 可视化.md @@ -1,10 +1,44 @@ -# 使用 TensorBoard 可视化模型,数据和训练 +# tensorboard 使用说明 + + +## 1 数据形式 + +Tensorboard可以记录与展示以下数据形式: +1. 标量Scalars +2. 图片Images +3. 音频Audio +4. 计算图Graph +5. 数据分布Distribution +6. 直方图Histograms +7. 嵌入向量Embeddings + +## 2 操作流程tensorflow-tensorboard + +Tensorboard的可视化过程 + +1. 首先肯定是先建立一个graph,你想从这个graph中获取某些数据的信息 +2. 确定要在graph中的哪些节点放置summary operations以记录信息 + +``` +使用tf.summary.scalar记录标量 +使用tf.summary.histogram记录数据的直方图 +使用tf.summary.distribution记录数据的分布图 +使用tf.summary.image记录图像数据 +``` + +3. operations并不会去真的执行计算,除非你告诉他们需要去run,或者它被其他的需要run的operation所依赖。而我们上一步创建的这些summary operations其实并不被其他节点依赖,因此,我们需要特地去运行所有的summary节点。但是呢,一份程序下来可能有超多这样的summary 节点,要手动一个一个去启动自然是及其繁琐的,因此我们可以使用tf.summary.merge_all去将所有summary节点合并成一个节点,只要运行这个节点,就能产生所有我们之前设置的summary data。 + +4. 使用tf.summary.FileWriter将运行后输出的数据都保存到本地磁盘中 + +5. 运行整个程序,并在命令行输入运行tensorboard的指令,之后打开web端可查看可视化的结果 + + +## 3 操作流程pytorch-tensorboard + > 原文: -在 [60 分钟突击](https://pytorch.org/tutorials/beginner/deep_learning_60min_blitz.html)中,我们向您展示了如何加载数据,如何通过定义为`nn.Module`子类的模型提供数据,如何在训练数据上训练该模型以及在测试数据上对其进行测试。 为了了解发生的情况,我们在模型训练期间打印一些统计数据,以了解训练是否在进行中。 但是,我们可以做得更好:PyTorch 与 TensorBoard 集成在一起,TensorBoard 是一种工具,用于可视化神经网络训练运行的结果。 本教程使用 [Fashion-MNIST 数据集](https://github.com/zalandoresearch/fashion-mnist)说明了其某些功能,可以使用`torchvision.datasets`将其读入 PyTorch。 - -在本教程中,我们将学习如何: +教程使用 [Fashion-MNIST 数据集](https://github.com/zalandoresearch/fashion-mnist)说明了其某些功能,可以使用`torchvision.datasets`将其读入 PyTorch。在本教程中,我们将学习如何: > 1. 读取数据并进行适当的转换(与先前的教程几乎相同)。 > 2. 设置 TensorBoard。 @@ -18,6 +52,9 @@ > * 在训练模型时如何跟踪其表现 > * 在训练后如何评估模型的表现。 + + +# pytorch-tensorboard实例 我们将从 [CIFAR-10 教程](https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html)中类似的样板代码开始: ```py @@ -32,7 +69,8 @@ import torchvision.transforms as transforms import torch.nn as nn import torch.nn.functional as F import torch.optim as optim - +``` +```py # transforms transform = transforms.Compose( [transforms.ToTensor(), @@ -107,7 +145,7 @@ optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) ``` -## 1\. TensorBoard 设置 +## 1. TensorBoard 设置 现在,我们将设置 TensorBoard,从`torch.utils`导入`tensorboard`并定义`SummaryWriter`,这是将信息写入 TensorBoard 的关键对象。 @@ -121,7 +159,7 @@ writer = SummaryWriter('runs/fashion_mnist_experiment_1') 请注意,仅此行会创建一个`runs/fashion_mnist_experiment_1`文件夹。 -## 2\. 写入 TensorBoard +## 2. 写入 TensorBoard 现在,使用[`make_grid`](https://pytorch.org/docs/stable/torchvision/utils.html#torchvision.utils.make_grid)将图像写入到 TensorBoard 中,具体来说就是网格。 @@ -154,7 +192,7 @@ tensorboard --logdir=runs 现在您知道如何使用 TensorBoard 了! 但是,此示例可以在 Jupyter 笔记本中完成-TensorBoard 真正擅长的地方是创建交互式可视化。 接下来,我们将介绍其中之一,并在本教程结束时介绍更多内容。 -## 3\. 使用 TensorBoard 检查模型 +## 3. 使用 TensorBoard 检查模型 TensorBoard 的优势之一是其可视化复杂模型结构的能力。 让我们可视化我们构建的模型。 @@ -172,7 +210,7 @@ writer.close() TensorBoard 具有非常方便的功能,可在低维空间中可视化高维数据,例如图像数据。 接下来我们将介绍这一点。 -## 4\. 在 TensorBoard 中添加“投影仪” +## 4. 在 TensorBoard 中添加“投影仪” 我们可以通过[`add_embedding`](https://pytorch.org/docs/stable/tensorboard.html#torch.utils.tensorboard.writer.SummaryWriter.add_embedding)方法可视化高维数据的低维表示 @@ -208,7 +246,7 @@ writer.close() 现在我们已经彻底检查了我们的数据,让我们展示了 TensorBoard 如何从训练开始就可以使跟踪模型的训练和评估更加清晰。 -## 5\. 使用 TensorBoard 跟踪模型训练 +## 5. 使用 TensorBoard 跟踪模型训练 在前面的示例中,我们仅*每 2000 次迭代*打印该模型的运行损失。 现在,我们将运行损失记录到 TensorBoard 中,并通过`plot_classes_preds`函数查看模型所做的预测。 @@ -299,7 +337,7 @@ print('Finished Training') 在之前的教程中,我们研究了模型训练后的每类准确率; 在这里,我们将使用 TensorBoard 绘制每个类别的精确调用曲线([在这里解释](https://www.scikit-yb.org/en/latest/api/classifier/prcurve.html))。 -## 6\. 使用 TensorBoard 评估经过训练的模型 +## 6. 使用 TensorBoard 评估经过训练的模型 ```py # 1\. gets the probability predictions in a test_size x num_classes Tensor @@ -345,4 +383,5 @@ for i in range(len(classes)): ![intermediate/../../_static/img/tensorboard_pr_curves.png](img/d15de2be2b754f9a4f46418764232b5e.png) -这是 TensorBoard 和 PyTorch 与之集成的介绍。 当然,您可以在 Jupyter 笔记本中完成 TensorBoard 的所有操作,但是使用 TensorBoard 时,默认情况下会获得交互式的视觉效果。 \ No newline at end of file +这是 TensorBoard 和 PyTorch 与之集成的介绍。 当然,您可以在 Jupyter 笔记本中完成 TensorBoard 的所有操作,但是使用 TensorBoard 时,默认情况下会获得交互式的视觉效果。 + diff --git a/pytorch/官方教程/10 Pysyft send&get.ipynb b/pytorch/官方教程/10 Pysyft send&get.ipynb new file mode 100644 index 00000000..3f08dd85 --- /dev/null +++ b/pytorch/官方教程/10 Pysyft send&get.ipynb @@ -0,0 +1,337 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python388jvsc74a57bd057f55249afac9e3bb90b27c0916a1d44f0a08c86299e4ac4c83ac98b0a805cf4", + "display_name": "Python 3.8.8 64-bit ('pysyft': conda)" + }, + "metadata": { + "interpreter": { + "hash": "57f55249afac9e3bb90b27c0916a1d44f0a08c86299e4ac4c83ac98b0a805cf4" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "## syft==0.2.4 教程" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "## 1 send tensor to bob's machine" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([1, 2, 3, 4, 5])" + ] + }, + "metadata": {}, + "execution_count": 2 + } + ], + "source": [ + "import numpy as np \n", + "import syft as sy # Run this cell to see if things work\n", + "import sys\n", + "\n", + "import torch\n", + "from torch.nn import Parameter\n", + "import torch.nn as nn\n", + "import torch.nn.functional as F\n", + "\n", + "import syft as sy\n", + "hook = sy.TorchHook(torch)\n", + "\n", + "torch.tensor([1,2,3,4,5])" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "tensor([ 2, 4, 6, 8, 10])\n" + ] + } + ], + "source": [ + "x = torch.tensor([1,2,3,4,5])\n", + "y = x + x\n", + "print(y)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], + "source": [ + "bob = sy.VirtualWorker(hook, id=\"bob\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "x = torch.tensor([1,2,3,4,5])\n", + "y = torch.tensor([1,1,1,1,1])" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(Wrapper)>[PointerTensor | me:2749667760 -> bob:24084234239]\n(Wrapper)>[PointerTensor | me:90763379457 -> bob:66772298697]\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 22 + } + ], + "source": [ + "x_ptr = x.send(bob)\n", + "y_ptr = y.send(bob)\n", + "print(x_ptr)\n", + "print(y_ptr)\n", + "x_ptr.location\n", + "x_ptr.id_at_location\n", + "x_ptr.owner" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "{24084234239: tensor([1, 2, 3, 4, 5]), 66772298697: tensor([1, 1, 1, 1, 1])}" + ] + }, + "metadata": {}, + "execution_count": 8 + } + ], + "source": [ + "bob._objects" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(Wrapper)>[PointerTensor | me:43302083549 -> bob:80050839170]\n" + ] + } + ], + "source": [ + "z = x_ptr + x_ptr\n", + "print(z)" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "{24084234239: tensor([1, 2, 3, 4, 5]),\n", + " 66772298697: tensor([1, 1, 1, 1, 1]),\n", + " 44465068457: tensor([ 2, 4, 6, 8, 10]),\n", + " 57491926079: tensor([1, 2, 3, 4, 5]),\n", + " 11932127750: tensor([1, 1, 1, 1, 1])}" + ] + }, + "metadata": {}, + "execution_count": 33 + } + ], + "source": [ + "bob._objects" + ] + }, + { + "source": [ + "## 2 using tensor pointer" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [], + "source": [ + "bob = sy.VirtualWorker(hook, id=\"tom\")" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(Wrapper)>[PointerTensor | me:41193493464 -> tom:49833880055]\n" + ] + } + ], + "source": [ + "x = torch.tensor([1,2,3,4,5]).send(bob)\n", + "y = torch.tensor([1,1,1,1,1]).send(bob)\n", + "z = x+y\n", + "print(z)" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "{78395822529: tensor([1, 2, 3, 4, 5]),\n", + " 72335892048: tensor([1, 1, 1, 1, 1]),\n", + " 49833880055: tensor([2, 3, 4, 5, 6])}" + ] + }, + "metadata": {}, + "execution_count": 40 + } + ], + "source": [ + "bob._objects" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([2, 3, 4, 5, 6])" + ] + }, + "metadata": {}, + "execution_count": 41 + } + ], + "source": [ + "z.get()" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "{78395822529: tensor([1, 2, 3, 4, 5]), 72335892048: tensor([1, 1, 1, 1, 1])}" + ] + }, + "metadata": {}, + "execution_count": 42 + } + ], + "source": [ + "bob._objects" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "tensor([1., 2., 3., 4., 5.], requires_grad=True)\n" + ] + } + ], + "source": [ + "x = torch.tensor([1,2,3,4,5.], requires_grad=True).send(bob)\n", + "y = torch.tensor([1,1,1,1,1.], requires_grad=True).send(bob)\n", + "z = (x + y).sum()\n", + "z.backward()\n", + "x = x.get()\n", + "print(x)\n", + "print(x.grad)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ] +} \ No newline at end of file diff --git a/pytorch/官方教程/10 Pysyft 概述.md b/pytorch/官方教程/10 Pysyft 概述.md index 7c08f4d4..f85436db 100644 --- a/pytorch/官方教程/10 Pysyft 概述.md +++ b/pytorch/官方教程/10 Pysyft 概述.md @@ -3,6 +3,7 @@ > 参考文献 > * [A generic framework for privacy preserving deep learning](https://zhuanlan.zhihu.com/p/114774133) +> * [FedAvg 的 Pytorch 实现](https://zhuanlan.zhihu.com/p/259806876?utm_source=wechat_session) ## 1 论文阅读 @@ -39,3 +40,18 @@ PySyft是用于安全和隐私深度学习的Python库,它在主流深度学 2. MPC工具箱不仅包含基本运算(例如加法和乘法),还包含预处理工具,用于生成例如用于乘法的三元组,还包含针对神经网络的更具体的运算,包括矩阵乘法。由于MPC的特殊性,对卷积网络的传统元素进行了一些调整:如文献2中所述,我们用average pooling代替max pooling,并近似了higher-degree sigmoid来代替relu作为激活函数。 3. SPDZ协议假定数据是以整数形式给出的,因此我们在链中添加了一个FixedPrecisionTensor节点,该节点将浮点数转换为固定精度数字。该节点将值编码为一个整数,并存储小数点的位置。 图3总结了实现SPDZ的张量的完整结构。![](image/2021-04-28-15-48-37.png) 4. 与文献2提出的MPC协议不同,参与者在我们的框架中并不平等,因为有一方是模型的所有者(称为本地worker)。他通过控制所有其他方(远程worker)的训练过程来充当领导者。为了减轻处理数据时的这种集中化偏差,本地worker可以在他不拥有且看不到的数据上创建远程共享张量。 + +## 2 fedavg算法实现过程 + +### fedavg算法原理 + +在 Fedrated Learning 中,每个客户数据都分散地在本地训练其模型,仅将学习到的模型参数发送到受信任的 Server,通过差分隐私加密和安全聚合等技术得到主模型。然后,受信任的 Server 将聚合的主模型发回给这些客户端,并重复此过程。 + +在这种情况下,准备了一个具有 IID(独立同分布)数据的简单实现,以演示如何将在不同节点上运行的数百个不同模型的参数与 FedAvg 方法结合使用,以及该模型是否会给出合理的结果。此实现是在 MNIST 数据集上执行的。MNIST 数据集包含数量为 0 到 9 的 28 * 28 像素灰度图像。 + +### 训练过程 +1. 由于主模型的参数和节点中所有局部模型的参数都是随机初始化的,所有这些参数将彼此不同。因此,在对节点中的本地模型进行训练之前,主模型会将模型参数发送给节点。 +2. 节点使用这些参数在其自身的数据上训练本地模型。 +3. 每个节点在训练自己的模型时都会更新其参数。训练过程完成后,每个节点会将其参数发送到主模型。 +主模型采用这些参数的平均值并将其设置为新的权重参数,并将其传递回节点以进行下一次迭代。 + diff --git a/pytorch/官方教程/11 Pysyft 多端训练.ipynb b/pytorch/官方教程/11 Pysyft 多端训练.ipynb new file mode 100644 index 00000000..fa3bc717 --- /dev/null +++ b/pytorch/官方教程/11 Pysyft 多端训练.ipynb @@ -0,0 +1,253 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python388jvsc74a57bd057f55249afac9e3bb90b27c0916a1d44f0a08c86299e4ac4c83ac98b0a805cf4", + "display_name": "Python 3.8.8 64-bit ('pysyft': conda)" + }, + "metadata": { + "interpreter": { + "hash": "57f55249afac9e3bb90b27c0916a1d44f0a08c86299e4ac4c83ac98b0a805cf4" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "## 1 pytorch 训练线性模型" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "from torch import nn\n", + "from torch import optim" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n" + ] + } + ], + "source": [ + "\n", + "# A Toy Dataset\n", + "data = torch.tensor([[0,0],[0,1],[1,0],[1,1.]])\n", + "target = torch.tensor([[0],[0],[1],[1.]])\n", + "\n", + "# A Toy Model\n", + "model = nn.Linear(2,1)\n", + "# print(model.parameters())\n", + "def train():\n", + " # Training Logic\n", + " opt = optim.SGD(params=model.parameters(),lr=0.1)\n", + " for iter in range(20):\n", + "\n", + " # 1) erase previous gradients (if they exist)\n", + " opt.zero_grad()\n", + "\n", + " # 2) make a prediction\n", + " pred = model(data)\n", + "\n", + " # 3) calculate how much we missed\n", + " loss = ((pred - target)**2).sum()\n", + "\n", + " # 4) figure out which weights caused us to miss\n", + " loss.backward()\n", + "\n", + " # 5) change those weights\n", + " opt.step()\n", + "\n", + " # 6) print our progress\n", + " print(loss.data)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "tensor(0.2655)\ntensor(0.1714)\ntensor(0.1110)\ntensor(0.0721)\ntensor(0.0469)\ntensor(0.0306)\ntensor(0.0201)\ntensor(0.0132)\ntensor(0.0087)\ntensor(0.0058)\ntensor(0.0039)\ntensor(0.0026)\ntensor(0.0018)\ntensor(0.0012)\ntensor(0.0008)\ntensor(0.0006)\ntensor(0.0004)\ntensor(0.0003)\ntensor(0.0002)\ntensor(0.0001)\n" + ] + } + ], + "source": [ + "train()" + ] + }, + { + "source": [ + "## 2 syft训练\n", + "\n", + "> 1. 注意这里的model.send和model.get会对model中每一个变量执行a = a.send()和b=b.get()\n", + "> 2. 这样,每次训练的时候,模型都会被完全转移到客户端。训练完成后,客户端的模型被完全转移到服务端。\n", + "> 3. 这样,表示两个端进行连续的训练,一端训练完成模型再由另一个客户端进行训练。" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "import syft as sy\n", + "hook = sy.TorchHook(torch)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# create a couple workers\n", + "\n", + "bob = sy.VirtualWorker(hook, id=\"bob\")\n", + "alice = sy.VirtualWorker(hook, id=\"alice\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# A Toy Dataset\n", + "data = torch.tensor([[0,0],[0,1],[1,0],[1,1.]], requires_grad=True)\n", + "target = torch.tensor([[0],[0],[1],[1.]], requires_grad=True)\n", + "\n", + "# get pointers to training data on each worker by\n", + "# sending some training data to bob and alice\n", + "data_bob = data[0:2]\n", + "target_bob = target[0:2]\n", + "\n", + "data_alice = data[2:]\n", + "target_alice = target[2:]\n", + "\n", + "# Iniitalize A Toy Model\n", + "model = nn.Linear(2,1)\n", + "\n", + "data_bob = data_bob.send(bob)\n", + "data_alice = data_alice.send(alice)\n", + "target_bob = target_bob.send(bob)\n", + "target_alice = target_alice.send(alice)\n", + "\n", + "# organize pointers into a list\n", + "datasets = [(data_bob,target_bob),(data_alice,target_alice)]" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "from syft.federated.floptimizer import Optims\n", + "workers = ['bob', 'alice']\n", + "optims = Optims(workers, optim=optim.Adam(params=model.parameters(),lr=0.1))" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "def train():\n", + " # Training Logic\n", + " for iter in range(10):\n", + " \n", + " # NEW) iterate through each worker's dataset\n", + " for data,target in datasets:\n", + " \n", + " # NEW) send model to correct worker\n", + " model.send(data.location)\n", + " \n", + " #Call the optimizer for the worker using get_optim\n", + " opt = optims.get_optim(data.location.id)\n", + " #print(data.location.id)\n", + "\n", + " # 1) erase previous gradients (if they exist)\n", + " opt.zero_grad()\n", + "\n", + " # 2) make a prediction\n", + " pred = model(data)\n", + "\n", + " # 3) calculate how much we missed\n", + " loss = ((pred - target)**2).sum()\n", + "\n", + " # 4) figure out which weights caused us to miss\n", + " loss.backward()\n", + "\n", + " # 5) change those weights\n", + " opt.step()\n", + " \n", + " # NEW) get model (with gradients)\n", + " model.get()\n", + "\n", + " # 6) print our progress\n", + " print(loss.get()) # NEW) slight edit... need to call .get() on loss\\\n", + " \n", + "# federated averaging" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "tensor(0.2523, requires_grad=True)\ntensor(1.1397, requires_grad=True)\ntensor(0.0076, requires_grad=True)\ntensor(0.3003, requires_grad=True)\ntensor(0.1095, requires_grad=True)\ntensor(0.0394, requires_grad=True)\ntensor(0.2870, requires_grad=True)\ntensor(0.0093, requires_grad=True)\ntensor(0.3682, requires_grad=True)\ntensor(0.0296, requires_grad=True)\ntensor(0.3454, requires_grad=True)\ntensor(0.0340, requires_grad=True)\ntensor(0.2584, requires_grad=True)\ntensor(0.0178, requires_grad=True)\ntensor(0.1542, requires_grad=True)\ntensor(0.0018, requires_grad=True)\ntensor(0.0689, requires_grad=True)\ntensor(0.0040, requires_grad=True)\ntensor(0.0196, requires_grad=True)\ntensor(0.0250, requires_grad=True)\n" + ] + } + ], + "source": [ + "train()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ] +} \ No newline at end of file diff --git a/pytorch/官方教程/12 Pysyft 指针.ipynb b/pytorch/官方教程/12 Pysyft 指针.ipynb new file mode 100644 index 00000000..1d7eab7c --- /dev/null +++ b/pytorch/官方教程/12 Pysyft 指针.ipynb @@ -0,0 +1,225 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python388jvsc74a57bd057f55249afac9e3bb90b27c0916a1d44f0a08c86299e4ac4c83ac98b0a805cf4", + "display_name": "Python 3.8.8 64-bit ('pysyft': conda)" + }, + "metadata": { + "interpreter": { + "hash": "57f55249afac9e3bb90b27c0916a1d44f0a08c86299e4ac4c83ac98b0a805cf4" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "WARNING:root:Torch was already hooked... skipping hooking process\n" + ] + } + ], + "source": [ + "import torch\n", + "import syft as sy\n", + "hook = sy.TorchHook(torch)" + ] + }, + { + "source": [ + "## 1 指向指针的指针" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "tensor([1, 2, 3, 4])\n(Wrapper)>[PointerTensor | me:42050510610 -> bob:82174302252]\n(Wrapper)>[PointerTensor | me:53433269588 -> alice:42050510610]\n" + ] + } + ], + "source": [ + "bob = sy.VirtualWorker(hook, id='bob')\n", + "alice = sy.VirtualWorker(hook, id='alice')\n", + "\n", + "# this is a local tensor\n", + "x = torch.tensor([1,2,3,4])\n", + "print(x)\n", + "\n", + "# this sends the local tensor to Bob\n", + "x_ptr = x.send(bob)\n", + "\n", + "# this is now a pointer\n", + "print(x_ptr)\n", + "\n", + "# now we can SEND THE POINTER to alice!!!\n", + "pointer_to_x_ptr = x_ptr.send(alice)\n", + "\n", + "print(pointer_to_x_ptr)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "{82174302252: tensor([1, 2, 3, 4])}\n{55713210485: tensor([1, 2, 3, 4, 5]), 42050510610: (Wrapper)>[PointerTensor | alice:42050510610 -> bob:82174302252]}\n(Wrapper)>[PointerTensor | me:42050510610 -> bob:82174302252]\n" + ] + } + ], + "source": [ + "# As you can see above, Bob still has the actual data (data is always stored in a LocalTensor type). \n", + "print(bob._objects)\n", + "# Alice, on the other hand, has x_ptr!! (notice how it points at bob)\n", + "print(alice._objects)\n", + "\n", + "x_ptr = pointer_to_x_ptr.get()\n", + "print(x_ptr)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([1, 2, 3, 4])" + ] + }, + "metadata": {}, + "execution_count": 13 + } + ], + "source": [ + "# and then we can use x_ptr to get x back from Bob!\n", + "\n", + "x = x_ptr.get()\n", + "x" + ] + }, + { + "source": [ + "## 2 指针链操作" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# x is now a pointer to the data which lives on Bob's machine\n", + "x = torch.tensor([1,2,3,4,5]).send(bob)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " bob: {55713210485: tensor([1, 2, 3, 4, 5])}\nalice: {}\n" + ] + } + ], + "source": [ + "print(' bob:', bob._objects)\n", + "print('alice:',alice._objects)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "x = x.move(alice)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " bob: {}\nalice: {55713210485: tensor([1, 2, 3, 4, 5])}\n" + ] + } + ], + "source": [ + "print(' bob:', bob._objects)\n", + "print('alice:',alice._objects)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(Wrapper)>[PointerTensor | me:65315350067 -> alice:55713210485]" + ] + }, + "metadata": {}, + "execution_count": 9 + } + ], + "source": [ + "x" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ] +} \ No newline at end of file diff --git a/pytorch/官方教程/13 Pysyft 联邦平均.ipynb b/pytorch/官方教程/13 Pysyft 联邦平均.ipynb new file mode 100644 index 00000000..88d4ec13 --- /dev/null +++ b/pytorch/官方教程/13 Pysyft 联邦平均.ipynb @@ -0,0 +1,321 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python388jvsc74a57bd057f55249afac9e3bb90b27c0916a1d44f0a08c86299e4ac4c83ac98b0a805cf4", + "display_name": "Python 3.8.8 64-bit ('pysyft': conda)" + }, + "metadata": { + "interpreter": { + "hash": "57f55249afac9e3bb90b27c0916a1d44f0a08c86299e4ac4c83ac98b0a805cf4" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import syft as sy\n", + "import copy\n", + "hook = sy.TorchHook(torch)\n", + "from torch import nn, optim" + ] + }, + { + "source": [ + "## 步骤1 创建数据的所有者" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# create a couple workers\n", + "\n", + "bob = sy.VirtualWorker(hook, id=\"bob\")\n", + "alice = sy.VirtualWorker(hook, id=\"alice\")\n", + "secure_worker = sy.VirtualWorker(hook, id=\"secure_worker\")\n", + "\n", + "\n", + "# A Toy Dataset\n", + "data = torch.tensor([[0,0],[0,1],[1,0],[1,1.]], requires_grad=True)\n", + "target = torch.tensor([[0],[0],[1],[1.]], requires_grad=True)\n", + "\n", + "# get pointers to training data on each worker by\n", + "# sending some training data to bob and alice\n", + "bobs_data = data[0:2].send(bob)\n", + "bobs_target = target[0:2].send(bob)\n", + "\n", + "alices_data = data[2:].send(alice)\n", + "alices_target = target[2:].send(alice)" + ] + }, + { + "source": [ + "## 步骤2 创建模型" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# Iniitalize A Toy Model\n", + "model = nn.Linear(2,1)" + ] + }, + { + "source": [ + "## 步骤3 将模型发送给Alice和Bob\n", + "\n", + "> 1. 创建模型,前项传播计算中间结果,计算损失值,损失值反向传播形成梯度,使用优化器进行梯度下降。\n", + "> 2. 在联邦学习模型中,聚合的是模型的权重,而不是模型的梯度。是经过一次梯度下降操作后的模型的权重。\n", + "> 3. adam优化器中的参数包括两个部分:一个是动量梯度计算,一个是RMscrop。都是由历史的梯度得出来的。\n", + "> 4. 所以要想服务器知道模型的adam优化器的结果,必须将历史的梯度也进行上传,显然不合理。" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "bobs_model = model.copy().send(bob)\n", + "alices_model = model.copy().send(alice)\n", + "\n", + "bobs_opt = optim.SGD(params=bobs_model.parameters(),lr=0.1)\n", + "alices_opt = optim.SGD(params=alices_model.parameters(),lr=0.1)" + ] + }, + { + "source": [ + "## 步骤4 并行训练Bob和Alice的模型" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Bob:tensor(0.1430) Alice:tensor(1.3841)\nBob:tensor(0.0712) Alice:tensor(0.0938)\nBob:tensor(0.0492) Alice:tensor(0.0699)\nBob:tensor(0.0394) Alice:tensor(0.0582)\nBob:tensor(0.0330) Alice:tensor(0.0484)\nBob:tensor(0.0280) Alice:tensor(0.0403)\nBob:tensor(0.0239) Alice:tensor(0.0335)\nBob:tensor(0.0204) Alice:tensor(0.0279)\nBob:tensor(0.0174) Alice:tensor(0.0232)\nBob:tensor(0.0148) Alice:tensor(0.0193)\n" + ] + } + ], + "source": [ + "for i in range(10):\n", + "\n", + " # Train Bob's Model\n", + " bobs_opt.zero_grad()\n", + " bobs_pred = bobs_model(bobs_data)\n", + " bobs_loss = ((bobs_pred - bobs_target)**2).sum()\n", + " bobs_loss.backward()\n", + "\n", + " bobs_opt.step()\n", + " bobs_loss = bobs_loss.get().data\n", + "\n", + " # Train Alice's Model\n", + " alices_opt.zero_grad()\n", + " alices_pred = alices_model(alices_data)\n", + " alices_loss = ((alices_pred - alices_target)**2).sum()\n", + " alices_loss.backward()\n", + "\n", + " alices_opt.step()\n", + " alices_loss = alices_loss.get().data\n", + " \n", + " print(\"Bob:\" + str(bobs_loss) + \" Alice:\" + str(alices_loss))" + ] + }, + { + "source": [ + "## 步骤5 客户端发送模型到服务器" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "alices_model.move(secure_worker)\n", + "bobs_model.move(secure_worker)" + ] + }, + { + "source": [ + "## 步骤6 模型平均" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "with torch.no_grad():\n", + " model.weight.set_(((alices_model.weight.data + bobs_model.weight.data) / 2).get())\n", + " model.bias.set_(((alices_model.bias.data + bobs_model.bias.data) / 2).get())\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "OrderedDict([('weight', Parameter containing:\ntensor([[0.6258, 0.1664]], requires_grad=True)), ('bias', Parameter containing:\ntensor([-0.0167], requires_grad=True))])\n" + ] + } + ], + "source": [ + "print(model._parameters)" + ] + }, + { + "source": [ + "## 步骤7 迭代以上步骤" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Bob:tensor(0.0046) Alice:tensor(0.0139)\n", + "Bob:tensor(0.0013) Alice:tensor(0.0066)\n", + "Bob:tensor(0.0003) Alice:tensor(0.0030)\n", + "Bob:tensor(4.8089e-05) Alice:tensor(0.0014)\n", + "Bob:tensor(4.9928e-05) Alice:tensor(0.0006)\n", + "Bob:tensor(9.4057e-05) Alice:tensor(0.0003)\n", + "Bob:tensor(0.0001) Alice:tensor(0.0001)\n", + "Bob:tensor(0.0001) Alice:tensor(7.4250e-05)\n", + "Bob:tensor(0.0001) Alice:tensor(3.8461e-05)\n", + "Bob:tensor(0.0001) Alice:tensor(2.0683e-05)\n" + ] + } + ], + "source": [ + "iterations = 10\n", + "worker_iters = 5\n", + "\n", + "for a_iter in range(iterations):\n", + " \n", + " bobs_model = model.copy().send(bob)\n", + " alices_model = model.copy().send(alice)\n", + "\n", + " bobs_opt = optim.SGD(params=bobs_model.parameters(),lr=0.1)\n", + " alices_opt = optim.SGD(params=alices_model.parameters(),lr=0.1)\n", + "\n", + " for wi in range(worker_iters):\n", + "\n", + " # Train Bob's Model\n", + " bobs_opt.zero_grad()\n", + " bobs_pred = bobs_model(bobs_data)\n", + " bobs_loss = ((bobs_pred - bobs_target)**2).sum()\n", + " bobs_loss.backward()\n", + "\n", + " bobs_opt.step()\n", + " bobs_loss = bobs_loss.get().data\n", + "\n", + " # Train Alice's Model\n", + " alices_opt.zero_grad()\n", + " alices_pred = alices_model(alices_data)\n", + " alices_loss = ((alices_pred - alices_target)**2).sum()\n", + " alices_loss.backward()\n", + "\n", + " alices_opt.step()\n", + " alices_loss = alices_loss.get().data\n", + " \n", + " alices_model.move(secure_worker)\n", + " bobs_model.move(secure_worker)\n", + " with torch.no_grad():\n", + " model.weight.set_(((alices_model.weight.data + bobs_model.weight.data) / 2).get())\n", + " model.bias.set_(((alices_model.bias.data + bobs_model.bias.data) / 2).get())\n", + " \n", + " print(\"Bob:\" + str(bobs_loss) + \" Alice:\" + str(alices_loss))" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "# 验证模型\n", + "preds = model(data)\n", + "loss = ((preds - target) ** 2).sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "tensor([[0.0356],\n [0.0321],\n [0.9589],\n [0.9553]], grad_fn=)\ntensor([[0.],\n [0.],\n [1.],\n [1.]], requires_grad=True)\ntensor(0.0060)\n" + ] + } + ], + "source": [ + "print(preds)\n", + "print(target)\n", + "print(loss.data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ] +} \ No newline at end of file diff --git a/pytorch/官方教程/14 Pysyft sandbox.ipynb b/pytorch/官方教程/14 Pysyft sandbox.ipynb new file mode 100644 index 00000000..1bf45167 --- /dev/null +++ b/pytorch/官方教程/14 Pysyft sandbox.ipynb @@ -0,0 +1,216 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python388jvsc74a57bd057f55249afac9e3bb90b27c0916a1d44f0a08c86299e4ac4c83ac98b0a805cf4", + "display_name": "Python 3.8.8 64-bit ('pysyft': conda)" + }, + "metadata": { + "interpreter": { + "hash": "57f55249afac9e3bb90b27c0916a1d44f0a08c86299e4ac4c83ac98b0a805cf4" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "## 1 创建sandbox" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Setting up Sandbox...\n", + "\t- Hooking PyTorch\n", + "\t- Creating Virtual Workers:\n", + "\t\t- bob\n", + "\t\t- theo\n", + "\t\t- jason\n", + "\t\t- alice\n", + "\t\t- andy\n", + "\t\t- jon\n", + "\tStoring hook and workers as global variables...\n", + "\tLoading datasets from SciKit Learn...\n", + "\t\t- Boston Housing Dataset\n", + "\t\t- Diabetes Dataset\n", + "\t\t- Breast Cancer Dataset\n", + "\t- Digits Dataset\n", + "\t\t- Iris Dataset\n", + "\t\t- Wine Dataset\n", + "\t\t- Linnerud Dataset\n", + "\tDistributing Datasets Amongst Workers...\n", + "\tCollecting workers into a VirtualGrid...\n", + "Done!\n" + ] + } + ], + "source": [ + "import torch\n", + "import syft as sy\n", + "sy.create_sandbox(globals())" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[, , , , , ]\n\n\n" + ] + } + ], + "source": [ + "print(workers)\n", + "print(hook)\n", + "print(bob)" + ] + }, + { + "source": [ + "## 2 数据集搜索功能" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "x = torch.tensor([1,2,3,4,5]).tag(\"#fun\", \"#boston\", \"#housing\").describe(\"The input datapoints to the boston housing dataset.\")\n", + "y = torch.tensor([1,2,3,4,5]).tag(\"#fun\", \"#boston\", \"#housing\").describe(\"The input datapoints to the boston housing dataset.\")\n", + "z = torch.tensor([1,2,3,4,5]).tag(\"#fun\", \"#mnist\",).describe(\"The images in the MNIST training dataset.\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "tensor([1, 2, 3, 4, 5])\n\tTags: #housing #fun #boston \n\tDescription: The input datapoints to the boston housing dataset....\n\tShape: torch.Size([5])\n" + ] + } + ], + "source": [ + "print(x)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "x = x.send(bob)\n", + "y = y.send(bob)\n", + "z = z.send(bob)\n", + "\n", + "# this searches for exact match within a tag or within the description\n", + "results = bob.search([\"#boston\", \"#housing\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[tensor([[6.3200e-03, 1.8000e+01, 2.3100e+00, ..., 1.5300e+01, 3.9690e+02,\n 4.9800e+00],\n [2.7310e-02, 0.0000e+00, 7.0700e+00, ..., 1.7800e+01, 3.9690e+02,\n 9.1400e+00],\n [2.7290e-02, 0.0000e+00, 7.0700e+00, ..., 1.7800e+01, 3.9283e+02,\n 4.0300e+00],\n ...,\n [4.4620e-02, 2.5000e+01, 4.8600e+00, ..., 1.9000e+01, 3.9563e+02,\n 7.2200e+00],\n [3.6590e-02, 2.5000e+01, 4.8600e+00, ..., 1.9000e+01, 3.9690e+02,\n 6.7200e+00],\n [3.5510e-02, 2.5000e+01, 4.8600e+00, ..., 1.9000e+01, 3.9064e+02,\n 7.5100e+00]])\n\tTags: #housing #data #boston_housing .. #boston _boston_dataset: \n\tDescription: .. _boston_dataset:...\n\tShape: torch.Size([84, 13]), tensor([1, 2, 3, 4, 5])\n\tTags: #housing #fun #boston \n\tDescription: The input datapoints to the boston housing dataset....\n\tShape: torch.Size([5]), tensor([1, 2, 3, 4, 5])\n\tTags: #housing #fun #boston \n\tDescription: The input datapoints to the boston housing dataset....\n\tShape: torch.Size([5]), tensor([24.0000, 21.6000, 34.7000, 33.4000, 36.2000, 28.7000, 22.9000, 27.1000,\n 16.5000, 18.9000, 15.0000, 18.9000, 21.7000, 20.4000, 18.2000, 19.9000,\n 23.1000, 17.5000, 20.2000, 18.2000, 13.6000, 19.6000, 15.2000, 14.5000,\n 15.6000, 13.9000, 16.6000, 14.8000, 18.4000, 21.0000, 12.7000, 14.5000,\n 13.2000, 13.1000, 13.5000, 18.9000, 20.0000, 21.0000, 24.7000, 30.8000,\n 34.9000, 26.6000, 25.3000, 24.7000, 21.2000, 19.3000, 20.0000, 16.6000,\n 14.4000, 19.4000, 19.7000, 20.5000, 25.0000, 23.4000, 18.9000, 35.4000,\n 24.7000, 31.6000, 23.3000, 19.6000, 18.7000, 16.0000, 22.2000, 25.0000,\n 33.0000, 23.5000, 19.4000, 22.0000, 17.4000, 20.9000, 24.2000, 21.7000,\n 22.8000, 23.4000, 24.1000, 21.4000, 20.0000, 20.8000, 21.2000, 20.3000,\n 28.0000, 23.9000, 24.8000, 22.9000])\n\tTags: #target #housing #boston_housing .. #boston _boston_dataset: \n\tDescription: .. _boston_dataset:...\n\tShape: torch.Size([84])]\n" + ] + } + ], + "source": [ + "print(results)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n" + ] + } + ], + "source": [ + "print(results[0].describe)" + ] + }, + { + "source": [ + "## 3 虚拟网格\n", + "\n", + "网格只是工作人员的集合,为您要将数据集放在一起时提供了一些便捷功能。" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "{'bob': [(Wrapper)>[PointerTensor | me:92293275269 -> bob:47788767151]\n\tTags: .. #boston _boston_dataset: #target #housing #boston_housing \n\tShape: torch.Size([84])\n\tDescription: .. _boston_dataset:...], 'theo': [(Wrapper)>[PointerTensor | me:35544867868 -> theo:34457620693]\n\tTags: .. #boston _boston_dataset: #target #housing #boston_housing \n\tShape: torch.Size([84])\n\tDescription: .. _boston_dataset:...], 'jason': [(Wrapper)>[PointerTensor | me:81365469339 -> jason:15167770837]\n\tTags: .. #boston _boston_dataset: #target #housing #boston_housing \n\tShape: torch.Size([84])\n\tDescription: .. _boston_dataset:...], 'alice': [(Wrapper)>[PointerTensor | me:66469100711 -> alice:93240075334]\n\tTags: .. #boston _boston_dataset: #target #housing #boston_housing \n\tShape: torch.Size([84])\n\tDescription: .. _boston_dataset:...], 'andy': [(Wrapper)>[PointerTensor | me:37370447686 -> andy:37274546577]\n\tTags: .. #boston _boston_dataset: #target #housing #boston_housing \n\tShape: torch.Size([84])\n\tDescription: .. _boston_dataset:...], 'jon': [(Wrapper)>[PointerTensor | me:26110954914 -> jon:9765914045]\n\tTags: .. #boston _boston_dataset: #target #housing #boston_housing \n\tShape: torch.Size([86])\n\tDescription: .. _boston_dataset:...]}\n" + ] + } + ], + "source": [ + "grid = sy.PrivateGridNetwork(*workers)\n", + "results = grid.search(\"#boston\")\n", + "boston_data = grid.search(\"#boston\",\"#data\")\n", + "boston_target = grid.search(\"#boston\",\"#target\")\n", + "\n", + "print(boston_target)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ] +} \ No newline at end of file diff --git a/pytorch/官方教程/15 Pysyft Mnist实例.ipynb b/pytorch/官方教程/15 Pysyft Mnist实例.ipynb new file mode 100644 index 00000000..9e75a092 --- /dev/null +++ b/pytorch/官方教程/15 Pysyft Mnist实例.ipynb @@ -0,0 +1,299 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python388jvsc74a57bd057f55249afac9e3bb90b27c0916a1d44f0a08c86299e4ac4c83ac98b0a805cf4", + "display_name": "Python 3.8.8 64-bit ('pysyft': conda)" + }, + "metadata": { + "interpreter": { + "hash": "57f55249afac9e3bb90b27c0916a1d44f0a08c86299e4ac4c83ac98b0a805cf4" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "# 引入包\n", + "import torch\n", + "import torch.nn as nn\n", + "import torch.nn.functional as F\n", + "import torch.optim as optim\n", + "from torchvision import datasets, transforms" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "WARNING:root:Torch was already hooked... skipping hooking process\n" + ] + } + ], + "source": [ + "# 引入工作者\n", + "import syft as sy # <-- NEW: import the Pysyft library\n", + "hook = sy.TorchHook(torch) # <-- NEW: hook PyTorch ie add extra functionalities to support Federated Learning\n", + "bob = sy.VirtualWorker(hook, id=\"bob\") # <-- NEW: define remote worker bob\n", + "alice = sy.VirtualWorker(hook, id=\"alice\") # <-- NEW: and alice" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "# 定义相关的参数\n", + "class Arguments():\n", + " def __init__(self):\n", + " self.batch_size = 64\n", + " self.test_batch_size = 1000\n", + " self.epochs = 1\n", + " self.lr = 0.01\n", + " self.momentum = 0.5\n", + " self.no_cuda = False\n", + " self.seed = 1\n", + " self.log_interval = 30\n", + " self.save_model = False\n", + "\n", + "args = Arguments()\n", + "\n", + "use_cuda = not args.no_cuda and torch.cuda.is_available()\n", + "\n", + "torch.manual_seed(args.seed)\n", + "\n", + "device = torch.device(\"cuda\" if use_cuda else \"cpu\")\n", + "\n", + "kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}" + ] + }, + { + "source": [ + "## 1 数据加载并发送给工作人员\n", + "我们首先加载数据,然后使用该.federate方法将训练数据集转换为跨工作人员划分的联合数据集。现在,该联合数据集已提供给Federated DataLoader。测试数据集保持不变。" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "federated_train_loader = sy.FederatedDataLoader( # <-- this is now a FederatedDataLoader \n", + " datasets.MNIST('./data', train=True, download=True,\n", + " transform=transforms.Compose([\n", + " transforms.ToTensor(),\n", + " transforms.Normalize((0.1307,), (0.3081,))\n", + " ]))\n", + " .federate((bob, alice)), # <-- NEW: we distribute the dataset across all the workers, it's now a FederatedDataset\n", + " batch_size=args.batch_size, shuffle=True, **kwargs)\n", + "\n", + "test_loader = torch.utils.data.DataLoader(\n", + " datasets.MNIST('./data', train=False, transform=transforms.Compose([\n", + " transforms.ToTensor(),\n", + " transforms.Normalize((0.1307,), (0.3081,))\n", + " ])),\n", + " batch_size=args.test_batch_size, shuffle=True, **kwargs)" + ] + }, + { + "source": [ + "## 3.1 定义网络模型\n", + "我们使用与官方示例中完全相同的CNN。" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "class Net(nn.Module):\n", + " def __init__(self):\n", + " super(Net, self).__init__()\n", + " self.conv1 = nn.Conv2d(1, 20, 5, 1)\n", + " self.conv2 = nn.Conv2d(20, 50, 5, 1)\n", + " self.fc1 = nn.Linear(4*4*50, 500)\n", + " self.fc2 = nn.Linear(500, 10)\n", + "\n", + " def forward(self, x):\n", + " x = F.relu(self.conv1(x))\n", + " x = F.max_pool2d(x, 2, 2)\n", + " x = F.relu(self.conv2(x))\n", + " x = F.max_pool2d(x, 2, 2)\n", + " x = x.view(-1, 4*4*50)\n", + " x = F.relu(self.fc1(x))\n", + " x = self.fc2(x)\n", + " return F.log_softmax(x, dim=1)" + ] + }, + { + "source": [ + "## 3.5 定义训练和测试函数\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "def train(args, model, device, federated_train_loader, optimizer, epoch):\n", + " model.train()\n", + " for batch_idx, (data, target) in enumerate(federated_train_loader): # <-- now it is a distributed dataset\n", + " model.send(data.location) # <-- NEW: send the model to the right location\n", + " data, target = data.to(device), target.to(device)\n", + " optimizer.zero_grad()\n", + " output = model(data)\n", + " loss = F.nll_loss(output, target)\n", + " loss.backward()\n", + " optimizer.step()\n", + " model.get() # <-- NEW: get the model back\n", + " if batch_idx % args.log_interval == 0:\n", + " loss = loss.get() # <-- NEW: get the loss back\n", + " print('Train Epoch: {} [{}/{} ({:.0f}%)]\\tLoss: {:.6f}'.format(\n", + " epoch, batch_idx * args.batch_size, len(federated_train_loader) * args.batch_size,\n", + " 100. * batch_idx / len(federated_train_loader), loss.item()))" + ] + }, + { + "source": [ + "## 4 定义测试函数" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "def test(args, model, device, test_loader):\n", + " model.eval()\n", + " test_loss = 0\n", + " correct = 0\n", + " with torch.no_grad():\n", + " for data, target in test_loader:\n", + " data, target = data.to(device), target.to(device)\n", + " output = model(data)\n", + " test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss\n", + " pred = output.argmax(1, keepdim=True) # get the index of the max log-probability \n", + " correct += pred.eq(target.view_as(pred)).sum().item()\n", + "\n", + " test_loss /= len(test_loader.dataset)\n", + "\n", + " print('\\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\\n'.format(\n", + " test_loss, correct, len(test_loader.dataset),\n", + " 100. * correct / len(test_loader.dataset)))" + ] + }, + { + "source": [ + "## 5 训练过程" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Train Epoch: 1 [0/60032 (0%)]\tLoss: 2.305134\n", + "Train Epoch: 1 [1920/60032 (3%)]\tLoss: 2.156802\n", + "Train Epoch: 1 [3840/60032 (6%)]\tLoss: 1.896611\n", + "Train Epoch: 1 [5760/60032 (10%)]\tLoss: 1.440396\n", + "Train Epoch: 1 [7680/60032 (13%)]\tLoss: 0.867146\n", + "Train Epoch: 1 [9600/60032 (16%)]\tLoss: 0.654510\n", + "Train Epoch: 1 [11520/60032 (19%)]\tLoss: 0.593326\n", + "Train Epoch: 1 [13440/60032 (22%)]\tLoss: 0.455695\n", + "Train Epoch: 1 [15360/60032 (26%)]\tLoss: 0.370734\n", + "Train Epoch: 1 [17280/60032 (29%)]\tLoss: 0.303786\n", + "Train Epoch: 1 [19200/60032 (32%)]\tLoss: 0.312873\n", + "Train Epoch: 1 [21120/60032 (35%)]\tLoss: 0.369926\n", + "Train Epoch: 1 [23040/60032 (38%)]\tLoss: 0.237502\n", + "Train Epoch: 1 [24960/60032 (42%)]\tLoss: 0.187541\n", + "Train Epoch: 1 [26880/60032 (45%)]\tLoss: 0.523300\n", + "Train Epoch: 1 [28800/60032 (48%)]\tLoss: 0.225545\n", + "Train Epoch: 1 [30720/60032 (51%)]\tLoss: 0.144113\n", + "Train Epoch: 1 [32640/60032 (54%)]\tLoss: 0.267781\n", + "Train Epoch: 1 [34560/60032 (58%)]\tLoss: 0.186732\n", + "Train Epoch: 1 [36480/60032 (61%)]\tLoss: 0.303092\n", + "Train Epoch: 1 [38400/60032 (64%)]\tLoss: 0.239764\n", + "Train Epoch: 1 [40320/60032 (67%)]\tLoss: 0.254949\n", + "Train Epoch: 1 [42240/60032 (70%)]\tLoss: 0.193145\n", + "Train Epoch: 1 [44160/60032 (74%)]\tLoss: 0.174407\n", + "Train Epoch: 1 [46080/60032 (77%)]\tLoss: 0.220799\n", + "Train Epoch: 1 [48000/60032 (80%)]\tLoss: 0.324493\n", + "Train Epoch: 1 [49920/60032 (83%)]\tLoss: 0.275526\n", + "Train Epoch: 1 [51840/60032 (86%)]\tLoss: 0.130198\n", + "Train Epoch: 1 [53760/60032 (90%)]\tLoss: 0.183578\n", + "Train Epoch: 1 [55680/60032 (93%)]\tLoss: 0.222406\n", + "Train Epoch: 1 [57600/60032 (96%)]\tLoss: 0.081534\n", + "Train Epoch: 1 [59520/60032 (99%)]\tLoss: 0.143510\n", + "\n", + "Test set: Average loss: 0.1576, Accuracy: 9511/10000 (95%)\n", + "\n", + "Wall time: 1min 37s\n" + ] + } + ], + "source": [ + "%%time\n", + "model = Net().to(device)\n", + "optimizer = optim.SGD(model.parameters(), lr=args.lr) # TODO momentum is not supported at the moment\n", + "\n", + "for epoch in range(1, args.epochs + 1):\n", + " train(args, model, device, federated_train_loader, optimizer, epoch)\n", + " test(args, model, device, test_loader)\n", + "\n", + "if (args.save_model):\n", + " torch.save(model.state_dict(), \"mnist_cnn.pt\")\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ] +} \ No newline at end of file diff --git a/pytorch/官方教程/16 Pysyft 联邦数据实例.ipynb b/pytorch/官方教程/16 Pysyft 联邦数据实例.ipynb new file mode 100644 index 00000000..aab4e644 --- /dev/null +++ b/pytorch/官方教程/16 Pysyft 联邦数据实例.ipynb @@ -0,0 +1,181 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python388jvsc74a57bd057f55249afac9e3bb90b27c0916a1d44f0a08c86299e4ac4c83ac98b0a805cf4", + "display_name": "Python 3.8.8 64-bit ('pysyft': conda)" + }, + "metadata": { + "interpreter": { + "hash": "57f55249afac9e3bb90b27c0916a1d44f0a08c86299e4ac4c83ac98b0a805cf4" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Setting up Sandbox...\n", + "Done!\n" + ] + } + ], + "source": [ + "import torch as th\n", + "import syft as sy\n", + "sy.create_sandbox(globals(), verbose=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "## 1 导入数据\n", + "grid用来帮忙管理数据。(sandbox)。" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "boston_data = grid.search(\"#boston\", \"#data\")\n", + "boston_target = grid.search(\"#boston\", \"#target\")" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(Wrapper)>[PointerTensor | me:27894392138 -> alice:84849301825]\n\tTags: .. #boston_housing #housing _boston_dataset: #boston #data \n\tShape: torch.Size([84, 13])\n\tDescription: .. _boston_dataset:...\n" + ] + } + ], + "source": [ + "# 3 定义模型\n", + "n_features = boston_data['alice'][0].shape[1]\n", + "n_targets = 1\n", + "\n", + "model = th.nn.Linear(n_features, n_targets)\n", + "print(boston_data['alice'][0])" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "['bob', 'theo', 'jason', 'alice', 'andy', 'jon']\n" + ] + } + ], + "source": [ + "# 3 定义多个数据和多个优化器\n", + "# Cast the result in BaseDatasets\n", + "datasets = []\n", + "for worker in boston_data.keys():\n", + " dataset = sy.BaseDataset(boston_data[worker][0], boston_target[worker][0])\n", + " datasets.append(dataset)\n", + "\n", + "# Build the FederatedDataset object\n", + "dataset = sy.FederatedDataset(datasets)\n", + "print(dataset.workers)\n", + "optimizers = {}\n", + "for worker in dataset.workers:\n", + " optimizers[worker] = th.optim.Adam(params=model.parameters(),lr=1e-2)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "train_loader = sy.FederatedDataLoader(dataset, batch_size=32, shuffle=False, drop_last=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Train Epoch: 1 [0/16 (0%)]\tBatch loss: 56.270569\n", + "Train Epoch: 1 [8/16 (50%)]\tBatch loss: 199.320129\n", + "Train Epoch: 1 [16/16 (100%)]\tBatch loss: 359.854309\n", + "Total loss 4278.3408851623535\n" + ] + } + ], + "source": [ + "# 对分布式数据进行训练\n", + "epochs = 1\n", + "for epoch in range(1, epochs + 1):\n", + " loss_accum = 0\n", + " for batch_idx, (data, target) in enumerate(train_loader):\n", + " model.send(data.location)\n", + " \n", + " optimizer = optimizers[data.location.id]\n", + " optimizer.zero_grad()\n", + " pred = model(data)\n", + " loss = ((pred.view(-1) - target)**2).mean()\n", + " loss.backward()\n", + " optimizer.step()\n", + " \n", + " model.get()\n", + " loss = loss.get()\n", + " \n", + " loss_accum += float(loss)\n", + " \n", + " if batch_idx % 8 == 0:\n", + " print('Train Epoch: {} [{}/{} ({:.0f}%)]\\tBatch loss: {:.6f}'.format(\n", + " epoch, batch_idx, len(train_loader),\n", + " 100. * batch_idx / len(train_loader), loss.item())) \n", + " \n", + " print('Total loss', loss_accum)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ] +} \ No newline at end of file diff --git a/pytorch/官方教程/17 Pysyft 计划操作.ipynb b/pytorch/官方教程/17 Pysyft 计划操作.ipynb new file mode 100644 index 00000000..b283e6d8 --- /dev/null +++ b/pytorch/官方教程/17 Pysyft 计划操作.ipynb @@ -0,0 +1,588 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python388jvsc74a57bd057f55249afac9e3bb90b27c0916a1d44f0a08c86299e4ac4c83ac98b0a805cf4", + "display_name": "Python 3.8.8 64-bit ('pysyft': conda)" + }, + "metadata": { + "interpreter": { + "hash": "57f55249afac9e3bb90b27c0916a1d44f0a08c86299e4ac4c83ac98b0a805cf4" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "## 1 原理说明\n", + "plan函数,主要是用来生成一个远程函数指针,使用本地操作,实现远程的操作。\n", + "\n", + "就像进行梯度下降一样。我可能无法控制客户端执行的具体的代码,因为我只能传输数据。\n", + "\n", + "这个时候可以传递plan计划,让客户端执行plan中的代码完成梯度下降和训练过程。\n", + "\n", + "客户端可能不知道要做些什么行为,如何来训练模型,训练过程中的超参数,主要使用plan的方法,传递给客户端,让客户端执行操作。" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import torch.nn as nn\n", + "import torch.nn.functional as F" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# 定义当前进程的worker为服务器进程\n", + "import syft as sy # import the Pysyft library\n", + "hook = sy.TorchHook(torch) # hook PyTorch ie add extra functionalities \n", + "\n", + "\n", + "# IMPORTANT: Local worker should not be a client worker\n", + "hook.local_worker.is_client_worker = False\n", + "\n", + "\n", + "server = hook.local_worker" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# 定义客户端worker\n", + "x11 = torch.tensor([-1, 2.]).tag('input_data')\n", + "x12 = torch.tensor([1, -2.]).tag('input_data2')\n", + "x21 = torch.tensor([-1, 2.]).tag('input_data')\n", + "x22 = torch.tensor([1, -2.]).tag('input_data2')\n", + "\n", + "device_1 = sy.VirtualWorker(hook, id=\"device_1\", data=(x11, x12)) \n", + "device_2 = sy.VirtualWorker(hook, id=\"device_2\", data=(x21, x22))\n", + "devices = device_1, device_2" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# 定义计划函数\n", + "@sy.func2plan()\n", + "def plan_double_abs(x):\n", + " x = x + x\n", + " x = torch.abs(x)\n", + " return x" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "\n", + "def plan_double_abs():\n", + " return " + ] + }, + "metadata": {}, + "execution_count": 5 + } + ], + "source": [ + "plan_double_abs" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([-1., 2.])\n", + "\tTags: input_data \n", + "\tShape: torch.Size([2])" + ] + }, + "metadata": {}, + "execution_count": 6 + } + ], + "source": [ + "pointer_to_data = device_1.search('input_data')[0]\n", + "pointer_to_data" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "False" + ] + }, + "metadata": {}, + "execution_count": 7 + } + ], + "source": [ + "plan_double_abs.is_built" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "A plan needs to be built before being sent to a worker.\n" + ] + } + ], + "source": [ + "# Sending non-built Plan will fail\n", + "try:\n", + " plan_double_abs.send(device_1)\n", + "except RuntimeError as error:\n", + " print(error)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "True" + ] + }, + "metadata": {}, + "execution_count": 9 + } + ], + "source": [ + "# 使用设备上的数据构建plan计划\n", + "plan_double_abs.build(torch.tensor([1., -2.]))\n", + "plan_double_abs.is_built" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[PointerPlan | me:99455482860 -> device_1:95989162597]" + ] + }, + "metadata": {}, + "execution_count": 10 + } + ], + "source": [ + "# This cell is executed successfully\n", + "pointer_plan = plan_double_abs.send(device_1)\n", + "pointer_plan" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(Wrapper)>[PointerTensor | me:71552086868 -> device_1:41296790067]\n" + ] + } + ], + "source": [ + "# 运行指针上的计算\n", + "pointer_to_result = pointer_plan(pointer_to_data)\n", + "print(pointer_to_result)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([2., 4.])" + ] + }, + "metadata": {}, + "execution_count": 12 + } + ], + "source": [ + "pointer_to_result.get()" + ] + }, + { + "source": [ + "## 2 具体实例\n", + "\n", + "减少通信的方法:通过计划," + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "class Net(sy.Plan):\n", + " def __init__(self):\n", + " super(Net, self).__init__()\n", + " self.fc1 = nn.Linear(2, 3)\n", + " self.fc2 = nn.Linear(3, 2)\n", + "\n", + " def forward(self, x):\n", + " x = F.relu(self.fc1(x))\n", + " x = self.fc2(x)\n", + " return F.log_softmax(x, dim=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\ndef Net():\n return \n" + ] + } + ], + "source": [ + "net = Net()\n", + "print(net)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "# 使用数据构建计划\n", + "net.build(torch.tensor([1., 2.]))" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[PointerPlan | me:96392609349 -> device_1:55855385282]" + ] + }, + "metadata": {}, + "execution_count": 16 + } + ], + "source": [ + "# 将数据发送给远程设备\n", + "pointer_to_net = net.send(device_1)\n", + "pointer_to_net\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "pointer_to_data = device_1.search('input_data')[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(Wrapper)>[PointerTensor | me:25850645014 -> device_1:63083167324]" + ] + }, + "metadata": {}, + "execution_count": 18 + } + ], + "source": [ + "pointer_to_result = pointer_to_net(pointer_to_data)\n", + "pointer_to_result" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([-1.3050, -0.3163], requires_grad=True)" + ] + }, + "metadata": {}, + "execution_count": 19 + } + ], + "source": [ + "pointer_to_result.get()" + ] + }, + { + "source": [ + "## 3 在工作人员之间进行切换" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "class Net(sy.Plan):\n", + " def __init__(self):\n", + " super(Net, self).__init__()\n", + " self.fc1 = nn.Linear(2, 3)\n", + " self.fc2 = nn.Linear(3, 2)\n", + "\n", + " def forward(self, x):\n", + " x = F.relu(self.fc1(x))\n", + " x = self.fc2(x)\n", + " return F.log_softmax(x, dim=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "net = Net()\n", + "\n", + "# Build plan\n", + "net.build(torch.tensor([1., 2.]))" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([-0.6172, -0.7753], requires_grad=True)" + ] + }, + "metadata": {}, + "execution_count": 23 + } + ], + "source": [ + "# 在设备1上构建计划\n", + "pointer_to_net_1 = net.send(device_1)\n", + "pointer_to_data = device_1.search('input_data')[0]\n", + "pointer_to_result = pointer_to_net_1(pointer_to_data)\n", + "pointer_to_result.get()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([-0.6172, -0.7753], requires_grad=True)" + ] + }, + "metadata": {}, + "execution_count": 24 + } + ], + "source": [ + "# 在设备2上构建计划\n", + "pointer_to_net_2 = net.send(device_2)\n", + "pointer_to_data = device_2.search('input_data')[0]\n", + "pointer_to_result = pointer_to_net_2(pointer_to_data)\n", + "pointer_to_result.get()" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "## 4 自动构建计划\n" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "True" + ] + }, + "metadata": {}, + "execution_count": 26 + } + ], + "source": [ + "@sy.func2plan(args_shape=[(-1, 1)])\n", + "def plan_double_abs(x):\n", + " x = x + x\n", + " x = torch.abs(x)\n", + " return x\n", + "\n", + "plan_double_abs.is_built" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "True" + ] + }, + "metadata": {}, + "execution_count": 27 + } + ], + "source": [ + "@sy.func2plan(args_shape=[(1, 2), (-1, 2)])\n", + "def plan_sum_abs(x, y):\n", + " s = x + y\n", + " return torch.abs(s)\n", + "\n", + "plan_sum_abs.is_built" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "@sy.func2plan(args_shape=[(1,)], state=(torch.tensor([1]), ))\n", + "def plan_abs(x, state):\n", + " bias, = state.read()\n", + " x = x.abs()\n", + " return x + bias" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([2, 1])" + ] + }, + "metadata": {}, + "execution_count": 29 + } + ], + "source": [ + "pointer_plan = plan_abs.send(device_1)\n", + "x_ptr = torch.tensor([-1, 0]).send(device_1)\n", + "p = pointer_plan(x_ptr)\n", + "p.get()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ] +} \ No newline at end of file diff --git a/pytorch/官方教程/18 Pysyft 协议操作.ipynb b/pytorch/官方教程/18 Pysyft 协议操作.ipynb new file mode 100644 index 00000000..08c958bd --- /dev/null +++ b/pytorch/官方教程/18 Pysyft 协议操作.ipynb @@ -0,0 +1,380 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python388jvsc74a57bd057f55249afac9e3bb90b27c0916a1d44f0a08c86299e4ac4c83ac98b0a805cf4", + "display_name": "Python 3.8.8 64-bit ('pysyft': conda)" + }, + "metadata": { + "interpreter": { + "hash": "57f55249afac9e3bb90b27c0916a1d44f0a08c86299e4ac4c83ac98b0a805cf4" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "## 协议简介\n", + "协议协调一系列计划,将其部署在远距离的工作人员上,并一次通过。\n", + "\n", + "它是一个高级对象,其中包含分布在多个工作程序中的复杂计算的逻辑。协议的主要特征是能够在工作人员之间发送/搜索/取回,并最终部署到已确定的工作人员的能力。因此,用户可以设计协议,然后将其上载到Cloud Worker,其他任何Worker都可以搜索,下载并在其所连接的Worker上应用其包含的计算程序。" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "## 1 创建部署\n", + "通过提供成对列表来创建协议(worker, plan)。worker可以是实际工作人员,也可以是工作人员ID,也可以是表示虚拟工作人员的字符串。在创建时可以使用后一种情况来指定部署时同一工作人员应拥有(或不拥有)两个计划。plan可以是计划或PointerPlan。" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import torch as th\n", + "import syft as sy\n", + "hook = sy.TorchHook(th)\n", + "\n", + "# IMPORTANT: Local worker should not be a client worker\n", + "hook.local_worker.is_client_worker = False" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# 定义一系列计划\n", + "@sy.func2plan(args_shape=[(1,)])\n", + "def inc1(x):\n", + " return x + 1\n", + "\n", + "@sy.func2plan(args_shape=[(1,)])\n", + "def inc2(x):\n", + " return x + 1\n", + "\n", + "@sy.func2plan(args_shape=[(1,)])\n", + "def inc3(x):\n", + " return x + 1\n", + "\n", + "protocol = sy.Protocol([(\"worker1\", inc1), (\"worker2\", inc2), (\"worker3\", inc3)])" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "\n", + " - alice: [PointerPlan | me:67767771677 -> alice:59400908940]\n", + " - bob: [PointerPlan | me:73508574980 -> bob:39724673319]\n", + " - charlie: [PointerPlan | me:21217029022 -> charlie:68100273012]" + ] + }, + "metadata": {}, + "execution_count": 4 + } + ], + "source": [ + "# 将协议绑定到worker\n", + "bob = sy.VirtualWorker(hook, id=\"bob\")\n", + "alice = sy.VirtualWorker(hook, id=\"alice\")\n", + "charlie = sy.VirtualWorker(hook, id=\"charlie\")\n", + "\n", + "workers = alice, bob, charlie\n", + "protocol.deploy(*workers)" + ] + }, + { + "source": [ + "## 2. 运行协议\n", + "运行协议意味着依次执行所有计划。为此,您提供一些输入数据,该数据将发送到第一个计划位置。运行第一个计划,并将其输出移到第二个计划位置,依此类推。运行完所有计划后,将返回最终结果,该结果由指向最后一个计划位置的指针组成。" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "send alice\nmove alice -> bob\nmove bob -> charlie\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(Wrapper)>[PointerTensor | me:18000403678 -> charlie:54564292641]" + ] + }, + "metadata": {}, + "execution_count": 6 + } + ], + "source": [ + "x = th.tensor([1.0])\n", + "ptr = protocol.run(x)\n", + "ptr" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([4.])" + ] + }, + "metadata": {}, + "execution_count": 7 + } + ], + "source": [ + "# 输入1.0经历了3个计划,因此增加了3倍,这就是为什么它现在等于4.0的原因!\n", + "ptr.get()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# 运行远程协议。protocol也能发送到远程。\n", + "james = sy.VirtualWorker(hook, id=\"james\")\n", + "protocol.send(james)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "send remote run request to james\nsend alice\nmove alice -> bob\nmove bob -> charlie\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(Wrapper)>[PointerTensor | me:53893386564 -> james:23188330447]" + ] + }, + "metadata": {}, + "execution_count": 9 + } + ], + "source": [ + "x = th.tensor([1.0]).send(james)\n", + "ptr = protocol.run(x)\n", + "ptr" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(Wrapper)>[PointerTensor | me:23188330447 -> charlie:73651850441]" + ] + }, + "metadata": {}, + "execution_count": 10 + } + ], + "source": [ + "ptr = ptr.get()\n", + "ptr" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([4.])" + ] + }, + "metadata": {}, + "execution_count": 11 + } + ], + "source": [ + "ptr = ptr.get()\n", + "ptr" + ] + }, + { + "source": [ + "## 3. 搜索协议\n", + "在实际设置中,您可能希望下载一个远程协议,将其部署在您的工作程序上并与您的数据一起运行:" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "protocol = sy.Protocol([(\"worker1\", inc1), (\"worker2\", inc2), (\"worker3\", inc3)])\n", + "protocol.tag('my_protocol')\n", + "protocol.send(james)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "# 得到当前的worker\n", + "me = sy.hook.local_worker # get access to me as a local worker" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[[PointerProtocol | me:125920778 -> james:69669625366]]" + ] + }, + "metadata": {}, + "execution_count": 14 + } + ], + "source": [ + "# 在james上搜索协议\n", + "responses = me.request_search(['my_protocol'], location=james)\n", + "responses" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "# 得到第一条协议\n", + "ptr_protocol = responses[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "\n", + " - me: \n", + "def inc1(arg_1):\n", + " _2 = _1.__add__(1)\n", + " return _2\n", + " - me: \n", + "def inc2(arg_1):\n", + " _2 = _1.__add__(1)\n", + " return _2\n", + " - me: \n", + "def inc3(arg_1):\n", + " _2 = _1.__add__(1)\n", + " return _2" + ] + }, + "metadata": {}, + "execution_count": 16 + } + ], + "source": [ + "# 去除协议中的计划\n", + "protocol_back = ptr_protocol.get()\n", + "protocol_back" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "send alice\nmove alice -> bob\nmove bob -> charlie\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([4.])" + ] + }, + "metadata": {}, + "execution_count": 17 + } + ], + "source": [ + "# 运行协议\n", + "protocol_back.deploy(alice, bob, charlie)\n", + "\n", + "x = th.tensor([1.0])\n", + "ptr = protocol_back.run(x)\n", + "ptr.get()" + ] + } + ] +} \ No newline at end of file diff --git a/pytorch/官方教程/19 Pysyft 加密操作.ipynb b/pytorch/官方教程/19 Pysyft 加密操作.ipynb new file mode 100644 index 00000000..bfcf5e44 --- /dev/null +++ b/pytorch/官方教程/19 Pysyft 加密操作.ipynb @@ -0,0 +1,546 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python388jvsc74a57bd057f55249afac9e3bb90b27c0916a1d44f0a08c86299e4ac4c83ac98b0a805cf4", + "display_name": "Python 3.8.8 64-bit ('pysyft': conda)" + }, + "metadata": { + "interpreter": { + "hash": "57f55249afac9e3bb90b27c0916a1d44f0a08c86299e4ac4c83ac98b0a805cf4" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "## 步骤1 使用安全多方计算加密\n", + "\n", + "将一个值拆分成多个值。(例子中是3方的值)。只有这三个值合起来才能知道原来的值是多少。也可以拆成多个其他的值。" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "Q = 1234567891011\n", + "x = 25\n", + "\n", + "import random\n", + "\n", + "def encrypt(x):\n", + " share_a = random.randint(-Q,Q)\n", + " share_b = random.randint(-Q,Q)\n", + " share_c = (x - share_a - share_b) % Q\n", + " return (share_a, share_b, share_c)\n", + "encrypt(x)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def decrypt(*shares):\n", + " return sum(shares) % Q\n", + "a,b,c = encrypt(25)\n", + "decrypt(a, b, c)" + ] + }, + { + "source": [ + "## 步骤2 使用SMPC的基本算法" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "x = encrypt(25)\n", + "y = encrypt(5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def add(x, y):\n", + " z = list()\n", + " # the first worker adds their shares together\n", + " z.append((x[0] + y[0]) % Q)\n", + " \n", + " # the second worker adds their shares together\n", + " z.append((x[1] + y[1]) % Q)\n", + " \n", + " # the third worker adds their shares together\n", + " z.append((x[2] + y[2]) % Q)\n", + " \n", + " return z" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "decrypt(*add(x,y))" + ] + }, + { + "source": [ + "## 步骤3 使用SMPC pysyft" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import syft as sy\n", + "hook = sy.TorchHook(torch)\n", + "\n", + "bob = sy.VirtualWorker(hook, id=\"bob\")\n", + "alice = sy.VirtualWorker(hook, id=\"alice\")\n", + "bill = sy.VirtualWorker(hook, id=\"bill\")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "tensor([25])\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([25])" + ] + }, + "metadata": {}, + "execution_count": 2 + } + ], + "source": [ + "# 基本加密/解密\n", + "# 加密就像获取任何PySyft张量并调用.share()一样简单。解密就像在共享变量上调用.get()一样简单\n", + "x = torch.tensor([25])\n", + "print(x)\n", + "encrypted_x = x.share(bob, alice,bill)\n", + "encrypted_x.get()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "{}" + ] + }, + "metadata": {}, + "execution_count": 3 + } + ], + "source": [ + "# 内省加密值\n", + "# 如果我们仔细观察鲍勃,爱丽丝和比尔的工人,我们可以看到所创造的份额!\n", + "bob._objects" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "x = torch.tensor([25]).share(bob, alice,bill)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([1689317694142364100])" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ], + "source": [ + "# Bob's share\n", + "bobs_share = list(bob._objects.values())[0]\n", + "bobs_share" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([3665791413858103916])" + ] + }, + "metadata": {}, + "execution_count": 6 + } + ], + "source": [ + "# Alice's share\n", + "alices_share = list(alice._objects.values())[0]\n", + "alices_share" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([3868262928854307817])" + ] + }, + "metadata": {}, + "execution_count": 7 + } + ], + "source": [ + "# Bill's share\n", + "bills_share = list(bill._objects.values())[0]\n", + "bills_share" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([25])" + ] + }, + "metadata": {}, + "execution_count": 8 + } + ], + "source": [ + "# 解密数据\n", + "Q = x.child.field\n", + "\n", + "(bobs_share + alices_share + bills_share) % Q" + ] + }, + { + "source": [ + "## 步骤4 加密运算" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "## 加法运算" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "tensor([30])\ntensor([20])\n" + ] + } + ], + "source": [ + "# 加法运算\n", + "x = torch.tensor([25]).share(bob,alice)\n", + "y = torch.tensor([5]).share(bob,alice)\n", + "\n", + "z = x + y\n", + "print(z.get())\n", + "\n", + "z = x - y\n", + "print(z.get())" + ] + }, + { + "source": [ + "## 乘法运算\n", + "对于乘法,我们需要一个额外的参与者,该参与者负责一致地生成随机数(并且不与任何其他参与者进行合谋)。我们称此人为“密码提供者”。对于所有密集用途,加密提供者只是一个额外的VirtualWorker,但必须承认加密提供者不是“所有者”,因为他/她不拥有股份,而是需要信任才能避免串通的人与任何现有股东。\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "crypto_provider = sy.VirtualWorker(hook, id=\"crypto_provider\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "x = torch.tensor([25]).share(bob,alice, crypto_provider=crypto_provider)\n", + "y = torch.tensor([5]).share(bob,alice, crypto_provider=crypto_provider)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([125])" + ] + }, + "metadata": {}, + "execution_count": 12 + } + ], + "source": [ + "# multiplication\n", + "\n", + "z = x * y\n", + "z.get()" + ] + }, + { + "source": [ + "## 矩阵乘法" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "# 矩阵乘法\n", + "x = torch.tensor([[1, 2],[3,4]]).share(bob,alice, crypto_provider=crypto_provider)\n", + "y = torch.tensor([[2, 0],[0,2]]).share(bob,alice, crypto_provider=crypto_provider)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([[2, 4],\n", + " [6, 8]])" + ] + }, + "metadata": {}, + "execution_count": 14 + } + ], + "source": [ + "# matrix multiplication\n", + "\n", + "z = x.mm(y)\n", + "z.get()" + ] + }, + { + "source": [ + "## 加密比较\n", + "\n", + "私有值之间的私有比较也是可能的。我们在这里依靠SecureNN协议,其详细信息可以在这里找到。比较的结果也是私有共享张量。" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([1])" + ] + }, + "metadata": {}, + "execution_count": 15 + } + ], + "source": [ + "x = torch.tensor([25]).share(bob,alice, crypto_provider=crypto_provider)\n", + "y = torch.tensor([5]).share(bob,alice, crypto_provider=crypto_provider)\n", + "\n", + "z = x > y\n", + "z.get()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([0])" + ] + }, + "metadata": {}, + "execution_count": 16 + } + ], + "source": [ + "z = x <= y\n", + "z.get()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([1])" + ] + }, + "metadata": {}, + "execution_count": 17 + } + ], + "source": [ + "z = x == y + 20\n", + "z.get()" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([4])" + ] + }, + "metadata": {}, + "execution_count": 18 + } + ], + "source": [ + "x = torch.tensor([2, 3, 4, 1]).share(bob,alice, crypto_provider=crypto_provider)\n", + "x.max().get()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensor([4, 3])" + ] + }, + "metadata": {}, + "execution_count": 19 + } + ], + "source": [ + "x = torch.tensor([[2, 3], [4, 1]]).share(bob,alice, crypto_provider=crypto_provider)\n", + "max_values, max_ids = x.max(dim=0)\n", + "max_values.get()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ] +} \ No newline at end of file diff --git a/pytorch/官方教程/20 Pysyft 联邦安全聚合实例.ipynb b/pytorch/官方教程/20 Pysyft 联邦安全聚合实例.ipynb new file mode 100644 index 00000000..00b42e54 --- /dev/null +++ b/pytorch/官方教程/20 Pysyft 联邦安全聚合实例.ipynb @@ -0,0 +1,652 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python388jvsc74a57bd057f55249afac9e3bb90b27c0916a1d44f0a08c86299e4ac4c83ac98b0a805cf4", + "display_name": "Python 3.8.8 64-bit ('pysyft': conda)" + }, + "metadata": { + "interpreter": { + "hash": "57f55249afac9e3bb90b27c0916a1d44f0a08c86299e4ac4c83ac98b0a805cf4" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "# 正常联邦学习\n", + "\n", + "## 1 设置参数" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "import pickle\n", + "\n", + "import torch\n", + "import torch.nn as nn\n", + "import torch.nn.functional as F\n", + "import torch.optim as optim\n", + "from torch.utils.data import TensorDataset, DataLoader\n", + "\n", + "class Parser:\n", + " \"\"\"Parameters for training\"\"\"\n", + " def __init__(self):\n", + " self.epochs = 10\n", + " self.lr = 0.001\n", + " self.test_batch_size = 8\n", + " self.batch_size = 8\n", + " self.log_interval = 10\n", + " self.seed = 1\n", + " \n", + "args = Parser()\n", + "\n", + "torch.manual_seed(args.seed)\n", + "kwargs = {}" + ] + }, + { + "source": [ + "## 2 加载数据集" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "with open('./data/BostonHousing/boston_housing.pickle','rb') as f:\n", + " ((X, y), (X_test, y_test)) = pickle.load(f)\n", + "\n", + "X = torch.from_numpy(X).float()\n", + "y = torch.from_numpy(y).float()\n", + "X_test = torch.from_numpy(X_test).float()\n", + "y_test = torch.from_numpy(y_test).float()\n", + "# preprocessing\n", + "mean = X.mean(0, keepdim=True)\n", + "dev = X.std(0, keepdim=True)\n", + "mean[:, 3] = 0. # the feature at column 3 is binary,\n", + "dev[:, 3] = 1. # so we don't standardize it\n", + "X = (X - mean) / dev\n", + "X_test = (X_test - mean) / dev\n", + "train = TensorDataset(X, y)\n", + "test = TensorDataset(X_test, y_test)\n", + "train_loader = DataLoader(train, batch_size=args.batch_size, shuffle=True, **kwargs)\n", + "test_loader = DataLoader(test, batch_size=args.test_batch_size, shuffle=True, **kwargs)" + ] + }, + { + "source": [ + "## 3 定义神经网络" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "class Net(nn.Module):\n", + " def __init__(self):\n", + " super(Net, self).__init__()\n", + " self.fc1 = nn.Linear(13, 32)\n", + " self.fc2 = nn.Linear(32, 24)\n", + " self.fc3 = nn.Linear(24, 1)\n", + "\n", + " def forward(self, x):\n", + " x = x.view(-1, 13)\n", + " x = F.relu(self.fc1(x))\n", + " x = F.relu(self.fc2(x))\n", + " x = self.fc3(x)\n", + " return x\n", + "\n", + "model = Net()\n", + "optimizer = optim.SGD(model.parameters(), lr=args.lr)" + ] + }, + { + "source": [ + "## 4 配置pysyft" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "import syft as sy\n", + "\n", + "hook = sy.TorchHook(torch)\n", + "bob = sy.VirtualWorker(hook, id=\"bob\")\n", + "alice = sy.VirtualWorker(hook, id=\"alice\")\n", + "james = sy.VirtualWorker(hook, id=\"james\")\n", + "\n", + "compute_nodes = [bob, alice]" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# 将数据发送给工作人员\n", + "# 通常他们已经有数据,这只是出于演示目的,我们手动将其发送\n", + "\n", + "train_distributed_dataset = []\n", + "\n", + "for batch_idx, (data,target) in enumerate(train_loader):\n", + " data = data.send(compute_nodes[batch_idx % len(compute_nodes)])\n", + " target = target.send(compute_nodes[batch_idx % len(compute_nodes)])\n", + " train_distributed_dataset.append((data, target))" + ] + }, + { + "source": [ + "## 5 训练数据" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "def train(epoch):\n", + " model.train()\n", + " for batch_idx, (data,target) in enumerate(train_distributed_dataset):\n", + " worker = data.location\n", + " model.send(worker)\n", + "\n", + " optimizer.zero_grad()\n", + " # update the model\n", + " pred = model(data)\n", + " loss = F.mse_loss(pred.view(-1), target)\n", + " loss.backward()\n", + " optimizer.step()\n", + " model.get()\n", + " \n", + " if batch_idx % args.log_interval == 0:\n", + " loss = loss.get()\n", + " print('Train Epoch: {} [{}/{} ({:.0f}%)]\\tLoss: {:.6f}'.format(\n", + " epoch, batch_idx * data.shape[0], len(train_loader),\n", + " 100. * batch_idx / len(train_loader), loss.item()))\n", + " \n" + ] + }, + { + "source": [ + "## 6 测试数据" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "def test():\n", + " model.eval()\n", + " test_loss = 0\n", + " for data, target in test_loader:\n", + " output = model(data)\n", + " test_loss += F.mse_loss(output.view(-1), target, reduction='sum').item() # sum up batch loss\n", + " pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability\n", + " \n", + " test_loss /= len(test_loader.dataset)\n", + " print('\\nTest set: Average loss: {:.4f}\\n'.format(test_loss))" + ] + }, + { + "source": [ + "## 7 进行训练" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Train Epoch: 1 [0/51 (0%)]\tLoss: 499.737000\n", + "Train Epoch: 1 [80/51 (20%)]\tLoss: 444.433716\n", + "Train Epoch: 1 [160/51 (39%)]\tLoss: 332.781464\n", + "Train Epoch: 1 [240/51 (59%)]\tLoss: 133.071625\n", + "Train Epoch: 1 [320/51 (78%)]\tLoss: 202.246490\n", + "Train Epoch: 1 [200/51 (98%)]\tLoss: 16.212315\n", + "Train Epoch: 2 [0/51 (0%)]\tLoss: 43.216484\n", + "Train Epoch: 2 [80/51 (20%)]\tLoss: 8.341544\n", + "Train Epoch: 2 [160/51 (39%)]\tLoss: 21.476643\n", + "Train Epoch: 2 [240/51 (59%)]\tLoss: 24.701031\n", + "Train Epoch: 2 [320/51 (78%)]\tLoss: 135.487106\n", + "Train Epoch: 2 [200/51 (98%)]\tLoss: 16.858931\n", + "Train Epoch: 3 [0/51 (0%)]\tLoss: 31.058376\n", + "Train Epoch: 3 [80/51 (20%)]\tLoss: 6.284318\n", + "Train Epoch: 3 [160/51 (39%)]\tLoss: 16.468420\n", + "Train Epoch: 3 [240/51 (59%)]\tLoss: 20.231100\n", + "Train Epoch: 3 [320/51 (78%)]\tLoss: 109.964317\n", + "Train Epoch: 3 [200/51 (98%)]\tLoss: 14.596965\n", + "Train Epoch: 4 [0/51 (0%)]\tLoss: 31.536047\n", + "Train Epoch: 4 [80/51 (20%)]\tLoss: 6.582750\n", + "Train Epoch: 4 [160/51 (39%)]\tLoss: 14.647826\n", + "Train Epoch: 4 [240/51 (59%)]\tLoss: 15.352848\n", + "Train Epoch: 4 [320/51 (78%)]\tLoss: 90.883652\n", + "Train Epoch: 4 [200/51 (98%)]\tLoss: 13.207232\n", + "Train Epoch: 5 [0/51 (0%)]\tLoss: 31.258972\n", + "Train Epoch: 5 [80/51 (20%)]\tLoss: 7.210756\n", + "Train Epoch: 5 [160/51 (39%)]\tLoss: 13.199810\n", + "Train Epoch: 5 [240/51 (59%)]\tLoss: 11.943946\n", + "Train Epoch: 5 [320/51 (78%)]\tLoss: 78.488693\n", + "Train Epoch: 5 [200/51 (98%)]\tLoss: 12.271082\n", + "Train Epoch: 6 [0/51 (0%)]\tLoss: 29.944614\n", + "Train Epoch: 6 [80/51 (20%)]\tLoss: 7.750119\n", + "Train Epoch: 6 [160/51 (39%)]\tLoss: 11.655151\n", + "Train Epoch: 6 [240/51 (59%)]\tLoss: 10.470257\n", + "Train Epoch: 6 [320/51 (78%)]\tLoss: 70.250122\n", + "Train Epoch: 6 [200/51 (98%)]\tLoss: 11.836300\n", + "Train Epoch: 7 [0/51 (0%)]\tLoss: 29.140289\n", + "Train Epoch: 7 [80/51 (20%)]\tLoss: 7.879148\n", + "Train Epoch: 7 [160/51 (39%)]\tLoss: 9.965446\n", + "Train Epoch: 7 [240/51 (59%)]\tLoss: 9.868064\n", + "Train Epoch: 7 [320/51 (78%)]\tLoss: 64.482697\n", + "Train Epoch: 7 [200/51 (98%)]\tLoss: 11.616804\n", + "Train Epoch: 8 [0/51 (0%)]\tLoss: 28.429148\n", + "Train Epoch: 8 [80/51 (20%)]\tLoss: 8.152041\n", + "Train Epoch: 8 [160/51 (39%)]\tLoss: 8.601873\n", + "Train Epoch: 8 [240/51 (59%)]\tLoss: 9.669197\n", + "Train Epoch: 8 [320/51 (78%)]\tLoss: 59.962414\n", + "Train Epoch: 8 [200/51 (98%)]\tLoss: 11.714998\n", + "Train Epoch: 9 [0/51 (0%)]\tLoss: 28.531382\n", + "Train Epoch: 9 [80/51 (20%)]\tLoss: 8.310322\n", + "Train Epoch: 9 [160/51 (39%)]\tLoss: 7.499841\n", + "Train Epoch: 9 [240/51 (59%)]\tLoss: 9.452386\n", + "Train Epoch: 9 [320/51 (78%)]\tLoss: 56.551567\n", + "Train Epoch: 9 [200/51 (98%)]\tLoss: 11.322638\n", + "Train Epoch: 10 [0/51 (0%)]\tLoss: 28.026115\n", + "Train Epoch: 10 [80/51 (20%)]\tLoss: 8.640520\n", + "Train Epoch: 10 [160/51 (39%)]\tLoss: 6.647425\n", + "Train Epoch: 10 [240/51 (59%)]\tLoss: 9.245568\n", + "Train Epoch: 10 [320/51 (78%)]\tLoss: 53.482048\n", + "Train Epoch: 10 [200/51 (98%)]\tLoss: 10.839175\n", + "Total 7.91 s\n", + "\n", + "Test set: Average loss: 20.7677\n", + "\n" + ] + } + ], + "source": [ + "import time\n", + "\n", + "t = time.time()\n", + "\n", + "for epoch in range(1, args.epochs + 1):\n", + " train(epoch)\n", + "\n", + " \n", + "total_time = time.time() - t\n", + "print('Total', round(total_time, 2), 's')\n", + "\n", + "test()" + ] + }, + { + "source": [ + "# 添加加密的聚合方案\n", + "\n", + "## 1 修改训练逻辑" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "remote_dataset = (list(),list())\n", + "\n", + "train_distributed_dataset = []\n", + "\n", + "for batch_idx, (data,target) in enumerate(train_loader):\n", + " data = data.send(compute_nodes[batch_idx % len(compute_nodes)])\n", + " target = target.send(compute_nodes[batch_idx % len(compute_nodes)])\n", + " remote_dataset[batch_idx % len(compute_nodes)].append((data, target))\n", + "\n", + "def update(data, target, model, optimizer):\n", + " model.send(data.location)\n", + " optimizer.zero_grad()\n", + " pred = model(data)\n", + " loss = F.mse_loss(pred.view(-1), target)\n", + " loss.backward()\n", + " optimizer.step()\n", + " return model\n", + "\n", + "bobs_model = Net()\n", + "alices_model = Net()\n", + "\n", + "bobs_optimizer = optim.SGD(bobs_model.parameters(), lr=args.lr)\n", + "alices_optimizer = optim.SGD(alices_model.parameters(), lr=args.lr)\n", + "\n", + "models = [bobs_model, alices_model]\n", + "params = [list(bobs_model.parameters()), list(alices_model.parameters())]\n", + "optimizers = [bobs_optimizer, alices_optimizer]" + ] + }, + { + "source": [ + "## 2 训练" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "# this is selecting which batch to train on\n", + "data_index = 0\n", + "# update remote models\n", + "# we could iterate this multiple times before proceeding, but we're only iterating once per worker here\n", + "for remote_index in range(len(compute_nodes)):\n", + " data, target = remote_dataset[remote_index][data_index]\n", + " models[remote_index] = update(data, target, models[remote_index], optimizers[remote_index])\n" + ] + }, + { + "source": [ + "## 3 加密聚合" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "# create a list where we'll deposit our encrypted model average\n", + "new_params = list()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "output_type": "error", + "ename": "AttributeError", + "evalue": "'numpy.ndarray' object has no attribute 'owner'", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32mD:\\anaconda\\envs\\pysyft\\lib\\site-packages\\syft\\generic\\frameworks\\hook\\hook_args.py\u001b[0m in \u001b[0;36mregister_response\u001b[1;34m(attr, response, response_ids, owner)\u001b[0m\n\u001b[0;32m 663\u001b[0m \u001b[1;31m# Load the utility function to register the response and transform tensors with pointers\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 664\u001b[1;33m \u001b[0mregister_response_function\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mregister_response_functions\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mattr_id\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 665\u001b[0m \u001b[1;31m# Try running it\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;31mKeyError\u001b[0m: 'numpy'", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 12\u001b[0m \u001b[1;31m# to use Integers to store decimal information. In other words,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 13\u001b[0m \u001b[1;31m# we need to use \"Fixed Precision\" encoding.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 14\u001b[1;33m \u001b[0mfixed_precision_param\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcopy_of_parameter\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfix_precision\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 15\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 16\u001b[0m \u001b[1;31m# now we encrypt it on the remote machine. Note that\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mD:\\anaconda\\envs\\pysyft\\lib\\site-packages\\syft\\frameworks\\torch\\tensors\\interpreters\\native.py\u001b[0m in \u001b[0;36mfix_prec\u001b[1;34m(self, storage, field_type, no_wrap, *args, **kwargs)\u001b[0m\n\u001b[0;32m 765\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 766\u001b[0m \u001b[0mmax_precision\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_get_maximum_precision\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 767\u001b[1;33m \u001b[0mneed_large_prec\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_requires_large_precision\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmax_precision\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mbase\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mprec_fractional\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 768\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 769\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mstorage\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;34m\"crt\"\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mD:\\anaconda\\envs\\pysyft\\lib\\site-packages\\syft\\frameworks\\torch\\tensors\\interpreters\\native.py\u001b[0m in \u001b[0;36m_requires_large_precision\u001b[1;34m(self, max_precision, base, precision_fractional)\u001b[0m\n\u001b[0;32m 837\u001b[0m \u001b[1;31m# We need to use NumPy here as log2 is not yet implemented for LongTensor PyTorch objects\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 838\u001b[0m return np.any(\n\u001b[1;32m--> 839\u001b[1;33m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mlog2\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mabs\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mclone\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdetach\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mnumpy\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m+\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0mbase_fractional\u001b[0m \u001b[1;33m>\u001b[0m \u001b[0mmax_precision\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 840\u001b[0m )\n\u001b[0;32m 841\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mD:\\anaconda\\envs\\pysyft\\lib\\site-packages\\syft\\generic\\frameworks\\hook\\trace.py\u001b[0m in \u001b[0;36mtrace_wrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 81\u001b[0m \u001b[0msyft\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mhook\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtrace\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mlogs\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcommand\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mresponse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 82\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 83\u001b[1;33m \u001b[0mresponse\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 84\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 85\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mresponse\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mD:\\anaconda\\envs\\pysyft\\lib\\site-packages\\syft\\generic\\frameworks\\hook\\hook.py\u001b[0m in \u001b[0;36moverloaded_native_method\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 456\u001b[0m \u001b[1;31m# Send the new command to the appropriate class and get the response\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 457\u001b[0m \u001b[0mmethod\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnew_self\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmethod_name\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 458\u001b[1;33m \u001b[0mresponse\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmethod\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0mnew_args\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mnew_kwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 459\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 460\u001b[0m \u001b[1;31m# For inplace methods, just directly return self\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mD:\\anaconda\\envs\\pysyft\\lib\\site-packages\\syft\\generic\\frameworks\\hook\\hook.py\u001b[0m in \u001b[0;36moverloaded_pointer_method\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 619\u001b[0m \u001b[0mcommand\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mattr\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 620\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 621\u001b[1;33m \u001b[0mresponse\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mowner\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msend_command\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlocation\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcommand\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 622\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 623\u001b[0m \u001b[1;31m# For inplace methods, just directly return self\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mD:\\anaconda\\envs\\pysyft\\lib\\site-packages\\syft\\workers\\base.py\u001b[0m in \u001b[0;36msend_command\u001b[1;34m(self, recipient, message, return_ids)\u001b[0m\n\u001b[0;32m 581\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 582\u001b[0m \u001b[0mmessage\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mTensorCommandMessage\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcomputation\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtarget\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0margs_\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkwargs_\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mreturn_ids\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 583\u001b[1;33m \u001b[0mret_val\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msend_msg\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmessage\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlocation\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mrecipient\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 584\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mResponseSignatureError\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 585\u001b[0m \u001b[0mret_val\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mD:\\anaconda\\envs\\pysyft\\lib\\site-packages\\syft\\workers\\base.py\u001b[0m in \u001b[0;36msend_msg\u001b[1;34m(self, message, location)\u001b[0m\n\u001b[0;32m 285\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 286\u001b[0m \u001b[1;31m# Step 2: send the message and wait for a response\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 287\u001b[1;33m \u001b[0mbin_response\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_send_msg\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mbin_message\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlocation\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 288\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 289\u001b[0m \u001b[1;31m# Step 3: deserialize the response\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mD:\\anaconda\\envs\\pysyft\\lib\\site-packages\\syft\\workers\\virtual.py\u001b[0m in \u001b[0;36m_send_msg\u001b[1;34m(self, message, location)\u001b[0m\n\u001b[0;32m 13\u001b[0m \u001b[0msleep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmessage_pending_time\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 14\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 15\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mlocation\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_recv_msg\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmessage\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 16\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 17\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_recv_msg\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmessage\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mbin\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m->\u001b[0m \u001b[0mbin\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mD:\\anaconda\\envs\\pysyft\\lib\\site-packages\\syft\\workers\\virtual.py\u001b[0m in \u001b[0;36m_recv_msg\u001b[1;34m(self, message)\u001b[0m\n\u001b[0;32m 17\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_recv_msg\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmessage\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mbin\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m->\u001b[0m \u001b[0mbin\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 18\u001b[0m \u001b[1;34m\"\"\"receive message\"\"\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 19\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrecv_msg\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmessage\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[1;32mD:\\anaconda\\envs\\pysyft\\lib\\site-packages\\syft\\workers\\base.py\u001b[0m in \u001b[0;36mrecv_msg\u001b[1;34m(self, bin_message)\u001b[0m\n\u001b[0;32m 318\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 319\u001b[0m \u001b[1;31m# Step 1: route message to appropriate function\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 320\u001b[1;33m \u001b[0mresponse\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_message_router\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mtype\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmsg\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmsg\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 321\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 322\u001b[0m \u001b[1;31m# Step 2: Serialize the message to simple python objects\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mD:\\anaconda\\envs\\pysyft\\lib\\site-packages\\syft\\workers\\base.py\u001b[0m in \u001b[0;36mexecute_tensor_command\u001b[1;34m(self, cmd)\u001b[0m\n\u001b[0;32m 432\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mexecute_tensor_command\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcmd\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mTensorCommandMessage\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m->\u001b[0m \u001b[0mPointerTensor\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 433\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcmd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0maction\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mComputationAction\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 434\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute_computation_action\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcmd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0maction\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 435\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 436\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute_communication_action\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcmd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0maction\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mD:\\anaconda\\envs\\pysyft\\lib\\site-packages\\syft\\workers\\base.py\u001b[0m in \u001b[0;36mexecute_computation_action\u001b[1;34m(self, action)\u001b[0m\n\u001b[0;32m 493\u001b[0m \u001b[1;31m# Register response and create pointers for tensor elements\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 494\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 495\u001b[1;33m \u001b[0mresponse\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mhook_args\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mregister_response\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mop_name\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mresponse\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlist\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mreturn_ids\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 496\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mresponse\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 497\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mResponseSignatureError\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mD:\\anaconda\\envs\\pysyft\\lib\\site-packages\\syft\\generic\\frameworks\\hook\\hook_args.py\u001b[0m in \u001b[0;36mregister_response\u001b[1;34m(attr, response, response_ids, owner)\u001b[0m\n\u001b[0;32m 671\u001b[0m \u001b[0mregister_response_functions\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mattr_id\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mregister_response_function\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 672\u001b[0m \u001b[1;31m# Run it\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 673\u001b[1;33m \u001b[0mnew_response\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mregister_response_function\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mresponse\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mresponse_ids\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mresponse_ids\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mowner\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mowner\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 674\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 675\u001b[0m \u001b[1;31m# Remove the artificial tuple\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mD:\\anaconda\\envs\\pysyft\\lib\\site-packages\\syft\\generic\\frameworks\\hook\\hook_args.py\u001b[0m in \u001b[0;36m\u001b[1;34m(x, **kwargs)\u001b[0m\n\u001b[0;32m 765\u001b[0m \u001b[0mf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmany_fold\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 766\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 767\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[1;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mf\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlambdas\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[1;32mD:\\anaconda\\envs\\pysyft\\lib\\site-packages\\syft\\generic\\frameworks\\hook\\hook_args.py\u001b[0m in \u001b[0;36mtwo_fold\u001b[1;34m(lambdas, args, **kwargs)\u001b[0m\n\u001b[0;32m 520\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 521\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mtwo_fold\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlambdas\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 522\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mlambdas\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlambdas\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 523\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 524\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mD:\\anaconda\\envs\\pysyft\\lib\\site-packages\\syft\\generic\\frameworks\\hook\\hook_args.py\u001b[0m in \u001b[0;36m\u001b[1;34m(i, **kwargs)\u001b[0m\n\u001b[0;32m 743\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mr\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mlist\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtuple\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m# if the rule is a list or tuple.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 744\u001b[0m \u001b[1;31m# Last if not, rule is probably == 1 so use type to return the right transformation.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 745\u001b[1;33m \u001b[1;32melse\u001b[0m \u001b[1;32mlambda\u001b[0m \u001b[0mi\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mregister_tensor\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 746\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0ma\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mr\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mzip\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mresponse\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mrules\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m# And do this for all the responses / rules provided\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 747\u001b[0m ]\n", + "\u001b[1;32mD:\\anaconda\\envs\\pysyft\\lib\\site-packages\\syft\\generic\\frameworks\\hook\\hook_args.py\u001b[0m in \u001b[0;36mregister_tensor\u001b[1;34m(tensor, owner, response_ids)\u001b[0m\n\u001b[0;32m 711\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0meach\u001b[0m \u001b[0mid\u001b[0m \u001b[1;32mis\u001b[0m \u001b[0mpop\u001b[0m \u001b[0mout\u001b[0m \u001b[0mwhen\u001b[0m \u001b[0mneeded\u001b[0m\u001b[1;33m.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 712\u001b[0m \"\"\"\n\u001b[1;32m--> 713\u001b[1;33m \u001b[0mtensor\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mowner\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mowner\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 714\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 715\u001b[0m \u001b[0mtensor\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mid\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mresponse_ids\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpop\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m-\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;31mAttributeError\u001b[0m: 'numpy.ndarray' object has no attribute 'owner'" + ] + } + ], + "source": [ + "# iterate through each parameter\n", + "for param_i in range(len(params[0])):\n", + "\n", + " # for each worker\n", + " spdz_params = list()\n", + " for remote_index in range(len(compute_nodes)):\n", + " \n", + " # select the identical parameter from each worker and copy it\n", + " copy_of_parameter = params[remote_index][param_i].copy()\n", + " \n", + " # since SMPC can only work with integers (not floats), we need\n", + " # to use Integers to store decimal information. In other words,\n", + " # we need to use \"Fixed Precision\" encoding.\n", + " fixed_precision_param = copy_of_parameter.fix_precision()\n", + " \n", + " # now we encrypt it on the remote machine. Note that \n", + " # fixed_precision_param is ALREADY a pointer. Thus, when\n", + " # we call share, it actually encrypts the data that the\n", + " # data is pointing TO. This returns a POINTER to the \n", + " # MPC secret shared object, which we need to fetch.\n", + " encrypted_param = fixed_precision_param.share(bob, alice, crypto_provider=james)\n", + " \n", + " # now we fetch the pointer to the MPC shared value\n", + " param = encrypted_param.get()\n", + " \n", + " # save the parameter so we can average it with the same parameter\n", + " # from the other workers\n", + " spdz_params.append(param)\n", + "\n", + " # average params from multiple workers, fetch them to the local machine\n", + " # decrypt and decode (from fixed precision) back into a floating point number\n", + " new_param = (spdz_params[0] + spdz_params[1]).get().float_precision()/2\n", + " \n", + " # save the new averaged parameter\n", + " new_params.append(new_param)" + ] + }, + { + "source": [ + "## 4 清理" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "output_type": "error", + "ename": "IndexError", + "evalue": "list index out of range", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mIndexError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 9\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mremote_index\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcompute_nodes\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 10\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mparam_index\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mparams\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mremote_index\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 11\u001b[1;33m \u001b[0mparams\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mremote_index\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mparam_index\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mset_\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnew_params\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mparam_index\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[1;31mIndexError\u001b[0m: list index out of range" + ] + } + ], + "source": [ + "with torch.no_grad():\n", + " for model in params:\n", + " for param in model:\n", + " param *= 0\n", + "\n", + " for model in models:\n", + " model.get()\n", + "\n", + " for remote_index in range(len(compute_nodes)):\n", + " for param_index in range(len(params[remote_index])):\n", + " params[remote_index][param_index].set_(new_params[param_index])" + ] + }, + { + "source": [ + "## 5 最终训练" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "def train(epoch):\n", + " for data_index in range(len(remote_dataset[0])-1):\n", + " # update remote models\n", + " for remote_index in range(len(compute_nodes)):\n", + " data, target = remote_dataset[remote_index][data_index]\n", + " models[remote_index] = update(data, target, models[remote_index], optimizers[remote_index])\n", + "\n", + " # encrypted aggregation\n", + " new_params = list()\n", + " for param_i in range(len(params[0])):\n", + " spdz_params = list()\n", + " for remote_index in range(len(compute_nodes)):\n", + " spdz_params.append(params[remote_index][param_i].copy().fix_precision().share(bob, alice, crypto_provider=james).get())\n", + "\n", + " new_param = (spdz_params[0] + spdz_params[1]).get().float_precision()/2\n", + " new_params.append(new_param)\n", + "\n", + " # cleanup\n", + " with torch.no_grad():\n", + " for model in params:\n", + " for param in model:\n", + " param *= 0\n", + "\n", + " for model in models:\n", + " model.get()\n", + "\n", + " for remote_index in range(len(compute_nodes)):\n", + " for param_index in range(len(params[remote_index])):\n", + " params[remote_index][param_index].set_(new_params[param_index])" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "def test():\n", + " models[0].eval()\n", + " test_loss = 0\n", + " for data, target in test_loader:\n", + " output = models[0](data)\n", + " test_loss += F.mse_loss(output.view(-1), target, reduction='sum').item() # sum up batch loss\n", + " pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability\n", + " \n", + " test_loss /= len(test_loader.dataset)\n", + " print('Test set: Average loss: {:.4f}\\n'.format(test_loss))" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Epoch 1\n" + ] + }, + { + "output_type": "error", + "ename": "AttributeError", + "evalue": "'numpy.ndarray' object has no attribute 'owner'", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mepoch\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mepochs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34mf\"Epoch {epoch + 1}\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 5\u001b[1;33m \u001b[0mtrain\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mepoch\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 6\u001b[0m \u001b[0mtest\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 7\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m\u001b[0m in \u001b[0;36mtrain\u001b[1;34m(epoch)\u001b[0m\n\u001b[0;32m 11\u001b[0m \u001b[0mspdz_params\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 12\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mremote_index\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcompute_nodes\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 13\u001b[1;33m \u001b[0mspdz_params\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mparams\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mremote_index\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mparam_i\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcopy\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfix_precision\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshare\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mbob\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0malice\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcrypto_provider\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mjames\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 14\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 15\u001b[0m \u001b[0mnew_param\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mspdz_params\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0mspdz_params\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfloat_precision\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m/\u001b[0m\u001b[1;36m2\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mD:\\anaconda\\envs\\pysyft\\lib\\site-packages\\syft\\frameworks\\torch\\tensors\\interpreters\\native.py\u001b[0m in \u001b[0;36mfix_prec\u001b[1;34m(self, storage, field_type, no_wrap, *args, **kwargs)\u001b[0m\n\u001b[0;32m 765\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 766\u001b[0m \u001b[0mmax_precision\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_get_maximum_precision\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 767\u001b[1;33m \u001b[0mneed_large_prec\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_requires_large_precision\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmax_precision\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mbase\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mprec_fractional\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 768\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 769\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mstorage\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;34m\"crt\"\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mD:\\anaconda\\envs\\pysyft\\lib\\site-packages\\syft\\frameworks\\torch\\tensors\\interpreters\\native.py\u001b[0m in \u001b[0;36m_requires_large_precision\u001b[1;34m(self, max_precision, base, precision_fractional)\u001b[0m\n\u001b[0;32m 837\u001b[0m \u001b[1;31m# We need to use NumPy here as log2 is not yet implemented for LongTensor PyTorch objects\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 838\u001b[0m return np.any(\n\u001b[1;32m--> 839\u001b[1;33m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mlog2\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mabs\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mclone\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdetach\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mnumpy\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m+\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0mbase_fractional\u001b[0m \u001b[1;33m>\u001b[0m \u001b[0mmax_precision\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 840\u001b[0m )\n\u001b[0;32m 841\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mD:\\anaconda\\envs\\pysyft\\lib\\site-packages\\syft\\generic\\frameworks\\hook\\trace.py\u001b[0m in \u001b[0;36mtrace_wrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 81\u001b[0m \u001b[0msyft\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mhook\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtrace\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mlogs\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcommand\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mresponse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 82\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 83\u001b[1;33m \u001b[0mresponse\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 84\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 85\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mresponse\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mD:\\anaconda\\envs\\pysyft\\lib\\site-packages\\syft\\generic\\frameworks\\hook\\hook.py\u001b[0m in \u001b[0;36moverloaded_native_method\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 456\u001b[0m \u001b[1;31m# Send the new command to the appropriate class and get the response\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 457\u001b[0m \u001b[0mmethod\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnew_self\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmethod_name\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 458\u001b[1;33m \u001b[0mresponse\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmethod\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0mnew_args\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mnew_kwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 459\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 460\u001b[0m \u001b[1;31m# For inplace methods, just directly return self\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mD:\\anaconda\\envs\\pysyft\\lib\\site-packages\\syft\\generic\\frameworks\\hook\\hook.py\u001b[0m in \u001b[0;36moverloaded_pointer_method\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 619\u001b[0m \u001b[0mcommand\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mattr\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 620\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 621\u001b[1;33m \u001b[0mresponse\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mowner\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msend_command\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlocation\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcommand\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 622\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 623\u001b[0m \u001b[1;31m# For inplace methods, just directly return self\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mD:\\anaconda\\envs\\pysyft\\lib\\site-packages\\syft\\workers\\base.py\u001b[0m in \u001b[0;36msend_command\u001b[1;34m(self, recipient, message, return_ids)\u001b[0m\n\u001b[0;32m 581\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 582\u001b[0m \u001b[0mmessage\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mTensorCommandMessage\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcomputation\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtarget\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0margs_\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkwargs_\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mreturn_ids\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 583\u001b[1;33m \u001b[0mret_val\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msend_msg\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmessage\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlocation\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mrecipient\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 584\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mResponseSignatureError\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 585\u001b[0m \u001b[0mret_val\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mD:\\anaconda\\envs\\pysyft\\lib\\site-packages\\syft\\workers\\base.py\u001b[0m in \u001b[0;36msend_msg\u001b[1;34m(self, message, location)\u001b[0m\n\u001b[0;32m 285\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 286\u001b[0m \u001b[1;31m# Step 2: send the message and wait for a response\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 287\u001b[1;33m \u001b[0mbin_response\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_send_msg\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mbin_message\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlocation\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 288\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 289\u001b[0m \u001b[1;31m# Step 3: deserialize the response\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mD:\\anaconda\\envs\\pysyft\\lib\\site-packages\\syft\\workers\\virtual.py\u001b[0m in \u001b[0;36m_send_msg\u001b[1;34m(self, message, location)\u001b[0m\n\u001b[0;32m 13\u001b[0m \u001b[0msleep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmessage_pending_time\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 14\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 15\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mlocation\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_recv_msg\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmessage\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 16\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 17\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_recv_msg\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmessage\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mbin\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m->\u001b[0m \u001b[0mbin\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mD:\\anaconda\\envs\\pysyft\\lib\\site-packages\\syft\\workers\\virtual.py\u001b[0m in \u001b[0;36m_recv_msg\u001b[1;34m(self, message)\u001b[0m\n\u001b[0;32m 17\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_recv_msg\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmessage\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mbin\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m->\u001b[0m \u001b[0mbin\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 18\u001b[0m \u001b[1;34m\"\"\"receive message\"\"\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 19\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrecv_msg\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmessage\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[1;32mD:\\anaconda\\envs\\pysyft\\lib\\site-packages\\syft\\workers\\base.py\u001b[0m in \u001b[0;36mrecv_msg\u001b[1;34m(self, bin_message)\u001b[0m\n\u001b[0;32m 318\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 319\u001b[0m \u001b[1;31m# Step 1: route message to appropriate function\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 320\u001b[1;33m \u001b[0mresponse\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_message_router\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mtype\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmsg\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmsg\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 321\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 322\u001b[0m \u001b[1;31m# Step 2: Serialize the message to simple python objects\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mD:\\anaconda\\envs\\pysyft\\lib\\site-packages\\syft\\workers\\base.py\u001b[0m in \u001b[0;36mexecute_tensor_command\u001b[1;34m(self, cmd)\u001b[0m\n\u001b[0;32m 432\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mexecute_tensor_command\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcmd\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mTensorCommandMessage\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m->\u001b[0m \u001b[0mPointerTensor\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 433\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcmd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0maction\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mComputationAction\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 434\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute_computation_action\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcmd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0maction\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 435\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 436\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute_communication_action\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcmd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0maction\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mD:\\anaconda\\envs\\pysyft\\lib\\site-packages\\syft\\workers\\base.py\u001b[0m in \u001b[0;36mexecute_computation_action\u001b[1;34m(self, action)\u001b[0m\n\u001b[0;32m 493\u001b[0m \u001b[1;31m# Register response and create pointers for tensor elements\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 494\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 495\u001b[1;33m \u001b[0mresponse\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mhook_args\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mregister_response\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mop_name\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mresponse\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlist\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mreturn_ids\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 496\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mresponse\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 497\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mResponseSignatureError\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mD:\\anaconda\\envs\\pysyft\\lib\\site-packages\\syft\\generic\\frameworks\\hook\\hook_args.py\u001b[0m in \u001b[0;36mregister_response\u001b[1;34m(attr, response, response_ids, owner)\u001b[0m\n\u001b[0;32m 664\u001b[0m \u001b[0mregister_response_function\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mregister_response_functions\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mattr_id\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 665\u001b[0m \u001b[1;31m# Try running it\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 666\u001b[1;33m \u001b[0mnew_response\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mregister_response_function\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mresponse\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mresponse_ids\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mresponse_ids\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mowner\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mowner\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 667\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 668\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mIndexError\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mAssertionError\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m \u001b[1;31m# Update the function in cas of an error\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mD:\\anaconda\\envs\\pysyft\\lib\\site-packages\\syft\\generic\\frameworks\\hook\\hook_args.py\u001b[0m in \u001b[0;36m\u001b[1;34m(x, **kwargs)\u001b[0m\n\u001b[0;32m 765\u001b[0m \u001b[0mf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmany_fold\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 766\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 767\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[1;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mf\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlambdas\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[1;32mD:\\anaconda\\envs\\pysyft\\lib\\site-packages\\syft\\generic\\frameworks\\hook\\hook_args.py\u001b[0m in \u001b[0;36mtwo_fold\u001b[1;34m(lambdas, args, **kwargs)\u001b[0m\n\u001b[0;32m 520\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 521\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mtwo_fold\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlambdas\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 522\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mlambdas\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlambdas\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 523\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 524\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32mD:\\anaconda\\envs\\pysyft\\lib\\site-packages\\syft\\generic\\frameworks\\hook\\hook_args.py\u001b[0m in \u001b[0;36m\u001b[1;34m(i, **kwargs)\u001b[0m\n\u001b[0;32m 743\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mr\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mlist\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtuple\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m# if the rule is a list or tuple.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 744\u001b[0m \u001b[1;31m# Last if not, rule is probably == 1 so use type to return the right transformation.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 745\u001b[1;33m \u001b[1;32melse\u001b[0m \u001b[1;32mlambda\u001b[0m \u001b[0mi\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mregister_tensor\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 746\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0ma\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mr\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mzip\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mresponse\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mrules\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m# And do this for all the responses / rules provided\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 747\u001b[0m ]\n", + "\u001b[1;32mD:\\anaconda\\envs\\pysyft\\lib\\site-packages\\syft\\generic\\frameworks\\hook\\hook_args.py\u001b[0m in \u001b[0;36mregister_tensor\u001b[1;34m(tensor, owner, response_ids)\u001b[0m\n\u001b[0;32m 711\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0meach\u001b[0m \u001b[0mid\u001b[0m \u001b[1;32mis\u001b[0m \u001b[0mpop\u001b[0m \u001b[0mout\u001b[0m \u001b[0mwhen\u001b[0m \u001b[0mneeded\u001b[0m\u001b[1;33m.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 712\u001b[0m \"\"\"\n\u001b[1;32m--> 713\u001b[1;33m \u001b[0mtensor\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mowner\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mowner\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 714\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 715\u001b[0m \u001b[0mtensor\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mid\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mresponse_ids\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpop\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m-\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;31mAttributeError\u001b[0m: 'numpy.ndarray' object has no attribute 'owner'" + ] + } + ], + "source": [ + "t = time.time()\n", + "\n", + "for epoch in range(args.epochs):\n", + " print(f\"Epoch {epoch + 1}\")\n", + " train(epoch)\n", + " test()\n", + "\n", + " \n", + "total_time = time.time() - t\n", + "print('Total', round(total_time, 2), 's')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ] +} \ No newline at end of file diff --git a/pytorch/官方教程/21 Pysyft 安全数据分类实例.ipynb b/pytorch/官方教程/21 Pysyft 安全数据分类实例.ipynb new file mode 100644 index 00000000..4131dc9f --- /dev/null +++ b/pytorch/官方教程/21 Pysyft 安全数据分类实例.ipynb @@ -0,0 +1,286 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python388jvsc74a57bd057f55249afac9e3bb90b27c0916a1d44f0a08c86299e4ac4c83ac98b0a805cf4", + "display_name": "Python 3.8.8 64-bit ('pysyft': conda)" + }, + "metadata": { + "interpreter": { + "hash": "57f55249afac9e3bb90b27c0916a1d44f0a08c86299e4ac4c83ac98b0a805cf4" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "## 1 引入包\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import torch.nn as nn\n", + "import torch.nn.functional as F\n", + "import torch.optim as optim\n", + "from torchvision import datasets, transforms" + ] + }, + { + "source": [ + "## 2 设置联邦学习" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import syft as sy\n", + "hook = sy.TorchHook(torch) \n", + "client = sy.VirtualWorker(hook, id=\"client\")\n", + "bob = sy.VirtualWorker(hook, id=\"bob\")\n", + "alice = sy.VirtualWorker(hook, id=\"alice\")\n", + "crypto_provider = sy.VirtualWorker(hook, id=\"crypto_provider\") " + ] + }, + { + "source": [ + "## 3 设置学习参数" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "epochs=1\n", + "class Arguments():\n", + " def __init__(self):\n", + " self.batch_size = 64\n", + " self.test_batch_size = 50\n", + " self.epochs = epochs\n", + " self.lr = 0.001\n", + " self.log_interval = 100\n", + "\n", + "args = Arguments()" + ] + }, + { + "source": [ + "## 4 数据加载并发送给工作人员\n", + "\n", + "在我们的设置中,我们假设服务器有权访问某些数据以首先训练其模型。这是MNIST训练集。\n", + "\n", + "其次,客户端具有一些数据,并且希望使用服务器的模型对其进行预测。此客户端通过在两个worker alice和bob之间额外共享数据来加密其数据" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "train_loader = torch.utils.data.DataLoader(\n", + " datasets.MNIST('./data', train=True, download=True,\n", + " transform=transforms.Compose([\n", + " transforms.ToTensor(),\n", + " transforms.Normalize((0.1307,), (0.3081,))\n", + " ])),\n", + " batch_size=args.batch_size, shuffle=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "test_loader = torch.utils.data.DataLoader(\n", + " datasets.MNIST('./data', train=False,\n", + " transform=transforms.Compose([\n", + " transforms.ToTensor(),\n", + " transforms.Normalize((0.1307,), (0.3081,))\n", + " ])),\n", + " batch_size=args.test_batch_size, shuffle=True)\n", + "\n", + "private_test_loader = []\n", + "# 对数据进行加密,让其进行运算,显然不符合我们的目标。\n", + "for data, target in test_loader:\n", + " private_test_loader.append((\n", + " data.fix_precision().share(alice, bob, crypto_provider=crypto_provider),\n", + " target.fix_precision().share(alice, bob, crypto_provider=crypto_provider)\n", + " ))" + ] + }, + { + "source": [ + "## 5 定义神经网络" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "class Net(nn.Module):\n", + " def __init__(self):\n", + " super(Net, self).__init__()\n", + " self.fc1 = nn.Linear(784, 500)\n", + " self.fc2 = nn.Linear(500, 10)\n", + "\n", + " def forward(self, x):\n", + " x = x.view(-1, 784)\n", + " x = self.fc1(x)\n", + " x = F.relu(x)\n", + " x = self.fc2(x)\n", + " return x" + ] + }, + { + "source": [ + "## 6 定义训练方法" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "def train(args, model, train_loader, optimizer, epoch):\n", + " model.train()\n", + " for batch_idx, (data, target) in enumerate(train_loader):\n", + " optimizer.zero_grad()\n", + " output = model(data)\n", + " output = F.log_softmax(output, dim=1)\n", + " loss = F.nll_loss(output, target)\n", + " loss.backward()\n", + " optimizer.step()\n", + " if batch_idx % args.log_interval == 0:\n", + " print('Train Epoch: {} [{}/{} ({:.0f}%)]\\tLoss: {:.6f}'.format(\n", + " epoch, batch_idx * args.batch_size, len(train_loader) * args.batch_size,\n", + " 100. * batch_idx / len(train_loader), loss.item()))" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Train Epoch: 1 [0/60032 (0%)]\tLoss: 2.364058\n", + "Train Epoch: 1 [6400/60032 (11%)]\tLoss: 0.197535\n", + "Train Epoch: 1 [12800/60032 (21%)]\tLoss: 0.219191\n", + "Train Epoch: 1 [19200/60032 (32%)]\tLoss: 0.206442\n", + "Train Epoch: 1 [25600/60032 (43%)]\tLoss: 0.062148\n", + "Train Epoch: 1 [32000/60032 (53%)]\tLoss: 0.186572\n", + "Train Epoch: 1 [38400/60032 (64%)]\tLoss: 0.050391\n", + "Train Epoch: 1 [44800/60032 (75%)]\tLoss: 0.134885\n", + "Train Epoch: 1 [51200/60032 (85%)]\tLoss: 0.140885\n", + "Train Epoch: 1 [57600/60032 (96%)]\tLoss: 0.162966\n" + ] + } + ], + "source": [ + "model = Net()\n", + "optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)\n", + "\n", + "for epoch in range(1, args.epochs + 1):\n", + " train(args, model, train_loader, optimizer, epoch)" + ] + }, + { + "source": [ + "## 7 定义测试方法" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "def test(args, model, test_loader):\n", + " model.eval()\n", + " test_loss = 0\n", + " correct = 0\n", + " with torch.no_grad():\n", + " for data, target in test_loader:\n", + " output = model(data)\n", + " output = F.log_softmax(output, dim=1)\n", + " test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss\n", + " pred = output.argmax(1, keepdim=True) # get the index of the max log-probability \n", + " correct += pred.eq(target.view_as(pred)).sum().item()\n", + "\n", + " test_loss /= len(test_loader.dataset)\n", + "\n", + " print('\\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\\n'.format(\n", + " test_loss, correct, len(test_loader.dataset),\n", + " 100. * correct / len(test_loader.dataset)))" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\nTest set: Average loss: 0.0991, Accuracy: 9692/10000 (97%)\n\n" + ] + } + ], + "source": [ + "test(args, model, test_loader)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ] +} \ No newline at end of file diff --git a/pytorch/官方教程/22 Pysyft 安全模型分类实例.ipynb b/pytorch/官方教程/22 Pysyft 安全模型分类实例.ipynb new file mode 100644 index 00000000..bfc0179a --- /dev/null +++ b/pytorch/官方教程/22 Pysyft 安全模型分类实例.ipynb @@ -0,0 +1,191 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python388jvsc74a57bd057f55249afac9e3bb90b27c0916a1d44f0a08c86299e4ac4c83ac98b0a805cf4", + "display_name": "Python 3.8.8 64-bit ('pysyft': conda)" + }, + "metadata": { + "interpreter": { + "hash": "57f55249afac9e3bb90b27c0916a1d44f0a08c86299e4ac4c83ac98b0a805cf4" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "# 安全模型分类实例\r\n", + "\r\n", + "在此笔记本中,我们将使用到目前为止所学到的所有技术来执行神经网络训练(和预测),同时对模型和数据进行加密。\r\n", + "\r\n", + "## 1 创建数据、设置syft" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import torch.nn as nn\n", + "import torch.nn.functional as F\n", + "import torch.optim as optim\n", + "import syft as sy" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# Set everything up\n", + "hook = sy.TorchHook(torch) \n", + "\n", + "alice = sy.VirtualWorker(id=\"alice\", hook=hook)\n", + "bob = sy.VirtualWorker(id=\"bob\", hook=hook)\n", + "james = sy.VirtualWorker(id=\"james\", hook=hook)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# A Toy Dataset\n", + "data = torch.tensor([[0,0],[0,1],[1,0],[1,1.]])\n", + "target = torch.tensor([[0],[0],[1],[1.]])\n", + "\n", + "# A Toy Model\n", + "class Net(nn.Module):\n", + " def __init__(self):\n", + " super(Net, self).__init__()\n", + " self.fc1 = nn.Linear(2, 2)\n", + " self.fc2 = nn.Linear(2, 1)\n", + "\n", + " def forward(self, x):\n", + " x = self.fc1(x)\n", + " x = F.relu(x)\n", + " x = self.fc2(x)\n", + " return x\n", + "model = Net()" + ] + }, + { + "source": [ + "## 2 加密模型和数据" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# We encode everything\n", + "data = data.fix_precision().share(bob, alice, crypto_provider=james, requires_grad=True)\n", + "target = target.fix_precision().share(bob, alice, crypto_provider=james, requires_grad=True)\n", + "model = model.fix_precision().share(bob, alice, crypto_provider=james, requires_grad=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(Wrapper)>AutogradTensor>FixedPrecisionTensor>[AdditiveSharingTensor]\n\t-> [PointerTensor | me:37577169362 -> bob:13920372678]\n\t-> [PointerTensor | me:72102385192 -> alice:93469225639]\n\t*crypto provider: james*\n" + ] + } + ], + "source": [ + "print(data)" + ] + }, + { + "source": [ + "## 3 训练" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "tensor(1.0490)\n", + "tensor(0.9820)\n", + "tensor(0.9470)\n", + "tensor(0.8940)\n", + "tensor(0.8300)\n", + "tensor(0.7630)\n", + "tensor(0.6830)\n", + "tensor(0.5860)\n", + "tensor(0.5040)\n", + "tensor(0.3940)\n", + "tensor(0.2920)\n", + "tensor(0.2050)\n", + "tensor(0.1720)\n", + "tensor(0.1150)\n", + "tensor(0.0940)\n", + "tensor(0.0740)\n", + "tensor(0.0550)\n", + "tensor(0.0550)\n", + "tensor(0.0330)\n", + "tensor(0.0370)\n" + ] + } + ], + "source": [ + "opt = optim.SGD(params=model.parameters(),lr=0.1).fix_precision()\n", + "\n", + "for iter in range(20):\n", + " # 1) erase previous gradients (if they exist)\n", + " opt.zero_grad()\n", + "\n", + " # 2) make a prediction\n", + " pred = model(data)\n", + "\n", + " # 3) calculate how much we missed\n", + " loss = ((pred - target)**2).sum()\n", + "\n", + " # 4) figure out which weights caused us to miss\n", + " loss.backward()\n", + "\n", + " # 5) change those weights\n", + " opt.step()\n", + "\n", + " # 6) print our progress\n", + " print(loss.get().float_precision())" + ] + } + ] +} \ No newline at end of file diff --git a/pytorch/官方教程/23 Pysyft 安全Mnist分类实例.ipynb b/pytorch/官方教程/23 Pysyft 安全Mnist分类实例.ipynb new file mode 100644 index 00000000..fde7cecc --- /dev/null +++ b/pytorch/官方教程/23 Pysyft 安全Mnist分类实例.ipynb @@ -0,0 +1,477 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python388jvsc74a57bd057f55249afac9e3bb90b27c0916a1d44f0a08c86299e4ac4c83ac98b0a805cf4", + "display_name": "Python 3.8.8 64-bit ('pysyft': conda)" + }, + "metadata": { + "interpreter": { + "hash": "57f55249afac9e3bb90b27c0916a1d44f0a08c86299e4ac4c83ac98b0a805cf4" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "# 安全训练评估\n", + "\n", + "在构建机器学习即服务解决方案(MLaaS)时,公司可能需要请求其他合作伙伴访问数据以训练其模型。在卫生或金融领域,模型和数据都非常关键:模型参数是业务资产,而数据是严格监管的个人数据。\n", + "\n", + "在这种情况下,一种可能的解决方案是对模型和数据都进行加密,并在加密后的值上训练机器学习模型。例如,这保证了公司不会访问患者的病历,并且医疗机构将无法观察他们所贡献的模型。存在几种允许对加密数据进行计算的加密方案,其中包括安全多方计算(SMPC),同态加密(FHE / SHE)和功能加密(FE)。我们将在这里集中讨论多方计算(已在教程5中进行了介绍),它由私有加性共享组成,并依赖于加密协议SecureNN和SPDZ。\n", + "\n", + "本教程的确切设置如下:考虑您是服务器,并且您想对模型中的某些数据进行训练。 𝑛 工人。服务器机密共享他的模型,并将每个共享发送给工作人员。工人们还秘密共享他们的数据并在他们之间交换数据。在我们将要研究的配置中,有2个工人:alice和bob。交换股份后,他们每个人现在拥有自己的股份,另一工人的股份和模型的股份。现在,计算可以开始使用适当的加密协议对模型进行私下训练。训练模型后,所有份额都可以发送回服务器以对其进行解密。下图对此进行了说明:" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "epochs = 10\n", + "# We don't use the whole dataset for efficiency purpose, but feel free to increase these numbers\n", + "n_train_items = 640\n", + "n_test_items = 640" + ] + }, + { + "source": [ + "## 1 导入与配置" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import torch.nn as nn\n", + "import torch.nn.functional as F\n", + "import torch.optim as optim\n", + "from torchvision import datasets, transforms\n", + "\n", + "import time" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "class Arguments():\n", + " def __init__(self):\n", + " self.batch_size = 64\n", + " self.test_batch_size = 64\n", + " self.epochs = epochs\n", + " self.lr = 0.02\n", + " self.seed = 1\n", + " self.log_interval = 1 # Log info at each batch\n", + " self.precision_fractional = 3\n", + "\n", + "args = Arguments()\n", + "\n", + "_ = torch.manual_seed(args.seed)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "import syft as sy # import the Pysyft library\n", + "hook = sy.TorchHook(torch) # hook PyTorch to add extra functionalities like Federated and Encrypted Learning\n", + "\n", + "# simulation functions\n", + "def connect_to_workers(n_workers):\n", + " return [\n", + " sy.VirtualWorker(hook, id=f\"worker{i+1}\")\n", + " for i in range(n_workers)\n", + " ]\n", + "def connect_to_crypto_provider():\n", + " return sy.VirtualWorker(hook, id=\"crypto_provider\")\n", + "\n", + "workers = connect_to_workers(n_workers=2)\n", + "crypto_provider = connect_to_crypto_provider()" + ] + }, + { + "source": [ + "## 2 秘密共享数据\n", + "在这里,我们使用一个效用函数来模拟以下行为:我们假设MNIST数据集分布在各个部分中,每个部分都由我们的一个工人持有。然后,工作人员将其数据分批拆分,并在彼此之间秘密共享其数据。返回的最终对象是这些秘密共享批次上的可迭代对象,我们将其称为私有数据加载器。请注意,在此过程中,本地工作人员(因此我们)从未访问过数据。\n", + "\n", + "我们像往常一样获得了训练和测试私有数据集,并且输入和标签都是秘密共享的。" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "def get_private_data_loaders(precision_fractional, workers, crypto_provider):\n", + " \n", + " def one_hot_of(index_tensor):\n", + " \"\"\"\n", + " Transform to one hot tensor\n", + " \n", + " Example:\n", + " [0, 3, 9]\n", + " =>\n", + " [[1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],\n", + " [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]]\n", + " \n", + " \"\"\"\n", + " onehot_tensor = torch.zeros(*index_tensor.shape, 10) # 10 classes for MNIST\n", + " onehot_tensor = onehot_tensor.scatter(1, index_tensor.view(-1, 1), 1)\n", + " return onehot_tensor\n", + " \n", + " def secret_share(tensor):\n", + " \"\"\"\n", + " Transform to fixed precision and secret share a tensor\n", + " \"\"\"\n", + " return (\n", + " tensor\n", + " .fix_precision(precision_fractional=precision_fractional)\n", + " .share(*workers, crypto_provider=crypto_provider, requires_grad=True)\n", + " )\n", + " \n", + " transformation = transforms.Compose([\n", + " transforms.ToTensor(),\n", + " transforms.Normalize((0.1307,), (0.3081,))\n", + " ])\n", + " \n", + " train_loader = torch.utils.data.DataLoader(\n", + " datasets.MNIST('./data', train=True, download=True, transform=transformation),\n", + " batch_size=args.batch_size\n", + " )\n", + " \n", + " private_train_loader = [\n", + " (secret_share(data), secret_share(one_hot_of(target)))\n", + " for i, (data, target) in enumerate(train_loader)\n", + " if i < n_train_items / args.batch_size\n", + " ]\n", + " \n", + " test_loader = torch.utils.data.DataLoader(\n", + " datasets.MNIST('./data', train=False, download=True, transform=transformation),\n", + " batch_size=args.test_batch_size\n", + " )\n", + " \n", + " private_test_loader = [\n", + " (secret_share(data), secret_share(target.float()))\n", + " for i, (data, target) in enumerate(test_loader)\n", + " if i < n_test_items / args.test_batch_size\n", + " ]\n", + " \n", + " return private_train_loader, private_test_loader\n", + " \n", + " \n", + "private_train_loader, private_test_loader = get_private_data_loaders(\n", + " precision_fractional=args.precision_fractional,\n", + " workers=workers,\n", + " crypto_provider=crypto_provider\n", + ")" + ] + }, + { + "source": [ + "## 3 实现模型\n", + "\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "class Net(nn.Module):\n", + " def __init__(self):\n", + " super(Net, self).__init__()\n", + " self.fc1 = nn.Linear(28 * 28, 128)\n", + " self.fc2 = nn.Linear(128, 64)\n", + " self.fc3 = nn.Linear(64, 10)\n", + "\n", + " def forward(self, x):\n", + " x = x.view(-1, 28 * 28)\n", + " x = F.relu(self.fc1(x))\n", + " x = F.relu(self.fc2(x))\n", + " x = self.fc3(x)\n", + " return x" + ] + }, + { + "source": [ + "## 4 训练和测试" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "def train(args, model, private_train_loader, optimizer, epoch):\n", + " model.train()\n", + " for batch_idx, (data, target) in enumerate(private_train_loader): # <-- now it is a private dataset\n", + " start_time = time.time()\n", + " \n", + " optimizer.zero_grad()\n", + " \n", + " output = model(data)\n", + " \n", + " # loss = F.nll_loss(output, target) <-- not possible here\n", + " batch_size = output.shape[0]\n", + " loss = ((output - target)**2).sum().refresh()/batch_size\n", + " \n", + " loss.backward()\n", + " \n", + " optimizer.step()\n", + "\n", + " if batch_idx % args.log_interval == 0:\n", + " loss = loss.get().float_precision()\n", + " print('Train Epoch: {} [{}/{} ({:.0f}%)]\\tLoss: {:.6f}\\tTime: {:.3f}s'.format(\n", + " epoch, batch_idx * args.batch_size, len(private_train_loader) * args.batch_size,\n", + " 100. * batch_idx / len(private_train_loader), loss.item(), time.time() - start_time))" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "def test(args, model, private_test_loader):\n", + " model.eval()\n", + " test_loss = 0\n", + " correct = 0\n", + " with torch.no_grad():\n", + " for data, target in private_test_loader:\n", + " start_time = time.time()\n", + " \n", + " output = model(data)\n", + " pred = output.argmax(dim=1)\n", + " correct += pred.eq(target.view_as(pred)).sum()\n", + "\n", + " correct = correct.get().float_precision()\n", + " print('\\nTest set: Accuracy: {}/{} ({:.0f}%)\\n'.format(\n", + " correct.item(), len(private_test_loader)* args.test_batch_size,\n", + " 100. * correct.item() / (len(private_test_loader) * args.test_batch_size)))" + ] + }, + { + "source": [ + "## 5 训练" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Train Epoch: 1 [0/640 (0%)]\tLoss: 1.128000\tTime: 3.911s\n", + "Train Epoch: 1 [64/640 (10%)]\tLoss: 1.012000\tTime: 3.977s\n", + "Train Epoch: 1 [128/640 (20%)]\tLoss: 0.989000\tTime: 4.051s\n", + "Train Epoch: 1 [192/640 (30%)]\tLoss: 0.902000\tTime: 4.048s\n", + "Train Epoch: 1 [256/640 (40%)]\tLoss: 0.888000\tTime: 3.872s\n", + "Train Epoch: 1 [320/640 (50%)]\tLoss: 0.876000\tTime: 3.966s\n", + "Train Epoch: 1 [384/640 (60%)]\tLoss: 0.854000\tTime: 3.994s\n", + "Train Epoch: 1 [448/640 (70%)]\tLoss: 0.853000\tTime: 4.016s\n", + "Train Epoch: 1 [512/640 (80%)]\tLoss: 0.829000\tTime: 4.067s\n", + "Train Epoch: 1 [576/640 (90%)]\tLoss: 0.841000\tTime: 4.133s\n", + "\n", + "Test set: Accuracy: 227.0/640 (35%)\n", + "\n", + "Train Epoch: 2 [0/640 (0%)]\tLoss: 0.781000\tTime: 3.979s\n", + "Train Epoch: 2 [64/640 (10%)]\tLoss: 0.733000\tTime: 3.990s\n", + "Train Epoch: 2 [128/640 (20%)]\tLoss: 0.791000\tTime: 4.032s\n", + "Train Epoch: 2 [192/640 (30%)]\tLoss: 0.717000\tTime: 4.037s\n", + "Train Epoch: 2 [256/640 (40%)]\tLoss: 0.707000\tTime: 4.151s\n", + "Train Epoch: 2 [320/640 (50%)]\tLoss: 0.706000\tTime: 3.998s\n", + "Train Epoch: 2 [384/640 (60%)]\tLoss: 0.709000\tTime: 4.048s\n", + "Train Epoch: 2 [448/640 (70%)]\tLoss: 0.721000\tTime: 4.199s\n", + "Train Epoch: 2 [512/640 (80%)]\tLoss: 0.710000\tTime: 4.113s\n", + "Train Epoch: 2 [576/640 (90%)]\tLoss: 0.743000\tTime: 4.070s\n", + "\n", + "Test set: Accuracy: 360.0/640 (56%)\n", + "\n", + "Train Epoch: 3 [0/640 (0%)]\tLoss: 0.667000\tTime: 3.908s\n", + "Train Epoch: 3 [64/640 (10%)]\tLoss: 0.596000\tTime: 3.894s\n", + "Train Epoch: 3 [128/640 (20%)]\tLoss: 0.692000\tTime: 3.958s\n", + "Train Epoch: 3 [192/640 (30%)]\tLoss: 0.600000\tTime: 3.913s\n", + "Train Epoch: 3 [256/640 (40%)]\tLoss: 0.589000\tTime: 3.933s\n", + "Train Epoch: 3 [320/640 (50%)]\tLoss: 0.590000\tTime: 3.900s\n", + "Train Epoch: 3 [384/640 (60%)]\tLoss: 0.606000\tTime: 3.946s\n", + "Train Epoch: 3 [448/640 (70%)]\tLoss: 0.628000\tTime: 4.041s\n", + "Train Epoch: 3 [512/640 (80%)]\tLoss: 0.619000\tTime: 3.918s\n", + "Train Epoch: 3 [576/640 (90%)]\tLoss: 0.668000\tTime: 3.974s\n", + "\n", + "Test set: Accuracy: 401.0/640 (63%)\n", + "\n", + "Train Epoch: 4 [0/640 (0%)]\tLoss: 0.584000\tTime: 3.958s\n", + "Train Epoch: 4 [64/640 (10%)]\tLoss: 0.499000\tTime: 4.126s\n", + "Train Epoch: 4 [128/640 (20%)]\tLoss: 0.618000\tTime: 3.908s\n", + "Train Epoch: 4 [192/640 (30%)]\tLoss: 0.518000\tTime: 3.876s\n", + "Train Epoch: 4 [256/640 (40%)]\tLoss: 0.512000\tTime: 3.933s\n", + "Train Epoch: 4 [320/640 (50%)]\tLoss: 0.511000\tTime: 3.976s\n", + "Train Epoch: 4 [384/640 (60%)]\tLoss: 0.535000\tTime: 3.972s\n", + "Train Epoch: 4 [448/640 (70%)]\tLoss: 0.562000\tTime: 3.889s\n", + "Train Epoch: 4 [512/640 (80%)]\tLoss: 0.552000\tTime: 3.968s\n", + "Train Epoch: 4 [576/640 (90%)]\tLoss: 0.611000\tTime: 4.015s\n", + "\n", + "Test set: Accuracy: 424.0/640 (66%)\n", + "\n", + "Train Epoch: 5 [0/640 (0%)]\tLoss: 0.525000\tTime: 4.026s\n", + "Train Epoch: 5 [64/640 (10%)]\tLoss: 0.435000\tTime: 4.066s\n", + "Train Epoch: 5 [128/640 (20%)]\tLoss: 0.559000\tTime: 4.073s\n", + "Train Epoch: 5 [192/640 (30%)]\tLoss: 0.459000\tTime: 4.088s\n", + "Train Epoch: 5 [256/640 (40%)]\tLoss: 0.454000\tTime: 4.103s\n", + "Train Epoch: 5 [320/640 (50%)]\tLoss: 0.451000\tTime: 4.087s\n", + "Train Epoch: 5 [384/640 (60%)]\tLoss: 0.480000\tTime: 4.112s\n", + "Train Epoch: 5 [448/640 (70%)]\tLoss: 0.510000\tTime: 4.130s\n", + "Train Epoch: 5 [512/640 (80%)]\tLoss: 0.501000\tTime: 4.170s\n", + "Train Epoch: 5 [576/640 (90%)]\tLoss: 0.567000\tTime: 4.097s\n", + "\n", + "Test set: Accuracy: 449.0/640 (70%)\n", + "\n", + "Train Epoch: 6 [0/640 (0%)]\tLoss: 0.476000\tTime: 4.153s\n", + "Train Epoch: 6 [64/640 (10%)]\tLoss: 0.387000\tTime: 4.176s\n", + "Train Epoch: 6 [128/640 (20%)]\tLoss: 0.516000\tTime: 4.239s\n", + "Train Epoch: 6 [192/640 (30%)]\tLoss: 0.410000\tTime: 4.286s\n", + "Train Epoch: 6 [256/640 (40%)]\tLoss: 0.412000\tTime: 4.359s\n", + "Train Epoch: 6 [320/640 (50%)]\tLoss: 0.406000\tTime: 4.303s\n", + "Train Epoch: 6 [384/640 (60%)]\tLoss: 0.438000\tTime: 4.292s\n", + "Train Epoch: 6 [448/640 (70%)]\tLoss: 0.471000\tTime: 4.288s\n", + "Train Epoch: 6 [512/640 (80%)]\tLoss: 0.462000\tTime: 4.347s\n", + "Train Epoch: 6 [576/640 (90%)]\tLoss: 0.529000\tTime: 4.359s\n", + "\n", + "Test set: Accuracy: 464.0/640 (72%)\n", + "\n", + "Train Epoch: 7 [0/640 (0%)]\tLoss: 0.434000\tTime: 4.554s\n", + "Train Epoch: 7 [64/640 (10%)]\tLoss: 0.352000\tTime: 4.660s\n", + "Train Epoch: 7 [128/640 (20%)]\tLoss: 0.476000\tTime: 4.629s\n", + "Train Epoch: 7 [192/640 (30%)]\tLoss: 0.378000\tTime: 4.719s\n", + "Train Epoch: 7 [256/640 (40%)]\tLoss: 0.375000\tTime: 4.860s\n", + "Train Epoch: 7 [320/640 (50%)]\tLoss: 0.368000\tTime: 4.745s\n", + "Train Epoch: 7 [384/640 (60%)]\tLoss: 0.403000\tTime: 4.513s\n", + "Train Epoch: 7 [448/640 (70%)]\tLoss: 0.440000\tTime: 4.559s\n", + "Train Epoch: 7 [512/640 (80%)]\tLoss: 0.428000\tTime: 4.649s\n", + "Train Epoch: 7 [576/640 (90%)]\tLoss: 0.499000\tTime: 4.660s\n", + "\n", + "Test set: Accuracy: 469.0/640 (73%)\n", + "\n", + "Train Epoch: 8 [0/640 (0%)]\tLoss: 0.407000\tTime: 4.558s\n", + "Train Epoch: 8 [64/640 (10%)]\tLoss: 0.323000\tTime: 4.625s\n", + "Train Epoch: 8 [128/640 (20%)]\tLoss: 0.447000\tTime: 4.692s\n", + "Train Epoch: 8 [192/640 (30%)]\tLoss: 0.349000\tTime: 5.024s\n", + "Train Epoch: 8 [256/640 (40%)]\tLoss: 0.348000\tTime: 4.977s\n", + "Train Epoch: 8 [320/640 (50%)]\tLoss: 0.342000\tTime: 4.871s\n", + "Train Epoch: 8 [384/640 (60%)]\tLoss: 0.375000\tTime: 4.719s\n", + "Train Epoch: 8 [448/640 (70%)]\tLoss: 0.411000\tTime: 4.706s\n", + "Train Epoch: 8 [512/640 (80%)]\tLoss: 0.403000\tTime: 4.809s\n", + "Train Epoch: 8 [576/640 (90%)]\tLoss: 0.475000\tTime: 4.738s\n", + "\n", + "Test set: Accuracy: 474.0/640 (74%)\n", + "\n", + "Train Epoch: 9 [0/640 (0%)]\tLoss: 0.384000\tTime: 4.954s\n", + "Train Epoch: 9 [64/640 (10%)]\tLoss: 0.301000\tTime: 5.081s\n", + "Train Epoch: 9 [128/640 (20%)]\tLoss: 0.421000\tTime: 5.052s\n", + "Train Epoch: 9 [192/640 (30%)]\tLoss: 0.327000\tTime: 5.100s\n", + "Train Epoch: 9 [256/640 (40%)]\tLoss: 0.325000\tTime: 5.167s\n", + "Train Epoch: 9 [320/640 (50%)]\tLoss: 0.318000\tTime: 5.194s\n", + "Train Epoch: 9 [384/640 (60%)]\tLoss: 0.353000\tTime: 5.207s\n", + "Train Epoch: 9 [448/640 (70%)]\tLoss: 0.391000\tTime: 5.322s\n", + "Train Epoch: 9 [512/640 (80%)]\tLoss: 0.379000\tTime: 5.285s\n", + "Train Epoch: 9 [576/640 (90%)]\tLoss: 0.455000\tTime: 5.268s\n", + "\n", + "Test set: Accuracy: 481.0/640 (75%)\n", + "\n", + "Train Epoch: 10 [0/640 (0%)]\tLoss: 0.363000\tTime: 5.530s\n", + "Train Epoch: 10 [64/640 (10%)]\tLoss: 0.281000\tTime: 5.555s\n", + "Train Epoch: 10 [128/640 (20%)]\tLoss: 0.398000\tTime: 6.025s\n", + "Train Epoch: 10 [192/640 (30%)]\tLoss: 0.306000\tTime: 5.286s\n", + "Train Epoch: 10 [256/640 (40%)]\tLoss: 0.306000\tTime: 5.300s\n", + "Train Epoch: 10 [320/640 (50%)]\tLoss: 0.296000\tTime: 5.410s\n", + "Train Epoch: 10 [384/640 (60%)]\tLoss: 0.332000\tTime: 5.457s\n", + "Train Epoch: 10 [448/640 (70%)]\tLoss: 0.371000\tTime: 5.527s\n", + "Train Epoch: 10 [512/640 (80%)]\tLoss: 0.356000\tTime: 5.646s\n", + "Train Epoch: 10 [576/640 (90%)]\tLoss: 0.435000\tTime: 5.506s\n", + "\n", + "Test set: Accuracy: 488.0/640 (76%)\n", + "\n" + ] + } + ], + "source": [ + "model = Net()\n", + "model = model.fix_precision().share(*workers, crypto_provider=crypto_provider, requires_grad=True)\n", + "\n", + "optimizer = optim.SGD(model.parameters(), lr=args.lr)\n", + "optimizer = optimizer.fix_precision() \n", + "\n", + "for epoch in range(1, args.epochs + 1):\n", + " train(args, model, private_train_loader, optimizer, epoch)\n", + " test(args, model, private_test_loader)" + ] + }, + { + "source": [ + "## 6 相关讨论\n", + "\n", + "## 6.1计算时间\n", + "\n", + "第一件事显然是运行时间!您肯定已经注意到,它比纯文本训练要慢得多。特别是,在1批64项上进行一次迭代需要3.2 s,而在纯PyTorch中只有13 ms。尽管这似乎是一个阻止程序,但请回想一下,这里的所有事情都是远程发生的,并且是在加密的世界中发生的:没有单个数据项被公开。更具体地说,处理一项的时间为50ms,这还不错。真正的问题是分析何时需要加密训练以及何时仅加密预测就足够了。例如,在生产就绪的情况下,完全可以接受50毫秒执行预测!\n", + "\n", + "一个主要的瓶颈是昂贵的激活功能的使用:SMPC的relu激活非常昂贵,因为它使用私有比较和SecureNN协议。举例说明,如果我们用二次激活代替relu,就像在CryptoNets等加密计算的几篇论文中所做的那样,我们将从3.2s降到1.2s。\n", + "\n", + "通常,要删除的关键思想是仅加密必要的内容,本教程向您展示了它的简单性\n", + "\n", + "## 6.2使用SMPC进行反向传播\n", + "您可能想知道我们如何执行反向传播和梯度更新,尽管我们正在有限域中使用整数。为此,我们开发了一个新的syft张量,称为AutogradTensor。尽管您可能还没有看过本教程,但它还是大量使用它!让我们通过打印模型的重量进行检查:\n", + "\n", + "## 6.3安全保障\n", + "最后,让我们给出一些有关我们在此处实现的安全性的提示:我们在这里考虑的对手是诚实但好奇的:这意味着对手无法通过运行此协议来学习有关数据的任何信息,但是恶意的对手可以仍然偏离协议,例如尝试破坏共享以破坏计算。在此类SMPC计算(包括私有比较)中针对恶意对手的安全性仍然是一个未解决的问题。\n", + "\n", + "此外,即使“安全多方计算”确保不访问培训数据,此处仍然存在来自纯文本世界的许多威胁。例如,当您可以向模型提出请求时(在MLaaS的上下文中),您可以获得可能泄露有关训练数据集信息的预测。特别是,您没有针对成员资格攻击的任何保护措施,这是对机器学习服务的常见攻击,在这种攻击中,对手要确定是否在数据集中使用了特定项目。除此之外,其他攻击,例如意外的记忆过程(模型学习有关数据项的特定特征的模型),模型倒置或提取,仍然是可能的。\n", + "\n", + "对上述许多威胁有效的一种通用解决方案是添加差异隐私。它可以与安全的多方计算完美地结合在一起,并且可以提供非常有趣的安全性保证。我们目前正在研究几种实现方式,并希望提出一个将两者结合起来的示例!" + ], + "cell_type": "markdown", + "metadata": {} + } + ] +} \ No newline at end of file diff --git a/pytorch/官方教程/30 快速入门.ipynb b/pytorch/官方教程/30 快速入门.ipynb new file mode 100644 index 00000000..04368ca0 --- /dev/null +++ b/pytorch/官方教程/30 快速入门.ipynb @@ -0,0 +1,363 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.0" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python380jvsc74a57bd038740d3277777e2cd7c6c2cc9d8addf5118fdf3f82b1b39231fd12aeac8aee8b", + "display_name": "Python 3.8.0 64-bit" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "from torch import nn\n", + "from torch.utils.data import DataLoader\n", + "from torchvision import datasets\n", + "from torchvision.transforms import ToTensor, Lambda, Compose\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# Download training data from open datasets.\n", + "training_data = datasets.FashionMNIST(\n", + " root=\"data\",\n", + " train=True,\n", + " download=True,\n", + " transform=ToTensor(),\n", + ")\n", + "\n", + "# Download test data from open datasets.\n", + "test_data = datasets.FashionMNIST(\n", + " root=\"data\",\n", + " train=False,\n", + " download=True,\n", + " transform=ToTensor(),\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Shape of X [N, C, H, W]: torch.Size([64, 1, 28, 28])\nShape of y: torch.Size([64]) torch.int64\n" + ] + } + ], + "source": [ + "batch_size = 64\n", + "\n", + "# Create data loaders.\n", + "train_dataloader = DataLoader(training_data, batch_size=batch_size)\n", + "test_dataloader = DataLoader(test_data, batch_size=batch_size)\n", + "\n", + "for X, y in test_dataloader:\n", + " print(\"Shape of X [N, C, H, W]: \", X.shape)\n", + " print(\"Shape of y: \", y.shape, y.dtype)\n", + " break" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Using cpu device\nNeuralNetwork(\n (flatten): Flatten(start_dim=1, end_dim=-1)\n (linear_relu_stack): Sequential(\n (0): Linear(in_features=784, out_features=512, bias=True)\n (1): ReLU()\n (2): Linear(in_features=512, out_features=512, bias=True)\n (3): ReLU()\n (4): Linear(in_features=512, out_features=10, bias=True)\n (5): ReLU()\n )\n)\n" + ] + } + ], + "source": [ + "# Get cpu or gpu device for training.\n", + "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", + "print(\"Using {} device\".format(device))\n", + "\n", + "# Define model\n", + "class NeuralNetwork(nn.Module):\n", + " def __init__(self):\n", + " super(NeuralNetwork, self).__init__()\n", + " self.flatten = nn.Flatten()\n", + " self.linear_relu_stack = nn.Sequential(\n", + " nn.Linear(28*28, 512),\n", + " nn.ReLU(),\n", + " nn.Linear(512, 512),\n", + " nn.ReLU(),\n", + " nn.Linear(512, 10),\n", + " nn.ReLU()\n", + " )\n", + "\n", + " def forward(self, x):\n", + " x = self.flatten(x)\n", + " logits = self.linear_relu_stack(x)\n", + " return logits\n", + "\n", + "model = NeuralNetwork().to(device)\n", + "print(model)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "loss_fn = nn.CrossEntropyLoss()\n", + "optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "def train(dataloader, model, loss_fn, optimizer):\n", + " size = len(dataloader.dataset)\n", + " for batch, (X, y) in enumerate(dataloader):\n", + " X, y = X.to(device), y.to(device)\n", + "\n", + " # Compute prediction error\n", + " pred = model(X)\n", + " loss = loss_fn(pred, y)\n", + "\n", + " # Backpropagation\n", + " optimizer.zero_grad()\n", + " loss.backward()\n", + " optimizer.step()\n", + "\n", + " if batch % 100 == 0:\n", + " loss, current = loss.item(), batch * len(X)\n", + " print(f\"loss: {loss:>7f} [{current:>5d}/{size:>5d}]\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "def test(dataloader, model):\n", + " size = len(dataloader.dataset)\n", + " model.eval()\n", + " test_loss, correct = 0, 0\n", + " with torch.no_grad():\n", + " for X, y in dataloader:\n", + " X, y = X.to(device), y.to(device)\n", + " pred = model(X)\n", + " test_loss += loss_fn(pred, y).item()\n", + " correct += (pred.argmax(1) == y).type(torch.float).sum().item()\n", + " test_loss /= size\n", + " correct /= size\n", + " print(f\"Test Error: \\n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Epoch 1\n", + "-------------------------------\n", + "loss: 2.298708 [ 0/60000]\n", + "loss: 2.291671 [ 6400/60000]\n", + "loss: 2.278315 [12800/60000]\n", + "loss: 2.286206 [19200/60000]\n", + "loss: 2.279234 [25600/60000]\n", + "loss: 2.258433 [32000/60000]\n", + "loss: 2.269818 [38400/60000]\n", + "loss: 2.244914 [44800/60000]\n", + "loss: 2.260405 [51200/60000]\n", + "loss: 2.256901 [57600/60000]\n", + "Test Error: \n", + " Accuracy: 32.6%, Avg loss: 0.035208 \n", + "\n", + "Epoch 2\n", + "-------------------------------\n", + "loss: 2.229076 [ 0/60000]\n", + "loss: 2.227827 [ 6400/60000]\n", + "loss: 2.187566 [12800/60000]\n", + "loss: 2.230924 [19200/60000]\n", + "loss: 2.211191 [25600/60000]\n", + "loss: 2.171593 [32000/60000]\n", + "loss: 2.207811 [38400/60000]\n", + "loss: 2.151896 [44800/60000]\n", + "loss: 2.199616 [51200/60000]\n", + "loss: 2.197517 [57600/60000]\n", + "Test Error: \n", + " Accuracy: 34.0%, Avg loss: 0.033942 \n", + "\n", + "Epoch 3\n", + "-------------------------------\n", + "loss: 2.135999 [ 0/60000]\n", + "loss: 2.131395 [ 6400/60000]\n", + "loss: 2.054416 [12800/60000]\n", + "loss: 2.149383 [19200/60000]\n", + "loss: 2.106937 [25600/60000]\n", + "loss: 2.046117 [32000/60000]\n", + "loss: 2.119213 [38400/60000]\n", + "loss: 2.021386 [44800/60000]\n", + "loss: 2.125361 [51200/60000]\n", + "loss: 2.128896 [57600/60000]\n", + "Test Error: \n", + " Accuracy: 35.1%, Avg loss: 0.032325 \n", + "\n", + "Epoch 4\n", + "-------------------------------\n", + "loss: 2.018915 [ 0/60000]\n", + "loss: 2.011311 [ 6400/60000]\n", + "loss: 1.897095 [12800/60000]\n", + "loss: 2.053746 [19200/60000]\n", + "loss: 2.002030 [25600/60000]\n", + "loss: 1.924695 [32000/60000]\n", + "loss: 2.035108 [38400/60000]\n", + "loss: 1.902785 [44800/60000]\n", + "loss: 2.062397 [51200/60000]\n", + "loss: 2.083949 [57600/60000]\n", + "Test Error: \n", + " Accuracy: 38.4%, Avg loss: 0.030988 \n", + "\n", + "Epoch 5\n", + "-------------------------------\n", + "loss: 1.921658 [ 0/60000]\n", + "loss: 1.915269 [ 6400/60000]\n", + "loss: 1.775001 [12800/60000]\n", + "loss: 1.972878 [19200/60000]\n", + "loss: 1.932084 [25600/60000]\n", + "loss: 1.845431 [32000/60000]\n", + "loss: 1.974809 [38400/60000]\n", + "loss: 1.825955 [44800/60000]\n", + "loss: 2.015837 [51200/60000]\n", + "loss: 2.051906 [57600/60000]\n", + "Test Error: \n", + " Accuracy: 39.2%, Avg loss: 0.030034 \n", + "\n", + "Done!\n" + ] + } + ], + "source": [ + "epochs = 5\n", + "for t in range(epochs):\n", + " print(f\"Epoch {t+1}\\n-------------------------------\")\n", + " train(train_dataloader, model, loss_fn, optimizer)\n", + " test(test_dataloader, model)\n", + "print(\"Done!\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Saved PyTorch Model State to model.pth\n" + ] + } + ], + "source": [ + "torch.save(model.state_dict(), \"model.pth\")\n", + "print(\"Saved PyTorch Model State to model.pth\")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 10 + } + ], + "source": [ + "model = NeuralNetwork()\n", + "model.load_state_dict(torch.load(\"model.pth\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Predicted: \"Ankle boot\", Actual: \"Ankle boot\"\n" + ] + } + ], + "source": [ + "classes = [\n", + " \"T-shirt/top\",\n", + " \"Trouser\",\n", + " \"Pullover\",\n", + " \"Dress\",\n", + " \"Coat\",\n", + " \"Sandal\",\n", + " \"Shirt\",\n", + " \"Sneaker\",\n", + " \"Bag\",\n", + " \"Ankle boot\",\n", + "]\n", + "\n", + "model.eval()\n", + "x, y = test_data[0][0], test_data[0][1]\n", + "with torch.no_grad():\n", + " pred = model(x)\n", + " predicted, actual = classes[pred[0].argmax(0)], classes[y]\n", + " print(f'Predicted: \"{predicted}\", Actual: \"{actual}\"')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ] +} \ No newline at end of file diff --git a/pytorch/官方教程/30 快速入门.md b/pytorch/官方教程/30 快速入门.md new file mode 100644 index 00000000..f8abae7a --- /dev/null +++ b/pytorch/官方教程/30 快速入门.md @@ -0,0 +1,200 @@ +# 快速入门 +## 目录 + +0. 快速入门 +1. 张量 +2. 数据集和数据加载器 +3. 转换 +4. 建立模型 +5. 自动区分 +6. 优化循环 +7. 保存,加载和使用模型 + + +## 0 包加载 + +PyTorch有两个处理数据的原语: torch.utils.data.DataLoader和torch.utils.data.Dataset。 Dataset存储样本及其相应的标签,并DataLoader在周围包裹一个可迭代的对象Dataset。 + +```py +import torch +from torch import nn +from torch.utils.data import DataLoader +from torchvision import datasets +from torchvision.transforms import ToTensor, Lambda, Compose +import matplotlib.pyplot as plt +``` + +## 1 数据库和数据导入 + +PyTorch提供了特定领域的库,例如TorchText, TorchVision和TorchAudio,所有这些库都包含数据集。在本教程中,我们将使用TorchVision数据集。 + +```py +# Download training data from open datasets. +training_data = datasets.FashionMNIST( + root="data", + train=True, + download=True, + transform=ToTensor(), +) + +# Download test data from open datasets. +test_data = datasets.FashionMNIST( + root="data", + train=False, + download=True, + transform=ToTensor(), +) +``` + +## 2 数据处理 + +将Dataset当作参数传递给DataLoader。这在我们的数据集上包装了一个可迭代的对象,并支持自动批处理,采样,改组和多进程数据加载。在这里,我们将批处理大小定义为64,即,可迭代的数据加载器中的每个元素将返回一批64个功能部件和标签。 + +```py +batch_size = 64 + +# Create data loaders. +train_dataloader = DataLoader(training_data, batch_size=batch_size) +test_dataloader = DataLoader(test_data, batch_size=batch_size) + +for X, y in test_dataloader: + print("Shape of X [N, C, H, W]: ", X.shape) + print("Shape of y: ", y.shape, y.dtype) + break +``` + +## 3.1 创建模型 +为了在PyTorch中定义一个神经网络,我们创建了一个从nn.Module继承的类。我们在__init__函数中定义网络的层,并在函数中指定数据如何通过网络forward。为了加速神经网络中的操作,我们将其移至GPU(如果有) +```py +# Get cpu or gpu device for training. +device = "cuda" if torch.cuda.is_available() else "cpu" +print("Using {} device".format(device)) + +# Define model +class NeuralNetwork(nn.Module): + def __init__(self): + super(NeuralNetwork, self).__init__() + self.flatten = nn.Flatten() + self.linear_relu_stack = nn.Sequential( + nn.Linear(28*28, 512), + nn.ReLU(), + nn.Linear(512, 512), + nn.ReLU(), + nn.Linear(512, 10), + nn.ReLU() + ) + + def forward(self, x): + x = self.flatten(x) + logits = self.linear_relu_stack(x) + return logits + +model = NeuralNetwork().to(device) +print(model) +``` + +## 3.2-4 前项传播/损失函数/优化器 +训​​练模型,我们需要损失函数 和优化器。 + +```py +loss_fn = nn.CrossEntropyLoss() +optimizer = torch.optim.SGD(model.parameters(), lr=1e-3) +``` + +## 3.5 反向传播 + +在单个训练循环中,模型对训练数据集进行预测(分批进给),然后反向传播预测误差以调整模型的参数。 + +```py +def train(dataloader, model, loss_fn, optimizer): + size = len(dataloader.dataset) + for batch, (X, y) in enumerate(dataloader): + X, y = X.to(device), y.to(device) + + # Compute prediction error + pred = model(X) + loss = loss_fn(pred, y) + + # Backpropagation + optimizer.zero_grad() + loss.backward() + optimizer.step() + + if batch % 100 == 0: + loss, current = loss.item(), batch * len(X) + print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]") +``` + +## 4 验证模型 + +我们还将对照测试数据集检查模型的性能,以确保模型正在学习。 + +```py +def test(dataloader, model): + size = len(dataloader.dataset) + model.eval() + test_loss, correct = 0, 0 + with torch.no_grad(): + for X, y in dataloader: + X, y = X.to(device), y.to(device) + pred = model(X) + test_loss += loss_fn(pred, y).item() + correct += (pred.argmax(1) == y).type(torch.float).sum().item() + test_loss /= size + correct /= size + print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n") +``` + +训练过程是在几个迭代(历元)上进行的。在每个时期,模型都会学习参数以做出更好的预测。我们在每个时期打印模型的准确性和损失;我们希望看到每个时期的精度都会提高而损耗会降低。 + +``` +epochs = 5 +for t in range(epochs): + print(f"Epoch {t+1}\n-------------------------------") + train(train_dataloader, model, loss_fn, optimizer) + test(test_dataloader, model) +print("Done!") +``` + +## 5 使用模型 + +### 保存模型 + +保存模型的常用方法是序列化内部状态字典(包含模型参数)。 + +``` +torch.save(model.state_dict(), "model.pth") +print("Saved PyTorch Model State to model.pth") +``` +### 加载模型 +加载模型的过程包括重新创建模型结构并将状态字典加载到其中 + +```py +model = NeuralNetwork() +model.load_state_dict(torch.load("model.pth")) +``` + +### 模型预测 +现在可以使用该模型进行预测。 + +```py +classes = [ + "T-shirt/top", + "Trouser", + "Pullover", + "Dress", + "Coat", + "Sandal", + "Shirt", + "Sneaker", + "Bag", + "Ankle boot", +] + +model.eval() +x, y = test_data[0][0], test_data[0][1] +with torch.no_grad(): + pred = model(x) + predicted, actual = classes[pred[0].argmax(0)], classes[y] + print(f'Predicted: "{predicted}", Actual: "{actual}"') +``` \ No newline at end of file diff --git a/pytorch/官方教程/31 张量.md b/pytorch/官方教程/31 张量.md new file mode 100644 index 00000000..54e0cd86 --- /dev/null +++ b/pytorch/官方教程/31 张量.md @@ -0,0 +1,287 @@ +# 张量 + +张量如同数组和矩阵一样, 是一种特殊的数据结构。在`PyTorch`中, 神经网络的输入、输出以及网络的参数等数据, 都是使用张量来进行描述。 + +张量的使用和`Numpy`中的`ndarrays`很类似, 区别在于张量可以在`GPU`或其它专用硬件上运行, 这样可以得到更快的加速效果。如果你对`ndarrays`很熟悉的话, 张量的使用对你来说就很容易了。如果不太熟悉的话, 希望这篇有关张量`API`的快速入门教程能够帮到你。 + +```python +import torch +import numpy as np +``` + +## 1 张量初始化 + +张量有很多种不同的初始化方法, 先来看看四个简单的例子: + +### **1. 直接生成张量** + +由原始数据直接生成张量, 张量类型由原始数据类型决定。 + +```python +data = [[1, 2], [3, 4]] +x_data = torch.tensor(data) +``` + +### **2. 通过Numpy数组来生成张量** + +由已有的`Numpy`数组来生成张量(反过来也可以由张量来生成`Numpy`数组, 参考[张量与Numpy之间的转换](#jump))。 + +```python +np_array = np.array(data) +x_np = torch.from_numpy(np_array) +``` +### **3. 通过已有的张量来生成新的张量** + +新的张量将继承已有张量的数据属性(结构、类型), 也可以重新指定新的数据类型。 + +```python +x_ones = torch.ones_like(x_data) # 保留 x_data 的属性 +print(f"Ones Tensor: \n {x_ones} \n") + +x_rand = torch.rand_like(x_data, dtype=torch.float) # 重写 x_data 的数据类型 + +print(f"Random Tensor: \n {x_rand} \n") +``` + +显示: + +```python +Ones Tensor: + tensor([[1, 1], + [1, 1]]) + +Random Tensor: + tensor([[0.0381, 0.5780], + [0.3963, 0.0840]]) +``` +**4. 通过指定数据维度来生成张量** + +`shape`是元组类型, 用来描述张量的维数, 下面3个函数通过传入`shape`来指定生成张量的维数。 + +```python +shape = (2,3,) +rand_tensor = torch.rand(shape) +ones_tensor = torch.ones(shape) +zeros_tensor = torch.zeros(shape) + +print(f"Random Tensor: \n {rand_tensor} \n") +print(f"Ones Tensor: \n {ones_tensor} \n") +print(f"Zeros Tensor: \n {zeros_tensor}") +``` + +显示: + +```python +Random Tensor: + tensor([[0.0266, 0.0553, 0.9843], + [0.0398, 0.8964, 0.3457]]) + +Ones Tensor: + tensor([[1., 1., 1.], + [1., 1., 1.]]) + +Zeros Tensor: + tensor([[0., 0., 0.], + [0., 0., 0.]]) +``` + +## 2 张量属性 + +从张量属性我们可以得到张量的维数、数据类型以及它们所存储的设备(CPU或GPU)。 + +来看一个简单的例子: + +```python +tensor = torch.rand(3,4) + +print(f"Shape of tensor: {tensor.shape}") +print(f"Datatype of tensor: {tensor.dtype}") +print(f"Device tensor is stored on: {tensor.device}") +``` + +显示: + +```python +Shape of tensor: torch.Size([3, 4]) # 维数 +Datatype of tensor: torch.float32 # 数据类型 +Device tensor is stored on: cpu # 存储设备 +``` + +## 3 张量运算 + +有超过100种张量相关的运算操作, 例如转置、索引、切片、数学运算、线性代数、随机采样等。更多的运算可以在这里[查看](https://pytorch.org/docs/stable/torch.html)。 + +所有这些运算都可以在GPU上运行(相对于CPU来说可以达到更高的运算速度)。如果你使用的是Google的Colab环境, 可以通过 `Edit > Notebook Settings` 来分配一个GPU使用。 + +```python +# 判断当前环境GPU是否可用, 然后将tensor导入GPU内运行 +if torch.cuda.is_available(): + tensor = tensor.to('cuda') +``` + +光说不练假把式, 接下来的例子一定要动手跑一跑。如果你对Numpy的运算非常熟悉的话, 那tensor的运算对你来说就是小菜一碟。 + +**1. 张量的索引和切片** + +```python +tensor = torch.ones(4, 4) +tensor[:,1] = 0 # 将第1列(从0开始)的数据全部赋值为0 +print(tensor) +``` + +显示: + +```python +tensor([[1., 0., 1., 1.], + [1., 0., 1., 1.], + [1., 0., 1., 1.], + [1., 0., 1., 1.]]) +``` + +**2. 张量的拼接** + +你可以通过`torch.cat`方法将一组张量按照指定的维度进行拼接, 也可以参考[`torch.stack`](https://pytorch.org/docs/stable/generated/torch.stack.html)方法。这个方法也可以实现拼接操作, 但和`torch.cat`稍微有点不同。 + +```python +t1 = torch.cat([tensor, tensor, tensor], dim=1) +print(t1) +``` + + 显示: + +``` +tensor([[1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.], + [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.], + [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.], + [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.]]) +``` + +**3. 张量的乘积和矩阵乘法** + +```python +# 逐个元素相乘结果 +tensor.mul(tensor) +# 等价写法: +tensor * tensor +# 使用torch中的方法 +torch.mul(tensor, tensor, out=z3) +``` + +显示: + +```python +tensor.mul(tensor): + tensor([[1., 0., 1., 1.], + [1., 0., 1., 1.], + [1., 0., 1., 1.], + [1., 0., 1., 1.]]) + +tensor * tensor: + tensor([[1., 0., 1., 1.], + [1., 0., 1., 1.], + [1., 0., 1., 1.], + [1., 0., 1., 1.]]) +``` + +下面写法表示张量与张量的矩阵乘法: + +```python +tensor.matmul(tensor.T) +# 等价写法: +tensor @ tensor.T +# 使用torch中的方法 +torch.matmul(tensor,tensor.T,out=y3) +``` + +显示: + +```python +tensor.matmul(tensor.T): + tensor([[3., 3., 3., 3.], + [3., 3., 3., 3.], + [3., 3., 3., 3.], + [3., 3., 3., 3.]]) + +tensor @ tensor.T: + tensor([[3., 3., 3., 3.], + [3., 3., 3., 3.], + [3., 3., 3., 3.], + [3., 3., 3., 3.]]) +``` + +**4. 自动赋值运算** + +自动赋值运算通常在方法后有 `_` 作为后缀, 例如: `x.copy_(y)`, `x.t_()`操作会改变 `x` 的取值。 + +```python +print(tensor, "\n") +tensor.add_(5) +print(tensor) +``` + +显示: + +```python +tensor([[1., 0., 1., 1.], + [1., 0., 1., 1.], + [1., 0., 1., 1.], + [1., 0., 1., 1.]]) + +tensor([[6., 5., 6., 6.], + [6., 5., 6., 6.], + [6., 5., 6., 6.], + [6., 5., 6., 6.]]) +``` + +> 注意: +> +> 自动赋值运算虽然可以节省内存, 但在求导时会因为丢失了中间过程而导致一些问题, 所以我们并不鼓励使用它。 + +## 4 Tensor与Numpy的转化 +张量和`Numpy array`数组在CPU上可以共用一块内存区域, 改变其中一个另一个也会随之改变。 +**1. 由张量变换为Numpy array数组** +```python +t = torch.ones(5) +print(f"t: {t}") +n = t.numpy() +print(f"n: {n}") +``` +显示: +```python +t: tensor([1., 1., 1., 1., 1.]) +n: [1. 1. 1. 1. 1.] +``` +修改张量的值,则`Numpy array`数组值也会随之改变。 +```python +t.add_(1) +print(f"t: {t}") +print(f"n: {n}") +``` +显示: +```python +t: tensor([2., 2., 2., 2., 2.]) +n: [2. 2. 2. 2. 2.] +``` + +**2. 由Numpy array数组转为张量** + +```python +n = np.ones(5) +t = torch.from_numpy(n) +``` + +修改`Numpy array`数组的值,则张量值也会随之改变。 + +```python +np.add(n, 1, out=n) +print(f"t: {t}") +print(f"n: {n}") +``` + +显示: + +```python +t: tensor([2., 2., 2., 2., 2.], dtype=torch.float64) +n: [2. 2. 2. 2. 2.] +``` + diff --git a/pytorch/官方教程/10 Pysyft 概述.ipynb b/pytorch/官方教程/32 数据集和数据加载器.md similarity index 100% rename from pytorch/官方教程/10 Pysyft 概述.ipynb rename to pytorch/官方教程/32 数据集和数据加载器.md diff --git a/pytorch/官方教程/33 转换transform.md b/pytorch/官方教程/33 转换transform.md new file mode 100644 index 00000000..e69de29b diff --git a/pytorch/官方教程/34 建立神经网络.md b/pytorch/官方教程/34 建立神经网络.md new file mode 100644 index 00000000..e69de29b diff --git a/pytorch/官方教程/35 自动微分.md b/pytorch/官方教程/35 自动微分.md new file mode 100644 index 00000000..e69de29b diff --git a/pytorch/官方教程/36 优化模型参数.md b/pytorch/官方教程/36 优化模型参数.md new file mode 100644 index 00000000..e69de29b diff --git a/pytorch/官方教程/36.md b/pytorch/官方教程/36.md deleted file mode 100644 index 4876704a..00000000 --- a/pytorch/官方教程/36.md +++ /dev/null @@ -1 +0,0 @@ -# 在生产中部署 PyTorch 模型 \ No newline at end of file diff --git a/pytorch/官方教程/37 保存并加载模型.md b/pytorch/官方教程/37 保存并加载模型.md new file mode 100644 index 00000000..e69de29b diff --git a/pytorch/官方教程/37.md b/pytorch/官方教程/37.md deleted file mode 100644 index 4f6d134f..00000000 --- a/pytorch/官方教程/37.md +++ /dev/null @@ -1,319 +0,0 @@ -# 通过使用 Flask 的 REST API 在 Python 中部署 PyTorch - -> 原文: - -**作者**: [Avinash Sajjanshetty](https://avi.im) - -在本教程中,我们将使用 Flask 部署 PyTorch 模型,并公开用于模型推理的 REST API。 特别是,我们将部署预训练的 DenseNet 121 模型来检测图像。 - -小费 - -此处使用的所有代码均以 MIT 许可发布,可在 [Github](https://github.com/avinassh/pytorch-flask-api) 上找到。 - -这是在生产中部署 PyTorch 模型的系列教程中的第一篇。 到目前为止,以这种方式使用 Flask 是开始为 PyTorch 模型提供服务的最简单方法,但不适用于具有高性能要求的用例。 为了那个原因: - -> * 如果您已经熟悉 TorchScript,则可以直接进入我们的[通过 C++ 加载 TorchScript 模型](https://pytorch.org/tutorials/advanced/cpp_export.html)的教程。 -> * 如果您首先需要在 TorchScript 上进行复习,请查看我们的 [TorchScript 入门](https://pytorch.org/tutorials/beginner/Intro_to_TorchScript_tutorial.html)教程。 - -## API 定义 - -我们将首先定义 API 端点,请求和响应类型。 我们的 API 端点将位于`/predict`,它通过包含图片的`file`参数接受 HTTP POST 请求。 响应将是包含预测的 JSON 响应: - -```py -{"class_id": "n02124075", "class_name": "Egyptian_cat"} - -``` - -## 依赖项 - -通过运行以下命令来安装所需的依赖项: - -```py -$ pip install Flask==1.0.3 torchvision-0.3.0 - -``` - -## 简单的 Web 服务器 - -以下是一个简单的网络服务器,摘自 Flask 的文档 - -```py -from flask import Flask -app = Flask(__name__) - -@app.route('/') -def hello(): - return 'Hello World!' - -``` - -将以上代码段保存在名为`app.py`的文件中,您现在可以通过输入以下内容来运行 Flask 开发服务器: - -```py -$ FLASK_ENV=development FLASK_APP=app.py flask run - -``` - -当您在网络浏览器中访问`http://localhost:5000/`时,您会看到`Hello World!`文字 - -我们将对上面的代码片段进行一些更改,以使其适合我们的 API 定义。 首先,我们将方法重命名为`predict`。 我们将端点路径更新为`/predict`。 由于图像文件将通过 HTTP POST 请求发送,因此我们将对其进行更新,使其也仅接受 POST 请求: - -```py -@app.route('/predict', methods=['POST']) -def predict(): - return 'Hello World!' - -``` - -我们还将更改响应类型,以使其返回包含 ImageNet 类 ID 和名称的 JSON 响应。 更新后的`app.py`文件现在为: - -```py -from flask import Flask, jsonify -app = Flask(__name__) - -@app.route('/predict', methods=['POST']) -def predict(): - return jsonify({'class_id': 'IMAGE_NET_XXX', 'class_name': 'Cat'}) - -``` - -## 推断 - -在下一部分中,我们将重点介绍编写推理代码。 这将涉及两部分,第一部分是准备图像,以便可以将其馈送到 DenseNet;第二部分,我们将编写代码以从模型中获取实际的预测。 - -### 准备图像 - -DenseNet 模型要求图像为尺寸为`224 x 224`的 3 通道 RGB 图像。我们还将使用所需的均值和标准差值对图像张量进行归一化。 您可以在上阅读有关它的更多信息。 - -我们将使用`torchvision`库中的`transforms`并建立一个转换管道,该转换管道可根据需要转换图像。 [您可以这里阅读有关转换的更多信息](https://pytorch.org/docs/stable/torchvision/transforms.html)。 - -```py -import io - -import torchvision.transforms as transforms -from PIL import Image - -def transform_image(image_bytes): - my_transforms = transforms.Compose([transforms.Resize(255), - transforms.CenterCrop(224), - transforms.ToTensor(), - transforms.Normalize( - [0.485, 0.456, 0.406], - [0.229, 0.224, 0.225])]) - image = Image.open(io.BytesIO(image_bytes)) - return my_transforms(image).unsqueeze(0) - -``` - -上面的方法以字节为单位获取图像数据,应用一系列变换并返回张量。 要测试上述方法,请以字节模式读取图像文件(首先将`../_static/img/sample_file.jpeg`替换为计算机上文件的实际路径),然后查看是否取回张量: - -```py -with open("../_static/img/sample_file.jpeg", 'rb') as f: - image_bytes = f.read() - tensor = transform_image(image_bytes=image_bytes) - print(tensor) - -``` - -出: - -```py -tensor([[[[ 0.4508, 0.4166, 0.3994, ..., -1.3473, -1.3302, -1.3473], - [ 0.5364, 0.4851, 0.4508, ..., -1.2959, -1.3130, -1.3302], - [ 0.7077, 0.6392, 0.6049, ..., -1.2959, -1.3302, -1.3644], - ..., - [ 1.3755, 1.3927, 1.4098, ..., 1.1700, 1.3584, 1.6667], - [ 1.8893, 1.7694, 1.4440, ..., 1.2899, 1.4783, 1.5468], - [ 1.6324, 1.8379, 1.8379, ..., 1.4783, 1.7352, 1.4612]], - - [[ 0.5728, 0.5378, 0.5203, ..., -1.3704, -1.3529, -1.3529], - [ 0.6604, 0.6078, 0.5728, ..., -1.3004, -1.3179, -1.3354], - [ 0.8529, 0.7654, 0.7304, ..., -1.3004, -1.3354, -1.3704], - ..., - [ 1.4657, 1.4657, 1.4832, ..., 1.3256, 1.5357, 1.8508], - [ 2.0084, 1.8683, 1.5182, ..., 1.4657, 1.6583, 1.7283], - [ 1.7458, 1.9384, 1.9209, ..., 1.6583, 1.9209, 1.6408]], - - [[ 0.7228, 0.6879, 0.6531, ..., -1.6476, -1.6302, -1.6476], - [ 0.8099, 0.7576, 0.7228, ..., -1.6476, -1.6476, -1.6650], - [ 1.0017, 0.9145, 0.8797, ..., -1.6476, -1.6650, -1.6999], - ..., - [ 1.6291, 1.6291, 1.6465, ..., 1.6291, 1.8208, 2.1346], - [ 2.1868, 2.0300, 1.6814, ..., 1.7685, 1.9428, 2.0125], - [ 1.9254, 2.0997, 2.0823, ..., 1.9428, 2.2043, 1.9080]]]]) - -``` - -### 预测 - -现在将使用预训练的 DenseNet 121 模型来预测图像类别。 我们将使用`torchvision`库中的一个,加载模型并进行推断。 在此示例中,我们将使用预训练模型,但您可以对自己的模型使用相同的方法。 在此[教程](../beginner/saving_loading_models.html)中查看有关加载模型的更多信息。 - -```py -from torchvision import models - -# Make sure to pass `pretrained` as `True` to use the pretrained weights: -model = models.densenet121(pretrained=True) -# Since we are using our model only for inference, switch to `eval` mode: -model.eval() - -def get_prediction(image_bytes): - tensor = transform_image(image_bytes=image_bytes) - outputs = model.forward(tensor) - _, y_hat = outputs.max(1) - return y_hat - -``` - -张量`y_hat`将包含预测的类 ID 的索引。 但是,我们需要一个人类可读的类名。 为此,我们需要一个类 ID 来进行名称映射。 将[这个文件](https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json)下载为`imagenet_class_index.json`,并记住它的保存位置(或者,如果您按照本教程中的确切步骤操作,请将其保存在`tutorials/_static`中)。 此文件包含 ImageNet 类 ID 到 ImageNet 类名称的映射。 我们将加载此 JSON 文件并获取预测索引的类名称。 - -```py -import json - -imagenet_class_index = json.load(open('../_static/imagenet_class_index.json')) - -def get_prediction(image_bytes): - tensor = transform_image(image_bytes=image_bytes) - outputs = model.forward(tensor) - _, y_hat = outputs.max(1) - predicted_idx = str(y_hat.item()) - return imagenet_class_index[predicted_idx] - -``` - -在使用`imagenet_class_index`字典之前,首先我们将张量值转换为字符串值,因为`imagenet_class_index`字典中的键是字符串。 我们将测试上述方法: - -```py -with open("../_static/img/sample_file.jpeg", 'rb') as f: - image_bytes = f.read() - print(get_prediction(image_bytes=image_bytes)) - -``` - -出: - -```py -['n02124075', 'Egyptian_cat'] - -``` - -您应该得到如下响应: - -```py -['n02124075', 'Egyptian_cat'] - -``` - -数组中的第一项是 ImageNet 类 ID,第二项是人类可读的名称。 - -注意 - -您是否注意到`model`变量不属于`get_prediction`方法? 还是为什么模型是全局变量? 就内存和计算而言,加载模型可能是一项昂贵的操作。 如果我们以`get_prediction`方法加载模型,则每次调用该方法时都会不必要地加载该模型。 由于我们正在构建一个 Web 服务器,因此每秒可能有成千上万的请求,因此我们不应该浪费时间为每个推断重复加载模型。 因此,我们仅将模型加载到内存中一次。 在生产系统中,必须高效使用计算以能够大规模处理请求,因此通常应在处理请求之前加载模型。 - -## 将模型集成到我们的 API 服务器中 - -在最后一部分中,我们将模型添加到 Flask API 服务器中。 由于我们的 API 服务器应该获取图像文件,因此我们将更新`predict`方法以从请求中读取文件: - -```py -from flask import request - -@app.route('/predict', methods=['POST']) -def predict(): - if request.method == 'POST': - # we will get the file from the request - file = request.files['file'] - # convert that to bytes - img_bytes = file.read() - class_id, class_name = get_prediction(image_bytes=img_bytes) - return jsonify({'class_id': class_id, 'class_name': class_name}) - -``` - -`app.py`文件现在完成。 以下是完整版本; 将路径替换为保存文件的路径,它应运行: - -```py -import io -import json - -from torchvision import models -import torchvision.transforms as transforms -from PIL import Image -from flask import Flask, jsonify, request - -app = Flask(__name__) -imagenet_class_index = json.load(open('/imagenet_class_index.json')) -model = models.densenet121(pretrained=True) -model.eval() - -def transform_image(image_bytes): - my_transforms = transforms.Compose([transforms.Resize(255), - transforms.CenterCrop(224), - transforms.ToTensor(), - transforms.Normalize( - [0.485, 0.456, 0.406], - [0.229, 0.224, 0.225])]) - image = Image.open(io.BytesIO(image_bytes)) - return my_transforms(image).unsqueeze(0) - -def get_prediction(image_bytes): - tensor = transform_image(image_bytes=image_bytes) - outputs = model.forward(tensor) - _, y_hat = outputs.max(1) - predicted_idx = str(y_hat.item()) - return imagenet_class_index[predicted_idx] - -@app.route('/predict', methods=['POST']) -def predict(): - if request.method == 'POST': - file = request.files['file'] - img_bytes = file.read() - class_id, class_name = get_prediction(image_bytes=img_bytes) - return jsonify({'class_id': class_id, 'class_name': class_name}) - -if __name__ == '__main__': - app.run() - -``` - -让我们测试一下我们的网络服务器! 跑: - -```py -$ FLASK_ENV=development FLASK_APP=app.py flask run - -``` - -我们可以使用[`requests`](https://pypi.org/project/requests/)库向我们的应用发送 POST 请求: - -```py -import requests - -resp = requests.post("http://localhost:5000/predict", - files={"file": open('/cat.jpg','rb')}) - -``` - -现在打印`resp.json()`将显示以下内容: - -```py -{"class_id": "n02124075", "class_name": "Egyptian_cat"} - -``` - -## 后续步骤 - -我们编写的服务器非常琐碎,可能无法完成生产应用所需的一切。 因此,您可以采取一些措施来改善它: - -* 端点`/predict`假定请求中始终会有一个图像文件。 这可能并不适用于所有请求。 我们的用户可能发送带有其他参数的图像,或者根本不发送任何图像。 -* 用户也可以发送非图像类型的文件。 由于我们没有处理错误,因此这将破坏我们的服务器。 添加显式的错误处理路径将引发异常,这将使我们能够更好地处理错误的输入 -* 即使模型可以识别大量类别的图像,也可能无法识别所有图像。 增强实现以处理模型无法识别图像中的任何情况的情况。 -* 我们在开发模式下运行 Flask 服务器,该服务器不适合在生产中进行部署。 您可以查看[本教程](https://flask.palletsprojects.com/en/1.1.x/tutorial/deploy/),以便在生产环境中部署 Flask 服务器。 -* 您还可以通过创建一个带有表单的页面来添加 UI,该表单可以拍摄图像并显示预测。 查看类似项目的[演示](https://pytorch-imagenet.herokuapp.com/)及其[源代码](https://github.com/avinassh/pytorch-flask-api-heroku)。 -* 在本教程中,我们仅展示了如何构建可以一次返回单个图像预测的服务。 我们可以修改服务以能够一次返回多个图像的预测。 此外,[service-streamer](https://github.com/ShannonAI/service-streamer) 库自动将对服务的请求排队,并将请求采样到微型批量中,这些微型批量可输入模型中。 您可以查看[本教程](https://github.com/ShannonAI/service-streamer/wiki/Vision-Recognition-Service-with-Flask-and-service-streamer)。 -* 最后,我们鼓励您在页面顶部查看链接到的其他 PyTorch 模型部署教程。 - -**脚本的总运行时间**:(0 分钟 1.232 秒) - -[下载 Python 源码:`flask_rest_api_tutorial.py`](../_downloads/146c514e84d7e33f2a302bcc3ae793cb/flask_rest_api_tutorial.py) - -[下载 Jupyter 笔记本:`flask_rest_api_tutorial.ipynb`](../_downloads/6c042f3d39855d2a2de414758e5f9836/flask_rest_api_tutorial.ipynb) - -[由 Sphinx 画廊](https://sphinx-gallery.readthedocs.io)生成的画廊 \ No newline at end of file diff --git a/pytorch/官方教程/38 Pytorch 序列化.md b/pytorch/官方教程/38 Pytorch 序列化.md new file mode 100644 index 00000000..24af4bff --- /dev/null +++ b/pytorch/官方教程/38 Pytorch 序列化.md @@ -0,0 +1,105 @@ +# Torch Script 简介 + +## 1 为什么 + +* TorchScript是PyTorch模型(的子类nn.Module)的中间表示,可以在高性能环境(例如C ++)中运行。 + +* TorchScript是一种从PyTorch代码创建可序列化和可优化模型的方法。任何TorchScript程序都可以从Python进程中保存,并在没有Python依赖项的进程中加载​​。 + +* 我们提供了将模型从纯Python程序逐步过渡到可以 **独立于Python运行的TorchScript程序** 的工具,例如在独立的C ++程序中。这样就可以使用Python中熟悉的工具在PyTorch中训练模型,然后通过TorchScript将模型导出到生产环境中。 + +## 2 怎样做-trace + +### 实现一个模型 + +1. 构造函数,为调用准备模块 +2. 一组Parameters和Modules。这些由构造函数初始化,并且可以在调用期间由模块使用。 +3. 一个forward功能。这是调用模块时运行的代码。 + +```py +class MyCell(torch.nn.Module): + def __init__(self): + super(MyCell, self).__init__() + self.linear = torch.nn.Linear(4, 4) + + def forward(self, x, h): + new_h = torch.tanh(self.linear(x) + h) + return new_h, new_h + + +x = torch.rand(3, 4) +h = torch.rand(3, 4) + +my_cell = MyCell() +print(my_cell) +print(my_cell(x, h)) +``` + + +### 对模型序列化 + +```py +traced_cell = torch.jit.trace(my_cell, (x, h)) +print(traced_cell) +traced_cell(x, h) + +print(traced_cell.graph) +print(traced_cell.code) +``` + +1. TorchScript代码可以在其自己的解释器中调用,该解释器基本上是受限制的Python解释器。该解释器不获取全局解释器锁定,因此可以在同一实例上同时处理许多请求。 +2. 这种格式允许我们将整个模型保存到磁盘上,并将其加载到另一个环境中,例如在以Python以外的语言编写的服务器中 +3. TorchScript为我们提供了一种表示形式,其中我们可以对代码进行编译器优化以提供更有效的执行 +4. TorchScript允许我们与许多后端/设备运行时进行接口,这些运行时比单个操作员需要更广泛的程序视图。 + + +## 3 怎样做-script + +1. 将模型转换为trace格式 +``` +class MyDecisionGate(torch.nn.Module): + def forward(self, x): + if x.sum() > 0: + return x + else: + return -x + +class MyCell(torch.nn.Module): + def __init__(self, dg): + super(MyCell, self).__init__() + self.dg = dg + self.linear = torch.nn.Linear(4, 4) + + def forward(self, x, h): + new_h = torch.tanh(self.dg(self.linear(x)) + h) + return new_h, new_h + +my_cell = MyCell(MyDecisionGate()) +traced_cell = torch.jit.trace(my_cell, (x, h)) + +print(traced_cell.dg.code) +print(traced_cell.code) +``` + +2. 提供了一个 脚本编译器,它可以直接分析您的Python源代码以将其转换为TorchScript。让我们MyDecisionGate 使用脚本编译器进行转换: + +``` +scripted_gate = torch.jit.script(MyDecisionGate()) + +my_cell = MyCell(scripted_gate) +scripted_cell = torch.jit.script(my_cell) + +print(scripted_gate.code) +print(scripted_cell.code) +``` + +## 4 保存和加载模型 + +``` +traced.save('wrapped_rnn.pt') + +loaded = torch.jit.load('wrapped_rnn.pt') + +print(loaded) +print(loaded.code) +``` \ No newline at end of file diff --git a/pytorch/官方教程/38.md b/pytorch/官方教程/38.md deleted file mode 100644 index c37b2170..00000000 --- a/pytorch/官方教程/38.md +++ /dev/null @@ -1,480 +0,0 @@ -# TorchScript 简介 - -> 原文: - -*James Reed (jamesreed@fb.com),Michael Suo (suo@fb.com)*,修订 2 - -本教程是 TorchScript 的简介,TorchScript 是 PyTorch 模型(`nn.Module`的子类)的中间表示,可以在高性能环境(例如 C++)中运行。 - -在本教程中,我们将介绍: - -1. PyTorch 中模型创作的基础,包括: - -* 模组 -* 定义`forward`函数 -* 将模块组成模块的层次结构 - -2. 将 PyTorch 模块转换为 TorchScript(我们的高性能部署运行时)的特定方法 - -* 跟踪现有模块 -* 使用脚本直接编译模块 -* 如何组合两种方法 -* 保存和加载 TorchScript 模块 - -我们希望在完成本教程之后,您将继续学习[后续教程](https://pytorch.org/tutorials/advanced/cpp_export.html),该教程将引导您完成一个从 C++ 实际调用 TorchScript 模型的示例。 - -```py -import torch # This is all you need to use both PyTorch and TorchScript! -print(torch.__version__) - -``` - -出: - -```py -1.7.1 - -``` - -## PyTorch 模型创建基础 - -首先定义一个简单的`Module`。 `Module`是 PyTorch 中组成的基本单位。 它包含: - -1. 为调用准备模块的构造器 -2. 一组`Parameters`和子`Modules`。 这些由构造器初始化,并且可以在调用期间由模块使用。 -3. `forward`函数。 这是调用模块时运行的代码。 - -我们来看一个小例子: - -```py -class MyCell(torch.nn.Module): - def __init__(self): - super(MyCell, self).__init__() - - def forward(self, x, h): - new_h = torch.tanh(x + h) - return new_h, new_h - -my_cell = MyCell() -x = torch.rand(3, 4) -h = torch.rand(3, 4) -print(my_cell(x, h)) - -``` - -出: - -```py -(tensor([[0.8837, 0.5372, 0.4951, 0.9124], - [0.6124, 0.7072, 0.6395, 0.9585], - [0.6178, 0.8701, 0.8071, 0.2415]]), tensor([[0.8837, 0.5372, 0.4951, 0.9124], - [0.6124, 0.7072, 0.6395, 0.9585], - [0.6178, 0.8701, 0.8071, 0.2415]])) - -``` - -因此,我们已经: - -1. 创建了一个子类`torch.nn.Module`的类。 -2. 定义一个构造器。 构造器没有做很多事情,只是调用`super`的构造器。 -3. 定义了`forward`函数,该函数具有两个输入并返回两个输出。 `forward`函数的实际内容并不是很重要,但它是一种伪造的 [RNN 单元](https://colah.github.io/posts/2015-08-Understanding-LSTMs/),即,该函数应用于循环。 - -我们实例化了该模块,并制作了`x`和`y`,它们只是`3x4`随机值矩阵。 然后,我们使用`my_cell(x, h)`调用该单元格。 这依次调用我们的`forward`函数。 - -让我们做一些更有趣的事情: - -```py -class MyCell(torch.nn.Module): - def __init__(self): - super(MyCell, self).__init__() - self.linear = torch.nn.Linear(4, 4) - - def forward(self, x, h): - new_h = torch.tanh(self.linear(x) + h) - return new_h, new_h - -my_cell = MyCell() -print(my_cell) -print(my_cell(x, h)) - -``` - -出: - -```py -MyCell( - (linear): Linear(in_features=4, out_features=4, bias=True) -) -(tensor([[ 0.5042, 0.8137, -0.1593, 0.4167], - [ 0.1716, 0.8078, -0.2267, 0.7011], - [ 0.5616, 0.8753, 0.1597, -0.3899]], grad_fn=), tensor([[ 0.5042, 0.8137, -0.1593, 0.4167], - [ 0.1716, 0.8078, -0.2267, 0.7011], - [ 0.5616, 0.8753, 0.1597, -0.3899]], grad_fn=)) - -``` - -我们已经重新定义了模块`MyCell`,但是这次我们添加了`self.linear`属性,并且在`forward`函数中调用了`self.linear`。 - -这里到底发生了什么? `torch.nn.Linear`是 PyTorch 标准库中的`Module`。 就像`MyCell`一样,可以使用调用语法来调用它。 我们正在建立`Module`的层次结构。 - -`Module`上的`print`将直观地表示`Module`的子类层次结构。 在我们的示例中,我们可以看到`Linear`子类及其参数。 - -通过以这种方式组成`Module`,我们可以简洁易读地编写具有可重用组件的模型。 - -您可能已经在输出上注意到`grad_fn`。 这是 PyTorch 自动微分方法的详细信息,称为 [autograd](https://pytorch.org/tutorials/beginner/blitz/autograd_tutorial.html) 。 简而言之,该系统允许我们通过潜在的复杂程序来计算导数。 该设计为模型创作提供了极大的灵活性。 - -现在,让我们检查一下灵活性: - -```py -class MyDecisionGate(torch.nn.Module): - def forward(self, x): - if x.sum() > 0: - return x - else: - return -x - -class MyCell(torch.nn.Module): - def __init__(self): - super(MyCell, self).__init__() - self.dg = MyDecisionGate() - self.linear = torch.nn.Linear(4, 4) - - def forward(self, x, h): - new_h = torch.tanh(self.dg(self.linear(x)) + h) - return new_h, new_h - -my_cell = MyCell() -print(my_cell) -print(my_cell(x, h)) - -``` - -出: - -```py -MyCell( - (dg): MyDecisionGate() - (linear): Linear(in_features=4, out_features=4, bias=True) -) -(tensor([[0.8636, 0.5572, 0.6262, 0.8546], - [0.7766, 0.5056, 0.5357, 0.8360], - [0.7293, 0.7581, 0.7117, 0.2432]], grad_fn=), tensor([[0.8636, 0.5572, 0.6262, 0.8546], - [0.7766, 0.5056, 0.5357, 0.8360], - [0.7293, 0.7581, 0.7117, 0.2432]], grad_fn=)) - -``` - -我们再次重新定义了`MyCell`类,但是在这里我们定义了`MyDecisionGate`。 该模块利用**控制流**。 控制流包括循环和`if`语句之类的内容。 - -给定完整的程序表示形式,许多框架都采用计算符号派生的方法。 但是,在 PyTorch 中,我们使用梯度色带。 我们记录发生的操作,并在计算派生时向后回放。 这样,框架不必为语言中的所有构造显式定义派生类。 - -![How autograd works](img/beccc5ac5df1571304e11d6b12772a99.png) - -Autograd 的工作原理 - -## TorchScript 的基础 - -现在,让我们以正在运行的示例为例,看看如何应用 TorchScript。 - -简而言之,即使 PyTorch 具有灵活和动态的特性,TorchScript 也提供了捕获模型定义的工具。 让我们开始研究所谓的**跟踪**。 - -### 跟踪`Modules` - -```py -class MyCell(torch.nn.Module): - def __init__(self): - super(MyCell, self).__init__() - self.linear = torch.nn.Linear(4, 4) - - def forward(self, x, h): - new_h = torch.tanh(self.linear(x) + h) - return new_h, new_h - -my_cell = MyCell() -x, h = torch.rand(3, 4), torch.rand(3, 4) -traced_cell = torch.jit.trace(my_cell, (x, h)) -print(traced_cell) -traced_cell(x, h) - -``` - -出: - -```py -MyCell( - original_name=MyCell - (linear): Linear(original_name=Linear) -) - -``` - -我们倒退了一点,并学习了`MyCell`类的第二版。 和以前一样,我们实例化了它,但是这一次,我们调用了`torch.jit.trace`,将其传递给`Module`,并传递给了*示例输入*,网络可能会看到。 - -这到底是做什么的? 它调用了`Module`,记录了运行`Module`时发生的操作,并创建了`torch.jit.ScriptModule`的实例(其中`TracedModule`是实例) - -TorchScript 将其定义记录在中间表示(或 IR)中,在深度学习中通常称为*图*。 我们可以检查带有`.graph`属性的图: - -```py -print(traced_cell.graph) - -``` - -出: - -```py -graph(%self.1 : __torch__.MyCell, - %input : Float(3:4, 4:1, requires_grad=0, device=cpu), - %h : Float(3:4, 4:1, requires_grad=0, device=cpu)): - %19 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="linear"](%self.1) - %21 : Tensor = prim::CallMethod[name="forward"](%19, %input) - %12 : int = prim::Constant[value=1]() # /var/lib/jenkins/workspace/beginner_source/Intro_to_TorchScript_tutorial.py:188:0 - %13 : Float(3:4, 4:1, requires_grad=1, device=cpu) = aten::add(%21, %h, %12) # /var/lib/jenkins/workspace/beginner_source/Intro_to_TorchScript_tutorial.py:188:0 - %14 : Float(3:4, 4:1, requires_grad=1, device=cpu) = aten::tanh(%13) # /var/lib/jenkins/workspace/beginner_source/Intro_to_TorchScript_tutorial.py:188:0 - %15 : (Float(3:4, 4:1, requires_grad=1, device=cpu), Float(3:4, 4:1, requires_grad=1, device=cpu)) = prim::TupleConstruct(%14, %14) - return (%15) - -``` - -但是,这是一个非常低级的表示形式,图中包含的大多数信息对最终用户没有用。 相反,我们可以使用`.code`属性来给出代码的 Python 语法解释: - -```py -print(traced_cell.code) - -``` - -出: - -```py -def forward(self, - input: Tensor, - h: Tensor) -> Tuple[Tensor, Tensor]: - _0 = torch.add((self.linear).forward(input, ), h, alpha=1) - _1 = torch.tanh(_0) - return (_1, _1) - -``` - -那么**为什么我们要进行所有这些操作? 有以下几个原因**: - -1. TorchScript 代码可以在其自己的解释器中调用,该解释器基本上是受限制的 Python 解释器。 该解释器不获取全局解释器锁定,因此可以在同一实例上同时处理许多请求。 -2. 这种格式允许我们将整个模型保存到磁盘上,然后将其加载到另一个环境中,例如在以 Python 以外的语言编写的服务器中 -3. TorchScript 为我们提供了一种表示形式,其中我们可以对代码进行编译器优化以提供更有效的执行 -4. TorchScript 允许我们与许多后端/设备运行时进行交互,与单个运算符相比,它们要求更广泛的程序视图。 - -我们可以看到,调用`traced_cell`会产生与 Python 模块相同的结果: - -```py -print(my_cell(x, h)) -print(traced_cell(x, h)) - -``` - -出: - -```py -(tensor([[-0.3869, 0.0678, 0.5692, 0.6332], - [ 0.1230, 0.4653, 0.8051, 0.3346], - [-0.5288, 0.2767, 0.9063, 0.4727]], grad_fn=), tensor([[-0.3869, 0.0678, 0.5692, 0.6332], - [ 0.1230, 0.4653, 0.8051, 0.3346], - [-0.5288, 0.2767, 0.9063, 0.4727]], grad_fn=)) -(tensor([[-0.3869, 0.0678, 0.5692, 0.6332], - [ 0.1230, 0.4653, 0.8051, 0.3346], - [-0.5288, 0.2767, 0.9063, 0.4727]], grad_fn=), tensor([[-0.3869, 0.0678, 0.5692, 0.6332], - [ 0.1230, 0.4653, 0.8051, 0.3346], - [-0.5288, 0.2767, 0.9063, 0.4727]], grad_fn=)) - -``` - -## 使用脚本转换模块 - -有一个原因是我们使用了模块的第二版,而不是使用带有大量控制流的子模块。 现在让我们检查一下: - -```py -class MyDecisionGate(torch.nn.Module): - def forward(self, x): - if x.sum() > 0: - return x - else: - return -x - -class MyCell(torch.nn.Module): - def __init__(self, dg): - super(MyCell, self).__init__() - self.dg = dg - self.linear = torch.nn.Linear(4, 4) - - def forward(self, x, h): - new_h = torch.tanh(self.dg(self.linear(x)) + h) - return new_h, new_h - -my_cell = MyCell(MyDecisionGate()) -traced_cell = torch.jit.trace(my_cell, (x, h)) -print(traced_cell.code) - -``` - -出: - -```py -def forward(self, - input: Tensor, - h: Tensor) -> Tuple[Tensor, Tensor]: - _0 = self.dg - _1 = (self.linear).forward(input, ) - _2 = (_0).forward(_1, ) - _3 = torch.tanh(torch.add(_1, h, alpha=1)) - return (_3, _3) - -``` - -查看`.code`输出,可以发现找不到`if-else`分支! 为什么? 跟踪完全按照我们所说的去做:运行代码,记录发生的操作,并构造一个执行此操作的`ScriptModule`。 不幸的是,诸如控制流之类的东西被擦除了。 - -我们如何在 TorchScript 中忠实地表示此模块? 我们提供了**脚本编译器**,它可以直接分析您的 Python 源代码以将其转换为 TorchScript。 让我们使用脚本编译器转换`MyDecisionGate`: - -```py -scripted_gate = torch.jit.script(MyDecisionGate()) - -my_cell = MyCell(scripted_gate) -traced_cell = torch.jit.script(my_cell) -print(traced_cell.code) - -``` - -出: - -```py -def forward(self, - x: Tensor, - h: Tensor) -> Tuple[Tensor, Tensor]: - _0 = (self.dg).forward((self.linear).forward(x, ), ) - new_h = torch.tanh(torch.add(_0, h, alpha=1)) - return (new_h, new_h) - -``` - -万岁! 现在,我们已经忠实地捕获了我们在 TorchScript 中程序的行为。 现在,让我们尝试运行该程序: - -```py -# New inputs -x, h = torch.rand(3, 4), torch.rand(3, 4) -traced_cell(x, h) - -``` - -### 混合脚本和跟踪 - -在某些情况下,需要使用跟踪而不是脚本(例如,一个模块具有许多基于不变的 Python 值做出的架构决策,而我们不希望它们出现在 TorchScript 中)。 在这种情况下,可以通过跟踪来编写脚本:`torch.jit.script`将内联被跟踪模块的代码,而跟踪将内联脚本模块的代码。 - -第一种情况的示例: - -```py -class MyRNNLoop(torch.nn.Module): - def __init__(self): - super(MyRNNLoop, self).__init__() - self.cell = torch.jit.trace(MyCell(scripted_gate), (x, h)) - - def forward(self, xs): - h, y = torch.zeros(3, 4), torch.zeros(3, 4) - for i in range(xs.size(0)): - y, h = self.cell(xs[i], h) - return y, h - -rnn_loop = torch.jit.script(MyRNNLoop()) -print(rnn_loop.code) - -``` - -出: - -```py -def forward(self, - xs: Tensor) -> Tuple[Tensor, Tensor]: - h = torch.zeros([3, 4], dtype=None, layout=None, device=None, pin_memory=None) - y = torch.zeros([3, 4], dtype=None, layout=None, device=None, pin_memory=None) - y0 = y - h0 = h - for i in range(torch.size(xs, 0)): - _0 = (self.cell).forward(torch.select(xs, 0, i), h0, ) - y1, h1, = _0 - y0, h0 = y1, h1 - return (y0, h0) - -``` - -还有第二种情况的示例: - -```py -class WrapRNN(torch.nn.Module): - def __init__(self): - super(WrapRNN, self).__init__() - self.loop = torch.jit.script(MyRNNLoop()) - - def forward(self, xs): - y, h = self.loop(xs) - return torch.relu(y) - -traced = torch.jit.trace(WrapRNN(), (torch.rand(10, 3, 4))) -print(traced.code) - -``` - -出: - -```py -def forward(self, - argument_1: Tensor) -> Tensor: - _0, h, = (self.loop).forward(argument_1, ) - return torch.relu(h) - -``` - -这样,当情况需要它们时,可以使用脚本和跟踪并将它们一起使用。 - -## 保存和加载模型 - -我们提供 API,以存档格式将 TorchScript 模块保存到磁盘或从磁盘加载 TorchScript 模块。 这种格式包括代码,参数,属性和调试信息,这意味着归档文件是模型的独立表示形式,可以在完全独立的过程中加载。 让我们保存并加载包装好的 RNN 模块: - -```py -traced.save('wrapped_rnn.zip') - -loaded = torch.jit.load('wrapped_rnn.zip') - -print(loaded) -print(loaded.code) - -``` - -出: - -```py -RecursiveScriptModule( - original_name=WrapRNN - (loop): RecursiveScriptModule( - original_name=MyRNNLoop - (cell): RecursiveScriptModule( - original_name=MyCell - (dg): RecursiveScriptModule(original_name=MyDecisionGate) - (linear): RecursiveScriptModule(original_name=Linear) - ) - ) -) -def forward(self, - argument_1: Tensor) -> Tensor: - _0, h, = (self.loop).forward(argument_1, ) - return torch.relu(h) - -``` - -如您所见,序列化保留了模块层次结构和我们一直在研究的代码。 [也可以将模型加载到 C++ 中](https://pytorch.org/tutorials/advanced/cpp_export.html),以实现不依赖 Python 的执行。 - -### 进一步阅读 - -我们已经完成了教程! 有关更多涉及的演示,[请查看 NeurIPS 演示来使用 TorchScript 转换机器翻译模型](https://colab.research.google.com/drive/1HiICg6jRkBnr5hvK2-VnMi88Vi9pUzEJ)。 - -**脚本的总运行时间**:(0 分钟 0.269 秒) - -[下载 Python 源码:`Intro_to_TorchScript_tutorial.py`](../_downloads/bf4ee4ef1ffde8b469d9ed4001a28ee8/Intro_to_TorchScript_tutorial.py) - -[下载 Jupyter 笔记本:`Intro_to_TorchScript_tutorial.ipynb`](../_downloads/0fd9e9bc92ac80a422914e974021c007/Intro_to_TorchScript_tutorial.ipynb) - -[由 Sphinx 画廊](https://sphinx-gallery.readthedocs.io)生成的画廊 \ No newline at end of file diff --git a/pytorch/官方教程/39.md b/pytorch/官方教程/39.md deleted file mode 100644 index 6d569ec1..00000000 --- a/pytorch/官方教程/39.md +++ /dev/null @@ -1,298 +0,0 @@ -# 在 C++ 中加载 TorchScript 模型 - -> 原文: - -顾名思义,PyTorch 的主要接口是 Python 编程语言。 尽管 Python 是许多需要动态性和易于迭代的场景的合适且首选的语言,但是在同样许多情况下,Python 的这些属性恰恰是不利的。 后者经常应用的一种环境是*生产* –低延迟和严格部署要求的土地。 对于生产场景,即使仅将 C++ 绑定到 Java,Rust 或 Go 之类的另一种语言中,它也是经常选择的语言。 以下各段将概述 PyTorch 提供的从现有 Python 模型到序列化表示形式的路径,该序列化表示形式可以完全由 C++ *加载*和*执行*,不依赖于 Python。 - -## 第 1 步:将 PyTorch 模型转换为 Torch 脚本 - -PyTorch 模型从 Python 到 C++ 的旅程由 [Torch 脚本](https://pytorch.org/docs/master/jit.html)启用,它是 PyTorch 模型的一种表示形式,可以由 Torch 脚本编译器理解,编译和序列化。 如果您从使用原始“渴望” API 编写的现有 PyTorch 模型开始,则必须首先将模型转换为 Torch 脚本。 在最常见的情况下(如下所述),只需很少的努力。 如果您已经有了 Torch 脚本模块,则可以跳到本教程的下一部分。 - -有两种将 PyTorch 模型转换为 Torch 脚本的方法。 第一种称为*跟踪*,该机制通过使用示例输入对模型的结构进行一次评估,并记录这些输入在模型中的流量来捕获模型的结构。 这适用于有限使用控制流的模型。 第二种方法是在模型中添加显式注解,以告知 TorchScript 编译器可以根据 Torch Script 语言施加的约束直接解析和编译模型代码。 - -小费 - -您可以在官方 [Torch 脚本参考](https://pytorch.org/docs/master/jit.html)中找到这两种方法的完整文档以及使用方法的进一步指导。 - -### 通过跟踪转换为 Torch 脚本 - -要将 PyTorch 模型通过跟踪转换为 Torch 脚本,必须将模型的实例以及示例输入传递给`torch.jit.trace`函数。 这将产生一个`torch.jit.ScriptModule`对象,并将模型评估的轨迹嵌入到模块的`forward`方法中: - -```py -import torch -import torchvision - -# An instance of your model. -model = torchvision.models.resnet18() - -# An example input you would normally provide to your model's forward() method. -example = torch.rand(1, 3, 224, 224) - -# Use torch.jit.trace to generate a torch.jit.ScriptModule via tracing. -traced_script_module = torch.jit.trace(model, example) - -``` - -现在可以对跟踪的`ScriptModule`进行评估,使其与常规 PyTorch 模块相同: - -```py -In[1]: output = traced_script_module(torch.ones(1, 3, 224, 224)) -In[2]: output[0, :5] -Out[2]: tensor([-0.2698, -0.0381, 0.4023, -0.3010, -0.0448], grad_fn=) - -``` - -### 通过注解转换为 Torch 脚本 - -在某些情况下,例如,如果模型采用特定形式的控制流,则可能需要直接在 Torch 脚本中编写模型并相应地注解模型。 例如,假设您具有以下原始 Pytorch 模型: - -```py -import torch - -class MyModule(torch.nn.Module): - def __init__(self, N, M): - super(MyModule, self).__init__() - self.weight = torch.nn.Parameter(torch.rand(N, M)) - - def forward(self, input): - if input.sum() > 0: - output = self.weight.mv(input) - else: - output = self.weight + input - return output - -``` - -因为此模块的`forward`方法使用取决于输入的控制流,所以它不适合跟踪。 相反,我们可以将其转换为`ScriptModule`。 为了将模块转换为`ScriptModule`,需要使用`torch.jit.script`编译模块,如下所示: - -```py -class MyModule(torch.nn.Module): - def __init__(self, N, M): - super(MyModule, self).__init__() - self.weight = torch.nn.Parameter(torch.rand(N, M)) - - def forward(self, input): - if input.sum() > 0: - output = self.weight.mv(input) - else: - output = self.weight + input - return output - -my_module = MyModule(10,20) -sm = torch.jit.script(my_module) - -``` - -如果您需要在`nn.Module`中排除某些方法,因为它们使用的是 TorchScript 不支持的 Python 函数,则可以使用`@torch.jit.ignore`来注解这些方法 - -`my_module`是已准备好进行序列化的`ScriptModule`的实例。 - -## 第 2 步:将脚本模块序列化为文件 - -跟踪或注解 PyTorch 模型后,一旦有了`ScriptModule`,就可以将其序列化为文件了。 稍后,您将能够使用 C++ 从此文件加载模块并执行它,而无需依赖 Python。 假设我们要序列化先前在跟踪示例中显示的`ResNet18`模型。 要执行此序列化,只需在模块上调用[`save`](https://pytorch.org/docs/master/jit.html#torch.jit.ScriptModule.save)并为其传递文件名: - -```py -traced_script_module.save("traced_resnet_model.pt") - -``` - -这将在您的工作目录中生成一个`traced_resnet_model.pt`文件。 如果您还想序列化`my_module`,请致电`my_module.save("my_module_model.pt")`。我们现在已经正式离开 Python 领域,并准备跨入 C++ 领域。 - -## 第 3 步:在 C++ 中加载脚本模块 - -要在 C++ 中加载序列化的 PyTorch 模型,您的应用必须依赖于 PyTorch C++ API –也称为 *LibTorch* 。 LibTorch 发行版包含共享库,头文件和 CMake 构建配置文件的集合。 虽然 CMake 不是依赖 LibTorch 的要求,但它是推荐的方法,将来会得到很好的支持。 对于本教程,我们将使用 CMake 和 LibTorch 构建一个最小的 C++ 应用,该应用简单地加载并执行序列化的 PyTorch 模型。 - -### 最小的 C++ 应用 - -让我们从讨论加载模块的代码开始。 以下将已经做: - -```py -#include // One-stop header. - -#include -#include - -int main(int argc, const char* argv[]) { - if (argc != 2) { - std::cerr << "usage: example-app \n"; - return -1; - } - - torch::jit::script::Module module; - try { - // Deserialize the ScriptModule from a file using torch::jit::load(). - module = torch::jit::load(argv[1]); - } - catch (const c10::Error& e) { - std::cerr << "error loading the model\n"; - return -1; - } - - std::cout << "ok\n"; -} - -``` - -``标头包含了运行示例所需的 LibTorch 库中的所有相关包含。 我们的应用接受序列化的 PyTorch `ScriptModule`的文件路径作为其唯一的命令行参数,然后继续使用`torch::jit::load()`函数对该模块进行反序列化,该函数将该文件路径作为输入。 作为回报,我们收到一个`torch::jit::script::Module`对象。 我们将稍后讨论如何执行它。 - -### 依赖 LibTorch 并构建应用 - -假设我们将以上代码存储到名为`example-app.cpp`的文件中。 最小的`CMakeLists.txt`构建起来看起来很简单: - -```py -cmake_minimum_required(VERSION 3.0 FATAL_ERROR) -project(custom_ops) - -find_package(Torch REQUIRED) - -add_executable(example-app example-app.cpp) -target_link_libraries(example-app "${TORCH_LIBRARIES}") -set_property(TARGET example-app PROPERTY CXX_STANDARD 14) - -``` - -建立示例应用的最后一件事是 LibTorch 发行版。 您可以随时从 PyTorch 网站上的[下载页面](https://pytorch.org/)获取最新的稳定版本。 如果下载并解压缩最新的归档文件,则应该收到具有以下目录结构的文件夹: - -```py -libtorch/ - bin/ - include/ - lib/ - share/ - -``` - -* `lib/`文件夹包含您必须链接的共享库, -* `include/`文件夹包含程序需要包含的头文件, -* `share/`文件夹包含必要的 CMake 配置,以启用上面的简单`find_package(Torch)`命令。 - -小费 - -在 Windows 上,调试和发行版本不兼容 ABI。 如果计划以调试模式构建项目,请尝试使用 LibTorch 的调试版本。 另外,请确保在下面的`cmake --build .`行中指定正确的配置。 - -最后一步是构建应用。 为此,假定示例目录的布局如下: - -```py -example-app/ - CMakeLists.txt - example-app.cpp - -``` - -现在,我们可以运行以下命令从`example-app/`文件夹中构建应用: - -```py -mkdir build -cd build -cmake -DCMAKE_PREFIX_PATH=/path/to/libtorch .. -cmake --build . --config Release - -``` - -其中`/path/to/libtorch`应该是解压缩的 LibTorch 发行版的完整路径。 如果一切顺利,它将看起来像这样: - -```py -root@4b5a67132e81:/example-app# mkdir build -root@4b5a67132e81:/example-app# cd build -root@4b5a67132e81:/example-app/build# cmake -DCMAKE_PREFIX_PATH=/path/to/libtorch .. --- The C compiler identification is GNU 5.4.0 --- The CXX compiler identification is GNU 5.4.0 --- Check for working C compiler: /usr/bin/cc --- Check for working C compiler: /usr/bin/cc -- works --- Detecting C compiler ABI info --- Detecting C compiler ABI info - done --- Detecting C compile features --- Detecting C compile features - done --- Check for working CXX compiler: /usr/bin/c++ --- Check for working CXX compiler: /usr/bin/c++ -- works --- Detecting CXX compiler ABI info --- Detecting CXX compiler ABI info - done --- Detecting CXX compile features --- Detecting CXX compile features - done --- Looking for pthread.h --- Looking for pthread.h - found --- Looking for pthread_create --- Looking for pthread_create - not found --- Looking for pthread_create in pthreads --- Looking for pthread_create in pthreads - not found --- Looking for pthread_create in pthread --- Looking for pthread_create in pthread - found --- Found Threads: TRUE --- Configuring done --- Generating done --- Build files have been written to: /example-app/build -root@4b5a67132e81:/example-app/build# make -Scanning dependencies of target example-app -[ 50%] Building CXX object CMakeFiles/example-app.dir/example-app.cpp.o -[100%] Linking CXX executable example-app -[100%] Built target example-app - -``` - -如果我们提供到先前创建的跟踪`ResNet18`模型`traced_resnet_model.pt`到生成的`example-app`二进制文件的路径,则应该以友好的“确定”来回报。 请注意,如果尝试使用`my_module_model.pt`运行此示例,则会收到一条错误消息,提示您输入的形状不兼容。 `my_module_model.pt`期望使用 1D 而不是 4D。 - -```py -root@4b5a67132e81:/example-app/build# ./example-app /traced_resnet_model.pt -ok - -``` - -## 步骤 4:在 C++ 中执行脚本模块 - -在用 C++ 成功加载序列化的`ResNet18`之后,我们现在离执行它仅几行代码了! 让我们将这些行添加到 C++ 应用的`main()`函数中: - -```py -// Create a vector of inputs. -std::vector inputs; -inputs.push_back(torch::ones({1, 3, 224, 224})); - -// Execute the model and turn its output into a tensor. -at::Tensor output = module.forward(inputs).toTensor(); -std::cout << output.slice(/*dim=*/1, /*start=*/0, /*end=*/5) << '\n'; - -``` - -前两行设置了模型的输入。 我们创建一个`torch::jit::IValue`的向量(类型擦除的值类型`script::Module`方法接受并返回),并添加单个输入。 要创建输入张量,我们使用`torch::ones()`,等效于 C++ API 中的`torch.ones`。 然后,我们运行`script::Module`的`forward`方法,并将其传递给我们创建的输入向量。 作为回报,我们得到了一个新的`IValue`,我们可以通过调用`toTensor()`将其转换为张量。 - -小费 - -要总体上了解有关`torch::ones`和 PyTorch C++ API 之类的功能的更多信息,请参阅[这个页面](https://pytorch.org/cppdocs)上的文档。 PyTorch C++ API 提供了与 Python API 几乎相同的功能,使您可以像在 Python 中一样进一步操纵和处理张量。 - -在最后一行,我们打印输出的前五个条目。 由于在本教程前面的部分中,我们为 Python 中的模型提供了相同的输入,因此理想情况下,我们应该看到相同的输出。 让我们通过重新编译我们的应用并以相同的序列化模型运行它来进行尝试: - -```py -root@4b5a67132e81:/example-app/build# make -Scanning dependencies of target example-app -[ 50%] Building CXX object CMakeFiles/example-app.dir/example-app.cpp.o -[100%] Linking CXX executable example-app -[100%] Built target example-app -root@4b5a67132e81:/example-app/build# ./example-app traced_resnet_model.pt --0.2698 -0.0381 0.4023 -0.3010 -0.0448 -[ Variable[CPUFloatType]{1,5} ] - -``` - -作为参考,Python 以前的输出为: - -```py -tensor([-0.2698, -0.0381, 0.4023, -0.3010, -0.0448], grad_fn=) - -``` - -看起来很不错! - -小费 - -要将模型移至 GPU 内存,可以编写`model.to(at::kCUDA);`。 通过调用`tensor.to(at::kCUDA)`来确保模型的输入也位于 CUDA 内存中,这将在 CUDA 内存中返回新的张量。 - -## 第 5 步:获得帮助并探索 API - -本教程有望使您对 PyTorch 模型从 Python 到 C++ 的路径有一个大致的了解。 利用本教程中介绍的概念,您应该能够从原始的“急切的” PyTorch 模型,到 Python 中的已编译`ScriptModule`,再到磁盘上的序列化文件,以及–结束循环–到可执行文件`script::Module`在 C++ 中。 - -当然,有许多我们没有介绍的概念。 例如,您可能会发现自己想要扩展使用 C++ 或 CUDA 实现的自定义运算符来扩展`ScriptModule`,并希望在纯 C++ 生产环境中加载的`ScriptModule`内执行该自定义运算符。 好消息是:这是可能的,并且得到了很好的支持! 现在,您可以浏览[这个](https://github.com/pytorch/pytorch/tree/master/test/custom_operator)文件夹作为示例,我们将很快提供一个教程。 目前,以下链接通常可能会有所帮助: - -* [Torch 脚本参考](https://pytorch.org/docs/master/jit.html) -* [PyTorch C++ API 文档](https://pytorch.org/cppdocs/) -* [PyTorch Python API 文档](https://pytorch.org/docs/) - -与往常一样,如果您遇到任何问题或疑问,可以使用我们的[论坛](https://discuss.pytorch.org/)或 [GitHub ISSUE](https://github.com/pytorch/pytorch/issues) 进行联系。 \ No newline at end of file diff --git a/pytorch/官方教程/60.md b/pytorch/官方教程/40 Pytorch分布式概述.md similarity index 100% rename from pytorch/官方教程/60.md rename to pytorch/官方教程/40 Pytorch分布式概述.md diff --git a/pytorch/官方教程/40.md b/pytorch/官方教程/40.md deleted file mode 100644 index c0c8e0f6..00000000 --- a/pytorch/官方教程/40.md +++ /dev/null @@ -1,219 +0,0 @@ -# 将模型从 PyTorch 导出到 ONNX 并使用 ONNX 运行时运行它(可选) - -> 原文: - -在本教程中,我们描述了如何将 PyTorch 中定义的模型转换为 ONNX 格式,然后在 ONNX 运行时中运行它。 - -ONNX 运行时是针对 ONNX 模型的以性能为中心的引擎,可在多个平台和硬件(Windows,Linux 和 Mac 以及 CPU 和 GPU 上)高效地进行推理。 事实证明,如[此处](https://cloudblogs.microsoft.com/opensource/2019/05/22/onnx-runtime-machine-learning-inferencing-0-4-release)所述,ONNX 运行时大大提高了多个模型的性能。 - -对于本教程,您将需要安装 [ONNX](https://github.com/onnx/onnx) 和 [ONNX 运行时](https://github.com/microsoft/onnxruntime)。 您可以使用`pip install onnx onnxruntime`获得 ONNX 和 ONNX 运行时的二进制版本。 请注意,ONNX 运行时与 Python 3.5 至 3.7 版本兼容。 - -`NOTE`:本教程需要 PyTorch `master`分支,[可以按照此处的说明进行安装](https://github.com/pytorch/pytorch#from-source) - -```py -# Some standard imports -import io -import numpy as np - -from torch import nn -import torch.utils.model_zoo as model_zoo -import torch.onnx - -``` - -超分辨率是一种提高图像,视频分辨率的方法,广泛用于图像处理或视频编辑中。 在本教程中,我们将使用一个小的超分辨率模型。 - -首先,让我们在 PyTorch 中创建一个`SuperResolution`模型。 该模型使用了[《使用高效的子像素卷积神经网络的实时单幅图像和视频超分辨率》](https://arxiv.org/abs/1609.05158)(Shi 等人)中所述的高效子像素卷积层来提高图像的分辨率受向上缩放因子的影响。 该模型期望图像的 YCbCr 的 Y 分量作为输入,并以超分辨率输出放大的 Y 分量。 - -[模型](https://github.com/pytorch/examples/blob/master/super_resolution/model.py)直接来自 PyTorch 的示例,未经修改: - -```py -# Super Resolution model definition in PyTorch -import torch.nn as nn -import torch.nn.init as init - -class SuperResolutionNet(nn.Module): - def __init__(self, upscale_factor, inplace=False): - super(SuperResolutionNet, self).__init__() - - self.relu = nn.ReLU(inplace=inplace) - self.conv1 = nn.Conv2d(1, 64, (5, 5), (1, 1), (2, 2)) - self.conv2 = nn.Conv2d(64, 64, (3, 3), (1, 1), (1, 1)) - self.conv3 = nn.Conv2d(64, 32, (3, 3), (1, 1), (1, 1)) - self.conv4 = nn.Conv2d(32, upscale_factor ** 2, (3, 3), (1, 1), (1, 1)) - self.pixel_shuffle = nn.PixelShuffle(upscale_factor) - - self._initialize_weights() - - def forward(self, x): - x = self.relu(self.conv1(x)) - x = self.relu(self.conv2(x)) - x = self.relu(self.conv3(x)) - x = self.pixel_shuffle(self.conv4(x)) - return x - - def _initialize_weights(self): - init.orthogonal_(self.conv1.weight, init.calculate_gain('relu')) - init.orthogonal_(self.conv2.weight, init.calculate_gain('relu')) - init.orthogonal_(self.conv3.weight, init.calculate_gain('relu')) - init.orthogonal_(self.conv4.weight) - -# Create the super-resolution model by using the above model definition. -torch_model = SuperResolutionNet(upscale_factor=3) - -``` - -通常,您现在将训练此模型。 但是,在本教程中,我们将下载一些预训练的权重。 请注意,此模型未经过充分训练以提供良好的准确率,此处仅用于演示目的。 - -在导出模型之前,请先调用`torch_model.eval()`或`torch_model.train(False)`,以将模型转换为推理模式,这一点很重要。 这是必需的,因为像`dropout`或`batchnorm`这样的运算符在推断和训练模式下的行为会有所不同。 - -```py -# Load pretrained model weights -model_url = 'https://s3.amazonaws.com/pytorch/test_data/export/superres_epoch100-44c6958e.pth' -batch_size = 1 # just a random number - -# Initialize model with the pretrained weights -map_location = lambda storage, loc: storage -if torch.cuda.is_available(): - map_location = None -torch_model.load_state_dict(model_zoo.load_url(model_url, map_location=map_location)) - -# set the model to inference mode -torch_model.eval() - -``` - -在 PyTorch 中导出模型是通过跟踪或脚本编写的。 本教程将以通过跟踪导出的模型为例。 要导出模型,我们调用`torch.onnx.export()`函数。 这将执行模型,并记录使用什么运算符计算输出的轨迹。 因为`export`运行模型,所以我们需要提供输入张量`x`。 只要是正确的类型和大小,其中的值就可以是随机的。 请注意,除非指定为动态轴,否则输入尺寸将在导出的 ONNX 图中固定为所有输入尺寸。 在此示例中,我们使用输入`batch_size 1`导出模型,但随后在`torch.onnx.export()`的`dynamic_axes`参数中将第一维指定为动态。 因此,导出的模型将接受大小为`[batch_size, 1, 224, 224]`的输入,其中`batch_size`可以是可变的。 - -要了解有关 PyTorch 导出接口的更多详细信息,请查看[`torch.onnx`文档](https://pytorch.org/docs/master/onnx.html)。 - -```py -# Input to the model -x = torch.randn(batch_size, 1, 224, 224, requires_grad=True) -torch_out = torch_model(x) - -# Export the model -torch.onnx.export(torch_model, # model being run - x, # model input (or a tuple for multiple inputs) - "super_resolution.onnx", # where to save the model (can be a file or file-like object) - export_params=True, # store the trained parameter weights inside the model file - opset_version=10, # the ONNX version to export the model to - do_constant_folding=True, # whether to execute constant folding for optimization - input_names = ['input'], # the model's input names - output_names = ['output'], # the model's output names - dynamic_axes={'input' : {0 : 'batch_size'}, # variable lenght axes - 'output' : {0 : 'batch_size'}}) - -``` - -我们还计算了`torch_out`(模型之后的输出),我们将用来验证导出的模型在 ONNX 运行时中运行时是否计算出相同的值。 - -但是,在通过 ONNX 运行时验证模型的输出之前,我们将使用 ONNX 的 API 检查 ONNX 模型。 首先,`onnx.load("super_resolution.onnx")`将加载保存的模型并输出`onnx.ModelProto`结构(用于捆绑 ML 模型的顶级文件/容器格式。有关更多信息,请参见[`onnx.proto`文档](https://github.com/onnx/onnx/blob/master/onnx/onnx.proto)。 然后,`onnx.checker.check_model(onnx_model)`将验证模型的结构并确认模型具有有效的架构。 通过检查模型的版本,图的结构以及节点及其输入和输出,可以验证 ONNX 图的有效性。 - -```py -import onnx - -onnx_model = onnx.load("super_resolution.onnx") -onnx.checker.check_model(onnx_model) - -``` - -现在,我们使用 ONNX 运行时的 Python API 计算输出。 这部分通常可以在单独的过程中或在另一台机器上完成,但是我们将继续同一过程,以便我们可以验证 ONNX 运行时和 PyTorch 正在为网络计算相同的值。 - -为了使用 ONNX 运行时运行模型,我们需要使用所选的配置参数为模型创建一个推理会话(此处使用默认配置)。 创建会话后,我们将使用`run()` API 评估模型。 此调用的输出是一个列表,其中包含由 ONNX 运行时计算的模型的输出。 - -```py -import onnxruntime - -ort_session = onnxruntime.InferenceSession("super_resolution.onnx") - -def to_numpy(tensor): - return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy() - -# compute ONNX Runtime output prediction -ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(x)} -ort_outs = ort_session.run(None, ort_inputs) - -# compare ONNX Runtime and PyTorch results -np.testing.assert_allclose(to_numpy(torch_out), ort_outs[0], rtol=1e-03, atol=1e-05) - -print("Exported model has been tested with ONNXRuntime, and the result looks good!") - -``` - -我们应该看到 PyTorch 和 ONNX 运行时的输出在数值上与给定的精度匹配(`rtol = 1e-03`和`atol = 1e-05`)。 附带说明一下,如果它们不匹配,则说明 ONNX 导出器中存在问题,因此请与我们联系。 - -## 使用 ONNX 运行时在图像上运行模型 - -到目前为止,我们已经从 PyTorch 导出了一个模型,并演示了如何使用虚拟张量作为输入在 ONNX 运行时中加载和运行该模型。 - -在本教程中,我们将使用广泛使用的著名猫图像,如下图所示 - -![cat](img/35d54d0c48ca1c52d56850a202a2c160.png) - -首先,让我们加载图片,然后使用标准的 PIL python 库对其进行预处理。 请注意,此预处理是处理数据以训练/测试神经网络的标准做法。 - -我们首先调整图像大小以适合模型输入的大小(`224x224`)。 然后,我们将图像分为 Y,Cb 和 Cr 分量。 这些分量代表灰度图像(Y),以及蓝差(Cb)和红差(Cr)色度分量。 Y 分量对人眼更敏感,我们对将要转换的这个分量很感兴趣。 提取 Y 分量后,我们将其转换为张量,这将是模型的输入。 - -```py -from PIL import Image -import torchvision.transforms as transforms - -img = Image.open("./_static/img/cat.jpg") - -resize = transforms.Resize([224, 224]) -img = resize(img) - -img_ycbcr = img.convert('YCbCr') -img_y, img_cb, img_cr = img_ycbcr.split() - -to_tensor = transforms.ToTensor() -img_y = to_tensor(img_y) -img_y.unsqueeze_(0) - -``` - -现在,作为下一步,让我们使用代表灰度尺寸调整后的猫图像的张量,并按照先前的说明在 ONNX 运行时中运行超分辨率模型。 - -```py -ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(img_y)} -ort_outs = ort_session.run(None, ort_inputs) -img_out_y = ort_outs[0] - -``` - -此时,模型的输出为张量。 现在,我们将处理模型的输出,以根据输出张量构造最终的输出图像,并保存图像。 采用了来自此处的[超分辨率模型的 PyTorch 实现](https://github.com/pytorch/examples/blob/master/super_resolution/super_resolve.py)的后处理步骤。 - -```py -img_out_y = Image.fromarray(np.uint8((img_out_y[0] * 255.0).clip(0, 255)[0]), mode='L') - -# get the output image follow post-processing step from PyTorch implementation -final_img = Image.merge( - "YCbCr", [ - img_out_y, - img_cb.resize(img_out_y.size, Image.BICUBIC), - img_cr.resize(img_out_y.size, Image.BICUBIC), - ]).convert("RGB") - -# Save the image, we will compare this with the output image from mobile device -final_img.save("./_static/img/cat_superres_with_ort.jpg") - -``` - -![output\_cat](img/efb29904552d032a076d8512d4e60b95.png) - -ONNX 运行时是跨平台引擎,您可以在多个平台上以及在 CPU 和 GPU 上运行它。 - -还可以使用 Azure 机器学习服务将 ONNX 运行时部署到云中以进行模型推断。 更多信息在[此处](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-onnx)。 - -[在这里了解有关 ONNX 运行时性能的更多信息](https://github.com/microsoft/onnxruntime#high-performance)。 - -有关 ONNX 运行时的更多信息,[请点击这里](https://github.com/microsoft/onnxruntime)。 - -**脚本的总运行时间**:(0 分钟 0.000 秒) - -[下载 Python 源码:`super_resolution_with_onnxruntime.py`](../_downloads/58ce6e85b9b9e9647d302d6b48feccb0/super_resolution_with_onnxruntime.py) - -[下载 Jupyter 笔记本:`super_resolution_with_onnxruntime.ipynb`](../_downloads/8c7f0be1e1c3803fcb4c41bcd9f4226b/super_resolution_with_onnxruntime.ipynb) - -[由 Sphinx 画廊](https://sphinx-gallery.readthedocs.io)生成的画廊 \ No newline at end of file diff --git a/pytorch/官方教程/61.md b/pytorch/官方教程/41 单机模型并行.md similarity index 100% rename from pytorch/官方教程/61.md rename to pytorch/官方教程/41 单机模型并行.md diff --git a/pytorch/官方教程/41.md b/pytorch/官方教程/41.md deleted file mode 100644 index 4f909d3e..00000000 --- a/pytorch/官方教程/41.md +++ /dev/null @@ -1 +0,0 @@ -# 前端 API \ No newline at end of file diff --git a/pytorch/官方教程/62.md b/pytorch/官方教程/42 分布式数据并行.md similarity index 100% rename from pytorch/官方教程/62.md rename to pytorch/官方教程/42 分布式数据并行.md diff --git a/pytorch/官方教程/42.md b/pytorch/官方教程/42.md deleted file mode 100644 index 64d1de4f..00000000 --- a/pytorch/官方教程/42.md +++ /dev/null @@ -1,607 +0,0 @@ -# PyTorch 中的命名张量简介(原型) - -> 原文: - -**作者**: [Richard Zou](https://github.com/zou3519) - -命名张量旨在通过允许用户将显式名称与张量维度相关联来使张量更易于使用。 在大多数情况下,采用尺寸参数的操作将接受尺寸名称,而无需按位置跟踪尺寸。 此外,命名张量使用名称来自动检查运行时是否正确使用了 API,从而提供了额外的安全性。 名称也可以用于重新排列尺寸,例如,支持“按名称广播”而不是“按位置广播”。 - -本教程旨在作为 1.3 启动中将包含的功能的指南。 到最后,您将能够: - -* 创建具有命名尺寸的张量,以及删除或重命名这些尺寸 -* 了解操作如何传播维度名称的基础 -* 了解命名尺寸如何在两个关键区域实现更清晰的代码: - - * 广播操作 - * 重塑和展开尺寸 - -最后,我们将通过使用命名张量编写一个多头注意力模块来将其付诸实践。 - -PyTorch 中的命名张量受 [Sasha Rush](https://tech.cornell.edu/people/alexander-rush/) 的启发并与之合作。 Sasha 在他的 [2019 年 1 月博客文章](http://nlp.seas.harvard.edu/NamedTensor)中提出了最初的想法和概念证明。 - -## 基础知识:命名维度 - -PyTorch 现在允许张量具有命名维度; 工厂函数采用新的名称参数,该参数将名称与每个维度相关联。 这适用于大多数工厂函数,例如 - -* `tensor` -* `empty` -* `ones` -* `zeros` -* `randn` -* `rand` - -这里我们用名字构造一个张量: - -```py -import torch -imgs = torch.randn(1, 2, 2, 3, names=('N', 'C', 'H', 'W')) -print(imgs.names) - -``` - -出: - -```py -('N', 'C', 'H', 'W') - -``` - -与[命名张量的原始博客文章](http://nlp.seas.harvard.edu/NamedTensor)不同,命名维度是有序的:`tensor.names[i]`是`tensor`的第`i`个维度的名称。 - -重命名`Tensor`尺寸的方法有两种: - -```py -# Method #1: set the .names attribute (this changes name in-place) -imgs.names = ['batch', 'channel', 'width', 'height'] -print(imgs.names) - -# Method #2: specify new names (this changes names out-of-place) -imgs = imgs.rename(channel='C', width='W', height='H') -print(imgs.names) - -``` - -出: - -```py -('batch', 'channel', 'width', 'height') -('batch', 'C', 'W', 'H') - -``` - -删除名称的首选方法是调用`tensor.rename(None)`: - -```py -imgs = imgs.rename(None) -print(imgs.names) - -``` - -出: - -```py -(None, None, None, None) - -``` - -未命名的张量(没有命名尺寸的张量)仍然可以正常工作,并且在其`repr`中没有名称。 - -```py -unnamed = torch.randn(2, 1, 3) -print(unnamed) -print(unnamed.names) - -``` - -出: - -```py -tensor([[[-0.7420, -0.3646, 0.1424]], - - [[-0.6065, -1.4888, 0.2935]]]) -(None, None, None) - -``` - -命名张量不需要命名所有尺寸。 - -```py -imgs = torch.randn(3, 1, 1, 2, names=('N', None, None, None)) -print(imgs.names) - -``` - -出: - -```py -('N', None, None, None) - -``` - -由于命名张量可以与未命名张量共存,因此我们需要一种不错的方式来编写可识别命名张量的代码,该代码可用于命名张量和未命名张量。 使用`tensor.refine_names(*names)`优化尺寸并将未命名的暗淡提升为已命名的暗淡。 细化维度定义为“重命名”,并具有以下限制: - -* 可以将`None`暗号细化为任何名称 -* 命名的维度只能精简为具有相同的名称。 - -```py -imgs = torch.randn(3, 1, 1, 2) -named_imgs = imgs.refine_names('N', 'C', 'H', 'W') -print(named_imgs.names) - -# Refine the last two dims to 'H' and 'W'. In Python 2, use the string '...' -# instead of ... -named_imgs = imgs.refine_names(..., 'H', 'W') -print(named_imgs.names) - -def catch_error(fn): - try: - fn() - assert False - except RuntimeError as err: - err = str(err) - if len(err) > 180: - err = err[:180] + "..." - print(err) - -named_imgs = imgs.refine_names('N', 'C', 'H', 'W') - -# Tried to refine an existing name to a different name -catch_error(lambda: named_imgs.refine_names('N', 'C', 'H', 'width')) - -``` - -出: - -```py -('N', 'C', 'H', 'W') -(None, None, 'H', 'W') -refine_names: cannot coerce Tensor['N', 'C', 'H', 'W'] to Tensor['N', 'C', 'H', 'width'] because 'W' is different from 'width' at index 3 - -``` - -大多数简单的操作都会传播名称。 命名张量的最终目标是所有操作以合理,直观的方式传播名称。 在 1.3 版本发布时,已添加了对许多常用操作的支持。 例如,这里是`.abs()`: - -```py -print(named_imgs.abs().names) - -``` - -出: - -```py -('N', 'C', 'H', 'W') - -``` - -### 访问器和归约 - -可以使用尺寸名称来引用尺寸而不是位置尺寸。 这些操作还传播名称。 索引(基本索引和高级索引)尚未实现,但仍在规划中。 使用上面的`named_imgs`张量,我们可以执行以下操作: - -```py -output = named_imgs.sum('C') # Perform a sum over the channel dimension -print(output.names) - -img0 = named_imgs.select('N', 0) # get one image -print(img0.names) - -``` - -出: - -```py -('N', 'H', 'W') -('C', 'H', 'W') - -``` - -### 名称推断 - -名称在称为**名称推断**的两步过程中在操作上传播: - -1. **检查名称**:运算符可以在运行时执行自动检查,以检查某些尺寸名称是否匹配。 -2. **传播名称**:名称推断将输出名称传播到输出张量。 - -让我们看一个非常小的例子,添加 2 个一维张量,不进行广播。 - -```py -x = torch.randn(3, names=('X',)) -y = torch.randn(3) -z = torch.randn(3, names=('Z',)) - -``` - -**检查名称**:首先,我们将检查这两个张量的名称是否相匹配。 当且仅当两个名称相等(字符串相等)或至少一个为`None`(`None`本质上是一个特殊的通配符名称)时,两个名称才匹配。 因此,这三者中唯一会出错的是`x + z`: - -```py -catch_error(lambda: x + z) - -``` - -出: - -```py -Error when attempting to broadcast dims ['X'] and dims ['Z']: dim 'X' and dim 'Z' are at the same position from the right but do not match. - -``` - -**传播名称**:通过返回两个名称中最精确的名称来统一这两个名称。 使用`x + y`时,`X`比`None`更精细。 - -```py -print((x + y).names) - -``` - -出: - -```py -('X',) - -``` - -大多数名称推断规则都很简单明了,但是其中一些可能具有意想不到的语义。 让我们来看看您可能会遇到的一对:广播和矩阵乘法。 - -#### 广播 - -命名张量不会改变广播行为; 他们仍然按位置广播。 但是,在检查两个尺寸是否可以广播时,PyTorch 还会检查这些尺寸的名称是否匹配。 - -这导致命名张量防止广播操作期间意外对齐。 在下面的示例中,我们将`per_batch_scale`应用于`imgs`。 - -```py -imgs = torch.randn(2, 2, 2, 2, names=('N', 'C', 'H', 'W')) -per_batch_scale = torch.rand(2, names=('N',)) -catch_error(lambda: imgs * per_batch_scale) - -``` - -出: - -```py -Error when attempting to broadcast dims ['N', 'C', 'H', 'W'] and dims ['N']: dim 'W' and dim 'N' are at the same position from the right but do not match. - -``` - -如果没有`names`,则`per_batch_scale`张量与`imgs`的最后一个尺寸对齐,这不是我们想要的。 我们确实想通过将`per_batch_scale`与`imgs`的批量尺寸对齐来执行操作。 有关如何按名称对齐张量的信息,请参见新的“按名称显式广播”功能,如下所述。 - -#### 矩阵乘法 - -`torch.mm(A, B)`在`A`的第二个暗角和`B`的第一个暗角之间执行点积,返回具有`A`的第一个暗角和`B`的第二个暗角的张量。 (其他`matmul`函数,例如`torch.matmul`,`torch.mv`和`torch.dot`的行为类似)。 - -```py -markov_states = torch.randn(128, 5, names=('batch', 'D')) -transition_matrix = torch.randn(5, 5, names=('in', 'out')) - -# Apply one transition -new_state = markov_states @ transition_matrix -print(new_state.names) - -``` - -出: - -```py -('batch', 'out') - -``` - -如您所见,矩阵乘法不会检查收缩尺寸是否具有相同的名称。 - -接下来,我们将介绍命名张量启用的两个新行为:按名称的显式广播以及按名称的展平和展平尺寸 - -### 新行为:按名称显式广播 - -有关使用多个维度的主要抱怨之一是需要`unsqueeze`“虚拟”维度,以便可以进行操作。 例如,在之前的每批比例示例中,使用未命名的张量,我们将执行以下操作: - -```py -imgs = torch.randn(2, 2, 2, 2) # N, C, H, W -per_batch_scale = torch.rand(2) # N - -correct_result = imgs * per_batch_scale.view(2, 1, 1, 1) # N, C, H, W -incorrect_result = imgs * per_batch_scale.expand_as(imgs) -assert not torch.allclose(correct_result, incorrect_result) - -``` - -通过使用名称,我们可以使这些操作更安全(并且易于与尺寸数量无关)。 我们提供了一个新的`tensor.align_as(other)`操作,可以对张量的尺寸进行排列以匹配`other.names`中指定的顺序,并在适当的地方添加一个尺寸的尺寸(`tensor.align_to(*names)`也可以): - -```py -imgs = imgs.refine_names('N', 'C', 'H', 'W') -per_batch_scale = per_batch_scale.refine_names('N') - -named_result = imgs * per_batch_scale.align_as(imgs) -# note: named tensors do not yet work with allclose -assert torch.allclose(named_result.rename(None), correct_result) - -``` - -### 新行为:按名称展平或取消展平维度 - -一种常见的操作是展平和展平尺寸。 现在,用户可以使用`view`,`reshape`或`flatten`来执行此操作; 用例包括将批量尺寸展平以将张量发送到必须采用一定数量尺寸的输入的运算符(即`conv2d`采用 4D 输入)。 - -为了使这些操作比查看或整形更具语义意义,我们引入了一种新的`tensor.unflatten(dim, namedshape)`方法并更新`flatten`以使用名称:`tensor.flatten(dims, new_dim)`。 - -`flatten`只能展平相邻的尺寸,但也可以用于不连续的维度。 必须将名称和形状传递到`unflatten`中,该形状是`(dim, size)`元组的列表,以指定如何展开维度。 可以在`flatten`期间保存`unflatten`的尺寸,但我们尚未这样做。 - -```py -imgs = imgs.flatten(['C', 'H', 'W'], 'features') -print(imgs.names) - -imgs = imgs.unflatten('features', (('C', 2), ('H', 2), ('W', 2))) -print(imgs.names) - -``` - -出: - -```py -('N', 'features') -('N', 'C', 'H', 'W') - -``` - -### Autograd 支持 - -Autograd 当前会忽略所有张量上的名称,只是将它们视为常规张量。 梯度计算是正确的,但是我们失去了名称赋予我们的安全性。 在路线图上引入名称以自动微分的处理。 - -```py -x = torch.randn(3, names=('D',)) -weight = torch.randn(3, names=('D',), requires_grad=True) -loss = (x - weight).abs() -grad_loss = torch.randn(3) -loss.backward(grad_loss) - -correct_grad = weight.grad.clone() -print(correct_grad) # Unnamed for now. Will be named in the future - -weight.grad.zero_() -grad_loss = grad_loss.refine_names('C') -loss = (x - weight).abs() -# Ideally we'd check that the names of loss and grad_loss match, but we don't -# yet -loss.backward(grad_loss) - -print(weight.grad) # still unnamed -assert torch.allclose(weight.grad, correct_grad) - -``` - -出: - -```py -tensor([0.5398, 0.7907, 0.7784]) -tensor([0.5398, 0.7907, 0.7784]) - -``` - -### 其他受支持的(和不受支持的)功能 - -[有关 1.3 发行版支持的功能的详细分类,请参见此处](https://pytorch.org/docs/stable/named_tensor.html)。 - -特别是,我们要指出当前不支持的三个重要函数: - -* 通过`torch.save`或`torch.load`保存或加载命名张量 -* 通过`torch.multiprocessing`进行多重处理 -* JIT 支持; 例如,以下将错误 - -```py -imgs_named = torch.randn(1, 2, 2, 3, names=('N', 'C', 'H', 'W')) - -@torch.jit.script -def fn(x): - return x - -catch_error(lambda: fn(imgs_named)) - -``` - -出: - -```py -NYI: Named tensors are currently unsupported in TorchScript. As a workaround please drop names via `tensor = tensor.rename(None)`. - -``` - -解决方法是,在使用尚不支持命名张量的任何东西之前,请通过`tensor = tensor.rename(None)`删除名称。 - -### 更长的例子:多头关注 - -现在,我们将通过一个完整的示例来实现一个常见的 PyTorch `nn.Module`:多头注意。 我们假设读者已经熟悉多头注意; 要进行复习,请查看[此说明](https://nlp.seas.harvard.edu/2018/04/03/attention.html)或[此说明](http://jalammar.github.io/illustrated-transformer/)。 - -我们采用 [ParlAI](https://github.com/facebookresearch/ParlAI) 来实现多头注意力的实现; 具体来说[此处](https://github.com/facebookresearch/ParlAI/blob/f7db35cba3f3faf6097b3e6b208442cd564783d9/parlai/agents/transformer/modules.py#L907)。 阅读该示例中的代码; 然后,与下面的代码进行比较,注意有四个标记为(I),(II),(III)和(IV)的位置,使用命名张量可以使代码更易读; 在代码块之后,我们将深入探讨其中的每一个。 - -```py -import torch.nn as nn -import torch.nn.functional as F -import math - -class MultiHeadAttention(nn.Module): - def __init__(self, n_heads, dim, dropout=0): - super(MultiHeadAttention, self).__init__() - self.n_heads = n_heads - self.dim = dim - - self.attn_dropout = nn.Dropout(p=dropout) - self.q_lin = nn.Linear(dim, dim) - self.k_lin = nn.Linear(dim, dim) - self.v_lin = nn.Linear(dim, dim) - nn.init.xavier_normal_(self.q_lin.weight) - nn.init.xavier_normal_(self.k_lin.weight) - nn.init.xavier_normal_(self.v_lin.weight) - self.out_lin = nn.Linear(dim, dim) - nn.init.xavier_normal_(self.out_lin.weight) - - def forward(self, query, key=None, value=None, mask=None): - # (I) - query = query.refine_names(..., 'T', 'D') - self_attn = key is None and value is None - if self_attn: - mask = mask.refine_names(..., 'T') - else: - mask = mask.refine_names(..., 'T', 'T_key') # enc attn - - dim = query.size('D') - assert dim == self.dim, \ - f'Dimensions do not match: {dim} query vs {self.dim} configured' - assert mask is not None, 'Mask is None, please specify a mask' - n_heads = self.n_heads - dim_per_head = dim // n_heads - scale = math.sqrt(dim_per_head) - - # (II) - def prepare_head(tensor): - tensor = tensor.refine_names(..., 'T', 'D') - return (tensor.unflatten('D', [('H', n_heads), ('D_head', dim_per_head)]) - .align_to(..., 'H', 'T', 'D_head')) - - assert value is None - if self_attn: - key = value = query - elif value is None: - # key and value are the same, but query differs - key = key.refine_names(..., 'T', 'D') - value = key - dim = key.size('D') - - # Distinguish between query_len (T) and key_len (T_key) dims. - k = prepare_head(self.k_lin(key)).rename(T='T_key') - v = prepare_head(self.v_lin(value)).rename(T='T_key') - q = prepare_head(self.q_lin(query)) - - dot_prod = q.div_(scale).matmul(k.align_to(..., 'D_head', 'T_key')) - dot_prod.refine_names(..., 'H', 'T', 'T_key') # just a check - - # (III) - attn_mask = (mask == 0).align_as(dot_prod) - dot_prod.masked_fill_(attn_mask, -float(1e20)) - - attn_weights = self.attn_dropout(F.softmax(dot_prod / scale, - dim='T_key')) - - # (IV) - attentioned = ( - attn_weights.matmul(v).refine_names(..., 'H', 'T', 'D_head') - .align_to(..., 'T', 'H', 'D_head') - .flatten(['H', 'D_head'], 'D') - ) - - return self.out_lin(attentioned).refine_names(..., 'T', 'D') - -``` - -(I)细化输入张量维度 - -```py -def forward(self, query, key=None, value=None, mask=None): - # (I) - query = query.refine_names(..., 'T', 'D') - -``` - -`query = query.refine_names(..., 'T', 'D')`用作可执行的文档,并将输入尺寸提升为名称。 它检查最后两个维度是否可以调整为`['T', 'D']`,以防止在以后出现潜在的无声或混乱的尺寸不匹配错误。 - -(II)在`prepare_head`中操纵尺寸 - -```py -# (II) -def prepare_head(tensor): - tensor = tensor.refine_names(..., 'T', 'D') - return (tensor.unflatten('D', [('H', n_heads), ('D_head', dim_per_head)]) - .align_to(..., 'H', 'T', 'D_head')) - -``` - -首先要注意的是代码如何清楚地说明输入和输出尺寸:输入张量必须以`T`和`D`变暗结束,输出张量应以`H`,`T`和`D_head`维度结束。 - -要注意的第二件事是代码清楚地描述了正在发生的事情。 `prepare_head`获取键,查询和值,并将嵌入的维度拆分为多个头部,最后将维度顺序重新排列为`[..., 'H', 'T', 'D_head']`。 ParlAI 使用`view`和`transpose`操作实现以下`prepare_head`: - -```py -def prepare_head(tensor): - # input is [batch_size, seq_len, n_heads * dim_per_head] - # output is [batch_size * n_heads, seq_len, dim_per_head] - batch_size, seq_len, _ = tensor.size() - tensor = tensor.view(batch_size, tensor.size(1), n_heads, dim_per_head) - tensor = ( - tensor.transpose(1, 2) - .contiguous() - .view(batch_size * n_heads, seq_len, dim_per_head) - ) - return tensor - -``` - -我们命名的张量变量使用的操作虽然较为冗长,但比`view`和`transpose`具有更多的语义含义,并包含以名称形式出现的可执行文档。 - -(III)按名称显式广播 - -```py -def ignore(): - # (III) - attn_mask = (mask == 0).align_as(dot_prod) - dot_prod.masked_fill_(attn_mask, -float(1e20)) - -``` - -`mask`通常具有暗淡`[N, T]`(在自我关注的情况下)或`[N, T, T_key]`(对于编码器注意的情况),而`dot_prod`具有暗淡的`[N, H, T, T_key]`。 为了使`mask`与`dot_prod`正确广播,我们通常会在自注意的情况下将的调暗`1`和`-1`压下,在编码器的情况下,我们将`unsqueeze`调暗`unsqueeze` 。 使用命名张量,我们只需使用`align_as`将`attn_mask`与`dot_prod`对齐,而不必担心`unsqueeze`变暗的位置。 - -(IV)使用`align_to`和`flatten`进行更多尺寸操作 - -```py -def ignore(): - # (IV) - attentioned = ( - attn_weights.matmul(v).refine_names(..., 'H', 'T', 'D_head') - .align_to(..., 'T', 'H', 'D_head') - .flatten(['H', 'D_head'], 'D') - ) - -``` - -在这里,与(II)一样,`align_to`和`flatten`在语义上比`view`和`transpose`更有意义(尽管更冗长)。 - -### 运行示例 - -```py -n, t, d, h = 7, 5, 2 * 3, 3 -query = torch.randn(n, t, d, names=('N', 'T', 'D')) -mask = torch.ones(n, t, names=('N', 'T')) -attn = MultiHeadAttention(h, d) -output = attn(query, mask=mask) -# works as expected! -print(output.names) - -``` - -出: - -```py -('N', 'T', 'D') - -``` - -以上工作正常。 此外,请注意,在代码中我们根本没有提到批量维度的名称。 实际上,我们的`MultiHeadAttention`模块与批量尺寸的存在无关。 - -```py -query = torch.randn(t, d, names=('T', 'D')) -mask = torch.ones(t, names=('T',)) -output = attn(query, mask=mask) -print(output.names) - -``` - -出: - -```py -('T', 'D') - -``` - -### 总结 - -感谢您的阅读! 命名张量仍在发展中。 如果您有反馈和/或改进建议,请通过创建 [ISSUE](https://github.com/pytorch/pytorch/issues) 来通知我们。 - -**脚本的总运行时间**:(0 分钟 0.094 秒) - -[下载 Python 源码:`named_tensor_tutorial.py`](../_downloads/1e94d0ce96a0c8097f002bcbe94c35d7/named_tensor_tutorial.py) - -[下载 Jupyter 笔记本:`named_tensor_tutorial.ipynb`](../_downloads/90d6df7aa4b65bb035e19943c6f92ea0/named_tensor_tutorial.ipynb) - -[由 Sphinx 画廊](https://sphinx-gallery.readthedocs.io)生成的画廊 \ No newline at end of file diff --git a/pytorch/官方教程/63.md b/pytorch/官方教程/43 Pytorch分布式应用.md similarity index 100% rename from pytorch/官方教程/63.md rename to pytorch/官方教程/43 Pytorch分布式应用.md diff --git a/pytorch/官方教程/43.md b/pytorch/官方教程/43.md deleted file mode 100644 index 6a73d07d..00000000 --- a/pytorch/官方教程/43.md +++ /dev/null @@ -1,416 +0,0 @@ -# PyTorch 中通道在最后的内存格式(beta) - -> 原文: - -**作者**: [Vitaly Fedyunin](https://github.com/VitalyFedyunin) - -## 什么是通道在最后 - -通道在最后的内存格式是在保留内存尺寸的顺序中对 NCHW 张量进行排序的另一种方法。 通道最后一个张量的排序方式使通道成为最密集的维度(又称为每像素存储图像)。 - -例如,NCHW 张量的经典(连续)存储(在我们的示例中是具有 3 个颜色通道的两个`2x2`图像)如下所示: - -![classic_memory_format](img/77e0660b596f377125122a2409288181.png) - -通道最后的存储格式对数据的排序方式不同: - -![channels_last_memory_format](img/462373919a0dfe17cd816fa0d8af140c.png) - -Pytorch 通过使用现有的跨步结构支持内存格式(并提供与现有模型(包括 eager,JIT 和 TorchScript)的向后兼容性)。 例如,通道在最后的格式中的`10x3x16x16`批量的步幅等于`(768, 1, 48, 3)`。 - -通道最后一个存储格式仅适用于 4D NCWH 张量。 - -```py -import torch -N, C, H, W = 10, 3, 32, 32 - -``` - -## 内存格式 API - -这是在连续和通道最后存储格式之间转换张量的方法。 - -经典 PyTorch 连续张量 - -```py -x = torch.empty(N, C, H, W) -print(x.stride()) # Ouputs: (3072, 1024, 32, 1) - -``` - -出: - -```py -(3072, 1024, 32, 1) - -``` - -转换运算符 - -```py -x = x.contiguous(memory_format=torch.channels_last) -print(x.shape) # Outputs: (10, 3, 32, 32) as dimensions order preserved -print(x.stride()) # Outputs: (3072, 1, 96, 3) - -``` - -出: - -```py -torch.Size([10, 3, 32, 32]) -(3072, 1, 96, 3) - -``` - -返回连续 - -```py -x = x.contiguous(memory_format=torch.contiguous_format) -print(x.stride()) # Outputs: (3072, 1024, 32, 1) - -``` - -出: - -```py -(3072, 1024, 32, 1) - -``` - -替代选择 - -```py -x = x.to(memory_format=torch.channels_last) -print(x.stride()) # Ouputs: (3072, 1, 96, 3) - -``` - -出: - -```py -(3072, 1, 96, 3) - -``` - -格式检查 - -```py -print(x.is_contiguous(memory_format=torch.channels_last)) # Ouputs: True - -``` - -出: - -```py -True - -``` - -最后创建为渠道 - -```py -x = torch.empty(N, C, H, W, memory_format=torch.channels_last) -print(x.stride()) # Ouputs: (3072, 1, 96, 3) - -``` - -出: - -```py -(3072, 1, 96, 3) - -``` - -`clone`保留内存格式 - -```py -y = x.clone() -print(y.stride()) # Ouputs: (3072, 1, 96, 3) - -``` - -出: - -```py -(3072, 1, 96, 3) - -``` - -`to`,`cuda`,`float`…保留内存格式 - -```py -if torch.cuda.is_available(): - y = x.cuda() - print(y.stride()) # Ouputs: (3072, 1, 96, 3) - -``` - -出: - -```py -(3072, 1, 96, 3) - -``` - -`empty_like`和`*_like`运算符保留内存格式 - -```py -y = torch.empty_like(x) -print(y.stride()) # Ouputs: (3072, 1, 96, 3) - -``` - -出: - -```py -(3072, 1, 96, 3) - -``` - -点向运算符保留内存格式 - -```py -z = x + y -print(z.stride()) # Ouputs: (3072, 1, 96, 3) - -``` - -出: - -```py -(3072, 1, 96, 3) - -``` - -转换,`Batchnorm`模块支持通道在最后(仅适用于`CudNN >= 7.6`) - -```py -if torch.backends.cudnn.version() >= 7603: - input = torch.randint(1, 10, (2, 8, 4, 4), dtype=torch.float32, device="cuda", requires_grad=True) - model = torch.nn.Conv2d(8, 4, 3).cuda().float() - - input = input.contiguous(memory_format=torch.channels_last) - model = model.to(memory_format=torch.channels_last) # Module parameters need to be Channels Last - - out = model(input) - print(out.is_contiguous(memory_format=torch.channels_last)) # Ouputs: True - -``` - -出: - -```py -True - -``` - -## 性能提升 - -在具有张量核心支持的 Nvidia 硬件上观察到了最大的性能提升。 在运行 [Nvidia](https://github.com/NVIDIA/apex) 提供的 AMP(自动混合精度)训练脚本时,我们可以将性能提高 22% 以上。 - -`python main_amp.py -a resnet50 --b 200 --workers 16 --opt-level O2  ./data` - -```py -# opt_level = O2 -# keep_batchnorm_fp32 = None -# loss_scale = None -# CUDNN VERSION: 7603 -# => creating model 'resnet50' -# Selected optimization level O2: FP16 training with FP32 batchnorm and FP32 master weights. -# Defaults for this optimization level are: -# enabled : True -# opt_level : O2 -# cast_model_type : torch.float16 -# patch_torch_functions : False -# keep_batchnorm_fp32 : True -# master_weights : True -# loss_scale : dynamic -# Processing user overrides (additional kwargs that are not None)... -# After processing overrides, optimization options are: -# enabled : True -# opt_level : O2 -# cast_model_type : torch.float16 -# patch_torch_functions : False -# keep_batchnorm_fp32 : True -# master_weights : True -# loss_scale : dynamic -# Epoch: [0][10/125] Time 0.866 (0.866) Speed 230.949 (230.949) Loss 0.6735125184 (0.6735) Prec@1 61.000 (61.000) Prec@5 100.000 (100.000) -# Epoch: [0][20/125] Time 0.259 (0.562) Speed 773.481 (355.693) Loss 0.6968704462 (0.6852) Prec@1 55.000 (58.000) Prec@5 100.000 (100.000) -# Epoch: [0][30/125] Time 0.258 (0.461) Speed 775.089 (433.965) Loss 0.7877287269 (0.7194) Prec@1 51.500 (55.833) Prec@5 100.000 (100.000) -# Epoch: [0][40/125] Time 0.259 (0.410) Speed 771.710 (487.281) Loss 0.8285319805 (0.7467) Prec@1 48.500 (54.000) Prec@5 100.000 (100.000) -# Epoch: [0][50/125] Time 0.260 (0.380) Speed 770.090 (525.908) Loss 0.7370464802 (0.7447) Prec@1 56.500 (54.500) Prec@5 100.000 (100.000) -# Epoch: [0][60/125] Time 0.258 (0.360) Speed 775.623 (555.728) Loss 0.7592862844 (0.7472) Prec@1 51.000 (53.917) Prec@5 100.000 (100.000) -# Epoch: [0][70/125] Time 0.258 (0.345) Speed 774.746 (579.115) Loss 1.9698858261 (0.9218) Prec@1 49.500 (53.286) Prec@5 100.000 (100.000) -# Epoch: [0][80/125] Time 0.260 (0.335) Speed 770.324 (597.659) Loss 2.2505953312 (1.0879) Prec@1 50.500 (52.938) Prec@5 100.000 (100.000) - -``` - -传递`--channels-last true`允许以通道在最后的格式运行模型,观察到 22% 的表现增益。 - -`python main_amp.py -a resnet50 --b 200 --workers 16 --opt-level O2 --channels-last true ./data` - -```py -# opt_level = O2 -# keep_batchnorm_fp32 = None -# loss_scale = None -# -# CUDNN VERSION: 7603 -# -# => creating model 'resnet50' -# Selected optimization level O2: FP16 training with FP32 batchnorm and FP32 master weights. -# -# Defaults for this optimization level are: -# enabled : True -# opt_level : O2 -# cast_model_type : torch.float16 -# patch_torch_functions : False -# keep_batchnorm_fp32 : True -# master_weights : True -# loss_scale : dynamic -# Processing user overrides (additional kwargs that are not None)... -# After processing overrides, optimization options are: -# enabled : True -# opt_level : O2 -# cast_model_type : torch.float16 -# patch_torch_functions : False -# keep_batchnorm_fp32 : True -# master_weights : True -# loss_scale : dynamic -# -# Epoch: [0][10/125] Time 0.767 (0.767) Speed 260.785 (260.785) Loss 0.7579724789 (0.7580) Prec@1 53.500 (53.500) Prec@5 100.000 (100.000) -# Epoch: [0][20/125] Time 0.198 (0.482) Speed 1012.135 (414.716) Loss 0.7007197738 (0.7293) Prec@1 49.000 (51.250) Prec@5 100.000 (100.000) -# Epoch: [0][30/125] Time 0.198 (0.387) Speed 1010.977 (516.198) Loss 0.7113101482 (0.7233) Prec@1 55.500 (52.667) Prec@5 100.000 (100.000) -# Epoch: [0][40/125] Time 0.197 (0.340) Speed 1013.023 (588.333) Loss 0.8943189979 (0.7661) Prec@1 54.000 (53.000) Prec@5 100.000 (100.000) -# Epoch: [0][50/125] Time 0.198 (0.312) Speed 1010.541 (641.977) Loss 1.7113249302 (0.9551) Prec@1 51.000 (52.600) Prec@5 100.000 (100.000) -# Epoch: [0][60/125] Time 0.198 (0.293) Speed 1011.163 (683.574) Loss 5.8537774086 (1.7716) Prec@1 50.500 (52.250) Prec@5 100.000 (100.000) -# Epoch: [0][70/125] Time 0.198 (0.279) Speed 1011.453 (716.767) Loss 5.7595844269 (2.3413) Prec@1 46.500 (51.429) Prec@5 100.000 (100.000) -# Epoch: [0][80/125] Time 0.198 (0.269) Speed 1011.827 (743.883) Loss 2.8196096420 (2.4011) Prec@1 47.500 (50.938) Prec@5 100.000 (100.000) - -``` - -以下模型列表完全支持通道在最后,并在 Volta 设备上显示了 8%-35% 的表现增益:`alexnet`,`mnasnet0_5`,`mnasnet0_75`,`mnasnet1_0`,`mnasnet1_3`,`mobilenet_v2`,`resnet101`,`resnet152`,`resnet18`,`resnet34`,`resnet50`,`resnext50_32x4d`,`shufflenet_v2_x0_5`,`shufflenet_v2_x1_0`,`shufflenet_v2_x1_5`,`shufflenet_v2_x2_0`,`squeezenet1_0`,`squeezenet1_1`,`vgg11` ,`vgg11_bn`,`vgg13`,`vgg13_bn`,`vgg16`,`vgg16_bn`,`vgg19`,`vgg19_bn`,`wide_resnet101_2`,`wide_resnet50_2` - -## 转换现有模型 - -通道在最后支持不受现有模型的限制,因为只要输入格式正确,任何模型都可以转换为通道在最后,并通过图传播格式。 - -```py -# Need to be done once, after model initialization (or load) -model = model.to(memory_format=torch.channels_last) # Replace with your model - -# Need to be done for every input -input = input.to(memory_format=torch.channels_last) # Replace with your input -output = model(input) - -``` - -但是,并非所有运算符都完全转换为支持通道在最后(通常返回连续输出)。 这意味着您需要根据[支持的运算符列表](https://github.com/pytorch/pytorch/wiki/Operators-with-Channels-Last-support)来验证已使用运算符的列表,或将内存格式检查引入急切的执行模式并运行模型。 - -运行以下代码后,如果运算符的输出与输入的存储格式不匹配,运算符将引发异常。 - -```py -def contains_cl(args): - for t in args: - if isinstance(t, torch.Tensor): - if t.is_contiguous(memory_format=torch.channels_last) and not t.is_contiguous(): - return True - elif isinstance(t, list) or isinstance(t, tuple): - if contains_cl(list(t)): - return True - return False - -def print_inputs(args, indent=''): - for t in args: - if isinstance(t, torch.Tensor): - print(indent, t.stride(), t.shape, t.device, t.dtype) - elif isinstance(t, list) or isinstance(t, tuple): - print(indent, type(t)) - print_inputs(list(t), indent=indent + ' ') - else: - print(indent, t) - -def check_wrapper(fn): - name = fn.__name__ - - def check_cl(*args, **kwargs): - was_cl = contains_cl(args) - try: - result = fn(*args, **kwargs) - except Exception as e: - print("`{}` inputs are:".format(name)) - print_inputs(args) - print('-------------------') - raise e - failed = False - if was_cl: - if isinstance(result, torch.Tensor): - if result.dim() == 4 and not result.is_contiguous(memory_format=torch.channels_last): - print("`{}` got channels_last input, but output is not channels_last:".format(name), - result.shape, result.stride(), result.device, result.dtype) - failed = True - if failed and True: - print("`{}` inputs are:".format(name)) - print_inputs(args) - raise Exception( - 'Operator `{}` lost channels_last property'.format(name)) - return result - return check_cl - -old_attrs = dict() - -def attribute(m): - old_attrs[m] = dict() - for i in dir(m): - e = getattr(m, i) - exclude_functions = ['is_cuda', 'has_names', 'numel', - 'stride', 'Tensor', 'is_contiguous', '__class__'] - if i not in exclude_functions and not i.startswith('_') and '__call__' in dir(e): - try: - old_attrs[m][i] = e - setattr(m, i, check_wrapper(e)) - except Exception as e: - print(i) - print(e) - -attribute(torch.Tensor) -attribute(torch.nn.functional) -attribute(torch) - -``` - -出: - -```py -Optional -'_Optional' object has no attribute '__name__' - -``` - -如果您发现不支持通道在最后的张量的运算符并且想要贡献力量,请随时使用[以下开发人员指南](https://github.com/pytorch/pytorch/wiki/Writing-memory-format-aware-operators)。 - -下面的代码是恢复火炬的属性。 - -```py -for (m, attrs) in old_attrs.items(): - for (k,v) in attrs.items(): - setattr(m, k, v) - -``` - -## 要做的工作 - -仍有许多事情要做,例如: - -* 解决 N1HW 和 NC11 张量的歧义; -* 测试分布式训练支持; -* 提高运算符覆盖率。 - -如果您有反馈和/或改进建议,请通过创建 [ISSUE](https://github.com/pytorch/pytorch/issues) 来通知我们。 - -**脚本的总运行时间**:(0 分钟 2.300 秒) - -[下载 Python 源码:`memory_format_tutorial.py`](../_downloads/6c290a5fc635f734b10bbca97b52d2f1/memory_format_tutorial.py) - -[下载 Jupyter 笔记本:`memory_format_tutorial.ipynb`](../_downloads/6f7327daa2a9b857365f893069d0bace/memory_format_tutorial.ipynb) - -[由 Sphinx 画廊](https://sphinx-gallery.readthedocs.io)生成的画廊 \ No newline at end of file diff --git a/pytorch/官方教程/64.md b/pytorch/官方教程/44 分布式RPC框架.md similarity index 100% rename from pytorch/官方教程/64.md rename to pytorch/官方教程/44 分布式RPC框架.md diff --git a/pytorch/官方教程/44.md b/pytorch/官方教程/44.md deleted file mode 100644 index e34a4a5a..00000000 --- a/pytorch/官方教程/44.md +++ /dev/null @@ -1,965 +0,0 @@ -# 使用 PyTorch C++ 前端 - -> 原文: - -PyTorch C++ 前端是 PyTorch 机器学习框架的纯 C++ 接口。 虽然 PyTorch 的主要接口自然是 Python,但此 Python API 位于强大的 C++ 代码库之上,提供基本的数据结构和功能,例如张量和自动微分。 C++ 前端公开了纯 C++ 11 API,该 API 使用机器学习训练和推理所需的工具扩展了此基础 C++ 代码库。 这包括用于神经网络建模的通用组件的内置集合; 使用自定义模块扩展此集合的 API; 一个流行的优化算法库,例如随机梯度下降; 具有 API 的并行数据加载器,用于定义和加载数据集; 序列化例程等。 - -本教程将引导您完成使用 C++ 前端训练模型的端到端示例。 具体来说,我们将训练 [DCGAN](https://arxiv.org/abs/1511.06434) (一种生成模型),以生成 MNIST 数字的图像。 虽然从概念上讲是一个简单的示例,但它足以使您对 PyTorch C++ 前端有个大概的了解,并且可以满足您训练更复杂模型的需求。 我们将从一些鼓舞人心的词开始,说明您为什么要使用 C++ 前端,然后直接深入定义和训练我们的模型。 - -小费 - -观看[来自 CppCon 2018 的简短演讲](https://www.youtube.com/watch?v=auRPXMMHJzc),获得有关 C++ 前端的快速(幽默)演示。 - -小费 - -[本笔记](https://pytorch.org/cppdocs/frontend.html)概述了 C++ 前端的组件和设计原理。 - -小费 - -有关 PyTorch C++ 生态系统的文档,请访问[这个页面](https://pytorch.org/cppdocs)。 您可以在此处找到高级描述以及 API 级文档。 - -## 动机 - -在我们开始 GAN 和 MNIST 数字的激动人心的旅程之前,让我们退后一步,讨论为什么要使用 C++ 前端而不是 Python。 我们(PyTorch 团队)创建了 C++ 前端,以便能够在无法使用 Python 或根本不适合该工具的环境中进行研究。 此类环境的示例包括: - -* **低延迟系统**:您可能希望在具有高每秒帧数和低延迟要求的纯 C++ 游戏引擎中进行强化学习研究。 与 Python 库相比,使用纯 C++ 库更适合这种环境。 由于 Python 解释器的缓慢性,Python 可能根本无法处理。 -* **高度多线程环境**:由于全局解释器锁定(GIL),Python 一次不能运行多个系统线程。 多处理是一种替代方法,但可伸缩性却不如它,并且存在很多缺点。 C++ 没有这样的约束,线程易于使用和创建。 需要重型并行化的模型,例如[深度神经演化](https://eng.uber.com/deep-neuroevolution/)中使用的模型,可以从中受益。 -* **现有 C++ 代码库**:您可能是现有 C++ 应用的所有者,该应用从事从后端服务器中的网页服务到照片编辑软件中的 3D 图形渲染等所有工作,并且希望将机器学习方法集成到您的系统中。 C++ 前端使您可以继续使用 C++,并避免在 Python 和 C++ 之间来回绑定的麻烦,同时保留了传统 PyTorch(Python)体验的大部分灵活性和直观性。 - -C++ 前端无意与 Python 前端竞争。 它是对它的补充。 我们知道研究人员和工程师都喜欢 PyTorch,因为它具有简单,灵活和直观的 API。 我们的目标是确保您可以在所有可能的环境(包括上述环境)中利用这些核心设计原则。 如果这些场景中的一种很好地描述了您的用例,或者您只是感兴趣或好奇,请在以下段落中继续研究 C++ 前端。 - -小费 - -C++ 前端试图提供一个与 Python 前端尽可能接近的 API。 如果您对 Python 前端有丰富的经验,并且问过自己“我如何使用 C++ 前端 X?”,请像在 Python 中那样编写代码,而且大多数情况下,相同的函数和方法也可以在 C++ 中使用,就像在 Python 中一样(只记得用双冒号替换点)。 - -## 编写基本应用 - -首先,编写一个最小的 C++ 应用,以验证我们是否在同一页面上了解我们的设置和构建环境。 首先,您需要获取 *LibTorch* 发行版的副本-我们现成的 zip 归档文件,其中打包了使用 C++ 前端所需的所有相关标头,库和 CMake 构建文件。 LibTorch 发行版可从 [PyTorch 网站](https://pytorch.org/get-started/locally/)下载,适用于 Linux,MacOS 和 Windows。 本教程的其余部分将假定基本的 Ubuntu Linux 环境,但是您也可以在 MacOS 或 Windows 上随意进行操作。 - -小费 - -有关[安装 PyTorch](https://pytorch.org/cppdocs/installing.html) 的 C++ 发行版的说明,更详细地描述了以下步骤。 - -小费 - -在 Windows 上,调试和发行版本不兼容 ABI。 如果计划以调试模式构建项目,请尝试使用 LibTorch 的调试版本。 另外,请确保在下面的`cmake --build .`行中指定正确的配置。 - -第一步是通过从 PyTorch 网站获取的链接在本地下载 LibTorch 发行版。 对于普通的 Ubuntu Linux 环境,这意味着运行: - -```py -# If you need e.g. CUDA 9.0 support, please replace "cpu" with "cu90" in the URL below. -wget https://download.pytorch.org/libtorch/nightly/cpu/libtorch-shared-with-deps-latest.zip -unzip libtorch-shared-with-deps-latest.zip - -``` - -接下来,让我们编写一个名为`dcgan.cpp`的小型 C++ 文件,其中包含`torch/torch.h`,现在只需打印出三乘三的标识矩阵即可: - -```py -#include -#include - -int main() { - torch::Tensor tensor = torch::eye(3); - std::cout << tensor << std::endl; -} - -``` - -稍后,为了构建这个小应用以及我们完整的训练脚本,我们将使用以下`CMakeLists.txt`文件: - -```py -cmake_minimum_required(VERSION 3.0 FATAL_ERROR) -project(dcgan) - -find_package(Torch REQUIRED) - -add_executable(dcgan dcgan.cpp) -target_link_libraries(dcgan "${TORCH_LIBRARIES}") -set_property(TARGET dcgan PROPERTY CXX_STANDARD 14) - -``` - -注意 - -虽然 CMake 是 LibTorch 的推荐构建系统,但这并不是硬性要求。 您还可以使用 Visual Studio 项目文件,QMake,普通 Makefile 或您认为合适的任何其他构建环境。 但是,我们不为此提供现成的支持。 - -在上面的 CMake 文件中记下第 4 行:`find_package(Torch REQUIRED)`。 这指示 CMake 查找 LibTorch 库的构建配置。 为了使 CMake 知道在哪里找到这些文件,调用`cmake`时必须设置`CMAKE_PREFIX_PATH`。 在执行此操作之前,让我们就`dcgan`应用的以下目录结构达成一致: - -```py -dcgan/ - CMakeLists.txt - dcgan.cpp - -``` - -此外,我将指向未压缩的 LibTorch 分布的路径称为`/path/to/libtorch`。 注意,它**必须是绝对路径**。 特别是,将`CMAKE_PREFIX_PATH`设置为`../../libtorch`之类的内容会以意想不到的方式中断。 而是写`$PWD/../../libtorch`以获取相应的绝对路径。 现在,我们准备构建我们的应用: - -```py -root@fa350df05ecf:/home# mkdir build -root@fa350df05ecf:/home# cd build -root@fa350df05ecf:/home/build# cmake -DCMAKE_PREFIX_PATH=/path/to/libtorch .. --- The C compiler identification is GNU 5.4.0 --- The CXX compiler identification is GNU 5.4.0 --- Check for working C compiler: /usr/bin/cc --- Check for working C compiler: /usr/bin/cc -- works --- Detecting C compiler ABI info --- Detecting C compiler ABI info - done --- Detecting C compile features --- Detecting C compile features - done --- Check for working CXX compiler: /usr/bin/c++ --- Check for working CXX compiler: /usr/bin/c++ -- works --- Detecting CXX compiler ABI info --- Detecting CXX compiler ABI info - done --- Detecting CXX compile features --- Detecting CXX compile features - done --- Looking for pthread.h --- Looking for pthread.h - found --- Looking for pthread_create --- Looking for pthread_create - not found --- Looking for pthread_create in pthreads --- Looking for pthread_create in pthreads - not found --- Looking for pthread_create in pthread --- Looking for pthread_create in pthread - found --- Found Threads: TRUE --- Found torch: /path/to/libtorch/lib/libtorch.so --- Configuring done --- Generating done --- Build files have been written to: /home/build -root@fa350df05ecf:/home/build# cmake --build . --config Release -Scanning dependencies of target dcgan -[ 50%] Building CXX object CMakeFiles/dcgan.dir/dcgan.cpp.o -[100%] Linking CXX executable dcgan -[100%] Built target dcgan - -``` - -上面,我们首先在`dcgan`目录内创建一个`build`文件夹,进入该文件夹,运行`cmake`命令以生成必要的构建(Make)文件,最后通过运行`cmake --build . --config Release`成功编译该项目。 现在我们准备执行最小的二进制文件并完成有关基本项目配置的这一部分: - -```py -root@fa350df05ecf:/home/build# ./dcgan -1 0 0 -0 1 0 -0 0 1 -[ Variable[CPUFloatType]{3,3} ] - -``` - -在我看来就像一个身份矩阵! - -## 定义神经网络模型 - -现在我们已经配置了基本环境,我们可以深入研究本教程中更有趣的部分。 首先,我们将讨论如何在 C++ 前端中定义模块并与之交互。 我们将从基本的小规模示例模块开始,然后使用 C++ 前端提供的广泛的内置模块库来实现全面的 GAN。 - -### 模块 API 基础 - -与 Python 接口一致,基于 C++ 前端的神经网络由称为*模块*的可重用构建块组成。 有一个基础模块类,所有其他模块都从该基础类派生。 在 Python 中,此类为`torch.nn.Module`,在 C++ 中为`torch::nn::Module`。 除了实现模块封装的算法的`forward()`方法之外,模块通常还包含以下三种子对象中的任何一种:参数,缓冲区和子模块。 - -参数和缓冲区以张量的形式存储状态。 参数记录梯度,而缓冲区不记录。 参数通常是神经网络的可训练权重。 缓冲区的示例包括批量标准化的均值和方差。 为了重用特定的逻辑和状态块,PyTorch API 允许嵌套模块。 嵌套模块称为*子模块*。 - -参数,缓冲区和子模块必须显式注册。 注册后,可以使用`parameters()`或`buffers()`之类的方法来检索整个(嵌套)模块层次结构中所有参数的容器。 类似地,使用`to(...)`之类的方法,例如 `to(torch::kCUDA)`将所有参数和缓冲区从 CPU 移到 CUDA 内存,在整个模块层次结构上工作。 - -#### 定义模块和注册参数 - -为了将这些词写成代码,让我们考虑一下用 Python 接口编写的简单模块: - -```py -import torch - -class Net(torch.nn.Module): - def __init__(self, N, M): - super(Net, self).__init__() - self.W = torch.nn.Parameter(torch.randn(N, M)) - self.b = torch.nn.Parameter(torch.randn(M)) - - def forward(self, input): - return torch.addmm(self.b, input, self.W) - -``` - -在 C++ 中,它看起来像这样: - -```py -#include - -struct Net : torch::nn::Module { - Net(int64_t N, int64_t M) { - W = register_parameter("W", torch::randn({N, M})); - b = register_parameter("b", torch::randn(M)); - } - torch::Tensor forward(torch::Tensor input) { - return torch::addmm(b, input, W); - } - torch::Tensor W, b; -}; - -``` - -就像在 Python 中一样,我们定义了一个名为`Net`的类(为简单起见,这里是`struct`而不是`class`),然后从模块基类派生它。 在构造器内部,我们使用`torch::randn`创建张量,就像在 Python 中使用`torch.randn`一样。 一个有趣的区别是我们如何注册参数。 在 Python 中,我们用`torch.nn.Parameter`类包装了张量,而在 C++ 中,我们不得不通过`register_parameter`方法传递张量。 这样做的原因是 Python API 可以检测到属性为`torch.nn.Parameter`类型并自动注册此类张量。 在 C++ 中,反射非常受限制,因此提供了一种更传统(且不太神奇)的方法。 - -#### 注册子模块并遍历模块层次结构 - -以相同的方式我们可以注册参数,我们也可以注册子模块。 在 Python 中,将子模块分配为模块的属性时,会自动检测并注册这些子模块: - -```py -class Net(torch.nn.Module): - def __init__(self, N, M): - super(Net, self).__init__() - # Registered as a submodule behind the scenes - self.linear = torch.nn.Linear(N, M) - self.another_bias = torch.nn.Parameter(torch.rand(M)) - - def forward(self, input): - return self.linear(input) + self.another_bias - -``` - -例如,这允许使用`parameters()`方法来递归访问模块层次结构中的所有参数: - -```py ->>> net = Net(4, 5) ->>> print(list(net.parameters())) -[Parameter containing: -tensor([0.0808, 0.8613, 0.2017, 0.5206, 0.5353], requires_grad=True), Parameter containing: -tensor([[-0.3740, -0.0976, -0.4786, -0.4928], - [-0.1434, 0.4713, 0.1735, -0.3293], - [-0.3467, -0.3858, 0.1980, 0.1986], - [-0.1975, 0.4278, -0.1831, -0.2709], - [ 0.3730, 0.4307, 0.3236, -0.0629]], requires_grad=True), Parameter containing: -tensor([ 0.2038, 0.4638, -0.2023, 0.1230, -0.0516], requires_grad=True)] - -``` - -要在 C++ 中注册子模块,请使用恰当命名的`register_module()`方法注册类似`torch::nn::Linear`的模块: - -```py -struct Net : torch::nn::Module { - Net(int64_t N, int64_t M) - : linear(register_module("linear", torch::nn::Linear(N, M))) { - another_bias = register_parameter("b", torch::randn(M)); - } - torch::Tensor forward(torch::Tensor input) { - return linear(input) + another_bias; - } - torch::nn::Linear linear; - torch::Tensor another_bias; -}; - -``` - -小费 - -您可以在[`torch::nn`命名空间的文档](https://pytorch.org/cppdocs/api/namespace_torch__nn.html)中找到可用的内置模块的完整列表,例如`torch::nn::Linear`,`torch::nn::Dropout`或`torch::nn::Conv2d`。 - -关于上述代码的一个微妙之处在于,为什么在构造器的初始值设定项列表中创建子模块,而在构造器的主体内部创建参数。 这是有充分的理由的,我们将在下面有关“C++ 前端所有权模型”的部分中对此进行介绍。 但是,最终结果是,就像 Python 中一样,我们可以递归访问模块树的参数。 调用`parameters()`返回一个`std::vector`,我们可以对其进行迭代: - -```py -int main() { - Net net(4, 5); - for (const auto& p : net.parameters()) { - std::cout << p << std::endl; - } -} - -``` - -打印: - -```py -root@fa350df05ecf:/home/build# ./dcgan -0.0345 -1.4456 --0.6313 --0.3585 --0.4008 -[ Variable[CPUFloatType]{5} ] --0.1647 0.2891 0.0527 -0.0354 -0.3084 0.2025 0.0343 0.1824 --0.4630 -0.2862 0.2500 -0.0420 -0.3679 -0.1482 -0.0460 0.1967 -0.2132 -0.1992 0.4257 0.0739 -[ Variable[CPUFloatType]{5,4} ] -0.01 * -3.6861 --10.1166 --45.0333 -7.9983 --20.0705 -[ Variable[CPUFloatType]{5} ] - -``` - -具有三个参数,就像在 Python 中一样。 为了也查看这些参数的名称,C++ API 提供了`named_parameters()`方法,该方法返回`OrderedDict`就像在 Python 中一样: - -```py -Net net(4, 5); -for (const auto& pair : net.named_parameters()) { - std::cout << pair.key() << ": " << pair.value() << std::endl; -} - -``` - -我们可以再次执行以查看输出: - -```py -root@fa350df05ecf:/home/build# make && ./dcgan 11:13:48 -Scanning dependencies of target dcgan -[ 50%] Building CXX object CMakeFiles/dcgan.dir/dcgan.cpp.o -[100%] Linking CXX executable dcgan -[100%] Built target dcgan -b: -0.1863 --0.8611 --0.1228 -1.3269 -0.9858 -[ Variable[CPUFloatType]{5} ] -linear.weight: 0.0339 0.2484 0.2035 -0.2103 --0.0715 -0.2975 -0.4350 -0.1878 --0.3616 0.1050 -0.4982 0.0335 --0.1605 0.4963 0.4099 -0.2883 -0.1818 -0.3447 -0.1501 -0.0215 -[ Variable[CPUFloatType]{5,4} ] -linear.bias: -0.0250 -0.0408 -0.3756 --0.2149 --0.3636 -[ Variable[CPUFloatType]{5} ] - -``` - -注意 - -[`torch::nn::Module`的文档](https://pytorch.org/cppdocs/api/classtorch_1_1nn_1_1_module.html#exhale-class-classtorch-1-1nn-1-1-module)包含在模块层次结构上运行的方法的完整列表。 - -#### 在正向模式下运行网络 - -要使用 C++ 执行网络,我们只需调用我们自己定义的`forward()`方法: - -```py -int main() { - Net net(4, 5); - std::cout << net.forward(torch::ones({2, 4})) << std::endl; -} - -``` - -打印类似: - -```py -root@fa350df05ecf:/home/build# ./dcgan -0.8559 1.1572 2.1069 -0.1247 0.8060 -0.8559 1.1572 2.1069 -0.1247 0.8060 -[ Variable[CPUFloatType]{2,5} ] - -``` - -#### 模块所有权 - -至此,我们知道了如何使用 C++ 定义模块,注册参数,注册子模块,通过`parameters()`之类的方法遍历模块层次结构并最终运行模块的`forward()`方法。 尽管在 C++ API 中还有很多方法,类和主题需要使用,但我将为您提供完整菜单的[文档](https://pytorch.org/cppdocs/api/namespace_torch__nn.html)。 我们将在稍后实现 DCGAN 模型和端到端训练管道的过程中,涉及更多概念。 在我们这样做之前,让我简要介绍一下 C++ 前端为`torch::nn::Module`的子类提供的*所有权模型*。 - -在本次讨论中,所有权模型是指模块的存储和传递方式-确定特定模块实例的所有者或所有者。 在 Python 中,对象始终是动态分配的(在堆上),并且具有引用语义。 这是非常容易使用且易于理解的。 实际上,在 Python 中,您可以很大程度上忽略对象的位置以及如何引用它们,而将精力集中在完成事情上。 - -C++ 是一种较低级的语言,它在此领域提供了更多选择。 这增加了复杂性,并严重影响了 C++ 前端的设计和人体工程学。 特别是,对于 C++ 前端中的模块,我们可以选择使用*值语义*或*引用语义*。 第一种情况是最简单的,并且在到目前为止的示例中已进行了展示:模块对象在栈上分配,并在传递给函数时可以被复制,移动(使用`std::move`)或通过引用或指针获取: - -```py -struct Net : torch::nn::Module { }; - -void a(Net net) { } -void b(Net& net) { } -void c(Net* net) { } - -int main() { - Net net; - a(net); - a(std::move(net)); - b(net); - c(&net); -} - -``` - -对于第二种情况-引用语义-我们可以使用`std::shared_ptr`。 引用语义的优势在于,就像在 Python 中一样,它减少了思考如何将模块传递给函数以及如何声明参数的认知开销(假设您在任何地方都使用`shared_ptr`)。 - -```py -struct Net : torch::nn::Module {}; - -void a(std::shared_ptr net) { } - -int main() { - auto net = std::make_shared(); - a(net); -} - -``` - -根据我们的经验,来自动态语言的研究人员非常喜欢引用语义而不是值语义,即使后者比 C++ 更“原生”。 同样重要的是要注意,`torch::nn::Module`的设计要与 Python API 的人体工程学保持紧密联系,因此要依靠共享所有权。 例如,采用我们较早的(此处为缩短的)`Net`定义: - -```py -struct Net : torch::nn::Module { - Net(int64_t N, int64_t M) - : linear(register_module("linear", torch::nn::Linear(N, M))) - { } - torch::nn::Linear linear; -}; - -``` - -为了使用`linear`子模块,我们想将其直接存储在我们的类中。 但是,我们还希望模块基类了解并有权访问此子模块。 为此,它必须存储对此子模块的引用。 至此,我们已经达到了共享所有权的需要。 `torch::nn::Module`类和具体的`Net`类都需要引用该子模块。 因此,基类将模块存储为`shared_ptr`,因此具体类也必须存储。 - -可是等等! 在上面的代码中我没有提到`shared_ptr`! 这是为什么? 好吧,因为`std::shared_ptr`实在令人难受。 为了保持研究人员的生产力,我们提出了一个精心设计的方案,以隐藏`shared_ptr`的提法-通常保留给值语义的好处-同时保留引用语义。 要了解它是如何工作的,我们可以看一下核心库中`torch::nn::Linear`模块的简化定义([完整定义在此处](https://github.com/pytorch/pytorch/blob/master/torch/csrc/api/include/torch/nn/modules/linear.h)): - -```py -struct LinearImpl : torch::nn::Module { - LinearImpl(int64_t in, int64_t out); - - Tensor forward(const Tensor& input); - - Tensor weight, bias; -}; - -TORCH_MODULE(Linear); - -``` - -简而言之:该模块不是`Linear`,而是`LinearImpl`。 然后,宏`TORCH_MODULE`定义了实际的`Linear`类。 这个“生成的”类实际上是`std::shared_ptr`的包装。 它是一个包装器,而不是简单的`typedef`,因此,除其他事项外,构造器仍可按预期工作,即,您仍然可以编写`torch::nn::Linear(3, 4)`而不是`std::make_shared(3, 4)`。 我们将由宏创建的类称为模块*所有者*。 与(共享)指针一样,您可以使用箭头运算符(例如`model->forward(...)`)访问基础对象。 最终结果是一个所有权模型,该模型非常类似于 Python API。 引用语义成为默认语义,但是没有额外输入`std::shared_ptr`或`std::make_shared`。 对于我们的`Net`,使用模块持有人 API 如下所示: - -```py -struct NetImpl : torch::nn::Module {}; -TORCH_MODULE(Net); - -void a(Net net) { } - -int main() { - Net net; - a(net); -} - -``` - -这里有一个微妙的问题值得一提。 默认构造的`std::shared_ptr`为“空”,即包含空指针。 什么是默认构造的`Linear`或`Net`? 好吧,这是一个棘手的选择。 我们可以说它应该是一个空(`null`)`std::shared_ptr`。 但是,请记住`Linear(3, 4)`与`std::make_shared(3, 4)`相同。 这意味着如果我们已确定`Linear linear;`应该为空指针,则将无法构造不采用任何构造器参数或都不使用所有缺省构造器的模块。 因此,在当前的 API 中,默认构造的模块持有人(如`Linear()`)将调用基础模块的默认构造器(`LinearImpl()`)。 如果基础模块没有默认构造器,则会出现编译器错误。 要构造空持有人,可以将`nullptr`传递给持有人的构造器。 - -实际上,这意味着您可以使用如先前所示的子模块,在*初始化器列表*中注册并构造该模块: - -```py -struct Net : torch::nn::Module { - Net(int64_t N, int64_t M) - : linear(register_module("linear", torch::nn::Linear(N, M))) - { } - torch::nn::Linear linear; -}; - -``` - -或者,您可以先使用空指针构造持有人,然后在构造器中为其分配值(Python 爱好者更熟悉): - -```py -struct Net : torch::nn::Module { - Net(int64_t N, int64_t M) { - linear = register_module("linear", torch::nn::Linear(N, M)); - } - torch::nn::Linear linear{nullptr}; // construct an empty holder -}; - -``` - -结论:您应该使用哪种所有权模型–哪种语义? C++ 前端的 API 最能支持模块所有者提供的所有权模型。 这种机制的唯一缺点是在模块声明下方多了一行样板。 也就是说,最简单的模型仍然是 C++ 模块简介中显示的值语义模型。 对于小的,简单的脚本,您也可以摆脱它。 但是,由于技术原因,您迟早会发现它并不总是受支持。 例如,序列化 API(`torch::save`和`torch::load`)仅支持模块支架(或普通`shared_ptr`)。 因此,建议使用模块持有人 API 和 C++ 前端定义模块,此后我们将在本教程中使用此 API。 - -### 定义 DCGAN 模块 - -现在,我们有必要的背景和简介来定义我们要在本文中解决的机器学习任务的模块。 回顾一下:我们的任务是从 [MNIST 数据集](http://yann.lecun.com/exdb/mnist/)生成数字图像。 我们想使用[生成对抗网络(GAN)](https://papers.nips.cc/paper/5423-generative-adversarial-nets.pdf)解决此任务。 特别是,我们将使用 [DCGAN 架构](https://arxiv.org/abs/1511.06434),这是同类中最早,最简单的架构之一,但完全可以完成此任务。 - -小费 - -[您可以在存储库中找到本教程中提供的完整源代码](https://github.com/pytorch/examples/tree/master/cpp/dcgan)。 - -#### 什么是 GAN aGAN? - -GAN 由两个不同的神经网络模型组成:*生成器*和*判别器*。 生成器从噪声分布中接收样本,其目的是将每个噪声样本转换为类似于目标分布的图像(在我们的情况下为 MNIST 数据集)。 判别器又从 MNIST 数据集接收*实际*图像,或从生成器接收*假*图像。 要求发出一个概率来判断特定图像的真实程度(接近`1`)或伪造(接近`0`)。 来自判别器的关于由生成器产生的图像有多真实的反馈被用来训练生成器。 判别器对真实性有多好的反馈将用于优化判别器。 从理论上讲,生成器和判别器之间的微妙平衡使它们连接起来得到改善,从而导致生成器生成与目标分布无法区分的图像,从而使判别器(那时)的敏锐眼睛冒出了散发`0.5`的真实和真实可能性。 假图片。 对我们来说,最终结果是一台接收噪声作为输入并生成逼真的数字图像作为其输出的机器。 - -#### 生成器模块 - -我们首先定义生成器模块,该模块由一系列转置的 2D 卷积,批量归一化和 ReLU 激活单元组成。 我们在定义自己的模块的`forward()`方法中显式地(在功能上)在模块之间传递输入: - -```py -struct DCGANGeneratorImpl : nn::Module { - DCGANGeneratorImpl(int kNoiseSize) - : conv1(nn::ConvTranspose2dOptions(kNoiseSize, 256, 4) - .bias(false)), - batch_norm1(256), - conv2(nn::ConvTranspose2dOptions(256, 128, 3) - .stride(2) - .padding(1) - .bias(false)), - batch_norm2(128), - conv3(nn::ConvTranspose2dOptions(128, 64, 4) - .stride(2) - .padding(1) - .bias(false)), - batch_norm3(64), - conv4(nn::ConvTranspose2dOptions(64, 1, 4) - .stride(2) - .padding(1) - .bias(false)) - { - // register_module() is needed if we want to use the parameters() method later on - register_module("conv1", conv1); - register_module("conv2", conv2); - register_module("conv3", conv3); - register_module("conv4", conv4); - register_module("batch_norm1", batch_norm1); - register_module("batch_norm2", batch_norm2); - register_module("batch_norm3", batch_norm3); - } - - torch::Tensor forward(torch::Tensor x) { - x = torch::relu(batch_norm1(conv1(x))); - x = torch::relu(batch_norm2(conv2(x))); - x = torch::relu(batch_norm3(conv3(x))); - x = torch::tanh(conv4(x)); - return x; - } - - nn::ConvTranspose2d conv1, conv2, conv3, conv4; - nn::BatchNorm2d batch_norm1, batch_norm2, batch_norm3; -}; -TORCH_MODULE(DCGANGenerator); - -DCGANGenerator generator(kNoiseSize); - -``` - -现在我们可以在`DCGANGenerator`上调用`forward()`将噪声样本映射到图像。 - -选择的特定模块,例如`nn::ConvTranspose2d`和`nn::BatchNorm2d`,遵循前面概述的结构。 `kNoiseSize`常数确定输入噪声向量的大小,并将其设置为`100`。 当然,超参数是通过研究生的血统发现的。 - -Attention - -No grad students were harmed in the discovery of hyperparameters. They were fed Soylent regularly. - -Note - -A brief word on the way options are passed to built-in modules like `Conv2d` in the C++ frontend: Every module has some required options, like the number of features for `BatchNorm2d`. If you only need to configure the required options, you can pass them directly to the module’s constructor, like `BatchNorm2d(128)` or `Dropout(0.5)` or `Conv2d(8, 4, 2)` (for input channel count, output channel count, and kernel size). If, however, you need to modify other options, which are normally defaulted, such as `bias` for `Conv2d`, you need to construct and pass an *options* object. Every module in the C++ frontend has an associated options struct, called `ModuleOptions` where `Module` is the name of the module, like `LinearOptions` for `Linear`. This is what we do for the `Conv2d` modules above. - -#### 判别器模块 - -The discriminator is similarly a sequence of convolutions, batch normalizations and activations. However, the convolutions are now regular ones instead of transposed, and we use a leaky ReLU with an alpha value of 0.2 instead of a vanilla ReLU. Also, the final activation becomes a Sigmoid, which squashes values into a range between 0 and 1\. We can then interpret these squashed values as the probabilities the discriminator assigns to images being real. - -To build the discriminator, we will try something different: a Sequential module. Like in Python, PyTorch here provides two APIs for model definition: a functional one where inputs are passed through successive functions (e.g. the generator module example), and a more object-oriented one where we build a Sequential module containing the entire model as submodules. Using Sequential, the discriminator would look like: - -```py -nn::Sequential discriminator( - // Layer 1 - nn::Conv2d( - nn::Conv2dOptions(1, 64, 4).stride(2).padding(1).bias(false)), - nn::LeakyReLU(nn::LeakyReLUOptions().negative_slope(0.2)), - // Layer 2 - nn::Conv2d( - nn::Conv2dOptions(64, 128, 4).stride(2).padding(1).bias(false)), - nn::BatchNorm2d(128), - nn::LeakyReLU(nn::LeakyReLUOptions().negative_slope(0.2)), - // Layer 3 - nn::Conv2d( - nn::Conv2dOptions(128, 256, 4).stride(2).padding(1).bias(false)), - nn::BatchNorm2d(256), - nn::LeakyReLU(nn::LeakyReLUOptions().negative_slope(0.2)), - // Layer 4 - nn::Conv2d( - nn::Conv2dOptions(256, 1, 3).stride(1).padding(0).bias(false)), - nn::Sigmoid()); - -``` - -Tip - -A `Sequential` module simply performs function composition. The output of the first submodule becomes the input of the second, the output of the third becomes the input of the fourth and so on. - -## 加载数据 - -Now that we have defined the generator and discriminator model, we need some data we can train these models with. The C++ frontend, like the Python one, comes with a powerful parallel data loader. This data loader can read batches of data from a dataset (which you can define yourself) and provides many configuration knobs. - -注意 - -While the Python data loader uses multi-processing, the C++ data loader is truly multi-threaded and does not launch any new processes. - -The data loader is part of the C++ frontend’s `data` api, contained in the `torch::data::` namespace. This API consists of a few different components: - -* 数据加载器类, -* 用于定义数据集的 API, -* 用于定义*转换*的 API,可以将其应用于数据集, -* 用于定义*采样器*的 API,该采样器会生成用于对数据集建立索引的索引, -* 现有数据集,变换和采样器的库。 - -For this tutorial, we can use the `MNIST` dataset that comes with the C++ frontend. Let’s instantiate a `torch::data::datasets::MNIST` for this, and apply two transformations: First, we normalize the images so that they are in the range of `-1` to `+1` (from an original range of `0` to `1`). Second, we apply the `Stack` *collation*, which takes a batch of tensors and stacks them into a single tensor along the first dimension: - -```py -auto dataset = torch::data::datasets::MNIST("./mnist") - .map(torch::data::transforms::Normalize<>(0.5, 0.5)) - .map(torch::data::transforms::Stack<>()); - -``` - -Note that the MNIST dataset should be located in the `./mnist` directory relative to wherever you execute the training binary from. You can use [this script](https://gist.github.com/goldsborough/6dd52a5e01ed73a642c1e772084bcd03) to download the MNIST dataset. - -接下来,我们创建一个数据加载器并将其传递给该数据集。 为了创建一个新的数据加载器,我们使用`torch::data::make_data_loader`,它返回正确类型的`std::unique_ptr`(取决于数据集的类型,采样器的类型以及其他一些实现细节): - -```py -auto data_loader = torch::data::make_data_loader(std::move(dataset)); - -``` - -数据加载器确实提供了很多选项。 [您可以在这里检查全套](https://github.com/pytorch/pytorch/blob/master/torch/csrc/api/include/torch/data/dataloader_options.h)。 例如,为了加快数据加载速度,我们可以增加工作器的数量。 默认数字为零,这意味着将使用主线程。 如果将`workers`设置为`2`,将产生两个线程并发加载数据。 我们还应该将批量大小从其默认值`1`增加到更合理的值,例如`64`(`kBatchSize`的值)。 因此,让我们创建一个`DataLoaderOptions`对象并设置适当的属性: - -```py -auto data_loader = torch::data::make_data_loader( - std::move(dataset), - torch::data::DataLoaderOptions().batch_size(kBatchSize).workers(2)); - -``` - -现在,我们可以编写一个循环来加载批量数据,目前我们仅将其打印到控制台: - -```py -for (torch::data::Example<>& batch : *data_loader) { - std::cout << "Batch size: " << batch.data.size(0) << " | Labels: "; - for (int64_t i = 0; i < batch.data.size(0); ++i) { - std::cout << batch.target[i].item() << " "; - } - std::cout << std::endl; -} - -``` - -在这种情况下,数据加载器返回的类型为`torch::data::Example`。 此类型是一种简单的结构,其中的`data`字段用于数据,而`target`字段用于标签。 因为我们之前应用了`Stack`归类,所以数据加载器仅返回一个这样的示例。 如果我们未应用排序规则,则数据加载器将改为生成`std::vector>`,批量中每个示例包含一个元素。 - -如果重建并运行此代码,则应看到类似以下内容的内容: - -```py -root@fa350df05ecf:/home/build# make -Scanning dependencies of target dcgan -[ 50%] Building CXX object CMakeFiles/dcgan.dir/dcgan.cpp.o -[100%] Linking CXX executable dcgan -[100%] Built target dcgan -root@fa350df05ecf:/home/build# make -[100%] Built target dcgan -root@fa350df05ecf:/home/build# ./dcgan -Batch size: 64 | Labels: 5 2 6 7 2 1 6 7 0 1 6 2 3 6 9 1 8 4 0 6 5 3 3 0 4 6 6 6 4 0 8 6 0 6 9 2 4 0 2 8 6 3 3 2 9 2 0 1 4 2 3 4 8 2 9 9 3 5 8 0 0 7 9 9 -Batch size: 64 | Labels: 2 2 4 7 1 2 8 8 6 9 0 2 2 9 3 6 1 3 8 0 4 4 8 8 8 9 2 6 4 7 1 5 0 9 7 5 4 3 5 4 1 2 8 0 7 1 9 6 1 6 5 3 4 4 1 2 3 2 3 5 0 1 6 2 -Batch size: 64 | Labels: 4 5 4 2 1 4 8 3 8 3 6 1 5 4 3 6 2 2 5 1 3 1 5 0 8 2 1 5 3 2 4 4 5 9 7 2 8 9 2 0 6 7 4 3 8 3 5 8 8 3 0 5 8 0 8 7 8 5 5 6 1 7 8 0 -Batch size: 64 | Labels: 3 3 7 1 4 1 6 1 0 3 6 4 0 2 5 4 0 4 2 8 1 9 6 5 1 6 3 2 8 9 2 3 8 7 4 5 9 6 0 8 3 0 0 6 4 8 2 5 4 1 8 3 7 8 0 0 8 9 6 7 2 1 4 7 -Batch size: 64 | Labels: 3 0 5 5 9 8 3 9 8 9 5 9 5 0 4 1 2 7 7 2 0 0 5 4 8 7 7 6 1 0 7 9 3 0 6 3 2 6 2 7 6 3 3 4 0 5 8 8 9 1 9 2 1 9 4 4 9 2 4 6 2 9 4 0 -Batch size: 64 | Labels: 9 6 7 5 3 5 9 0 8 6 6 7 8 2 1 9 8 8 1 1 8 2 0 7 1 4 1 6 7 5 1 7 7 4 0 3 2 9 0 6 6 3 4 4 8 1 2 8 6 9 2 0 3 1 2 8 5 6 4 8 5 8 6 2 -Batch size: 64 | Labels: 9 3 0 3 6 5 1 8 6 0 1 9 9 1 6 1 7 7 4 4 4 7 8 8 6 7 8 2 6 0 4 6 8 2 5 3 9 8 4 0 9 9 3 7 0 5 8 2 4 5 6 2 8 2 5 3 7 1 9 1 8 2 2 7 -Batch size: 64 | Labels: 9 1 9 2 7 2 6 0 8 6 8 7 7 4 8 6 1 1 6 8 5 7 9 1 3 2 0 5 1 7 3 1 6 1 0 8 6 0 8 1 0 5 4 9 3 8 5 8 4 8 0 1 2 6 2 4 2 7 7 3 7 4 5 3 -Batch size: 64 | Labels: 8 8 3 1 8 6 4 2 9 5 8 0 2 8 6 6 7 0 9 8 3 8 7 1 6 6 2 7 7 4 5 5 2 1 7 9 5 4 9 1 0 3 1 9 3 9 8 8 5 3 7 5 3 6 8 9 4 2 0 1 2 5 4 7 -Batch size: 64 | Labels: 9 2 7 0 8 4 4 2 7 5 0 0 6 2 0 5 9 5 9 8 8 9 3 5 7 5 4 7 3 0 5 7 6 5 7 1 6 2 8 7 6 3 2 6 5 6 1 2 7 7 0 0 5 9 0 0 9 1 7 8 3 2 9 4 -Batch size: 64 | Labels: 7 6 5 7 7 5 2 2 4 9 9 4 8 7 4 8 9 4 5 7 1 2 6 9 8 5 1 2 3 6 7 8 1 1 3 9 8 7 9 5 0 8 5 1 8 7 2 6 5 1 2 0 9 7 4 0 9 0 4 6 0 0 8 6 -... - -``` - -这意味着我们能够成功地从 MNIST 数据集中加载数据。 - -## 编写训练循环 - -现在,让我们完成示例的算法部分,并实现生成器和判别器之间的精妙舞蹈。 首先,我们将创建两个优化器,一个用于生成器,一个用于判别器。 我们使用的优化程序实现了 [Adam](https://arxiv.org/pdf/1412.6980.pdf) 算法: - -```py -torch::optim::Adam generator_optimizer( - generator->parameters(), torch::optim::AdamOptions(2e-4).beta1(0.5)); -torch::optim::Adam discriminator_optimizer( - discriminator->parameters(), torch::optim::AdamOptions(5e-4).beta1(0.5)); - -``` - -注意 - -在撰写本文时,C++ 前端提供了实现 Adagrad,Adam,LBBFG,RMSprop 和 SGD 的优化器。 [文档](https://pytorch.org/cppdocs/api/namespace_torch__optim.html)具有最新列表。 - -接下来,我们需要更新我们的训练循环。 我们将添加一个外循环以在每个周期耗尽数据加载器,然后编写 GAN 训练代码: - -```py -for (int64_t epoch = 1; epoch <= kNumberOfEpochs; ++epoch) { - int64_t batch_index = 0; - for (torch::data::Example<>& batch : *data_loader) { - // Train discriminator with real images. - discriminator->zero_grad(); - torch::Tensor real_images = batch.data; - torch::Tensor real_labels = torch::empty(batch.data.size(0)).uniform_(0.8, 1.0); - torch::Tensor real_output = discriminator->forward(real_images); - torch::Tensor d_loss_real = torch::binary_cross_entropy(real_output, real_labels); - d_loss_real.backward(); - - // Train discriminator with fake images. - torch::Tensor noise = torch::randn({batch.data.size(0), kNoiseSize, 1, 1}); - torch::Tensor fake_images = generator->forward(noise); - torch::Tensor fake_labels = torch::zeros(batch.data.size(0)); - torch::Tensor fake_output = discriminator->forward(fake_images.detach()); - torch::Tensor d_loss_fake = torch::binary_cross_entropy(fake_output, fake_labels); - d_loss_fake.backward(); - - torch::Tensor d_loss = d_loss_real + d_loss_fake; - discriminator_optimizer.step(); - - // Train generator. - generator->zero_grad(); - fake_labels.fill_(1); - fake_output = discriminator->forward(fake_images); - torch::Tensor g_loss = torch::binary_cross_entropy(fake_output, fake_labels); - g_loss.backward(); - generator_optimizer.step(); - - std::printf( - "\r[%2ld/%2ld][%3ld/%3ld] D_loss: %.4f | G_loss: %.4f", - epoch, - kNumberOfEpochs, - ++batch_index, - batches_per_epoch, - d_loss.item(), - g_loss.item()); - } -} - -``` - -上面,我们首先在真实图像上评估判别器,为此应为其分配较高的概率。 为此,我们使用`torch::empty(batch.data.size(0)).uniform_(0.8, 1.0)`作为目标概率。 - -注意 - -我们选择均匀分布在 0.8 到 1.0 之间的随机值,而不是各处的 1.0,以使判别器训练更可靠。 此技巧称为*标签平滑*。 - -在评估判别器之前,我们将其参数的梯度归零。 计算完损失后,我们通过调用`d_loss.backward()`来计算新的梯度,从而在网络中反向传播。 我们对虚假图像重复此步骤。 我们不使用数据集中的图像,而是让生成器通过为它提供一批随机噪声来为此创建伪造图像。 然后,我们将这些伪造图像转发给判别器。 这次,我们希望判别器发出低概率,最好是全零。 一旦计算了一批真实图像和一批伪造图像的判别器损失,我们就可以一步一步地进行判别器的优化程序,以更新其参数。 - -为了训练生成器,我们再次首先将其梯度归零,然后在伪图像上重新评估判别器。 但是,这一次,我们希望判别器将概率分配为非常接近的概率,这将表明生成器可以生成使判别器认为它们实际上是真实的图像(来自数据集)。 为此,我们用全部填充`fake_labels`张量。 最后,我们逐步使用生成器的优化器来更新其参数。 - -现在,我们应该准备在 CPU 上训练我们的模型。 我们还没有任何代码可以捕获状态或示例输出,但是我们稍后会添加。 现在,让我们观察一下我们的模型正在*做某事* –我们稍后将根据生成的图像来验证这是否有意义。 重建和运行应打印如下内容: - -```py -root@3c0711f20896:/home/build# make && ./dcgan -Scanning dependencies of target dcgan -[ 50%] Building CXX object CMakeFiles/dcgan.dir/dcgan.cpp.o -[100%] Linking CXX executable dcgan -[100%] Built target dcga -[ 1/10][100/938] D_loss: 0.6876 | G_loss: 4.1304 -[ 1/10][200/938] D_loss: 0.3776 | G_loss: 4.3101 -[ 1/10][300/938] D_loss: 0.3652 | G_loss: 4.6626 -[ 1/10][400/938] D_loss: 0.8057 | G_loss: 2.2795 -[ 1/10][500/938] D_loss: 0.3531 | G_loss: 4.4452 -[ 1/10][600/938] D_loss: 0.3501 | G_loss: 5.0811 -[ 1/10][700/938] D_loss: 0.3581 | G_loss: 4.5623 -[ 1/10][800/938] D_loss: 0.6423 | G_loss: 1.7385 -[ 1/10][900/938] D_loss: 0.3592 | G_loss: 4.7333 -[ 2/10][100/938] D_loss: 0.4660 | G_loss: 2.5242 -[ 2/10][200/938] D_loss: 0.6364 | G_loss: 2.0886 -[ 2/10][300/938] D_loss: 0.3717 | G_loss: 3.8103 -[ 2/10][400/938] D_loss: 1.0201 | G_loss: 1.3544 -[ 2/10][500/938] D_loss: 0.4522 | G_loss: 2.6545 -... - -``` - -## 移至 GPU - -虽然我们当前的脚本可以在 CPU 上正常运行,但是我们都知道卷积在 GPU 上要快得多。 让我们快速讨论如何将训练转移到 GPU 上。 为此,我们需要做两件事:将 GPU 设备规范传递给我们分配给自己的张量,并通过`to()`方法将所有其他张量明确复制到 C++ 前端中的所有张量和模块上。 实现这两者的最简单方法是在我们的训练脚本的顶层创建`torch::Device`的实例,然后将该设备传递给张量工厂函数,例如`torch::zeros`和`to()`方法。 我们可以从使用 CPU 设备开始: - -```py -// Place this somewhere at the top of your training script. -torch::Device device(torch::kCPU); - -``` - -新的张量分配,例如 - -```py -torch::Tensor fake_labels = torch::zeros(batch.data.size(0)); - -``` - -应该更新为以`device`作为最后一个参数: - -```py -torch::Tensor fake_labels = torch::zeros(batch.data.size(0), device); - -``` - -对于那些不在我们手中的张量,例如来自 MNIST 数据集的张量,我们必须插入显式的`to()`调用。 这表示 - -```py -torch::Tensor real_images = batch.data; - -``` - -变成 - -```py -torch::Tensor real_images = batch.data.to(device); - -``` - -并且我们的模型参数也应该移到正确的设备上: - -```py -generator->to(device); -discriminator->to(device); - -``` - -注意 - -如果张量已经存在于提供给`to()`的设备上,则该调用为空操作。 没有多余的副本。 - -至此,我们已经使之前的 CPU 代码更加明确了。 但是,现在将设备更改为 CUDA 设备也非常容易: - -```py -torch::Device device(torch::kCUDA) - -``` - -现在,所有张量都将驻留在 GPU 上,并调用快速 CUDA 内核进行所有操作,而无需我们更改任何下游代码。 如果我们想指定一个特定的设备索引,则可以将其作为第二个参数传递给`Device`构造器。 如果我们希望不同的张量驻留在不同的设备上,则可以传递单独的设备实例(例如,一个在 CUDA 设备 0 上,另一个在 CUDA 设备 1 上)。 我们甚至可以动态地进行此配置,这通常对于使我们的训练脚本更具可移植性很有用: - -```py -torch::Device device = torch::kCPU; -if (torch::cuda::is_available()) { - std::cout << "CUDA is available! Training on GPU." << std::endl; - device = torch::kCUDA; -} - -``` - -甚至 - -```py -torch::Device device(torch::cuda::is_available() ? torch::kCUDA : torch::kCPU); - -``` - -## 检查点和恢复训练状态 - -我们应该对训练脚本进行的最后扩充是定期保存模型参数的状态,优化器的状态以及一些生成的图像样本。 如果我们的计算机在训练过程中崩溃,则前两个将使我们能够恢复训练状态。 对于长期的训练过程,这是绝对必要的。 幸运的是,C++ 前端提供了一个 API,用于对模型和优化器状态以及单个张量进行序列化和反序列化。 - -为此的核心 API 是`torch::save(thing,filename)`和`torch::load(thing,filename)`,其中`thing`可以是`torch::nn::Module`子类,也可以是优化脚本实例,例如我们在训练脚本中拥有的`Adam`对象。 让我们更新训练循环,以一定间隔检查模型和优化器状态: - -```py -if (batch_index % kCheckpointEvery == 0) { - // Checkpoint the model and optimizer state. - torch::save(generator, "generator-checkpoint.pt"); - torch::save(generator_optimizer, "generator-optimizer-checkpoint.pt"); - torch::save(discriminator, "discriminator-checkpoint.pt"); - torch::save(discriminator_optimizer, "discriminator-optimizer-checkpoint.pt"); - // Sample the generator and save the images. - torch::Tensor samples = generator->forward(torch::randn({8, kNoiseSize, 1, 1}, device)); - torch::save((samples + 1.0) / 2.0, torch::str("dcgan-sample-", checkpoint_counter, ".pt")); - std::cout << "\n-> checkpoint " << ++checkpoint_counter << '\n'; -} - -``` - -其中`kCheckpointEvery`是设置为类似于`100`之类的整数,用于每批`100`批量检查点,而`checkpoint_counter`是每次创建检查点时都会增加的计数器。 - -要恢复训练状态,可以在创建所有模型和优化器之后但在训练循环之前添加如下代码: - -```py -torch::optim::Adam generator_optimizer( - generator->parameters(), torch::optim::AdamOptions(2e-4).beta1(0.5)); -torch::optim::Adam discriminator_optimizer( - discriminator->parameters(), torch::optim::AdamOptions(2e-4).beta1(0.5)); - -if (kRestoreFromCheckpoint) { - torch::load(generator, "generator-checkpoint.pt"); - torch::load(generator_optimizer, "generator-optimizer-checkpoint.pt"); - torch::load(discriminator, "discriminator-checkpoint.pt"); - torch::load( - discriminator_optimizer, "discriminator-optimizer-checkpoint.pt"); -} - -int64_t checkpoint_counter = 0; -for (int64_t epoch = 1; epoch <= kNumberOfEpochs; ++epoch) { - int64_t batch_index = 0; - for (torch::data::Example<>& batch : *data_loader) { - -``` - -## 检查生成的图像 - -我们的训练脚本现已完成。 我们准备在 CPU 或 GPU 上训练 GAN。 为了检查我们训练过程的中间输出,为此我们添加了将代码样本定期保存到`"dcgan-sample-xxx.pt"`文件的代码,我们可以编写一个小的 Python 脚本来加载张量并使用 matplotlib 显示它们: - -```py -from __future__ import print_function -from __future__ import unicode_literals - -import argparse - -import matplotlib.pyplot as plt -import torch - -parser = argparse.ArgumentParser() -parser.add_argument("-i", "--sample-file", required=True) -parser.add_argument("-o", "--out-file", default="out.png") -parser.add_argument("-d", "--dimension", type=int, default=3) -options = parser.parse_args() - -module = torch.jit.load(options.sample_file) -images = list(module.parameters())[0] - -for index in range(options.dimension * options.dimension): - image = images[index].detach().cpu().reshape(28, 28).mul(255).to(torch.uint8) - array = image.numpy() - axis = plt.subplot(options.dimension, options.dimension, 1 + index) - plt.imshow(array, cmap="gray") - axis.get_xaxis().set_visible(False) - axis.get_yaxis().set_visible(False) - -plt.savefig(options.out_file) -print("Saved ", options.out_file) - -``` - -现在,让我们训练模型约 30 个周期: - -```py -root@3c0711f20896:/home/build# make && ./dcgan 10:17:57 -Scanning dependencies of target dcgan -[ 50%] Building CXX object CMakeFiles/dcgan.dir/dcgan.cpp.o -[100%] Linking CXX executable dcgan -[100%] Built target dcgan -CUDA is available! Training on GPU. -[ 1/30][200/938] D_loss: 0.4953 | G_loss: 4.0195 --> checkpoint 1 -[ 1/30][400/938] D_loss: 0.3610 | G_loss: 4.8148 --> checkpoint 2 -[ 1/30][600/938] D_loss: 0.4072 | G_loss: 4.36760 --> checkpoint 3 -[ 1/30][800/938] D_loss: 0.4444 | G_loss: 4.0250 --> checkpoint 4 -[ 2/30][200/938] D_loss: 0.3761 | G_loss: 3.8790 --> checkpoint 5 -[ 2/30][400/938] D_loss: 0.3977 | G_loss: 3.3315 -... --> checkpoint 120 -[30/30][938/938] D_loss: 0.3610 | G_loss: 3.8084 - -``` - -并在图中显示图像: - -```py -root@3c0711f20896:/home/build# python display.py -i dcgan-sample-100.pt -Saved out.png - -``` - -应该看起来像这样: - -![digits](img/931dea1655c975ec616a9e22c80c242f.png) - -数字! 万岁! 现在,事情就在您的球场上了:您可以改进模型以使数字看起来更好吗? - -## 总结 - -希望本教程为您提供了 PyTorch C++ 前端的可摘要。 像 PyTorch 这样的机器学习库必然具有非常广泛的 API。 因此,有许多概念我们没有时间或空间来讨论。 但是,我建议您尝试一下 API,并在遇到问题时查阅[我们的文档](https://pytorch.org/cppdocs/),尤其是[库 API](https://pytorch.org/cppdocs/api/library_root.html) 部分。 另外,请记住,只要我们能够做到,就可以期望 C++ 前端遵循 Python 前端的设计和语义,因此您可以利用这一事实来提高学习率。 - -小费 - -[您可以在存储库中找到本教程中提供的完整源代码](https://github.com/pytorch/examples/tree/master/cpp/dcgan)。 - -与往常一样,如果您遇到任何问题或疑问,可以使用我们的[论坛](https://discuss.pytorch.org/)或 [GitHub ISSUE](https://github.com/pytorch/pytorch/issues) 进行联系。 \ No newline at end of file diff --git a/pytorch/官方教程/65.md b/pytorch/官方教程/45 分布式RPC框架-参数服务器.md similarity index 100% rename from pytorch/官方教程/65.md rename to pytorch/官方教程/45 分布式RPC框架-参数服务器.md diff --git a/pytorch/官方教程/45.md b/pytorch/官方教程/45.md deleted file mode 100644 index b2a58869..00000000 --- a/pytorch/官方教程/45.md +++ /dev/null @@ -1,996 +0,0 @@ -# 自定义 C++ 和 CUDA 扩展 - -> 原文: - -**作者**: [Peter Goldsborough](https://www.goldsborough.me/) - -PyTorch 提供了与神经网络,任意张量代数,数据整理和其他目的有关的大量操作。 但是,您仍然可能发现自己需要更多的自定义操作。 例如,您可能想使用论文中发现的新颖的激活函数,或者实现您在研究过程中开发的操作。 - -在 PyTorch 中集成这样的自定义操作的最简单方法是通过扩展[此处](https://pytorch.org/docs/master/notes/extending.html)概述的`Function`和`Module`来用 Python 编写它。 这为您提供了自动微分的全部功能(使您不必编写导函数)以及 Python 的通常表达能力。 但是,有时您的操作可以用 C++ 更好地实现。 例如,您的代码可能*确实*需要速度,因为在模型中它经常被调用,或者即使很少调用也很昂贵。 另一个合理的原因是它依赖于其他 C 或 C++ 库或与之交互。 为了解决这种情况,PyTorch 提供了一种非常简单的方式来编写自定义 *C++ 扩展*。 - -C++ 扩展是我们开发的一种机制,允许用户(您)创建源外定义的 PyTorch 运算符,即与 PyTorch 后端分开。 该方法不同于本机 PyTorch 操作的实现方式。 C++ 扩展旨在为您节省大量与将操作与 PyTorch 后端集成在一起相关的样板,同时为基于 PyTorch 的项目提供高度的灵活性。 但是,一旦将操作定义为 C++ 扩展,将其转换为本地 PyTorch 函数在很大程度上取决于代码组织,如果您决定在上游进行操作,则可以解决此问题。 - -## 动机和示例 - -本说明的其余部分将逐步介绍编写和使用 C++(和 CUDA)扩展的实际示例。 如果您被追捕,或者在一天结束前仍未完成该操作,就会有人开除您,则可以跳过本节,直接进入下一部分的实现细节。 - -假设您想出了一种新型的循环装置,发现与现有技术相比,它具有更好的表现。 该循环单元类似于 LSTM,但不同之处在于它缺少*遗忘门*,并使用*指数线性单元*(ELU)作为其内部激活函数。 由于此设备永远不会忘记,因此我们将其称为 *LLTM* 或*长期记忆*单元。 - -LLTM 与普通 LSTM 的两种区别非常重要,以至于我们无法为自己的目的配置 PyTorch 的`LSTMCell`,因此我们必须创建一个自定义单元。 这样做的第一个也是最简单的方法,并且在所有情况下都可能是一个好的第一步,是使用 Python 在纯 PyTorch 中实现我们所需的功能。 为此,我们需要子类`torch.nn.Module`并实现 LLTM 的正向传播。 看起来像这样: - -```py -class LLTM(torch.nn.Module): - def __init__(self, input_features, state_size): - super(LLTM, self).__init__() - self.input_features = input_features - self.state_size = state_size - # 3 * state_size for input gate, output gate and candidate cell gate. - # input_features + state_size because we will multiply with [input, h]. - self.weights = torch.nn.Parameter( - torch.empty(3 * state_size, input_features + state_size)) - self.bias = torch.nn.Parameter(torch.empty(3 * state_size)) - self.reset_parameters() - - def reset_parameters(self): - stdv = 1.0 / math.sqrt(self.state_size) - for weight in self.parameters(): - weight.data.uniform_(-stdv, +stdv) - - def forward(self, input, state): - old_h, old_cell = state - X = torch.cat([old_h, input], dim=1) - - # Compute the input, output and candidate cell gates with one MM. - gate_weights = F.linear(X, self.weights, self.bias) - # Split the combined gate weight matrix into its components. - gates = gate_weights.chunk(3, dim=1) - - input_gate = torch.sigmoid(gates[0]) - output_gate = torch.sigmoid(gates[1]) - # Here we use an ELU instead of the usual tanh. - candidate_cell = F.elu(gates[2]) - - # Compute the new cell state. - new_cell = old_cell + candidate_cell * input_gate - # Compute the new hidden state and output. - new_h = torch.tanh(new_cell) * output_gate - - return new_h, new_cell - -``` - -然后我们可以按预期使用: - -```py -import torch - -X = torch.randn(batch_size, input_features) -h = torch.randn(batch_size, state_size) -C = torch.randn(batch_size, state_size) - -rnn = LLTM(input_features, state_size) - -new_h, new_C = rnn(X, (h, C)) - -``` - -自然,如果可能的话,您应该使用这种方法扩展 PyTorch。 由于 PyTorch 对 CPU 和 GPU 的操作进行了高度优化的实现,并由 [NVIDIA cuDNN](https://developer.nvidia.com/cudnn),[Intel MKL](https://software.intel.com/en-us/mkl) 或 [NNPACK](https://github.com/Maratyszcza/NNPACK) 等库提供支持 ,上面的 PyTorch 代码通常会足够快。 但是,我们还可以看到为什么在某些情况下还有进一步改进性能的空间。 最明显的原因是 PyTorch 不了解您要实现的*算法*。 它仅知道您用于组成算法的单个操作。 因此,PyTorch 必须一个接一个地执行您的操作。 由于对操作的实现(或*核*)的每个单独调用(可能涉及 CUDA 内核的启动)都具有一定的开销,因此该开销在许多函数调用中可能变得很重要。 此外,运行我们的代码的 Python 解释器本身可能会使我们的程序变慢。 - -因此,一种确定的加速方法是用 C++(或 CUDA)和*熔断*特定操作组来重写零件。 融合是指将许多功能的实现组合为一个功能,这可以从更少的内核启动以及我们可以提高全局数据流可见性的情况下执行的其他优化中获利。 - -让我们看看如何使用 C++ 扩展来实现 LLTM 的*融合*版本。 首先,我们使用 [ATen](https://github.com/zdevito/ATen) 库以普通的 C++ 语言编写代码,该库为 PyTorch 的许多后端提供了强大的支持,并了解它如何使我们轻松转换 Python 代码。 然后,我们将模型的某些部分移至 CUDA 内核,以从 GPU 提供的大量并行处理中受益,从而进一步加快处理速度。 - -## 编写 C++ 扩展 - -C++ 扩展有两种形式:它们可以使用`setuptools`提前构建,也可以通过`torch.utils.cpp_extension.load()`适时构建。 我们将从第一种方法开始,稍后再讨论后者。 - -### 使用`setuptools`构建 - -为了“提前”,我们通过编写一个`setup.py`脚本来构建 C++ 扩展,该脚本使用`setuptools`编译我们的 C++ 代码。 对于 LLTM,它看起来像这样简单: - -```py -from setuptools import setup, Extension -from torch.utils import cpp_extension - -setup(name='lltm_cpp', - ext_modules=[cpp_extension.CppExtension('lltm_cpp', ['lltm.cpp'])], - cmdclass={'build_ext': cpp_extension.BuildExtension}) - -``` - -在此代码中,`CppExtension`是`setuptools.Extension`的便利包装,它传递正确的包含路径并将扩展的语言设置为 C++。 等效的`setuptools`原始代码如下: - -```py -Extension( - name='lltm_cpp', - sources=['lltm.cpp'], - include_dirs=cpp_extension.include_paths(), - language='c++') - -``` - -`BuildExtension`执行许多必需的配置步骤,并检查和管理混合 C++/CUDA 扩展的混合编译。 这就是我们现在真正需要了解的有关构建 C++ 扩展的全部信息! 现在让我们看一下`lltm.cpp`中 C++ 扩展的实现。 - -### 编写 C++ 操作 - -让我们开始以 C++ 实现 LLTM! 我们需要反向传播的一项函数是 Sigmoid 导数。 这是一小段代码,用于讨论编写 C++ 扩展时可供我们使用的总体环境: - -```py -#include - -#include - -torch::Tensor d_sigmoid(torch::Tensor z) { - auto s = torch::sigmoid(z); - return (1 - s) * s; -} - -``` - -``是一站式标头,包括编写 C++ 扩展的所有必需的 PyTorch 位。 这包括: - -* ATen 库,这是我们用于张量计算的主要 API, -* [`pybind11`](https://github.com/pybind/pybind11) ,这是我们为 C++ 代码创建 Python 绑定的方式, -* 标头,用于管理 ATen 与`pybind11`之间的交互的详细信息。 - -`d_sigmoid()`的实现显示了如何使用 ATen API。 PyTorch 的张量和变量接口是从 ATen 库自动生成的,因此我们可以或多或少地将 Python 实现 1:1 转换为 C++。 我们用于所有计算的主要数据类型将为`torch::Tensor`。 可以在此处检查其完整的 API。 还要注意,我们可以包括``或*任何其他 C 或 C++ 头文件* –我们拥有 C++ 11 的全部功能。 - -#### 正向传播 - -接下来,我们可以将整个正向传播到 C++: - -```py -#include - -std::vector lltm_forward( - torch::Tensor input, - torch::Tensor weights, - torch::Tensor bias, - torch::Tensor old_h, - torch::Tensor old_cell) { - auto X = torch::cat({old_h, input}, /*dim=*/1); - - auto gate_weights = torch::addmm(bias, X, weights.transpose(0, 1)); - auto gates = gate_weights.chunk(3, /*dim=*/1); - - auto input_gate = torch::sigmoid(gates[0]); - auto output_gate = torch::sigmoid(gates[1]); - auto candidate_cell = torch::elu(gates[2], /*alpha=*/1.0); - - auto new_cell = old_cell + candidate_cell * input_gate; - auto new_h = torch::tanh(new_cell) * output_gate; - - return {new_h, - new_cell, - input_gate, - output_gate, - candidate_cell, - X, - gate_weights}; -} - -``` - -#### 反向传播 - -C++ 扩展 API 当前不提供为我们自动生成向后函数的方法。 因此,我们还必须实现 LLTM 的后向传递,它计算相对于正向传播的每个输入的损失导数。 最终,我们将正向和反向函数放入`torch.autograd.Function`中,以创建一个不错的 Python 绑定。 向后函数的作用稍大一些,因此我们将不深入研究代码(如果您有兴趣,请阅读 [Alex Graves 的论文](https://www.cs.toronto.edu/~graves/phd.pdf),以获取有关此方面的更多信息): - -```py -// tanh'(z) = 1 - tanh^2(z) -torch::Tensor d_tanh(torch::Tensor z) { - return 1 - z.tanh().pow(2); -} - -// elu'(z) = relu'(z) + { alpha * exp(z) if (alpha * (exp(z) - 1)) < 0, else 0} -torch::Tensor d_elu(torch::Tensor z, torch::Scalar alpha = 1.0) { - auto e = z.exp(); - auto mask = (alpha * (e - 1)) < 0; - return (z > 0).type_as(z) + mask.type_as(z) * (alpha * e); -} - -std::vector lltm_backward( - torch::Tensor grad_h, - torch::Tensor grad_cell, - torch::Tensor new_cell, - torch::Tensor input_gate, - torch::Tensor output_gate, - torch::Tensor candidate_cell, - torch::Tensor X, - torch::Tensor gate_weights, - torch::Tensor weights) { - auto d_output_gate = torch::tanh(new_cell) * grad_h; - auto d_tanh_new_cell = output_gate * grad_h; - auto d_new_cell = d_tanh(new_cell) * d_tanh_new_cell + grad_cell; - - auto d_old_cell = d_new_cell; - auto d_candidate_cell = input_gate * d_new_cell; - auto d_input_gate = candidate_cell * d_new_cell; - - auto gates = gate_weights.chunk(3, /*dim=*/1); - d_input_gate *= d_sigmoid(gates[0]); - d_output_gate *= d_sigmoid(gates[1]); - d_candidate_cell *= d_elu(gates[2]); - - auto d_gates = - torch::cat({d_input_gate, d_output_gate, d_candidate_cell}, /*dim=*/1); - - auto d_weights = d_gates.t().mm(X); - auto d_bias = d_gates.sum(/*dim=*/0, /*keepdim=*/true); - - auto d_X = d_gates.mm(weights); - const auto state_size = grad_h.size(1); - auto d_old_h = d_X.slice(/*dim=*/1, 0, state_size); - auto d_input = d_X.slice(/*dim=*/1, state_size); - - return {d_old_h, d_input, d_weights, d_bias, d_old_cell}; -} - -``` - -### 绑定到 Python - -一旦用 C++ 和 ATen 编写了操作,就可以使用`pybind11`以非常简单的方式将 C++ 函数或类绑定到 Python 中。 您对 PyTorch C++ 扩展部分的疑问或问题将在[`pybind11`文档](https://pybind11.readthedocs.io/en/master/)中得到解决。 - -对于我们的扩展,必要的绑定代码仅跨越四行: - -```py -PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { - m.def("forward", &lltm_forward, "LLTM forward"); - m.def("backward", &lltm_backward, "LLTM backward"); -} - -``` - -这里要注意的一点是宏`TORCH_EXTENSION_NAME`。 火炬扩展程序构建会将其定义为您在`setup.py`脚本中为扩展程序指定的名称。 在这种情况下,`TORCH_EXTENSION_NAME`的值为`lltm`。 这是为了避免在两个位置(构建脚本和 C++ 代码)都保留扩展名,因为两者之间的不匹配会导致令人讨厌且难以跟踪的问题。 - -### 使用扩展程序 - -现在,我们准备将扩展名导入 PyTorch 中。 此时,您的目录结构可能如下所示: - -```py -pytorch/ - lltm-extension/ - lltm.cpp - setup.py - -``` - -现在,运行`python setup.py install`来构建和安装扩展程序。 看起来应该像这样: - -```py -running install -running bdist_egg -running egg_info -creating lltm_cpp.egg-info -writing lltm_cpp.egg-info/PKG-INFO -writing dependency_links to lltm_cpp.egg-info/dependency_links.txt -writing top-level names to lltm_cpp.egg-info/top_level.txt -writing manifest file 'lltm_cpp.egg-info/SOURCES.txt' -reading manifest file 'lltm_cpp.egg-info/SOURCES.txt' -writing manifest file 'lltm_cpp.egg-info/SOURCES.txt' -installing library code to build/bdist.linux-x86_64/egg -running install_lib -running build_ext -building 'lltm_cpp' extension -creating build -creating build/temp.linux-x86_64-3.7 -gcc -pthread -B ~/local/miniconda/compiler_compat -Wl,--sysroot=/ -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -fPIC -I~/local/miniconda/lib/python3.7/site-packages/torch/include -I~/local/miniconda/lib/python3.7/site-packages/torch/include/torch/csrc/api/include -I~/local/miniconda/lib/python3.7/site-packages/torch/include/TH -I~/local/miniconda/lib/python3.7/site-packages/torch/include/THC -I~/local/miniconda/include/python3.7m -c lltm.cpp -o build/temp.linux-x86_64-3.7/lltm.o -DTORCH_API_INCLUDE_EXTENSION_H -DTORCH_EXTENSION_NAME=lltm_cpp -D_GLIBCXX_USE_CXX11_ABI=1 -std=c++11 -cc1plus: warning: command line option '-Wstrict-prototypes' is valid for C/ObjC but not for C++ -creating build/lib.linux-x86_64-3.7 -g++ -pthread -shared -B ~/local/miniconda/compiler_compat -L~/local/miniconda/lib -Wl,-rpath=~/local/miniconda/lib -Wl,--no-as-needed -Wl,--sysroot=/ build/temp.linux-x86_64-3.7/lltm.o -o build/lib.linux-x86_64-3.7/lltm_cpp.cpython-37m-x86_64-linux-gnu.so -creating build/bdist.linux-x86_64 -creating build/bdist.linux-x86_64/egg -copying build/lib.linux-x86_64-3.7/lltm_cpp.cpython-37m-x86_64-linux-gnu.so -> build/bdist.linux-x86_64/egg -creating stub loader for lltm_cpp.cpython-37m-x86_64-linux-gnu.so -byte-compiling build/bdist.linux-x86_64/egg/lltm_cpp.py to lltm_cpp.cpython-37.pyc -creating build/bdist.linux-x86_64/egg/EGG-INFO -copying lltm_cpp.egg-info/PKG-INFO -> build/bdist.linux-x86_64/egg/EGG-INFO -copying lltm_cpp.egg-info/SOURCES.txt -> build/bdist.linux-x86_64/egg/EGG-INFO -copying lltm_cpp.egg-info/dependency_links.txt -> build/bdist.linux-x86_64/egg/EGG-INFO -copying lltm_cpp.egg-info/top_level.txt -> build/bdist.linux-x86_64/egg/EGG-INFO -writing build/bdist.linux-x86_64/egg/EGG-INFO/native_libs.txt -zip_safe flag not set; analyzing archive contents... -__pycache__.lltm_cpp.cpython-37: module references __file__ -creating 'dist/lltm_cpp-0.0.0-py3.7-linux-x86_64.egg' and adding 'build/bdist.linux-x86_64/egg' to it -removing 'build/bdist.linux-x86_64/egg' (and everything under it) -Processing lltm_cpp-0.0.0-py3.7-linux-x86_64.egg -removing '~/local/miniconda/lib/python3.7/site-packages/lltm_cpp-0.0.0-py3.7-linux-x86_64.egg' (and everything under it) -creating ~/local/miniconda/lib/python3.7/site-packages/lltm_cpp-0.0.0-py3.7-linux-x86_64.egg -Extracting lltm_cpp-0.0.0-py3.7-linux-x86_64.egg to ~/local/miniconda/lib/python3.7/site-packages -lltm-cpp 0.0.0 is already the active version in easy-install.pth - -Installed ~/local/miniconda/lib/python3.7/site-packages/lltm_cpp-0.0.0-py3.7-linux-x86_64.egg -Processing dependencies for lltm-cpp==0.0.0 -Finished processing dependencies for lltm-cpp==0.0.0 - -``` - -关于编译器的小提示:由于 ABI 版本问题,用于构建 C++ 扩展的编译器必须与 PyTorch 编译器兼容。 实际上,这意味着您必须在 Linux 上使用 GCC 4.9 及更高版本。 对于 Ubuntu 16.04 和其他较新的 Linux 发行版,这应该已经是默认的编译器。 在 MacOS 上,您必须使用 clang(它没有任何 ABI 版本控制问题)。 在最坏的情况下,您可以使用编译器从源代码构建 PyTorch,然后使用相同的编译器构建扩展。 - -扩展程序构建完成后,您可以使用`setup.py`脚本中指定的名称,将其简单地导入 Python。 只需确保先`import torch`,因为这将解决动态链接器必须看到的一些符号: - -```py -In [1]: import torch -In [2]: import lltm_cpp -In [3]: lltm_cpp.forward -Out[3]: - -``` - -如果我们在函数或模块上调用`help()`,则可以看到其签名与我们的 C++ 代码匹配: - -```py -In[4] help(lltm_cpp.forward) -forward(...) method of builtins.PyCapsule instance - forward(arg0: torch::Tensor, arg1: torch::Tensor, arg2: torch::Tensor, arg3: torch::Tensor, arg4: torch::Tensor) -> List[torch::Tensor] - - LLTM forward - -``` - -由于我们现在可以从 Python 调用 C++ 函数,因此可以将它们包装为`torch.autograd.Function`和`torch.nn.Module`以使其成为 PyTorch 的一等公民: - -```py -import math -import torch - -# Our module! -import lltm_cpp - -class LLTMFunction(torch.autograd.Function): - @staticmethod - def forward(ctx, input, weights, bias, old_h, old_cell): - outputs = lltm_cpp.forward(input, weights, bias, old_h, old_cell) - new_h, new_cell = outputs[:2] - variables = outputs[1:] + [weights] - ctx.save_for_backward(*variables) - - return new_h, new_cell - - @staticmethod - def backward(ctx, grad_h, grad_cell): - outputs = lltm_cpp.backward( - grad_h.contiguous(), grad_cell.contiguous(), *ctx.saved_variables) - d_old_h, d_input, d_weights, d_bias, d_old_cell = outputs - return d_input, d_weights, d_bias, d_old_h, d_old_cell - -class LLTM(torch.nn.Module): - def __init__(self, input_features, state_size): - super(LLTM, self).__init__() - self.input_features = input_features - self.state_size = state_size - self.weights = torch.nn.Parameter( - torch.empty(3 * state_size, input_features + state_size)) - self.bias = torch.nn.Parameter(torch.empty(3 * state_size)) - self.reset_parameters() - - def reset_parameters(self): - stdv = 1.0 / math.sqrt(self.state_size) - for weight in self.parameters(): - weight.data.uniform_(-stdv, +stdv) - - def forward(self, input, state): - return LLTMFunction.apply(input, self.weights, self.bias, *state) - -``` - -#### 性能比较 - -现在我们已经可以使用并从 PyTorch 调用 C++ 代码了,我们可以运行一个小型基准测试,以查看通过用 C++ 重写操作所获得的性能。 我们将向前和向后运行 LLTM 几次,并测量持续时间: - -```py -import time - -import torch - -batch_size = 16 -input_features = 32 -state_size = 128 - -X = torch.randn(batch_size, input_features) -h = torch.randn(batch_size, state_size) -C = torch.randn(batch_size, state_size) - -rnn = LLTM(input_features, state_size) - -forward = 0 -backward = 0 -for _ in range(100000): - start = time.time() - new_h, new_C = rnn(X, (h, C)) - forward += time.time() - start - - start = time.time() - (new_h.sum() + new_C.sum()).backward() - backward += time.time() - start - -print('Forward: {:.3f} us | Backward {:.3f} us'.format(forward * 1e6/1e5, backward * 1e6/1e5)) - -``` - -如果我们使用本文开头用纯 Python 编写的原始 LLTM 运行此代码,则会得到以下数字(在我的机器上): - -```py -Forward: 506.480 us | Backward 444.694 us - -``` - -以及我们的新 C++ 版本: - -```py -Forward: 349.335 us | Backward 443.523 us - -``` - -我们已经可以看到正向函数的明显提速(超过 30%)。 对于反向函数,可以看到加速,尽管不是很大。 我在上面编写的后向通行证没有特别优化,并且肯定可以改进。 而且,PyTorch 的自动微分引擎可以自动并行化计算图,可以整体上使用更高效的操作流程,并且也可以用 C++ 来实现,因此有望实现更快的速度。 不过,这是一个良好的开始。 - -#### GPU 设备上的性能 - -关于 PyTorch 的 *ATen* 后端的一个奇妙事实是,它抽象了您正在运行的计算设备。 这意味着我们为 CPU 编写的同一代码*也可以*在 GPU 上运行,并且各个操作将相应地分派到 GPU 优化的实现。 对于某些运算,例如矩阵乘法(例如`mm`或`addmm`),这是一个很大的胜利。 让我们看一下使用 CUDA 张量运行 C++ 代码所获得的性能。 无需更改实现,我们只需要将张量从 Python 放入 GPU 内存,即可在创建时添加`device=cuda_device`参数,或者在创建后使用`.to(cuda_device)`: - -```py -import torch - -assert torch.cuda.is_available() -cuda_device = torch.device("cuda") # device object representing GPU - -batch_size = 16 -input_features = 32 -state_size = 128 - -# Note the device=cuda_device arguments here -X = torch.randn(batch_size, input_features, device=cuda_device) -h = torch.randn(batch_size, state_size, device=cuda_device) -C = torch.randn(batch_size, state_size, device=cuda_device) - -rnn = LLTM(input_features, state_size).to(cuda_device) - -forward = 0 -backward = 0 -for _ in range(100000): - start = time.time() - new_h, new_C = rnn(X, (h, C)) - torch.cuda.synchronize() - forward += time.time() - start - - start = time.time() - (new_h.sum() + new_C.sum()).backward() - torch.cuda.synchronize() - backward += time.time() - start - -print('Forward: {:.3f} us | Backward {:.3f} us'.format(forward * 1e6/1e5, backward * 1e6/1e5)) - -``` - -再次将普通的 PyTorch 代码与 C++ 版本(现在都在 CUDA 设备上运行)进行比较,我们再次看到了性能提升。 对于 Python/PyTorch: - -```py -Forward: 187.719 us | Backward 410.815 us - -``` - -和 C++/ATen: - -```py -Forward: 149.802 us | Backward 393.458 us - -``` - -与非 CUDA 代码相比,这可以大大提高整体速度。 但是,通过编写自定义 CUDA 内核,我们可以从 C++ 代码中获得更多性能,我们将很快深入其中。 在此之前,让我们讨论构建 C++ 扩展的另一种方法。 - -### JIT 编译扩展 - -之前,我提到过有两种构建 C++ 扩展的方法:使用`setuptools`或即时(JIT)。 在介绍了前者之后,让我们详细介绍后者。 JIT 编译机制通过调用 PyTorch API 中称为`torch.utils.cpp_extension.load()`的简单函数,为您提供了一种动态编译和加载扩展的方式。 对于 LLTM,这看起来像这样简单: - -```py -from torch.utils.cpp_extension import load - -lltm_cpp = load(name="lltm_cpp", sources=["lltm.cpp"]) - -``` - -在此,我们为函数提供与`setuptools`相同的信息。 在后台,这将执行以下操作: - -1. 创建一个临时目录`/tmp/torch_extensions/lltm`, -2. 将 [Ninja](https://ninja-build.org/) 构建文件发送到该临时目录中, -3. 将您的源文件编译到共享库中, -4. 将此共享库导入为 Python 模块。 - -实际上,如果将`verbose=True`传递给`cpp_extension.load()`,则会通知您有关过程: - -```py -Using /tmp/torch_extensions as PyTorch extensions root... -Emitting ninja build file /tmp/torch_extensions/lltm_cpp/build.ninja... -Building extension module lltm_cpp... -Loading extension module lltm_cpp... - -``` - -生成的 Python 模块将与`setuptools`生成的模块完全相同,但是消除了必须维护单独的`setup.py`构建文件的要求。 如果您的设置更为复杂,并且确实需要`setuptools`的全部功能,则*可以*编写自己的`setup.py` –但是在许多情况下,这种 JIT 技术就可以了。 第一次运行此行时,将需要一些时间,因为扩展程序是在后台编译的。 由于我们使用 Ninja 构建系统来构建您的源代码,因此重新编译是增量的,因此在您第二次运行 Python 模块时重新加载扩展程序非常快捷,而且如果您不更改扩展程序的源文件,开销也很低。 - -## 编写混合的 C++/CUDA 扩展 - -为了将实现真正提升到一个新的水平,我们可以使用自定义 CUDA 内核来手写前进和后退通道的一部分。 对于 LLTM,这具有特别有效的前景,因为按顺序有大量的逐点运算,这些运算都可以在单个 CUDA 内核中融合和并行化。 让我们看看如何编写这种 CUDA 内核,并使用此扩展机制将其与 PyTorch 集成。 - -编写 CUDA 扩展的一般策略是首先编写一个 C++ 文件,该文件定义将从 Python 调用的函数,然后使用`pybind11`将这些函数绑定到 Python。 此外,此文件还将声明在 CUDA(`.cu`)文件中定义的函数。 然后,C++ 函数将进行一些检查,并最终将其调用转发给 CUDA 函数。 在 CUDA 文件中,我们编写了实际的 CUDA 内核。 然后`cpp_extension`包将负责使用`gcc`等 C++ 编译器来编译 C++ 源代码,并使用 NVIDIA 的`nvcc`编译器来编译 CUDA 源。 这样可以确保每个编译器都照顾最了解要编译的文件。 最终,它们将被链接到一个共享库中,该库可从 Python 代码中获得。 - -我们将从 C++ 文件开始,我们将其称为`lltm_cuda.cpp`,例如: - -```py -#include - -#include - -// CUDA forward declarations - -std::vector lltm_cuda_forward( - torch::Tensor input, - torch::Tensor weights, - torch::Tensor bias, - torch::Tensor old_h, - torch::Tensor old_cell); - -std::vector lltm_cuda_backward( - torch::Tensor grad_h, - torch::Tensor grad_cell, - torch::Tensor new_cell, - torch::Tensor input_gate, - torch::Tensor output_gate, - torch::Tensor candidate_cell, - torch::Tensor X, - torch::Tensor gate_weights, - torch::Tensor weights); - -// C++ interface - -#define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor") -#define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous") -#define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x) - -std::vector lltm_forward( - torch::Tensor input, - torch::Tensor weights, - torch::Tensor bias, - torch::Tensor old_h, - torch::Tensor old_cell) { - CHECK_INPUT(input); - CHECK_INPUT(weights); - CHECK_INPUT(bias); - CHECK_INPUT(old_h); - CHECK_INPUT(old_cell); - - return lltm_cuda_forward(input, weights, bias, old_h, old_cell); -} - -std::vector lltm_backward( - torch::Tensor grad_h, - torch::Tensor grad_cell, - torch::Tensor new_cell, - torch::Tensor input_gate, - torch::Tensor output_gate, - torch::Tensor candidate_cell, - torch::Tensor X, - torch::Tensor gate_weights, - torch::Tensor weights) { - CHECK_INPUT(grad_h); - CHECK_INPUT(grad_cell); - CHECK_INPUT(input_gate); - CHECK_INPUT(output_gate); - CHECK_INPUT(candidate_cell); - CHECK_INPUT(X); - CHECK_INPUT(gate_weights); - CHECK_INPUT(weights); - - return lltm_cuda_backward( - grad_h, - grad_cell, - new_cell, - input_gate, - output_gate, - candidate_cell, - X, - gate_weights, - weights); -} - -PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { - m.def("forward", &lltm_forward, "LLTM forward (CUDA)"); - m.def("backward", &lltm_backward, "LLTM backward (CUDA)"); -} - -``` - -如您所见,它主要是样板文件,检查并转发到我们将在 CUDA 文件中定义的功能。 我们将此文件命名为`lltm_cuda_kernel.cu`(请注意`.cu`扩展名!)。 NVCC 可以合理地编译 C++ 11,因此我们仍然可以使用 ATen 和 C++ 标准库(但不能使用`torch.h`)。 请注意,`setuptools`无法处理具有相同名称但扩展名不同的文件,因此,如果您使用`setup.py`方法而不是 JIT 方法,则必须给 CUDA 文件指定一个与 C++ 文件不同的名称(对于 JIT 方法, `lltm.cpp`和`lltm.cu`可以正常工作)。 让我们看一下该文件的外观: - -```py -#include - -#include -#include - -#include - -template -__device__ __forceinline__ scalar_t sigmoid(scalar_t z) { - return 1.0 / (1.0 + exp(-z)); -} - -``` - -在这里,我们看到了我刚刚描述的标头,以及我们正在使用特定于 CUDA 的声明,例如`__device__`和`__forceinline__`以及类似`exp`的事实。 让我们继续一些我们需要的辅助功​​能: - -```py -template -__device__ __forceinline__ scalar_t d_sigmoid(scalar_t z) { - const auto s = sigmoid(z); - return (1.0 - s) * s; -} - -template -__device__ __forceinline__ scalar_t d_tanh(scalar_t z) { - const auto t = tanh(z); - return 1 - (t * t); -} - -template -__device__ __forceinline__ scalar_t elu(scalar_t z, scalar_t alpha = 1.0) { - return fmax(0.0, z) + fmin(0.0, alpha * (exp(z) - 1.0)); -} - -template -__device__ __forceinline__ scalar_t d_elu(scalar_t z, scalar_t alpha = 1.0) { - const auto e = exp(z); - const auto d_relu = z < 0.0 ? 0.0 : 1.0; - return d_relu + (((alpha * (e - 1.0)) < 0.0) ? (alpha * e) : 0.0); -} - -``` - -现在,要真正实现一个函数,我们再次需要两件事:一个函数执行我们不想手工明确编写的操作并调用 CUDA 内核,然后是要加速的部分的实际 CUDA 内核。 。 对于正向传播,第一个函数应如下所示: - -```py -std::vector lltm_cuda_forward( - torch::Tensor input, - torch::Tensor weights, - torch::Tensor bias, - torch::Tensor old_h, - torch::Tensor old_cell) { - auto X = torch::cat({old_h, input}, /*dim=*/1); - auto gates = torch::addmm(bias, X, weights.transpose(0, 1)); - - const auto batch_size = old_cell.size(0); - const auto state_size = old_cell.size(1); - - auto new_h = torch::zeros_like(old_cell); - auto new_cell = torch::zeros_like(old_cell); - auto input_gate = torch::zeros_like(old_cell); - auto output_gate = torch::zeros_like(old_cell); - auto candidate_cell = torch::zeros_like(old_cell); - - const int threads = 1024; - const dim3 blocks((state_size + threads - 1) / threads, batch_size); - - AT_DISPATCH_FLOATING_TYPES(gates.type(), "lltm_forward_cuda", ([&] { - lltm_cuda_forward_kernel<<>>( - gates.data(), - old_cell.data(), - new_h.data(), - new_cell.data(), - input_gate.data(), - output_gate.data(), - candidate_cell.data(), - state_size); - })); - - return {new_h, new_cell, input_gate, output_gate, candidate_cell, X, gates}; -} - -``` - -这里的主要关注点是`AT_DISPATCH_FLOATING_TYPES`宏和内核启动(由`<<<...>>>`指示)。 尽管 ATen 提取了我们处理过的张量的设备和数据类型,但张量在运行时仍将由具体设备上具体类型的内存支持。 因此,我们需要一种在运行时确定张量是什么类型,然后有选择地调用具有相应正确类型签名的函数的方法。 手动完成后,(在概念上)将如下所示: - -```py -switch (tensor.type().scalarType()) { - case torch::ScalarType::Double: - return function(tensor.data()); - case torch::ScalarType::Float: - return function(tensor.data()); - ... -} - -``` - -`AT_DISPATCH_FLOATING_TYPES`的目的是为我们处理此调度。 它需要一个类型(在我们的例子中为`gates.type()`),一个名称(用于错误消息)和一个 lambda 函数。 在此 lambda 函数内部,类型别名`scalar_t`可用,并且定义为该上下文中张量实际上在运行时的类型。 这样,如果我们有一个模板函数(CUDA 内核将使用该模板函数),则可以使用此`scalar_t`别名实例化它,然后将调用正确的函数。 在这种情况下,我们还希望检索张量的数据指针作为`scalar_t`类型的指针。 如果您想分派所有类型而不只是浮点类型(`Float`和`Double`),则可以使用`AT_DISPATCH_ALL_TYPES`。 - -请注意,我们使用普通的 ATen 执行一些操作。 这些操作仍将在 GPU 上运行,但使用 ATen 的默认实现。 这是有道理的,因为 ATen 会针对矩阵乘法(例如`addmm`)或卷积使用高度优化的例程,而这将很难实现和改善。 - -至于内核启动本身,我们在这里指定每个 CUDA 块将具有 1024 个线程,并且将整个 GPU 网格分为所需的`1 x 1024`线程块,以便用每个组件一个线程填充矩阵。 例如,如果我们的状态大小为 2048,批量大小为 4,则我们将以每个 1024 个线程总共启动`4 x 2 = 8`块。 如果您以前从未听说过 CUDA 的“障碍”或“网格”,那么 [CUDA 简介](https://devblogs.nvidia.com/even-easier-introduction-cuda)可能会有所帮助。 - -实际的 CUDA 内核非常简单(如果您曾经编程过 GPU): - -```py -template -__global__ void lltm_cuda_forward_kernel( - const scalar_t* __restrict__ gates, - const scalar_t* __restrict__ old_cell, - scalar_t* __restrict__ new_h, - scalar_t* __restrict__ new_cell, - scalar_t* __restrict__ input_gate, - scalar_t* __restrict__ output_gate, - scalar_t* __restrict__ candidate_cell, - size_t state_size) { - const int column = blockIdx.x * blockDim.x + threadIdx.x; - const int index = blockIdx.y * state_size + column; - const int gates_row = blockIdx.y * (state_size * 3); - if (column < state_size) { - input_gate[index] = sigmoid(gates[gates_row + column]); - output_gate[index] = sigmoid(gates[gates_row + state_size + column]); - candidate_cell[index] = elu(gates[gates_row + 2 * state_size + column]); - new_cell[index] = - old_cell[index] + candidate_cell[index] * input_gate[index]; - new_h[index] = tanh(new_cell[index]) * output_gate[index]; - } -} - -``` - -这里最有趣的是,我们能够为门矩阵中的每个单独的组件完全并行地计算所有这些逐点运算。 如果您想象必须用一个串行的百万个元素的`for`大型循环来执行此操作,那么您会明白为什么这样做会更快。 - -### 使用访问器 - -您可以在 CUDA 内核中看到,我们直接处理正确类型的指针。 实际上,直接在 cuda 内核内部使用高级类型不可知张量会非常低效。 - -但是,这是以易于使用和可读性为代价的,尤其是对于高维数据。 在我们的示例中,例如,我们知道连续的`gates`张量具有 3 个维度: - -1. 批量,`batch_size`的大小和`3*state_size`的步幅 -2. `3`的行,大小和`state_size`的步幅 -3. 指数,`state_size`的大小和`1`的步幅 - -那么我们如何访问内核中的元素`gates[n][row][column]`? 事实证明,您需要通过一些简单的算法就可以大步访问元素。 - -```py -gates.data()[n`3`state_size + row*state_size + column] - -``` - -除了冗长之外,此表达式还需要跨步才能明确知道,并因此在其参数内传递给内核函数。 您会看到,在内核函数接受具有不同大小的多个张量的情况下,您将得到很长的参数列表。 - -对我们来说幸运的是,ATen 提供了通过动态检查张量是维度的类型和数量而创建的访问器。 然后,访问器公开一个 API,可以有效地访问张量元素,而不必转换为单个指针: - -```py -torch::Tensor foo = torch::rand({12, 12}); - -// assert foo is 2-dimensional and holds floats. -auto foo_a = foo.accessor(); -float trace = 0; - -for(int i = 0; i < foo_a.size(0); i++) { - // use the accessor foo_a to get tensor data. - trace += foo_a[i][i]; -} - -``` - -访问器对象具有相对较高级别的接口,具有`.size()`和`.stride()`方法以及多维索引。 `.accessor<>`接口旨在在 CPU 张量上有效访问数据。 CUDA 张量的等效项是`packed_accessor64<>`和`packed_accessor32<>`,它们产生具有 64 位或 32 位整数索引的压缩访问器。 - -与访问器的根本区别在于,打包的访问器在其结构内部复制大小和跨度数据,而不是指向它。 它允许我们将其传递给 CUDA 内核函数并在其中使用其接口。 - -我们可以设计一个使用压缩访问器而不是指针的函数。 - -```py -__global__ void lltm_cuda_forward_kernel( - const torch::PackedTensorAccessor32 gates, - const torch::PackedTensorAccessor32 old_cell, - torch::PackedTensorAccessor32 new_h, - torch::PackedTensorAccessor32 new_cell, - torch::PackedTensorAccessor32 input_gate, - torch::PackedTensorAccessor32 output_gate, - torch::PackedTensorAccessor32 candidate_cell) - -``` - -让我们分解一下这里使用的模板。 前两个参数`scalar_t`和`2`与常规访问器相同。 参数`torch::RestrictPtrTraits`指示必须使用`__restrict__`关键字。 另请注意,我们使用了`PackedAccessor32`变体,将变体和步幅存储在`int32_t`中。 这很重要,因为使用 64 位变体(`PackedAccessor64`)会使内核变慢。 - -函数声明变为 - -```py -template -__global__ void lltm_cuda_forward_kernel( - const torch::PackedTensorAccessor32 gates, - const torch::PackedTensorAccessor32 old_cell, - torch::PackedTensorAccessor32 new_h, - torch::PackedTensorAccessor32 new_cell, - torch::PackedTensorAccessor32 input_gate, - torch::PackedTensorAccessor32 output_gate, - torch::PackedTensorAccessor32 candidate_cell) { - //batch index - const int n = blockIdx.y; - // column index - const int c = blockIdx.x * blockDim.x + threadIdx.x; - if (c < gates.size(2)){ - input_gate[n][c] = sigmoid(gates[n][0][c]); - output_gate[n][c] = sigmoid(gates[n][1][c]); - candidate_cell[n][c] = elu(gates[n][2][c]); - new_cell[n][c] = - old_cell[n][c] + candidate_cell[n][c] * input_gate[n][c]; - new_h[n][c] = tanh(new_cell[n][c]) * output_gate[n][c]; - } -} - -``` - -该实现更具可读性! 然后,通过在主机函数内使用`.packed_accessor32<>`方法创建压缩访问器来调用此函数。 - -```py -std::vector lltm_cuda_forward( - torch::Tensor input, - torch::Tensor weights, - torch::Tensor bias, - torch::Tensor old_h, - torch::Tensor old_cell) { - auto X = torch::cat({old_h, input}, /*dim=*/1); - auto gate_weights = torch::addmm(bias, X, weights.transpose(0, 1)); - - const auto batch_size = old_cell.size(0); - const auto state_size = old_cell.size(1); - - auto gates = gate_weights.reshape({batch_size, 3, state_size}); - auto new_h = torch::zeros_like(old_cell); - auto new_cell = torch::zeros_like(old_cell); - auto input_gate = torch::zeros_like(old_cell); - auto output_gate = torch::zeros_like(old_cell); - auto candidate_cell = torch::zeros_like(old_cell); - - const int threads = 1024; - const dim3 blocks((state_size + threads - 1) / threads, batch_size); - - AT_DISPATCH_FLOATING_TYPES(gates.type(), "lltm_forward_cuda", ([&] { - lltm_cuda_forward_kernel<<>>( - gates.packed_accessor32(), - old_cell.packed_accessor32(), - new_h.packed_accessor32(), - new_cell.packed_accessor32(), - input_gate.packed_accessor32(), - output_gate.packed_accessor32(), - candidate_cell.packed_accessor32()); - })); - - return {new_h, new_cell, input_gate, output_gate, candidate_cell, X, gates}; -} - -``` - -反向传播遵循相同的模式,在此我不再赘述: - -```py -template -__global__ void lltm_cuda_backward_kernel( - torch::PackedTensorAccessor32 d_old_cell, - torch::PackedTensorAccessor32 d_gates, - const torch::PackedTensorAccessor32 grad_h, - const torch::PackedTensorAccessor32 grad_cell, - const torch::PackedTensorAccessor32 new_cell, - const torch::PackedTensorAccessor32 input_gate, - const torch::PackedTensorAccessor32 output_gate, - const torch::PackedTensorAccessor32 candidate_cell, - const torch::PackedTensorAccessor32 gate_weights) { - //batch index - const int n = blockIdx.y; - // column index - const int c = blockIdx.x * blockDim.x + threadIdx.x; - if (c < d_gates.size(2)){ - const auto d_output_gate = tanh(new_cell[n][c]) * grad_h[n][c]; - const auto d_tanh_new_cell = output_gate[n][c] * grad_h[n][c]; - const auto d_new_cell = - d_tanh(new_cell[n][c]) * d_tanh_new_cell + grad_cell[n][c]; - - d_old_cell[n][c] = d_new_cell; - const auto d_candidate_cell = input_gate[n][c] * d_new_cell; - const auto d_input_gate = candidate_cell[n][c] * d_new_cell; - - d_gates[n][0][c] = - d_input_gate * d_sigmoid(gate_weights[n][0][c]); - d_gates[n][1][c] = - d_output_gate * d_sigmoid(gate_weights[n][1][c]); - d_gates[n][2][c] = - d_candidate_cell * d_elu(gate_weights[n][2][c]); - } -} - -std::vector lltm_cuda_backward( - torch::Tensor grad_h, - torch::Tensor grad_cell, - torch::Tensor new_cell, - torch::Tensor input_gate, - torch::Tensor output_gate, - torch::Tensor candidate_cell, - torch::Tensor X, - torch::Tensor gates, - torch::Tensor weights) { - auto d_old_cell = torch::zeros_like(new_cell); - auto d_gates = torch::zeros_like(gates); - - const auto batch_size = new_cell.size(0); - const auto state_size = new_cell.size(1); - - const int threads = 1024; - const dim3 blocks((state_size + threads - 1) / threads, batch_size); - - AT_DISPATCH_FLOATING_TYPES(X.type(), "lltm_forward_cuda", ([&] { - lltm_cuda_backward_kernel<<>>( - d_old_cell.packed_accessor32(), - d_gates.packed_accessor32(), - grad_h.packed_accessor32(), - grad_cell.packed_accessor32(), - new_cell.packed_accessor32(), - input_gate.packed_accessor32(), - output_gate.packed_accessor32(), - candidate_cell.packed_accessor32(), - gates.packed_accessor32()); - })); - - auto d_gate_weights = d_gates.reshape({batch_size, 3*state_size}); - auto d_weights = d_gate_weights.t().mm(X); - auto d_bias = d_gate_weights.sum(/*dim=*/0, /*keepdim=*/true); - - auto d_X = d_gate_weights.mm(weights); - auto d_old_h = d_X.slice(/*dim=*/1, 0, state_size); - auto d_input = d_X.slice(/*dim=*/1, state_size); - - return {d_old_h, d_input, d_weights, d_bias, d_old_cell, d_gates}; -} - -``` - -### 将 C++/CUDA 操作与 PyTorch 集成 - -同样,将支持 CUDA 的操作与 PyTorch 集成非常简单。 如果要编写`setup.py`脚本,它可能看起来像这样: - -```py -from setuptools import setup -from torch.utils.cpp_extension import BuildExtension, CUDAExtension - -setup( - name='lltm', - ext_modules=[ - CUDAExtension('lltm_cuda', [ - 'lltm_cuda.cpp', - 'lltm_cuda_kernel.cu', - ]) - ], - cmdclass={ - 'build_ext': BuildExtension - }) - -``` - -现在,我们使用`CUDAExtension()`代替`CppExtension()`。 我们只需要指定`.cu`文件和`.cpp`文件即可–该库将为您解决所有麻烦。 JIT 机制甚至更简单: - -```py -from torch.utils.cpp_extension import load - -lltm = load(name='lltm', sources=['lltm_cuda.cpp', 'lltm_cuda_kernel.cu']) - -``` - -#### 性能比较 - -我们的希望是,将我们的代码的逐点操作与 CUDA 并行化和融合,将改善 LLTM 的性能。 让我们看看这是否成立。 我们可以运行前面列出的代码来运行基准测试。 我们之前最快的版本是基于 CUDA 的 C++ 代码: - -```py -Forward: 149.802 us | Backward 393.458 us - -``` - -现在使用我们的自定义 CUDA 内核: - -```py -Forward: 129.431 us | Backward 304.641 us - -``` - -更多性能提升! - -## 总结 - -现在,您应该对 PyTorch 的 C++ 扩展机制有了一个很好的了解,并有使用它们的动机。 [您可以在此处找到本说明中显示的代码示例](https://github.com/pytorch/extension-cpp)。 如有疑问,请使用[论坛](https://discuss.pytorch.org)。 另外,请务必查看我们的[常见问题解答](https://pytorch.org/cppdocs/notes/faq.html),以防遇到任何问题。 \ No newline at end of file diff --git a/pytorch/官方教程/66.md b/pytorch/官方教程/46 分布式RPC框架-管道并行化.md similarity index 100% rename from pytorch/官方教程/66.md rename to pytorch/官方教程/46 分布式RPC框架-管道并行化.md diff --git a/pytorch/官方教程/46.md b/pytorch/官方教程/46.md deleted file mode 100644 index 6ec1d1a3..00000000 --- a/pytorch/官方教程/46.md +++ /dev/null @@ -1,782 +0,0 @@ -# 使用自定义 C++ 运算符扩展 TorchScript - -> 原文: - -PyTorch 1.0 版本向 PyTorch 引入了一种新的编程模型,称为 [TorchScript](https://pytorch.org/docs/master/jit.html) 。 TorchScript 是 Python 编程语言的子集,可以通过 TorchScript 编译器进行解析,编译和优化。 此外,已编译的 TorchScript 模型可以选择序列化为磁盘文件格式,然后可以从纯 C++(以及 Python)加载并运行该文件格式以进行推理。 - -TorchScript 支持`torch`包提供的大量操作子集,使您可以纯粹表示为 PyTorch 的“标准库”中的一系列张量操作来表示多种复杂模型。 但是,有时您可能需要使用自定义 C++ 或 CUDA 函数扩展 TorchScript。 虽然我们建议您仅在无法(简单有效地)将您的想法表达为简单的 Python 函数时才诉诸该选项,但我们确实提供了一个非常友好且简单的接口,用于使用 [ATen](https://pytorch.org/cppdocs/#aten) 定义自定义 C++ 和 CUDA 内核。 ,PyTorch 的高性能 C++ 张量库。 绑定到 TorchScript 后,您可以将这些自定义内核(或“操作”)嵌入到 TorchScript 模型中,并以 Python 或直接以 C++ 的序列化形式执行它们。 - -以下段落提供了一个编写 TorchScript 自定义操作以调用 [OpenCV](https://www.opencv.org) (使用 C++ 编写的计算机视觉库)的示例。 我们将讨论如何在 C++ 中使用张量,如何有效地将它们转换为第三方张量格式(在这种情况下为 OpenCV `Mat`),如何在 TorchScript 运行时中注册您的运算符,以及最后如何编译该运算符并在 Python 和 C++ 中使用它。 - -## 在 C++ 中实现自定义运算符 - -在本教程中,我们将公开[`warpPerspective`](https://docs.opencv.org/2.4/modules/imgproc/doc/geometric_transformations.html#warpperspective)函数,该函数将透视转换应用于图像,从 OpenCV 到 TorchScript 作为自定义运算符。 第一步是用 C++ 编写自定义运算符的实现。 让我们将此实现的文件称为`op.cpp`,并使其如下所示: - -```py -torch::Tensor warp_perspective(torch::Tensor image, torch::Tensor warp) { - // BEGIN image_mat - cv::Mat image_mat(/*rows=*/image.size(0), - /*cols=*/image.size(1), - /*type=*/CV_32FC1, - /*data=*/image.data_ptr()); - // END image_mat - - // BEGIN warp_mat - cv::Mat warp_mat(/*rows=*/warp.size(0), - /*cols=*/warp.size(1), - /*type=*/CV_32FC1, - /*data=*/warp.data_ptr()); - // END warp_mat - - // BEGIN output_mat - cv::Mat output_mat; - cv::warpPerspective(image_mat, output_mat, warp_mat, /*dsize=*/{8, 8}); - // END output_mat - - // BEGIN output_tensor - torch::Tensor output = torch::from_blob(output_mat.ptr(), /*sizes=*/{8, 8}); - return output.clone(); - // END output_tensor -} - -``` - -该运算符的代码很短。 在文件顶部,我们包含 OpenCV 标头文件`opencv2/opencv.hpp`和`torch/script.h`标头,该标头暴露了 PyTorch C++ API 中所有需要编写自定义 TorchScript 运算符的必要特性。 我们的函数`warp_perspective`有两个参数:输入`image`和我们希望应用于图像的`warp`变换矩阵。 这些输入的类型是`torch::Tensor`,这是 C++ 中 PyTorch 的张量类型(也是 Python 中所有张量的基础类型)。 我们的`warp_perspective`函数的返回类型也将是`torch::Tensor`。 - -小费 - -有关 ATen 的更多信息,请参见[本说明](https://pytorch.org/cppdocs/notes/tensor_basics.html),ATen 是为 PyTorch 提供`Tensor`类的库。 此外,[本教程](https://pytorch.org/cppdocs/notes/tensor_creation.html)描述了如何在 C++ 中分配和初始化新的张量对象(此运算符不需要)。 - -注意 - -TorchScript 编译器了解固定数量的类型。 只有这些类型可以用作自定义运算符的参数。 当前这些类型是:`torch::Tensor`,`torch::Scalar`,`double`,`int64_t`和这些类型的`std::vector`。 请注意,只支持`double`而不是`float`,只支持`int64_t`而不是其他整数类型,例如`int`,`short`或`long`。 - -在函数内部,我们要做的第一件事就是将 PyTorch 张量转换为 OpenCV 矩阵,因为 OpenCV 的`warpPerspective`期望`cv::Mat`对象作为输入。 幸运的是,有一种方法可以执行它,而无需复制任何数据。 在前几行中 - -```py - cv::Mat image_mat(/*rows=*/image.size(0), - /*cols=*/image.size(1), - /*type=*/CV_32FC1, - /*data=*/image.data_ptr()); - -``` - -我们正在调用 [OpenCV `Mat`类的构造器](https://docs.opencv.org/trunk/d3/d63/classcv_1_1Mat.html#a922de793eabcec705b3579c5f95a643e),将张量转换为`Mat`对象。 我们向其传递原始`image`张量的行数和列数,数据类型(在此示例中,我们将其固定为`float32`),最后是指向基础数据的原始指针– `float*`。 `Mat`类的此构造器的特殊之处在于它不会复制输入数据。 取而代之的是,它将简单地引用此存储器来执行`Mat`上的所有操作。 如果在`image_mat`上执行原地操作,这将反映在原始`image`张量中(反之亦然)。 即使我们实际上将数据存储在 PyTorch 张量中,这也使我们能够使用库的本机矩阵类型调用后续的 OpenCV 例程。 我们重复此过程将`warp` PyTorch 张量转换为`warp_mat` OpenCV 矩阵: - -```py - cv::Mat warp_mat(/*rows=*/warp.size(0), - /*cols=*/warp.size(1), - /*type=*/CV_32FC1, - /*data=*/warp.data_ptr()); - -``` - -接下来,我们准备调用我们渴望在 TorchScript 中使用的 OpenCV 函数:`warpPerspective`。 为此,我们将`image_mat`和`warp_mat`矩阵以及称为`output_mat`的空输出矩阵传递给 OpenCV 函数。 我们还指定了我们希望输出矩阵(图像)为`dsize`的大小。 对于此示例,它被硬编码为`8 x 8`: - -```py - cv::Mat output_mat; - cv::warpPerspective(image_mat, output_mat, warp_mat, /*dsize=*/{8, 8}); - -``` - -我们的自定义运算符实现的最后一步是将`output_mat`转换回 PyTorch 张量,以便我们可以在 PyTorch 中进一步使用它。 这与我们先前在另一个方向进行转换的操作极为相似。 在这种情况下,PyTorch 提供了一种`torch::from_blob`方法。 在这种情况下, *blob* 的意思是指向我们要解释为 PyTorch 张量的不透明平面指针。 对`torch::from_blob`的调用如下所示: - -```py - torch::Tensor output = torch::from_blob(output_mat.ptr(), /*sizes=*/{8, 8}); - return output.clone(); - -``` - -我们在 OpenCV `Mat`类上使用`.ptr()`方法来获取指向基础数据的原始指针(就像之前的 PyTorch 张量的`.data_ptr()`一样)。 我们还指定了张量的输出形状,我们将其硬编码为`8 x 8`。 然后`torch::from_blob`的输出是`torch::Tensor`,指向 OpenCV 矩阵拥有的内存。 - -从我们的运算符实现返回此张量之前,我们必须在张量上调用`.clone()`以执行基础数据的存储副本。 这样做的原因是`torch::from_blob`返回了一个不拥有其数据的张量。 那时,数据仍归 OpenCV 矩阵所有。 但是,此 OpenCV 矩阵将超出范围,并在函数末尾重新分配。 如果我们按原样返回`output`张量,那么当我们在函数外部使用它时,它将指向无效的内存。 调用`.clone()`会返回一个新张量,其中包含新张量自己拥有的原始数据的副本。 因此,返回外部世界是安全的。 - -## 使用 TorchScript 注册自定义运算符 - -现在,已经在 C++ 中实现了自定义运算符,我们需要在 T​​orchScript 运行时和编译器中将其注册。 这将使 TorchScript 编译器可以在 TorchScript 代码中解析对我们自定义运算符的引用。 如果您曾经使用过`pybind11`库,则我们的注册语法非常类似于`pybind11`语法。 要注册一个函数,我们编写: - -```py -TORCH_LIBRARY(my_ops, m) { - m.def("warp_perspective", warp_perspective); -} - -``` - -在`op.cpp`文件顶层的某个位置。 `TORCH_LIBRARY`宏创建一个在程序启动时将被调用的函数。 库的名称(`my_ops`)作为第一个参数给出(不应用引号引起来)。 第二个参数(`m`)定义了`torch::Library`类型的变量,该变量是注册运算符的主要接口。 方法`Library::def`实际上创建了一个名为`warp_perspective`的运算符,将其同时暴露给 Python 和 TorchScript。 您可以通过多次调用`def`来定义任意数量的运算符。 - -在后台,`def`函数实际上正在做大量工作:它正在使用模板元编程来检查函数的类型签名,并将其转换为可在 TorchScript 的类型系统中指定操作符类型的操作符架构。 - -## 构建自定义运算符 - -现在,我们已经用 C++ 实现了自定义运算符并编写了其注册代码,是时候将该运算符构建到一个(共享的)库中了,可以将其加载到 Python 中进行研究和实验,或者加载到 C++ 中以在非 Python 中进行推理。 环境。 有多种方法可以使用纯 CMake 或`setuptools`之类的 Python 替代方法来构建我们的运算符。 为简洁起见,以下段落仅讨论 CMake 方法。 本教程的附录将深入探讨其他替代方法。 - -### 环境设置 - -我们需要安装 PyTorch 和 OpenCV。 实现这两者的最简单,最独立于平台的方法是通过 Conda: - -```py -conda install -c pytorch pytorch -conda install opencv - -``` - -### 将 CMake 用于构建 - -为了使用 [CMake](https://cmake.org) 构建系统将自定义运算符构建到共享库中,我们需要编写一个简短的`CMakeLists.txt`文件并将其与我们先前的`op.cpp`文件放置在一起。 为此,让我们就一个看起来像这样的目录结构达成一致: - -```py -warp-perspective/ - op.cpp - CMakeLists.txt - -``` - -我们的`CMakeLists.txt`文件的内容应为以下内容: - -```py -cmake_minimum_required(VERSION 3.1 FATAL_ERROR) -project(warp_perspective) - -find_package(Torch REQUIRED) -find_package(OpenCV REQUIRED) - -# Define our library target -add_library(warp_perspective SHARED op.cpp) -# Enable C++14 -target_compile_features(warp_perspective PRIVATE cxx_std_14) -# Link against LibTorch -target_link_libraries(warp_perspective "${TORCH_LIBRARIES}") -# Link against OpenCV -target_link_libraries(warp_perspective opencv_core opencv_imgproc) - -``` - -现在要构建我们的运算符,我们可以从`warp_perspective`文件夹中运行以下命令: - -```py -$ mkdir build -$ cd build -$ cmake -DCMAKE_PREFIX_PATH="$(python -c 'import torch.utils; print(torch.utils.cmake_prefix_path)')" .. --- The C compiler identification is GNU 5.4.0 --- The CXX compiler identification is GNU 5.4.0 --- Check for working C compiler: /usr/bin/cc --- Check for working C compiler: /usr/bin/cc -- works --- Detecting C compiler ABI info --- Detecting C compiler ABI info - done --- Detecting C compile features --- Detecting C compile features - done --- Check for working CXX compiler: /usr/bin/c++ --- Check for working CXX compiler: /usr/bin/c++ -- works --- Detecting CXX compiler ABI info --- Detecting CXX compiler ABI info - done --- Detecting CXX compile features --- Detecting CXX compile features - done --- Looking for pthread.h --- Looking for pthread.h - found --- Looking for pthread_create --- Looking for pthread_create - not found --- Looking for pthread_create in pthreads --- Looking for pthread_create in pthreads - not found --- Looking for pthread_create in pthread --- Looking for pthread_create in pthread - found --- Found Threads: TRUE --- Found torch: /libtorch/lib/libtorch.so --- Configuring done --- Generating done --- Build files have been written to: /warp_perspective/build -$ make -j -Scanning dependencies of target warp_perspective -[ 50%] Building CXX object CMakeFiles/warp_perspective.dir/op.cpp.o -[100%] Linking CXX shared library libwarp_perspective.so -[100%] Built target warp_perspective - -``` - -这会将`libwarp_perspective.so`共享库文件放置在`build`文件夹中。 在上面的`cmake`命令中,我们使用帮助程序变量`torch.utils.cmake_prefix_path`方便地告诉我们 PyTorch 安装的 cmake 文件在哪里。 - -我们将在下面进一步探讨如何使用和调用我们的运算符,但为了早日获得成功,我们可以尝试在 Python 中运行以下代码: - -```py -import torch -torch.ops.load_library("build/libwarp_perspective.so") -print(torch.ops.my_ops.warp_perspective) - -``` - -如果一切顺利,则应打印如下内容: - -```py - - -``` - -这是我们稍后将用来调用自定义运算符的 Python 函数。 - -## 在 Python 中使用 TorchScript 自定义运算符 - -将我们的自定义运算符构建到共享库后,我们就可以在 Python 的 TorchScript 模型中使用此运算符了。 这有两个部分:首先将运算符加载到 Python 中,其次在 TorchScript 代码中使用运算符。 - -您已经了解了如何将运算符导入 Python:`torch.ops.load_library()`。 此函数采用包含自定义运算符的共享库的路径,并将其加载到当前进程中。 加载共享库也将执行`TORCH_LIBRARY`块。 这将在 TorchScript 编译器中注册我们的自定义运算符,并允许我们在 TorchScript 代码中使用该运算符。 - -您可以将已加载的运算符称为`torch.ops..`,其中``是运算符名称的名称空间部分,而``是运算符的函数名称。 对于我们上面编写的运算符,名称空间为`my_ops`,函数名称为`warp_perspective`,这意味着我们的运算符可以作为`torch.ops.my_ops.warp_perspective`使用。 虽然可以在脚本化或跟踪的 TorchScript 模块中使用此函数,但我们也可以仅在急切的 PyTorch 中使用它,并将其传递给常规 PyTorch 张量: - -```py -import torch -torch.ops.load_library("build/libwarp_perspective.so") -print(torch.ops.my_ops.warp_perspective(torch.randn(32, 32), torch.rand(3, 3))) - -``` - -生产: - -```py -tensor([[0.0000, 0.3218, 0.4611, ..., 0.4636, 0.4636, 0.4636], - [0.3746, 0.0978, 0.5005, ..., 0.4636, 0.4636, 0.4636], - [0.3245, 0.0169, 0.0000, ..., 0.4458, 0.4458, 0.4458], - ..., - [0.1862, 0.1862, 0.1692, ..., 0.0000, 0.0000, 0.0000], - [0.1862, 0.1862, 0.1692, ..., 0.0000, 0.0000, 0.0000], - [0.1862, 0.1862, 0.1692, ..., 0.0000, 0.0000, 0.0000]]) - -``` - -注意 - -幕后发生的事情是,您第一次使用 Python 访问`torch.ops.namespace.function`时,TorchScript 编译器(在 C++ 领域)将查看是否已注册函数`namespace::function`,如果已注册,则将 Python 句柄返回给该函数, 我们随后可以使用它从 Python 调用我们的 C++ 运算符实现。 这是 TorchScript 自定义运算符和 C++ 扩展之间的一个值得注意的区别:C++ 扩展是使用`pybind11`手动绑定的,而 TorchScript 自定义操作则是由 PyTorch 自己动态绑定的。`pybind11`在绑定到 Python 的类型和类方面为您提供了更大的灵活性,因此建议将其用于纯粹渴望的代码,但 TorchScript 操作不支持它。 - -从这里开始,您可以在脚本或跟踪代码中使用自定义运算符,就像`torch`包中的其他函数一样。 实际上,诸如`torch.matmul`之类的“标准库”函数在很大程度上与自定义运算符使用相同的注册路径,这使得自定义运算符在 TorchScript 中的使用方式和位置方面真正成为一流公民。 (但是,区别之一是标准库函数具有自定义的 Python 自变量解析逻辑,与`torch.ops`自变量解析不同。) - -### 在跟踪中使用自定义运算符 - -首先,将我们的运算符嵌入到跟踪函数中。 回想一下,为了进行跟踪,我们从一些原始的 Pytorch 代码开始: - -```py -def compute(x, y, z): - return x.matmul(y) + torch.relu(z) - -``` - -然后在其上调用`torch.jit.trace`。 我们进一步传递`torch.jit.trace`一些示例输入,它将输入到我们的实现中,以记录输入流过其中时发生的操作顺序。 这样的结果实际上是渴望的 PyTorch 程序的“冻结”版本,TorchScript 编译器可以对其进行进一步的分析,优化和序列化: - -```py -inputs = [torch.randn(4, 8), torch.randn(8, 5), torch.randn(4, 5)] -trace = torch.jit.trace(compute, inputs) -print(trace.graph) - -``` - -生产: - -```py -graph(%x : Float(4:8, 8:1), - %y : Float(8:5, 5:1), - %z : Float(4:5, 5:1)): - %3 : Float(4:5, 5:1) = aten::matmul(%x, %y) # test.py:10:0 - %4 : Float(4:5, 5:1) = aten::relu(%z) # test.py:10:0 - %5 : int = prim::Constant[value=1]() # test.py:10:0 - %6 : Float(4:5, 5:1) = aten::add(%3, %4, %5) # test.py:10:0 - return (%6) - -``` - -现在,令人兴奋的启示是,我们可以简单地将自定义运算符放到 PyTorch 跟踪中,就好像它是`torch.relu`或任何其他`torch`函数一样: - -```py -def compute(x, y, z): - x = torch.ops.my_ops.warp_perspective(x, torch.eye(3)) - return x.matmul(y) + torch.relu(z) - -``` - -然后像以前一样跟踪它: - -```py -inputs = [torch.randn(4, 8), torch.randn(8, 5), torch.randn(8, 5)] -trace = torch.jit.trace(compute, inputs) -print(trace.graph) - -``` - -生产: - -```py -graph(%x.1 : Float(4:8, 8:1), - %y : Float(8:5, 5:1), - %z : Float(8:5, 5:1)): - %3 : int = prim::Constant[value=3]() # test.py:25:0 - %4 : int = prim::Constant[value=6]() # test.py:25:0 - %5 : int = prim::Constant[value=0]() # test.py:25:0 - %6 : Device = prim::Constant[value="cpu"]() # test.py:25:0 - %7 : bool = prim::Constant[value=0]() # test.py:25:0 - %8 : Float(3:3, 3:1) = aten::eye(%3, %4, %5, %6, %7) # test.py:25:0 - %x : Float(8:8, 8:1) = my_ops::warp_perspective(%x.1, %8) # test.py:25:0 - %10 : Float(8:5, 5:1) = aten::matmul(%x, %y) # test.py:26:0 - %11 : Float(8:5, 5:1) = aten::relu(%z) # test.py:26:0 - %12 : int = prim::Constant[value=1]() # test.py:26:0 - %13 : Float(8:5, 5:1) = aten::add(%10, %11, %12) # test.py:26:0 - return (%13) - -``` - -如此简单地将 TorchScript 自定义操作集成到跟踪的 PyTorch 代码中! - -### 将自定义运算符与脚本一起使用 - -除了跟踪之外,获得 PyTorch 程序的 TorchScript 表示形式的另一种方法是直接在 TorchScript 中编写代码。 TorchScript 在很大程度上是 Python 语言的子集,它具有一些限制,使 TorchScript 编译器更容易推理程序。 您可以使用`@torch.jit.script`标记自由函数,使用`@torch.jit.script_method`标记类中的方法(也必须从`torch.jit.ScriptModule`派生),将常规 PyTorch 代码转换为 TorchScript。 有关 TorchScript 注解的更多详细信息,请参见[此处](https://pytorch.org/docs/master/jit.html)。 - -使用 TorchScript 而不是跟踪的一个特殊原因是,跟踪无法捕获 PyTorch 代码中的控制流。 因此,让我们考虑使用控制流的此函数: - -```py -def compute(x, y): - if bool(x[0][0] == 42): - z = 5 - else: - z = 10 - return x.matmul(y) + z - -``` - -要将此函数从原始 PyTorch 转换为 TorchScript,我们用`@torch.jit.script`对其进行注解: - -```py -@torch.jit.script -def compute(x, y): - if bool(x[0][0] == 42): - z = 5 - else: - z = 10 - return x.matmul(y) + z - -``` - -这将及时将`compute`函数编译成图形表示形式,我们可以在`compute.graph`属性中进行检查: - -```py ->>> compute.graph -graph(%x : Dynamic - %y : Dynamic) { - %14 : int = prim::Constant[value=1]() - %2 : int = prim::Constant[value=0]() - %7 : int = prim::Constant[value=42]() - %z.1 : int = prim::Constant[value=5]() - %z.2 : int = prim::Constant[value=10]() - %4 : Dynamic = aten::select(%x, %2, %2) - %6 : Dynamic = aten::select(%4, %2, %2) - %8 : Dynamic = aten::eq(%6, %7) - %9 : bool = prim::TensorToBool(%8) - %z : int = prim::If(%9) - block0() { - -> (%z.1) - } - block1() { - -> (%z.2) - } - %13 : Dynamic = aten::matmul(%x, %y) - %15 : Dynamic = aten::add(%13, %z, %14) - return (%15); -} - -``` - -现在,就像以前一样,我们可以像脚本代码中的任何其他函数一样使用自定义运算符: - -```py -torch.ops.load_library("libwarp_perspective.so") - -@torch.jit.script -def compute(x, y): - if bool(x[0] == 42): - z = 5 - else: - z = 10 - x = torch.ops.my_ops.warp_perspective(x, torch.eye(3)) - return x.matmul(y) + z - -``` - -当 TorchScript 编译器看到对`torch.ops.my_ops.warp_perspective`的引用时,它将找到我们通过 C++ 中的`TORCH_LIBRARY`函数注册的实现,并将其编译为图形表示形式: - -```py ->>> compute.graph -graph(%x.1 : Dynamic - %y : Dynamic) { - %20 : int = prim::Constant[value=1]() - %16 : int[] = prim::Constant[value=[0, -1]]() - %14 : int = prim::Constant[value=6]() - %2 : int = prim::Constant[value=0]() - %7 : int = prim::Constant[value=42]() - %z.1 : int = prim::Constant[value=5]() - %z.2 : int = prim::Constant[value=10]() - %13 : int = prim::Constant[value=3]() - %4 : Dynamic = aten::select(%x.1, %2, %2) - %6 : Dynamic = aten::select(%4, %2, %2) - %8 : Dynamic = aten::eq(%6, %7) - %9 : bool = prim::TensorToBool(%8) - %z : int = prim::If(%9) - block0() { - -> (%z.1) - } - block1() { - -> (%z.2) - } - %17 : Dynamic = aten::eye(%13, %14, %2, %16) - %x : Dynamic = my_ops::warp_perspective(%x.1, %17) - %19 : Dynamic = aten::matmul(%x, %y) - %21 : Dynamic = aten::add(%19, %z, %20) - return (%21); - } - -``` - -请特别注意图末尾对`my_ops::warp_perspective`的引用。 - -注意 - -TorchScript 图形表示仍可能更改。 不要依靠它看起来像这样。 - -在 Python 中使用自定义运算符时,确实如此。 简而言之,您可以使用`torch.ops.load_library`导入包含您的运算符的库,并像其他任何`torch`运算符一样,从跟踪或编写脚本的 TorchScript 代码中调用自定义操作。 - -## 在 C++ 中使用 TorchScript 自定义运算符 - -TorchScript 的一项有用功能是能够将模型序列化到磁盘文件中。 该文件可以通过有线方式发送,存储在文件系统中,或者更重要的是,可以动态反序列化和执行,而无需保留原始源代码。 这在 Python 中是可能的,但在 C++ 中也是可能的。 为此,PyTorch [提供了纯 C++ API](https://pytorch.org/cppdocs/),用于反序列化以及执行 TorchScript 模型。 如果您还没有的话,请阅读[在 C++ 中加载和运行序列化 TorchScript 模型](https://pytorch.org/tutorials/advanced/cpp_export.html)的教程,接下来的几段将基于该教程构建。 - -简而言之,即使从文件反序列化并以 C++ 运行,也可以像常规`torch`运算符一样执行自定义运算符。 唯一的要求是将我们先前构建的自定义运算符共享库与执行模型的 C++ 应用链接。 在 Python 中,只需调用`torch.ops.load_library`即可。 在 C++ 中,您需要在使用的任何构建系统中将共享库与主应用链接。 下面的示例将使用 CMake 展示这一点。 - -注意 - -从技术上讲,您还可以在运行时将共享库动态加载到 C++ 应用中,就像在 Python 中一样。 在 Linux 上,可以使用`dlopen`来执行此操作。 在其他平台上也存在等效项。 - -在上面链接的 C++ 执行教程的基础上,让我们从一个最小的 C++ 应用开始,在与自定义运算符不同的文件夹中的`main.cpp`文件中,该文件加载并执行序列化的 TorchScript 模型: - -```py -#include // One-stop header. - -#include -#include - -int main(int argc, const char* argv[]) { - if (argc != 2) { - std::cerr << "usage: example-app \n"; - return -1; - } - - // Deserialize the ScriptModule from a file using torch::jit::load(). - std::shared_ptr module = torch::jit::load(argv[1]); - - std::vector inputs; - inputs.push_back(torch::randn({4, 8})); - inputs.push_back(torch::randn({8, 5})); - - torch::Tensor output = module->forward(std::move(inputs)).toTensor(); - - std::cout << output << std::endl; -} - -``` - -以及一个小的`CMakeLists.txt`文件: - -```py -cmake_minimum_required(VERSION 3.1 FATAL_ERROR) -project(example_app) - -find_package(Torch REQUIRED) - -add_executable(example_app main.cpp) -target_link_libraries(example_app "${TORCH_LIBRARIES}") -target_compile_features(example_app PRIVATE cxx_range_for) - -``` - -在这一点上,我们应该能够构建应用: - -并在尚未通过模型的情况下运行它: - -接下来,让我们序列化我们之前编写的使用自定义运算符的脚本函数: - -```py -torch.ops.load_library("libwarp_perspective.so") - -@torch.jit.script -def compute(x, y): - if bool(x[0][0] == 42): - z = 5 - else: - z = 10 - x = torch.ops.my_ops.warp_perspective(x, torch.eye(3)) - return x.matmul(y) + z - -compute.save("example.pt") - -``` - -最后一行将脚本函数序列化为一个名为`example.pt`的文件。 如果我们随后将此序列化模型传递给 C++ 应用,则可以立即运行它: - -或者可能不是。 也许还没有。 当然! 我们尚未将自定义运算符库与我们的应用链接。 让我们立即执行此操作,并正确进行操作,让我们稍微更新一下文件组织,如下所示: - -```py -example_app/ - CMakeLists.txt - main.cpp - warp_perspective/ - CMakeLists.txt - op.cpp - -``` - -这将允许我们将`warp_perspective`库 CMake 目标添加为应用目标的子目录。 `example_app`文件夹中的顶级`CMakeLists.txt`应该如下所示: - -```py -cmake_minimum_required(VERSION 3.1 FATAL_ERROR) -project(example_app) - -find_package(Torch REQUIRED) - -add_subdirectory(warp_perspective) - -add_executable(example_app main.cpp) -target_link_libraries(example_app "${TORCH_LIBRARIES}") -target_link_libraries(example_app -Wl,--no-as-needed warp_perspective) -target_compile_features(example_app PRIVATE cxx_range_for) - -``` - -基本的 CMake 配置与以前非常相似,只是我们将`warp_perspective` CMake 构建添加为子目录。 一旦其 CMake 代码运行,我们将`example_app`应用与`warp_perspective`共享库链接。 - -注意 - -上面的示例中嵌入了一个关键细节:`warp_perspective`链接行的`-Wl,--no-as-needed`前缀。 这是必需的,因为我们实际上不会在应用代码中从`warp_perspective`共享库中调用任何函数。 我们只需要运行`TORCH_LIBRARY`函数。 麻烦的是,这使链接器感到困惑,并使其认为可以完全跳过与库的链接。 在 Linux 上,`-Wl,--no-as-needed`标志会强制链接发生(注意:此标志特定于 Linux!)。 还有其他解决方法。 最简单的方法是在运算符库中定义*一些函数*,您需要从主应用中调用该函数。 这可能就像在某个标头中声明的函数`void init();`一样简单,然后在运算符库中将其定义为`void init() { }`。 在主应用中调用此`init()`函数会给链接器以印象,这是一个值得链接的库。 不幸的是,这超出了我们的控制范围,我们宁愿让您知道其原因和简单的解决方法,而不是让您将一些不透明的宏放入代码中。 - -现在,由于我们现在在顶层找到了`Torch`包,因此`warp_perspective`子目录中的`CMakeLists.txt`文件可以缩短一些。 它看起来应该像这样: - -```py -find_package(OpenCV REQUIRED) -add_library(warp_perspective SHARED op.cpp) -target_compile_features(warp_perspective PRIVATE cxx_range_for) -target_link_libraries(warp_perspective PRIVATE "${TORCH_LIBRARIES}") -target_link_libraries(warp_perspective PRIVATE opencv_core opencv_photo) - -``` - -让我们重新构建示例应用,该应用还将与自定义运算符库链接。 在顶层`example_app`目录中: - -```py -$ mkdir build -$ cd build -$ cmake -DCMAKE_PREFIX_PATH="$(python -c 'import torch.utils; print(torch.utils.cmake_prefix_path)')" .. --- The C compiler identification is GNU 5.4.0 --- The CXX compiler identification is GNU 5.4.0 --- Check for working C compiler: /usr/bin/cc --- Check for working C compiler: /usr/bin/cc -- works --- Detecting C compiler ABI info --- Detecting C compiler ABI info - done --- Detecting C compile features --- Detecting C compile features - done --- Check for working CXX compiler: /usr/bin/c++ --- Check for working CXX compiler: /usr/bin/c++ -- works --- Detecting CXX compiler ABI info --- Detecting CXX compiler ABI info - done --- Detecting CXX compile features --- Detecting CXX compile features - done --- Looking for pthread.h --- Looking for pthread.h - found --- Looking for pthread_create --- Looking for pthread_create - not found --- Looking for pthread_create in pthreads --- Looking for pthread_create in pthreads - not found --- Looking for pthread_create in pthread --- Looking for pthread_create in pthread - found --- Found Threads: TRUE --- Found torch: /libtorch/lib/libtorch.so --- Configuring done --- Generating done --- Build files have been written to: /warp_perspective/example_app/build -$ make -j -Scanning dependencies of target warp_perspective -[ 25%] Building CXX object warp_perspective/CMakeFiles/warp_perspective.dir/op.cpp.o -[ 50%] Linking CXX shared library libwarp_perspective.so -[ 50%] Built target warp_perspective -Scanning dependencies of target example_app -[ 75%] Building CXX object CMakeFiles/example_app.dir/main.cpp.o -[100%] Linking CXX executable example_app -[100%] Built target example_app - -``` - -如果现在运行`example_app`二进制文件并将其交给序列化模型,我们应该得出一个圆满的结局: - -```py -$ ./example_app example.pt -11.4125 5.8262 9.5345 8.6111 12.3997 - 7.4683 13.5969 9.0850 11.0698 9.4008 - 7.4597 15.0926 12.5727 8.9319 9.0666 - 9.4834 11.1747 9.0162 10.9521 8.6269 -10.0000 10.0000 10.0000 10.0000 10.0000 -10.0000 10.0000 10.0000 10.0000 10.0000 -10.0000 10.0000 10.0000 10.0000 10.0000 -10.0000 10.0000 10.0000 10.0000 10.0000 -[ Variable[CPUFloatType]{8,5} ] - -``` - -成功! 您现在可以推断了。 - -## 总结 - -本教程向您介绍了如何在 C++ 中实现自定义 TorchScript 运算符,如何将其构建到共享库中,如何在 Python 中使用它来定义 TorchScript 模型以及如何将其加载到 C++ 应用中以进行推理工作负载。 现在,您可以使用与第三方 C++ 库进行接口的 C++ 运算符扩展 TorchScript 模型,编写自定义的高性能 CUDA 内核,或实现任何其他需要 Python,TorchScript 和 C++ 之间的界线才能平稳融合的用例。 - -与往常一样,如果您遇到任何问题或疑问,可以使用我们的[论坛](https://discuss.pytorch.org/)或 [GitHub ISSUE](https://github.com/pytorch/pytorch/issues) 进行联系。 另外,我们的[常见问题解答(FAQ)页面](https://pytorch.org/cppdocs/notes/faq.html)可能包含有用的信息。 - -## 附录 A:建立自定义运算符的更多方法 - -“构建自定义运算符”一节介绍了如何使用 CMake 将自定义运算符构建到共享库中。 本附录概述了两种进一步的编译方法。 他们俩都使用 Python 作为编译过程的“驱动程序”或“接口”。 此外,两者都重用了[现有基础结构](https://pytorch.org/docs/stable/cpp_extension.html)。 PyTorch 提供了 [C++ 扩展](https://pytorch.org/tutorials/advanced/cpp_extension.html),它们依赖于[`pybind11`](https://github.com/pybind/pybind11)用于将函数从 C++ “显式”绑定到 Python。 - -第一种方法是使用 C++ 扩展程序的[方便的即时(JIT)编译接口](https://pytorch.org/docs/stable/cpp_extension.html#torch.utils.cpp_extension.load)在您首次运行 PyTorch 脚本时在后台编译代码。 第二种方法依赖于古老的`setuptools`包,并涉及编写单独的`setup.py`文件。 这样可以进行更高级的配置,并与其他基于`setuptools`的项目集成。 我们将在下面详细探讨这两种方法。 - -### 使用 JIT 编译的构建 - -PyTorch C++ 扩展工具包提供的 JIT 编译功能可将自定义运算符的编译直接嵌入到您的 Python 代码中,例如在训练脚本的顶部。 - -注意 - -这里的“ JIT 编译”与 TorchScript 编译器中用于优化程序的 JIT 编译无关。 这只是意味着您的自定义运算符 C++ 代码将在您首次导入时在系统`/tmp`目录下的文件夹中编译,就像您自己事先对其进行编译一样。 - -此 JIT 编译功能有两种形式。 首先,您仍然将运算符实现保留在单独的文件(`op.cpp`)中,然后使用`torch.utils.cpp_extension.load()`编译扩展名。 通常,此函数将返回暴露您的 C++ 扩展的 Python 模块。 但是,由于我们没有将自定义运算符编译到其自己的 Python 模块中,因此我们只想编译一个普通的共享库。 幸运的是,`torch.utils.cpp_extension.load()`有一个参数`is_python_module`,可以将其设置为`False`,以表明我们仅对构建共享库感兴趣,而对 Python 模块不感兴趣。 然后`torch.utils.cpp_extension.load()`将会编译并将共享库也加载到当前进程中,就像`torch.ops.load_library`之前所做的那样: - -```py -import torch.utils.cpp_extension - -torch.utils.cpp_extension.load( - name="warp_perspective", - sources=["op.cpp"], - extra_ldflags=["-lopencv_core", "-lopencv_imgproc"], - is_python_module=False, - verbose=True -) - -print(torch.ops.my_ops.warp_perspective) - -``` - -这应该大致打印: - -```py - - -``` - -JIT 编译的第二种形式使您可以将自定义 TorchScript 运算符的源代码作为字符串传递。 为此,请使用`torch.utils.cpp_extension.load_inline`: - -```py -import torch -import torch.utils.cpp_extension - -op_source = """ -#include -#include - -torch::Tensor warp_perspective(torch::Tensor image, torch::Tensor warp) { - cv::Mat image_mat(/*rows=*/image.size(0), - /*cols=*/image.size(1), - /*type=*/CV_32FC1, - /*data=*/image.data()); - cv::Mat warp_mat(/*rows=*/warp.size(0), - /*cols=*/warp.size(1), - /*type=*/CV_32FC1, - /*data=*/warp.data()); - - cv::Mat output_mat; - cv::warpPerspective(image_mat, output_mat, warp_mat, /*dsize=*/{64, 64}); - - torch::Tensor output = - torch::from_blob(output_mat.ptr(), /*sizes=*/{64, 64}); - return output.clone(); -} - -TORCH_LIBRARY(my_ops, m) { - m.def("warp_perspective", &warp_perspective); -} -""" - -torch.utils.cpp_extension.load_inline( - name="warp_perspective", - cpp_sources=op_source, - extra_ldflags=["-lopencv_core", "-lopencv_imgproc"], - is_python_module=False, - verbose=True, -) - -print(torch.ops.my_ops.warp_perspective) - -``` - -自然,最佳实践是仅在源代码相当短的情况下才使用`torch.utils.cpp_extension.load_inline`。 - -请注意,如果您在 Jupyter 笔记本中使用此功能,则不应多次执行单元格的注册,因为每次执行都会注册一个新库并重新注册自定义运算符。 如果需要重新执行它,请事先重新启动笔记本的 Python 内核。 - -### 使用`setuptools`构建 - -从 Python 专门构建自定义运算符的第二种方法是使用`setuptools`。 这样做的好处是`setuptools`具有相当强大而广泛的接口,可以用来构建用 C++ 编写的 Python 模块。 但是,由于`setuptools`实际上是用于构建 Python 模块而不是普通的共享库(它们没有 Python 期望从模块中获得的必要入口点),因此这种方法可能有点古怪。 也就是说,您需要的是一个`setup.py`文件来代替`CMakeLists.txt`,该文件看起来像这样: - -```py -from setuptools import setup -from torch.utils.cpp_extension import BuildExtension, CppExtension - -setup( - name="warp_perspective", - ext_modules=[ - CppExtension( - "warp_perspective", - ["example_app/warp_perspective/op.cpp"], - libraries=["opencv_core", "opencv_imgproc"], - ) - ], - cmdclass={"build_ext": BuildExtension.with_options(no_python_abi_suffix=True)}, -) - -``` - -请注意,我们在底部的`BuildExtension`中启用了`no_python_abi_suffix`选项。 这指示`setuptools`在产生的共享库的名称中省略任何特定于 Python-3 的 ABI 后缀。 否则,例如在 Python 3.7 上,该库可能被称为`warp_perspective.cpython-37m-x86_64-linux-gnu.so`,其中`cpython-37m-x86_64-linux-gnu`是 ABI 标签,但我们确实只是希望将其称为`warp_perspective.so` - -如果现在从`setup.py`所在的文件夹中的终端中运行`python setup.py build develop`,我们应该会看到类似以下内容的内容: - -```py -$ python setup.py build develop -running build -running build_ext -building 'warp_perspective' extension -creating build -creating build/temp.linux-x86_64-3.7 -gcc -pthread -B /root/local/miniconda/compiler_compat -Wl,--sysroot=/ -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -fPIC -I/root/local/miniconda/lib/python3.7/site-packages/torch/lib/include -I/root/local/miniconda/lib/python3.7/site-packages/torch/lib/include/torch/csrc/api/include -I/root/local/miniconda/lib/python3.7/site-packages/torch/lib/include/TH -I/root/local/miniconda/lib/python3.7/site-packages/torch/lib/include/THC -I/root/local/miniconda/include/python3.7m -c op.cpp -o build/temp.linux-x86_64-3.7/op.o -DTORCH_API_INCLUDE_EXTENSION_H -DTORCH_EXTENSION_NAME=warp_perspective -D_GLIBCXX_USE_CXX11_ABI=0 -std=c++11 -cc1plus: warning: command line option '-Wstrict-prototypes' is valid for C/ObjC but not for C++ -creating build/lib.linux-x86_64-3.7 -g++ -pthread -shared -B /root/local/miniconda/compiler_compat -L/root/local/miniconda/lib -Wl,-rpath=/root/local/miniconda/lib -Wl,--no-as-needed -Wl,--sysroot=/ build/temp.linux-x86_64-3.7/op.o -lopencv_core -lopencv_imgproc -o build/lib.linux-x86_64-3.7/warp_perspective.so -running develop -running egg_info -creating warp_perspective.egg-info -writing warp_perspective.egg-info/PKG-INFO -writing dependency_links to warp_perspective.egg-info/dependency_links.txt -writing top-level names to warp_perspective.egg-info/top_level.txt -writing manifest file 'warp_perspective.egg-info/SOURCES.txt' -reading manifest file 'warp_perspective.egg-info/SOURCES.txt' -writing manifest file 'warp_perspective.egg-info/SOURCES.txt' -running build_ext -copying build/lib.linux-x86_64-3.7/warp_perspective.so -> -Creating /root/local/miniconda/lib/python3.7/site-packages/warp-perspective.egg-link (link to .) -Adding warp-perspective 0.0.0 to easy-install.pth file - -Installed /warp_perspective -Processing dependencies for warp-perspective==0.0.0 -Finished processing dependencies for warp-perspective==0.0.0 - -``` - -这将产生一个名为`warp_perspective.so`的共享库,我们可以像之前那样将其传递给`torch.ops.load_library`,以使我们的运算符对 TorchScript 可见: - -```py ->>> import torch ->>> torch.ops.load_library("warp_perspective.so") ->>> print(torch.ops.custom.warp_perspective) - - -``` \ No newline at end of file diff --git a/pytorch/官方教程/67.md b/pytorch/官方教程/47 异步执行批量RPC处理.md similarity index 100% rename from pytorch/官方教程/67.md rename to pytorch/官方教程/47 异步执行批量RPC处理.md diff --git a/pytorch/官方教程/47.md b/pytorch/官方教程/47.md deleted file mode 100644 index 42fc08a2..00000000 --- a/pytorch/官方教程/47.md +++ /dev/null @@ -1,529 +0,0 @@ -# 使用自定义 C++ 类扩展 TorchScript - -> 原文: - -本教程是[自定义运算符](torch_script_custom_ops.html)教程的后续教程,并介绍了我们为将 C++ 类同时绑定到 TorchScript 和 Python 而构建的 API。 该 API 与[`pybind11`](https://github.com/pybind/pybind11)非常相似,如果您熟悉该系统,则大多数概念都将转移过来。 - -## 用 C++ 实现和绑定类 - -在本教程中,我们将定义一个简单的 C++ 类,该类在成员变量中保持持久状态。 - -```py -// This header is all you need to do the C++ portions of this -// tutorial -#include -// This header is what defines the custom class registration -// behavior specifically. script.h already includes this, but -// we include it here so you know it exists in case you want -// to look at the API or implementation. -#include - -#include -#include - -template -struct MyStackClass : torch::CustomClassHolder { - std::vector stack_; - MyStackClass(std::vector init) : stack_(init.begin(), init.end()) {} - - void push(T x) { - stack_.push_back(x); - } - T pop() { - auto val = stack_.back(); - stack_.pop_back(); - return val; - } - - c10::intrusive_ptr clone() const { - return c10::make_intrusive(stack_); - } - - void merge(const c10::intrusive_ptr& c) { - for (auto& elem : c->stack_) { - push(elem); - } - } -}; - -``` - -有几件事要注意: - -* `torch/custom_class.h`是您需要使用自定义类扩展 TorchScript 的标头。 -* 注意,无论何时使用自定义类的实例,我们都通过`c10::intrusive_ptr<>`的实例来实现。 可以将`intrusive_ptr`视为类似于`std::shared_ptr`的智能指针,但是引用计数直接存储在对象中,而不是单独的元数据块(如`std::shared_ptr`中所做的。`torch::Tensor`内部使用相同的指针类型 ;和自定义类也必须使用此指针类型,以便我们可以一致地管理不同的对象类型。 -* 注意的第二件事是用户定义的类必须继承`torch::CustomClassHolder`。 这样可以确保自定义类具有存储引用计数的空间。 - -现在让我们看一下如何使该类对 TorchScript 可见,该过程称为*绑定*该类: - -```py -// Notice a few things: -// - We pass the class to be registered as a template parameter to -// `torch::class_`. In this instance, we've passed the -// specialization of the MyStackClass class ``MyStackClass``. -// In general, you cannot register a non-specialized template -// class. For non-templated classes, you can just pass the -// class name directly as the template parameter. -// - The arguments passed to the constructor make up the "qualified name" -// of the class. In this case, the registered class will appear in -// Python and C++ as `torch.classes.my_classes.MyStackClass`. We call -// the first argument the "namespace" and the second argument the -// actual class name. -TORCH_LIBRARY(my_classes, m) { - m.class_>("MyStackClass") - // The following line registers the contructor of our MyStackClass - // class that takes a single `std::vector` argument, - // i.e. it exposes the C++ method `MyStackClass(std::vector init)`. - // Currently, we do not support registering overloaded - // constructors, so for now you can only `def()` one instance of - // `torch::init`. - .def(torch::init>()) - // The next line registers a stateless (i.e. no captures) C++ lambda - // function as a method. Note that a lambda function must take a - // `c10::intrusive_ptr` (or some const/ref version of that) - // as the first argument. Other arguments can be whatever you want. - .def("top", [](const c10::intrusive_ptr>& self) { - return self->stack_.back(); - }) - // The following four lines expose methods of the MyStackClass - // class as-is. `torch::class_` will automatically examine the - // argument and return types of the passed-in method pointers and - // expose these to Python and TorchScript accordingly. Finally, notice - // that we must take the *address* of the fully-qualified method name, - // i.e. use the unary `&` operator, due to C++ typing rules. - .def("push", &MyStackClass::push) - .def("pop", &MyStackClass::pop) - .def("clone", &MyStackClass::clone) - .def("merge", &MyStackClass::merge) - ; -} - -``` - -## 使用 CMake 将示例构建为 C++ 项目 - -现在,我们将使用 [CMake](https://cmake.org) 构建系统来构建上述 C++ 代码。 首先,将到目前为止介绍的所有 C++ 代码放入`class.cpp`文件中。 然后,编写一个简单的`CMakeLists.txt`文件并将其放在同一目录中。 `CMakeLists.txt`应该是这样的: - -```py -cmake_minimum_required(VERSION 3.1 FATAL_ERROR) -project(custom_class) - -find_package(Torch REQUIRED) - -# Define our library target -add_library(custom_class SHARED class.cpp) -set(CMAKE_CXX_STANDARD 14) -# Link against LibTorch -target_link_libraries(custom_class "${TORCH_LIBRARIES}") - -``` - -另外,创建一个`build`目录。 您的文件树应如下所示: - -```py -custom_class_project/ - class.cpp - CMakeLists.txt - build/ - -``` - -我们假设您已经按照[上一教程](torch_script_custom_ops.html)中所述的相同方式设置了环境。 继续并调用`cmake`,然后进行构建项目: - -```py -$ cd build -$ cmake -DCMAKE_PREFIX_PATH="$(python -c 'import torch.utils; print(torch.utils.cmake_prefix_path)')" .. - -- The C compiler identification is GNU 7.3.1 - -- The CXX compiler identification is GNU 7.3.1 - -- Check for working C compiler: /opt/rh/devtoolset-7/root/usr/bin/cc - -- Check for working C compiler: /opt/rh/devtoolset-7/root/usr/bin/cc -- works - -- Detecting C compiler ABI info - -- Detecting C compiler ABI info - done - -- Detecting C compile features - -- Detecting C compile features - done - -- Check for working CXX compiler: /opt/rh/devtoolset-7/root/usr/bin/c++ - -- Check for working CXX compiler: /opt/rh/devtoolset-7/root/usr/bin/c++ -- works - -- Detecting CXX compiler ABI info - -- Detecting CXX compiler ABI info - done - -- Detecting CXX compile features - -- Detecting CXX compile features - done - -- Looking for pthread.h - -- Looking for pthread.h - found - -- Looking for pthread_create - -- Looking for pthread_create - not found - -- Looking for pthread_create in pthreads - -- Looking for pthread_create in pthreads - not found - -- Looking for pthread_create in pthread - -- Looking for pthread_create in pthread - found - -- Found Threads: TRUE - -- Found torch: /torchbind_tutorial/libtorch/lib/libtorch.so - -- Configuring done - -- Generating done - -- Build files have been written to: /torchbind_tutorial/build -$ make -j - Scanning dependencies of target custom_class - [ 50%] Building CXX object CMakeFiles/custom_class.dir/class.cpp.o - [100%] Linking CXX shared library libcustom_class.so - [100%] Built target custom_class - -``` - -您会发现,构建目录中现在有一个动态库文件。 在 Linux 上,它可能名为`libcustom_class.so`。 因此,文件树应如下所示: - -```py -custom_class_project/ - class.cpp - CMakeLists.txt - build/ - libcustom_class.so - -``` - -## 从 Python 和 TorchScript 使用 C++ 类 - -现在我们已经将我们的类及其注册编译为`.so`文件,我们可以将`.so`加载到 Python 中并进行尝试。 这是一个演示脚本的脚本: - -```py -import torch - -# `torch.classes.load_library()` allows you to pass the path to your .so file -# to load it in and make the custom C++ classes available to both Python and -# TorchScript -torch.classes.load_library("build/libcustom_class.so") -# You can query the loaded libraries like this: -print(torch.classes.loaded_libraries) -# prints {'/custom_class_project/build/libcustom_class.so'} - -# We can find and instantiate our custom C++ class in python by using the -# `torch.classes` namespace: -# -# This instantiation will invoke the MyStackClass(std::vector init) -# constructor we registered earlier -s = torch.classes.my_classes.MyStackClass(["foo", "bar"]) - -# We can call methods in Python -s.push("pushed") -assert s.pop() == "pushed" - -# Returning and passing instances of custom classes works as you'd expect -s2 = s.clone() -s.merge(s2) -for expected in ["bar", "foo", "bar", "foo"]: - assert s.pop() == expected - -# We can also use the class in TorchScript -# For now, we need to assign the class's type to a local in order to -# annotate the type on the TorchScript function. This may change -# in the future. -MyStackClass = torch.classes.my_classes.MyStackClass - -@torch.jit.script -def do_stacks(s: MyStackClass): # We can pass a custom class instance - # We can instantiate the class - s2 = torch.classes.my_classes.MyStackClass(["hi", "mom"]) - s2.merge(s) # We can call a method on the class - # We can also return instances of the class - # from TorchScript function/methods - return s2.clone(), s2.top() - -stack, top = do_stacks(torch.classes.my_classes.MyStackClass(["wow"])) -assert top == "wow" -for expected in ["wow", "mom", "hi"]: - assert stack.pop() == expected - -``` - -## 使用自定义类保存,加载和运行 TorchScript 代码 - -我们还可以在使用 libtorch 的 C++ 进程中使用自定义注册的 C++ 类。 举例来说,让我们定义一个简单的`nn.Module`,它实例化并调用`MyStackClass`类上的方法: - -```py -import torch - -torch.classes.load_library('build/libcustom_class.so') - -class Foo(torch.nn.Module): - def __init__(self): - super().__init__() - - def forward(self, s: str) -> str: - stack = torch.classes.my_classes.MyStackClass(["hi", "mom"]) - return stack.pop() + s - -scripted_foo = torch.jit.script(Foo()) -print(scripted_foo.graph) - -scripted_foo.save('foo.pt') - -``` - -我们文件系统中的`foo.pt`现在包含我们刚刚定义的序列化 TorchScript 程序。 - -现在,我们将定义一个新的 CMake 项目,以展示如何加载此模型及其所需的`.so`文件。 有关如何执行此操作的完整说明,请查看[在 C++ 中加载 TorchScript 模型](https://pytorch.org/tutorials/advanced/cpp_export.html)的教程。 - -与之前类似,让我们创建一个包含以下内容的文件结构: - -```py -cpp_inference_example/ - infer.cpp - CMakeLists.txt - foo.pt - build/ - custom_class_project/ - class.cpp - CMakeLists.txt - build/ - -``` - -请注意,我们已经复制了序列化的`foo.pt`文件以及上面`custom_class_project`的源代码树。 我们将把`custom_class_project`作为依赖项添加到此 C++ 项目中,以便可以将自定义类构建到二进制文件中。 - -让我们用以下内容填充`infer.cpp`: - -```py -#include - -#include -#include - -int main(int argc, const char* argv[]) { - torch::jit::Module module; - try { - // Deserialize the ScriptModule from a file using torch::jit::load(). - module = torch::jit::load("foo.pt"); - } - catch (const c10::Error& e) { - std::cerr << "error loading the model\n"; - return -1; - } - - std::vector inputs = {"foobarbaz"}; - auto output = module.forward(inputs).toString(); - std::cout << output->string() << std::endl; -} - -``` - -同样,让我们​​定义`CMakeLists.txt`文件: - -```py -cmake_minimum_required(VERSION 3.1 FATAL_ERROR) -project(infer) - -find_package(Torch REQUIRED) - -add_subdirectory(custom_class_project) - -# Define our library target -add_executable(infer infer.cpp) -set(CMAKE_CXX_STANDARD 14) -# Link against LibTorch -target_link_libraries(infer "${TORCH_LIBRARIES}") -# This is where we link in our libcustom_class code, making our -# custom class available in our binary. -target_link_libraries(infer -Wl,--no-as-needed custom_class) - -``` - -您知道练习:`cd build`,`cmake`和`make`: - -```py -$ cd build -$ cmake -DCMAKE_PREFIX_PATH="$(python -c 'import torch.utils; print(torch.utils.cmake_prefix_path)')" .. - -- The C compiler identification is GNU 7.3.1 - -- The CXX compiler identification is GNU 7.3.1 - -- Check for working C compiler: /opt/rh/devtoolset-7/root/usr/bin/cc - -- Check for working C compiler: /opt/rh/devtoolset-7/root/usr/bin/cc -- works - -- Detecting C compiler ABI info - -- Detecting C compiler ABI info - done - -- Detecting C compile features - -- Detecting C compile features - done - -- Check for working CXX compiler: /opt/rh/devtoolset-7/root/usr/bin/c++ - -- Check for working CXX compiler: /opt/rh/devtoolset-7/root/usr/bin/c++ -- works - -- Detecting CXX compiler ABI info - -- Detecting CXX compiler ABI info - done - -- Detecting CXX compile features - -- Detecting CXX compile features - done - -- Looking for pthread.h - -- Looking for pthread.h - found - -- Looking for pthread_create - -- Looking for pthread_create - not found - -- Looking for pthread_create in pthreads - -- Looking for pthread_create in pthreads - not found - -- Looking for pthread_create in pthread - -- Looking for pthread_create in pthread - found - -- Found Threads: TRUE - -- Found torch: /local/miniconda3/lib/python3.7/site-packages/torch/lib/libtorch.so - -- Configuring done - -- Generating done - -- Build files have been written to: /cpp_inference_example/build -$ make -j - Scanning dependencies of target custom_class - [ 25%] Building CXX object custom_class_project/CMakeFiles/custom_class.dir/class.cpp.o - [ 50%] Linking CXX shared library libcustom_class.so - [ 50%] Built target custom_class - Scanning dependencies of target infer - [ 75%] Building CXX object CMakeFiles/infer.dir/infer.cpp.o - [100%] Linking CXX executable infer - [100%] Built target infer - -``` - -现在我们可以运行令人兴奋的 C++ 二进制文件: - -```py -$ ./infer - momfoobarbaz - -``` - -难以置信! - -## 将自定义类移入或移出`IValue` - -也可能需要将自定义类从自定义 C++ 类实例移入或移出`IValue`, such as when you take or return IValues from TorchScript methods or you want to instantiate a custom class attribute in C++. For creating an IValue: - -* `torch::make_custom_class()`提供类似于`c10::intrusive_ptr`的 API,因为它将采用您提供给它的任何参数集,调用与该参数集匹配的`T`的构造器,并包装该实例,然后返回。 但是,它不仅返回指向自定义类对象的指针,还返回包装对象的`IValue`。 然后,您可以将此`IValue`直接传递给 TorchScript。 -* 如果您已经有一个指向类的`intrusive_ptr`,则可以使用构造器`IValue(intrusive_ptr)`直接从其构造`IValue`。 - -要将`IValue`转换回自定义类: - -* `IValue::toCustomClass()`将返回一个`intrusive_ptr`,指向`IValue`包含的自定义类。 在内部,此函数正在检查`T`是否已注册为自定义类,并且`IValue`实际上确实包含一个自定义类。 您可以通过调用`isCustomClass()`来手动检查`IValue`是否包含自定义类。 - -## 为自定义 C++ 类定义序列化/反序列化方法 - -如果您尝试将具有自定义绑定 C++ 类的`ScriptModule`保存为属性,则会出现以下错误: - -```py -# export_attr.py -import torch - -torch.classes.load_library('build/libcustom_class.so') - -class Foo(torch.nn.Module): - def __init__(self): - super().__init__() - self.stack = torch.classes.my_classes.MyStackClass(["just", "testing"]) - - def forward(self, s: str) -> str: - return self.stack.pop() + s - -scripted_foo = torch.jit.script(Foo()) - -scripted_foo.save('foo.pt') -loaded = torch.jit.load('foo.pt') - -print(loaded.stack.pop()) - -``` - -```py -$ python export_attr.py -RuntimeError: Cannot serialize custom bound C++ class __torch__.torch.classes.my_classes.MyStackClass. Please define serialization methods via def_pickle for this class. (pushIValueImpl at ../torch/csrc/jit/pickler.cpp:128) - -``` - -这是因为 TorchScript 无法自动找出 C++ 类中保存的信息。 您必须手动指定。 这样做的方法是使用`class_`上的特殊`def_pickle`方法在类上定义`__getstate__`和`__setstate__`方法。 - -注意 - -TorchScript 中`__getstate__`和`__setstate__`的语义与 Python `pickle`模块的语义相同。 您可以[阅读更多](https://github.com/pytorch/pytorch/blob/master/torch/csrc/jit/docs/serialization.md#getstate-and-setstate)有关如何使用这些方法的信息。 - -这是`def_pickle`调用的示例,我们可以将其添加到`MyStackClass`的注册中以包括序列化方法: - -```py - // class_<>::def_pickle allows you to define the serialization - // and deserialization methods for your C++ class. - // Currently, we only support passing stateless lambda functions - // as arguments to def_pickle - .def_pickle( - // __getstate__ - // This function defines what data structure should be produced - // when we serialize an instance of this class. The function - // must take a single `self` argument, which is an intrusive_ptr - // to the instance of the object. The function can return - // any type that is supported as a return value of the TorchScript - // custom operator API. In this instance, we've chosen to return - // a std::vector as the salient data to preserve - // from the class. - [](const c10::intrusive_ptr>& self) - -> std::vector { - return self->stack_; - }, - // __setstate__ - // This function defines how to create a new instance of the C++ - // class when we are deserializing. The function must take a - // single argument of the same type as the return value of - // `__getstate__`. The function must return an intrusive_ptr - // to a new instance of the C++ class, initialized however - // you would like given the serialized state. - [](std::vector state) - -> c10::intrusive_ptr> { - // A convenient way to instantiate an object and get an - // intrusive_ptr to it is via `make_intrusive`. We use - // that here to allocate an instance of MyStackClass - // and call the single-argument std::vector - // constructor with the serialized state. - return c10::make_intrusive>(std::move(state)); - }); - -``` - -注意 - -我们在 Pickle API 中采用与`pybind11`不同的方法。`pybind11`作为传递给`class_::def()`的特殊函数`pybind11::pickle()`,为此我们有一个单独的方法`def_pickle`。 这是因为`torch::jit::pickle`这个名称已经被使用了,我们不想引起混淆。 - -以这种方式定义(反)序列化行为后,脚本现在可以成功运行: - -```py -$ python ../export_attr.py -testing - -``` - -## 定义接受或返回绑定 C++ 类的自定义运算符 - -定义自定义 C++ 类后,您还可以将该类用作自变量或从自定义运算符返回(即自由函数)。 假设您具有以下自由函数: - -```py -c10::intrusive_ptr> manipulate_instance(const c10::intrusive_ptr>& instance) { - instance->pop(); - return instance; -} - -``` - -您可以在`TORCH_LIBRARY`块中运行以下代码来注册它: - -```py - m.def( - "foo::manipulate_instance(__torch__.torch.classes.my_classes.MyStackClass x) -> __torch__.torch.classes.my_classes.MyStackClass Y", - manipulate_instance - ); - -``` - -有关注册 API 的更多详细信息,请参见[自定义操作教程](https://pytorch.org/tutorials/advanced/torch_script_custom_ops.html)。 - -完成此操作后,您可以像以下示例一样使用操作: - -```py -class TryCustomOp(torch.nn.Module): - def __init__(self): - super(TryCustomOp, self).__init__() - self.f = torch.classes.my_classes.MyStackClass(["foo", "bar"]) - - def forward(self): - return torch.ops.foo.manipulate_instance(self.f) - -``` - -注意 - -注册使用 C++ 类作为参数的运算符时,要求已注册自定义类。 您可以通过确保自定义类注册和您的自由函数定义在同一`TORCH_LIBRARY`块中,并确保自定义类注册位于第一位来强制实现此操作。 将来,我们可能会放宽此要求,以便可以按任何顺序进行注册。 - -## 总结 - -本教程向您介绍了如何向 TorchScript(以及扩展为 Python)公开 C++ 类,如何注册其方法,如何从 Python 和 TorchScript 使用该类以及如何使用该类保存和加载代码以及运行该代码。 在独立的 C++ 过程中。 现在,您可以使用与第三方 C++ 库连接的 C++ 类扩展 TorchScript 模型,或实现需要 Python,TorchScript 和 C++ 之间的界线平滑融合的任何其他用例。 - -与往常一样,如果您遇到任何问题或疑问,可以使用我们的[论坛](https://discuss.pytorch.org/)或 [GitHub ISSUE](https://github.com/pytorch/pytorch/issues) 进行联系。 另外,我们的[常见问题解答(FAQ)页面](https://pytorch.org/cppdocs/notes/faq.html)可能包含有用的信息。 \ No newline at end of file diff --git a/pytorch/官方教程/68.md b/pytorch/官方教程/48 分布式DataParallel与分布式RPC.md similarity index 100% rename from pytorch/官方教程/68.md rename to pytorch/官方教程/48 分布式DataParallel与分布式RPC.md diff --git a/pytorch/官方教程/48.md b/pytorch/官方教程/48.md deleted file mode 100644 index 6c9882e5..00000000 --- a/pytorch/官方教程/48.md +++ /dev/null @@ -1,233 +0,0 @@ -# TorchScript 中的动态并行性 - -> 原文: - -在本教程中,我们介绍在 TorchScript 中执行*动态互操作并行化*的语法。 此并行性具有以下属性: - -* 动态-创建的并行任务的数量及其工作量可能取决于程序的控制流。 -* 互操作-并行性与并行运行 TorchScript 程序片段有关。 这与*运算内部并行化*不同,后者涉及拆分单个运算符并并行运行运算符工作的子集。 - -## 基本语法 - -动态并行的两个重要 API 是: - -* `torch.jit.fork(fn : Callable[..., T], *args, **kwargs) -> torch.jit.Future[T]` -* `torch.jit.wait(fut : torch.jit.Future[T]) -> T` - -通过示例来演示这些工作原理的好方法: - -```py -import torch - -def foo(x): - return torch.neg(x) - -@torch.jit.script -def example(x): - # Call `foo` using parallelism: - # First, we "fork" off a task. This task will run `foo` with argument `x` - future = torch.jit.fork(foo, x) - - # Call `foo` normally - x_normal = foo(x) - - # Second, we "wait" on the task. Since the task may be running in - # parallel, we have to "wait" for its result to become available. - # Notice that by having lines of code between the "fork()" and "wait()" - # call for a given Future, we can overlap computations so that they - # run in parallel. - x_parallel = torch.jit.wait(future) - - return x_normal, x_parallel - -print(example(torch.ones(1))) # (-1., -1.) - -``` - -`fork()`接受可调用`fn`以及该可调用`args`和`kwargs`的参数,并创建异步任务来执行`fn`。 `fn`可以是函数,方法或模块实例。 `fork()`返回对此执行结果的值的引用,称为`Future`。 因为`fork`在创建异步任务后立即返回,所以在执行`fork()`调用之后的代码行时可能尚未执行`fn`。 因此,`wait()`用于等待异步任务完成并返回值。 - -这些结构可用于重叠函数内语句的执行(如工作示例部分所示),或与其他语言结构(如循环)组合在一起: - -```py -import torch -from typing import List - -def foo(x): - return torch.neg(x) - -@torch.jit.script -def example(x): - futures : List[torch.jit.Future[torch.Tensor]] = [] - for _ in range(100): - futures.append(torch.jit.fork(foo, x)) - - results = [] - for future in futures: - results.append(torch.jit.wait(future)) - - return torch.sum(torch.stack(results)) - -print(example(torch.ones([]))) - -``` - -注意 - -当我们初始化一个空的期货列表时,我们需要在`futures`上添加一个显式类型注解。 在 TorchScript 中,空容器默认假定它们包含张量值,因此我们将列表构造器`#`注解为`List[torch.jit.Future[torch.Tensor]]`类型 - -本示例使用`fork()`启动函数`foo`的 100 个实例,等待 100 个任务完成,然后对结果求和,返回`-100.0`。 - -## 应用示例:双向 LSTM 的集成 - -让我们尝试将并行性应用于一个更现实的示例,看看我们可以从中获得什么样的性能。 首先,让我们定义基准模型:双向 LSTM 层的集合。 - -```py -import torch, time - -# In RNN parlance, the dimensions we care about are: -# # of time-steps (T) -# Batch size (B) -# Hidden size/number of "channels" (C) -T, B, C = 50, 50, 1024 - -# A module that defines a single "bidirectional LSTM". This is simply two -# LSTMs applied to the same sequence, but one in reverse -class BidirectionalRecurrentLSTM(torch.nn.Module): - def __init__(self): - super().__init__() - self.cell_f = torch.nn.LSTM(input_size=C, hidden_size=C) - self.cell_b = torch.nn.LSTM(input_size=C, hidden_size=C) - - def forward(self, x : torch.Tensor) -> torch.Tensor: - # Forward layer - output_f, _ = self.cell_f(x) - - # Backward layer. Flip input in the time dimension (dim 0), apply the - # layer, then flip the outputs in the time dimension - x_rev = torch.flip(x, dims=[0]) - output_b, _ = self.cell_b(torch.flip(x, dims=[0])) - output_b_rev = torch.flip(output_b, dims=[0]) - - return torch.cat((output_f, output_b_rev), dim=2) - -# An "ensemble" of `BidirectionalRecurrentLSTM` modules. The modules in the -# ensemble are run one-by-one on the same input then their results are -# stacked and summed together, returning the combined result. -class LSTMEnsemble(torch.nn.Module): - def __init__(self, n_models): - super().__init__() - self.n_models = n_models - self.models = torch.nn.ModuleList([ - BidirectionalRecurrentLSTM() for _ in range(self.n_models)]) - - def forward(self, x : torch.Tensor) -> torch.Tensor: - results = [] - for model in self.models: - results.append(model(x)) - return torch.stack(results).sum(dim=0) - -# For a head-to-head comparison to what we're going to do with fork/wait, let's -# instantiate the model and compile it with TorchScript -ens = torch.jit.script(LSTMEnsemble(n_models=4)) - -# Normally you would pull this input out of an embedding table, but for the -# purpose of this demo let's just use random data. -x = torch.rand(T, B, C) - -# Let's run the model once to warm up things like the memory allocator -ens(x) - -x = torch.rand(T, B, C) - -# Let's see how fast it runs! -s = time.time() -ens(x) -print('Inference took', time.time() - s, ' seconds') - -``` - -在我的机器上,该网络运行时间为`2.05`秒。 我们可以做得更好! - -## 并行化前向和后向层 - -我们可以做的一个非常简单的事情是在`BidirectionalRecurrentLSTM`中并行化前进和后退层。 为此,计算结构是静态的,因此我们实际上甚至不需要任何循环。 像这样重写`BidirectionalRecurrentLSTM`的`forward`方法: - -```py -def forward(self, x : torch.Tensor) -> torch.Tensor: - # Forward layer - fork() so this can run in parallel to the backward - # layer - future_f = torch.jit.fork(self.cell_f, x) - - # Backward layer. Flip input in the time dimension (dim 0), apply the - # layer, then flip the outputs in the time dimension - x_rev = torch.flip(x, dims=[0]) - output_b, _ = self.cell_b(torch.flip(x, dims=[0])) - output_b_rev = torch.flip(output_b, dims=[0]) - - # Retrieve the output from the forward layer. Note this needs to happen - # *after* the stuff we want to parallelize with - output_f, _ = torch.jit.wait(future_f) - - return torch.cat((output_f, output_b_rev), dim=2) - -``` - -在此示例中,`forward()`将`cell_f`的执行委派给另一个线程,而它继续执行`cell_b`。 这导致两个单元的执行彼此重叠。 - -通过简单的修改再次运行脚本会产生`1.71`秒的运行时间,从而改进了`17%`! - -## 旁注:可视化并行性 - -我们还没有完成模型的优化,但是值得介绍一下用于可视化性能的工具。 一种重要的工具是 [PyTorch 分析器](https://pytorch.org/docs/stable/autograd.html#profiler)。 - -让我们将分析器与 Chrome 跟踪导出功能一起使用,以可视化并行模型的性能: - -此代码段将写出名为`parallel.json`的文件。 如果您将 Google Chrome 浏览器导航到`chrome://tracing`,单击`Load`按钮,然后加载该 JSON 文件,则应该看到类似以下的时间轴: - -![](img/6b495cb0cd4336a2469d9f07696faa3e.png) - -时间轴的横轴表示时间,纵轴表示执行线程。 如我们所见,我们一次运行两个`lstm`实例。 这是我们辛勤工作使双向层平行的结果! - -## 集成中的并行化模型 - -您可能已经注意到,我们的代码中还有更多的并行化机会:我们还可以并行运行`LSTMEnsemble`中包含的模型。 做到这一点的方法很简单,这就是我们应该更改`LSTMEnsemble`的`forward`方法的方式: - -```py -def forward(self, x : torch.Tensor) -> torch.Tensor: - # Launch tasks for each model - futures : List[torch.jit.Future[torch.Tensor]] = [] - for model in self.models: - futures.append(torch.jit.fork(model, x)) - - # Collect the results from the launched tasks - results : List[torch.Tensor] = [] - for future in futures: - results.append(torch.jit.wait(future)) - - return torch.stack(results).sum(dim=0) - -``` - -或者,如果您重视简洁性,我们可以使用列表推导: - -```py -def forward(self, x : torch.Tensor) -> torch.Tensor: - futures = [torch.jit.fork(model, x) for model in self.models] - results = [torch.jit.wait(fut) for fut in futures] - return torch.stack(results).sum(dim=0) - -``` - -如介绍中所述,我们使用循环为集合中的每个模型分派任务。 然后,我们使用了另一个循环来等待所有任务完成。 这提供了更多的计算重叠。 - -通过此小更新,脚本将在`1.4`秒内运行,总速度为`32%`! 两行代码相当不错。 - -我们还可以再次使用 Chrome 跟踪器来查看运行情况: - -![](img/ac8752539498c11001a65c1ff470d696.png) - -现在我们可以看到所有`LSTM`实例都在完全并行运行。 - -## 总结 - -在本教程中,我们学习了`fork()`和`wait()`,这是在 TorchScript 中执行动态,互操作并行的基本 API。 我们看到了一些典型的使用模式,这些模式使用这些函数并行执行 TorchScript 代码中的函数,方法或`Modules`的执行。 最后,我们通过一个使用该技术优化模型的示例进行了研究,并探索了 PyTorch 中可用的性能测量和可视化工具。 \ No newline at end of file diff --git a/pytorch/官方教程/49.md b/pytorch/官方教程/49.md deleted file mode 100644 index f2776e85..00000000 --- a/pytorch/官方教程/49.md +++ /dev/null @@ -1,414 +0,0 @@ -# C++ 前端中的 Autograd - -> 原文: - -`autograd`包对于在 PyTorch 中构建高度灵活和动态的神经网络至关重要。 PyTorch Python 前端中的大多数 autograd API 也可以在 C++ 前端中使用,从而可以轻松地将 Autograd 代码从 Python 转换为 C++。 - -在本教程中,我们将看几个在 PyTorch C++ 前端中进行 Autograd 的示例。 请注意,本教程假定您已经对 Python 前端中的 Autograd 有基本的了解。 如果不是这种情况,请先阅读 [Autograd:自动微分](https://pytorch.org/tutorials/beginner/blitz/autograd_tutorial.html)。 - -## 基本的 Autograd 操作 - -(改编自[本教程](https://pytorch.org/tutorials/beginner/blitz/autograd_tutorial.html#autograd-automatic-differentiation)) - -创建一个张量并设置`torch::requires_grad()`以跟踪它的计算 - -```py -auto x = torch::ones({2, 2}, torch::requires_grad()); -std::cout << x << std::endl; - -``` - -出: - -```py -1 1 -1 1 -[ CPUFloatType{2,2} ] - -``` - -进行张量运算: - -```py -auto y = x + 2; -std::cout << y << std::endl; - -``` - -出: - -```py - 3 3 - 3 3 -[ CPUFloatType{2,2} ] - -``` - -`y`是由于操作而创建的,因此具有`grad_fn`。 - -```py -std::cout << y.grad_fn()->name() << std::endl; - -``` - -出: - -```py -AddBackward1 - -``` - -在`y`上执行更多操作 - -```py -auto z = y * y * 3; -auto out = z.mean(); - -std::cout << z << std::endl; -std::cout << z.grad_fn()->name() << std::endl; -std::cout << out << std::endl; -std::cout << out.grad_fn()->name() << std::endl; - -``` - -出: - -```py - 27 27 - 27 27 -[ CPUFloatType{2,2} ] -MulBackward1 -27 -[ CPUFloatType{} ] -MeanBackward0 - -``` - -`.requires_grad_( ... )`原地更改现有张量的`requires_grad`标志。 - -```py -auto a = torch::randn({2, 2}); -a = ((a * 3) / (a - 1)); -std::cout << a.requires_grad() << std::endl; - -a.requires_grad_(true); -std::cout << a.requires_grad() << std::endl; - -auto b = (a * a).sum(); -std::cout << b.grad_fn()->name() << std::endl; - -``` - -出: - -```py -false -true -SumBackward0 - -``` - -现在让我们反向传播。 因为`out`包含单个标量,所以`out.backward()`等效于`out.backward(torch::tensor(1.))`。 - -```py -out.backward(); - -``` - -打印梯度`d(out) / dx` - -```py -std::cout << x.grad() << std::endl; - -``` - -出: - -```py - 4.5000 4.5000 - 4.5000 4.5000 -[ CPUFloatType{2,2} ] - -``` - -您应该具有`4.5`的矩阵。 有关如何获得此值的说明,请参见[本教程](https://pytorch.org/tutorials/beginner/blitz/autograd_tutorial.html#gradients)中的相应部分。 - -现在,让我们来看一个向量雅各布产品的示例: - -```py -x = torch::randn(3, torch::requires_grad()); - -y = x * 2; -while (y.norm().item() < 1000) { - y = y * 2; -} - -std::cout << y << std::endl; -std::cout << y.grad_fn()->name() << std::endl; - -``` - -出: - -```py --1021.4020 - 314.6695 - -613.4944 -[ CPUFloatType{3} ] -MulBackward1 - -``` - -如果我们想要向量-Jacobian 乘积,请将向量作为参数传递给`backward`: - -```py -auto v = torch::tensor({0.1, 1.0, 0.0001}, torch::kFloat); -y.backward(v); - -std::cout << x.grad() << std::endl; - -``` - -出: - -```py - 102.4000 - 1024.0000 - 0.1024 -[ CPUFloatType{3} ] - -``` - -您也可以通过在代码块中放置`torch::NoGradGuard`来停止对需要梯度的张量的跟踪历史的自动定格 - -```py -std::cout << x.requires_grad() << std::endl; -std::cout << x.pow(2).requires_grad() << std::endl; - -{ - torch::NoGradGuard no_grad; - std::cout << x.pow(2).requires_grad() << std::endl; -} - -``` - -出: - -```py -true -true -false - -``` - -或者使用`.detach()`获得具有相同内容但不需要梯度的新张量: - -```py -std::cout << x.requires_grad() << std::endl; -y = x.detach(); -std::cout << y.requires_grad() << std::endl; -std::cout << x.eq(y).all().item() << std::endl; - -``` - -出: - -```py -true -false -true - -``` - -有关 C++ 张量自动梯度 API 的更多信息,例如`grad`/`requires_grad`/`is_leaf`/`backward`/`detach`/`detach_`/`register_hook`/`retain_grad`,请参见[相应的 C++ API 文档](https://pytorch.org/cppdocs/api/classat_1_1_tensor.html)。 - -## 用 C++ 计算高阶梯度 - -高阶梯度的应用之一是计算梯度罚分。 我们来看看使用`torch::autograd::grad`的示例: - -```py -#include - -auto model = torch::nn::Linear(4, 3); - -auto input = torch::randn({3, 4}).requires_grad_(true); -auto output = model(input); - -// Calculate loss -auto target = torch::randn({3, 3}); -auto loss = torch::nn::MSELoss()(output, target); - -// Use norm of gradients as penalty -auto grad_output = torch::ones_like(output); -auto gradient = torch::autograd::grad({output}, {input}, /*grad_outputs=*/{grad_output}, /*create_graph=*/true)[0]; -auto gradient_penalty = torch::pow((gradient.norm(2, /*dim=*/1) - 1), 2).mean(); - -// Add gradient penalty to loss -auto combined_loss = loss + gradient_penalty; -combined_loss.backward(); - -std::cout << input.grad() << std::endl; - -``` - -出: - -```py --0.1042 -0.0638 0.0103 0.0723 --0.2543 -0.1222 0.0071 0.0814 --0.1683 -0.1052 0.0355 0.1024 -[ CPUFloatType{3,4} ] - -``` - -有关如何使用它们的更多信息,请参见[`torch::autograd::backward`](https://pytorch.org/cppdocs/api/function_namespacetorch_1_1autograd_1afa9b5d4329085df4b6b3d4b4be48914b.html)和[`torch::autograd::grad`](https://pytorch.org/cppdocs/api/function_namespacetorch_1_1autograd_1a1e03c42b14b40c306f9eb947ef842d9c.html)的文档。 - -## 在 C++ 中使用自定义 Autograd 函数 - -(改编自[本教程](https://pytorch.org/docs/stable/notes/extending.html#extending-torch-autograd)) - -向`torch::autograd`添加新的基本操作需要为每个操作实现一个新的`torch::autograd::Function`子类。 `torch::autograd::Function`用于`torch::autograd`计算结果和梯度,并对操作历史进行编码。 每个新函数都需要您实现两种方法:`forward`和`backward`,有关详细要求,请参见[此链接](https://pytorch.org/cppdocs/api/structtorch_1_1autograd_1_1_function.html)。 - -在下面,您可以从`torch::nn`找到`Linear`函数的代码: - -```py -#include - -using namespace torch::autograd; - -// Inherit from Function -class LinearFunction : public Function { - public: - // Note that both forward and backward are static functions - - // bias is an optional argument - static torch::Tensor forward( - AutogradContext *ctx, torch::Tensor input, torch::Tensor weight, torch::Tensor bias = torch::Tensor()) { - ctx->save_for_backward({input, weight, bias}); - auto output = input.mm(weight.t()); - if (bias.defined()) { - output += bias.unsqueeze(0).expand_as(output); - } - return output; - } - - static tensor_list backward(AutogradContext *ctx, tensor_list grad_outputs) { - auto saved = ctx->get_saved_variables(); - auto input = saved[0]; - auto weight = saved[1]; - auto bias = saved[2]; - - auto grad_output = grad_outputs[0]; - auto grad_input = grad_output.mm(weight); - auto grad_weight = grad_output.t().mm(input); - auto grad_bias = torch::Tensor(); - if (bias.defined()) { - grad_bias = grad_output.sum(0); - } - - return {grad_input, grad_weight, grad_bias}; - } -}; - -``` - -然后,我们可以通过以下方式使用`LinearFunction`: - -```py -auto x = torch::randn({2, 3}).requires_grad_(); -auto weight = torch::randn({4, 3}).requires_grad_(); -auto y = LinearFunction::apply(x, weight); -y.sum().backward(); - -std::cout << x.grad() << std::endl; -std::cout << weight.grad() << std::endl; - -``` - -出: - -```py - 0.5314 1.2807 1.4864 - 0.5314 1.2807 1.4864 -[ CPUFloatType{2,3} ] - 3.7608 0.9101 0.0073 - 3.7608 0.9101 0.0073 - 3.7608 0.9101 0.0073 - 3.7608 0.9101 0.0073 -[ CPUFloatType{4,3} ] - -``` - -在这里,我们给出了一个由非张量参数设置参数的函数的附加示例: - -```py -#include - -using namespace torch::autograd; - -class MulConstant : public Function { - public: - static torch::Tensor forward(AutogradContext *ctx, torch::Tensor tensor, double constant) { - // ctx is a context object that can be used to stash information - // for backward computation - ctx->saved_data["constant"] = constant; - return tensor * constant; - } - - static tensor_list backward(AutogradContext *ctx, tensor_list grad_outputs) { - // We return as many input gradients as there were arguments. - // Gradients of non-tensor arguments to forward must be `torch::Tensor()`. - return {grad_outputs[0] * ctx->saved_data["constant"].toDouble(), torch::Tensor()}; - } -}; - -``` - -然后,我们可以通过以下方式使用`MulConstant`: - -```py -auto x = torch::randn({2}).requires_grad_(); -auto y = MulConstant::apply(x, 5.5); -y.sum().backward(); - -std::cout << x.grad() << std::endl; - -``` - -出: - -```py - 5.5000 - 5.5000 -[ CPUFloatType{2} ] - -``` - -有关`torch::autograd::Function`的更多信息,请参见[其文档](https://pytorch.org/cppdocs/api/structtorch_1_1autograd_1_1_function.html)。 - -## 将 Autograd 代码从 Python 转换为 C++ - -在较高的层次上,在 C++ 中使用 Autograd 的最简单方法是先在 Python 中拥有可用的 Autograd 代码,然后使用下表将您的 Autograd 代码从 Python 转换为 C++: - -| Python | C++ | -| --- | --- | -| `torch.autograd.backward` | [`torch::autograd::backward`](https://pytorch.org/cppdocs/api/function_namespacetorch_1_1autograd_1afa9b5d4329085df4b6b3d4b4be48914b.html)) | -| `torch.autograd.grad` | [`torch::autograd::grad`](https://pytorch.org/cppdocs/api/function_namespacetorch_1_1autograd_1a1e03c42b14b40c306f9eb947ef842d9c.html)) | -| `torch.Tensor.detach` | [`torch::Tensor::detach`](https://pytorch.org/cppdocs/api/classat_1_1_tensor.html#_CPPv4NK2at6Tensor6detachEv)) | -| `torch.Tensor.detach_` | [`torch::Tensor::detach_`](https://pytorch.org/cppdocs/api/classat_1_1_tensor.html#_CPPv4NK2at6Tensor7detach_Ev)) | -| `torch.Tensor.backward` | [`torch::Tensor::backward`](https://pytorch.org/cppdocs/api/classat_1_1_tensor.html#_CPPv4NK2at6Tensor8backwardERK6Tensorbb)) | -| `torch.Tensor.register_hook` | [`torch::Tensor::register_hook`](https://pytorch.org/cppdocs/api/classat_1_1_tensor.html#_CPPv4I0ENK2at6Tensor13register_hookE18hook_return_void_tI1TERR1T)) | -| `torch.Tensor.requires_grad` | [`torch::Tensor::requires_grad_`](https://pytorch.org/cppdocs/api/classat_1_1_tensor.html#_CPPv4NK2at6Tensor14requires_grad_Eb)) | -| `torch.Tensor.retain_grad` | [`torch::Tensor::retain_grad`](https://pytorch.org/cppdocs/api/classat_1_1_tensor.html#_CPPv4NK2at6Tensor11retain_gradEv)) | -| `torch.Tensor.grad` | [`torch::Tensor::grad`](https://pytorch.org/cppdocs/api/classat_1_1_tensor.html#_CPPv4NK2at6Tensor4gradEv)) | -| `torch.Tensor.grad_fn` | [`torch::Tensor::grad_fn`](https://pytorch.org/cppdocs/api/classat_1_1_tensor.html#_CPPv4NK2at6Tensor7grad_fnEv)) | -| `torch.Tensor.set_data` | [`torch::Tensor::set_data`](https://pytorch.org/cppdocs/api/classat_1_1_tensor.html#_CPPv4NK2at6Tensor8set_dataERK6Tensor)) | -| `torch.Tensor.data` | [`torch::Tensor::data`](https://pytorch.org/cppdocs/api/classat_1_1_tensor.html#_CPPv4NK2at6Tensor4dataEv)) | -| `torch.Tensor.output_nr` | [`torch::Tensor::output_nr`](https://pytorch.org/cppdocs/api/classat_1_1_tensor.html#_CPPv4NK2at6Tensor9output_nrEv)) | -| `torch.Tensor.is_leaf` | [`torch::Tensor::is_leaf`](https://pytorch.org/cppdocs/api/classat_1_1_tensor.html#_CPPv4NK2at6Tensor7is_leafEv)) | - -翻译后,您的大多数 Python Autograd 代码都应仅在 C++ 中工作。 如果不是这种情况,请在 [GitHub ISSUE](https://github.com/pytorch/pytorch/issues) 中提交错误报告,我们将尽快对其进行修复。 - -## 总结 - -现在,您应该对 PyTorch 的 C++ autograd API 有了一个很好的了解。 [您可以在此处找到本说明中显示的代码示例](https://github.com/pytorch/examples/tree/master/cpp/autograd)。 与往常一样,如果您遇到任何问题或疑问,可以使用我们的[论坛](https://discuss.pytorch.org/)或 [GitHub ISSUE](https://github.com/pytorch/pytorch/issues) 进行联系。 \ No newline at end of file diff --git a/pytorch/官方教程/50 Android应用.md b/pytorch/官方教程/50 Android应用.md new file mode 100644 index 00000000..e69de29b diff --git a/pytorch/官方教程/50.md b/pytorch/官方教程/50.md deleted file mode 100644 index 8328c266..00000000 --- a/pytorch/官方教程/50.md +++ /dev/null @@ -1,244 +0,0 @@ -# 在 C++ 中注册调度运算符 - -> 原文: - -调度器是 PyTorch 的内部组件,负责确定调用`torch::add`之类的函数时应实际运行哪些代码。 这是不平凡的,因为 PyTorch 操作需要处理很多交叉关注点,这些关注点“层叠”在另一个之上。 以下是其处理的一些示例: - -* 根据输入张量的设备,在运算符的 CPU 和 CUDA 实现之间切换。 -* 在运算符的自动微分和后端实现之间切换,这取决于是否需要自动微分处理。 -* 必要时应用自动广播来实现自动混合精度。 -* 当运算符在`vmap`调用下运行时,应用批量规则。 -* 如果要跟踪导出的模型,则跟踪操作的执行。 - -如果在[自定义运算符代码](torch_script_custom_ops)中发现自己手动编写了`if`语句来处理这些情况,则调度器 API 可以帮助组织代码。 (相反,如果您的自定义运算符非常简单并且仅用于 CPU 推断,则可能不需要使用调度器,只需使用基本 API。) - -在本教程中,我们将描述如何构造自定义运算符注册以使用调度器来组织各种组件。 我们假设您熟悉如何[注册运算符](torch_script_custom_ops)以及如何编写[自定义自动微分函数](cpp_autograd)。 - -## 定义模式和后端实现 - -调度器背后的一般原理是将一个运算符的实现分为多个内核,每个内核都为特定的*调度键*实现功能; 例如,CPU,CUDA 或 Autograd。 调度器在您调用运算符时确定最高优先级的调度键是什么(这通过查看张量参数和某些线程本地状态来完成),并将控制权传递给内核以使用该调度键。 最终结果是,当您调用运算符时,我们首先执行 Autograd 内核,然后根据传入的张量的设备类型将其重新分配到 CPU 或 CUDA 内核。 - -让我们看一下实现这一目标所涉及的各个部分。 首先,我们必须为所讨论的运算符定义架构。 与简单的`pybind11`样式的运算符注册不同,我们目前实际上并未提供运算符的实现; 我们只提供一个模式字符串,指定所有其他内核将遵守的运算符的类型签名: - -```py -TORCH_LIBRARY(myops, m) { - m.def("myadd(Tensor self, Tensor other) -> Tensor"); -} - -``` - -接下来,我们需要实际提供此运算符的一些实现。 具体来说,这是一个非常简单的 CPU 实现: - -```py -Tensor myadd_cpu(const Tensor& self_, const Tensor& other_) { - TORCH_CHECK(self_.sizes() == other_.sizes()); - TORCH_INTERNAL_ASSERT(self_.device().type() == DeviceType::CPU); - TORCH_INTERNAL_ASSERT(other_.device().type() == DeviceType::CPU); - Tensor self = self_.contiguous(); - Tensor other = other_.contiguous(); - Tensor result = torch::empty(self.sizes(), self.options()); - const float* self_ptr = self.data_ptr(); - const float* other_ptr = other.data_ptr(); - float* result_ptr = result.data_ptr(); - for (int64_t i = 0; i < result.numel(); i++) { - result_ptr[i] = self_ptr[i] + other_ptr[i]; - } - return result; -} - -``` - -我们想将此函数注册为`myops::myadd`的实现。 但是,简单的注册方法(`def("myadd", myadd_cpu)`)将注册内核以在所有情况下都可以运行,即使张量不是 CPU 张量! (在内部,我们将它们称为“全部捕获”内核,因为它们捕获所有情况。)为确保仅针对 CPU 张量运行`myadd_cpu`,我们可以使用`TORCH_LIBRARY_IMPL`宏: - -```py -TORCH_LIBRARY_IMPL(myops, CPU, m) { - m.impl("myadd", myadd_cpu); -} - -``` - -通过`TORCH_LIBRARY_IMPL`,我们可以在特定的调度键(在本例中为 CPU)上为运算符注册实现。 每次对`impl`的调用都会将 CPU 内核与相应的运算符(我们先前在`TORCH_LIBRARY`块中定义)相关联。 如果我们还有 CUDA 实现`myadd_cuda`,我们可以将其注册在单独的`TORCH_LIBRARY_IMPL`块中: - -```py -TORCH_LIBRARY_IMPL(myops, CUDA, m) { - m.impl("myadd", myadd_cuda); -} - -``` - -这些注册可以跨文件甚至跨库边界拆分; 因此,例如,您可以将这两个`TORCH_LIBRARY_IMPL`块编译为单独的`myops_cpu`和`myops_cuda`动态库。 一般来说,您的注册结构如下所示: - -1. 单个`TORCH_LIBRARY`在集中位置列出名称空间中的每个自定义运算符。 -2. 每个调度键的`TORCH_LIBRARY_IMPL`,用于注册该键的实现(例如,CPU 或 CUDA)。 如果愿意,可以按每个运算符将`TORCH_LIBRARY_IMPL`块进一步细分为一个块。 如果每个运算符的实现都有一个单独的文件,但是又不想在标头中显示运算符,这将很方便。 您只需将注册内容放入定义您的运算符的 cpp 文件中。 - -注意 - -您知道吗,您还可以为 PyTorch 中的现有核心运算符编写`TORCH_LIBRARY_IMPL`块? 这就是实现 XLA 对 PyTorch 的支持的方式:`torch_xla`库包含一个`TORCH_LIBRARY_IMPL`,该库为 XLA 调度键上的所有基本运算符提供实现。 - -## 添加 Autograd 支持 - -至此,我们有了一个同时具有 CPU 和 CUDA 实现的运算符。 我们如何为它添加 Autograd 支持? 您可能会猜到,我们将注册一个 Autograd 内核(类似于[自定义 Autograd 函数](cpp_autograd)教程中描述的内容)! 但是,有一个变数:与 CPU 和 CUDA 内核不同,Autograd 内核需要*重新分发*:它需要回调调度器才能到达最终的 CPU 和 CUDA 实现。 - -因此,在编写 Autograd 内核之前,让我们编写一个*调度函数*,该函数调用调度器以为您的运算符找到合适的内核。 该函数构成了供您的运算符使用的公共 C++ API,实际上,PyTorch C++ API 中的所有张量函数都在后台完全以相同的方式调用了调度器。 调度函数如下所示: - -```py -Tensor myadd(const Tensor& self, const Tensor& other) { - static auto op = torch::Dispatcher::singleton() - .findSchemaOrThrow("myops::myadd", "") - .typed(); - return op.call(self, other); -} - -``` - -让我们分解一下: - -* 在第一行中,我们从调度器中查找与要调度到的运算符相对应的类型化运算符句柄。 `findSchemaOrThrow`具有两个参数:运算符的(名称空间限定)名称和运算符的重载名称(通常只是空字符串)。 `typed`将动态类型的句柄转换为静态类型的句柄(进行运行时测试以确保您提供了正确的 C++ 类型),以便我们可以对其进行常规的 C++ 调用。 我们将其传递给`decltype(myadd)`,因为调度函数的类型与注册到调度器的基础内核的类型相同。 - - 为了提高性能,此计算是在静态变量中完成的,因此我们只需要进行一次(慢速)查找。 如果键入了要调用的运算符的名称,则第一次调用此函数时,此查找将出错。 - -* 在第二行中,我们只需将所有参数传递到调度函数中,就可以简单地`call`运算符句柄。 这实际上将调用调度器,最终控制权将转移到适合此调用的任何内核。 - -有了分发函数,我们现在可以编写 Autograd 内核: - -```py -class MyAddFunction : public torch::autograd::Function { - public: - static Tensor forward( - AutogradContext *ctx, torch::Tensor self, torch::Tensor other) { - at::AutoNonVariableTypeMode g; - return myadd(self, other); - } - - static tensor_list backward(AutogradContext *ctx, tensor_list grad_outputs) { - auto grad_output = grad_outputs[0]; - return {grad_output, grad_output}; - } -}; - -Tensor myadd_autograd(const Tensor& self, const Tensor& other) { - return MyAddFunction::apply(self, other)[0]; -} - -``` - -使用`torch::autograd::Function`正常编写 Autograd 函数,除了代替直接在`forward()`中编写实现,我们: - -1. 使用`at::AutoNonVariableTypeMode` RAII 保护器关闭 Autograd 处理,然后 -2. 调用调度函数`myadd`以回调调度器。 - -如果没有(1),您的调用将无限循环(并且栈溢出),因为`myadd`将使您返回此函数(因为最高优先级分配键仍将是自动微分的。)对于(1),自动微分从一组正在考虑的调度键中排除,我们将转到下一个处理器,即 CPU 和 CUDA。 - -现在,我们可以按照注册 CPU/CUDA 函数的相同方式注册此函数: - -```py -TORCH_LIBRARY_IMPL(myops, Autograd, m) { - m.impl("myadd", myadd_autograd); -} - -``` - -## 超越 Autograd - -从某种意义上说,调度员并没有做太多事情:它所做的只是实现一种美化的`if`语句,其方法如下: - -```py -class MyAddFunction : ... { -public: - static Tensor forward( - AutogradContext *ctx, torch::Tensor self, torch::Tensor other) { - - if (self.device().type() == DeviceType::CPU) { - return add_cpu(self, other); - } else if (self.device().type() == DeviceType::CUDA) { - return add_cuda(self, other); - } else { - TORCH_CHECK(0, "Unsupported device ", self.device().type()); - } - } - ... -} - -``` - -那么为什么要使用调度器呢? 有几个原因: - -1. 它是分散的。 您可以组装运算符的所有部分(CPU,CUDA,Autograd),而不必编写引用所有元素的集中式`if`语句。 重要的是,第三方可以注册其他方面的额外实现,而不必修补运算符的原始定义。 -2. 它比 CPU,CUDA 和 Autograd 支持更多的调度键。 您可以在`c10/core/DispatchKey.h`中查看 PyTorch 中当前实现的调度键的完整列表。 这些调度键为运算符实现了多种可选功能,如果您决定希望自定义运算符支持该功能,则只需为相应的键注册内核即可。 -3. 调度器实现对盒装后备函数的支持,后者是可以一次实现并应用于系统中所有运算符的函数。 盒装后备可用于提供调度键的默认行为。 如果您使用调度器来实现您的运算符,那么您还可以选择所有这些操作的备用。 - -这是一些特定的调度键,您可能需要为其定义一个运算符。 - -### Autocast - -Autocast 调度键实现对[自动混合精度(AMP)](https://pytorch.org/docs/stable/amp.html)的支持。 自动广播包装器内核通常会在运行操作之前将传入的`float16`或`float32` CUDA 张量转换为某些首选精度。 例如,浮点 CUDA 张量上的积和卷积通常运行得更快,并且在`float16`中使用较少的内存,而不会影响收敛。 自动广播包装器仅在[启用自动广播的上下文](https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.autocast)中有效。 - -这是假设的自定义`Matmul`的自动广播包装器及其注册信息: - -```py -// Autocast-specific helper functions -#include - -Tensor mymatmul_autocast(const Tensor& self, const Tensor& other) { - c10::impl::ExcludeDispatchKeyGuard no_autocast(c10::DispatchKey::Autocast); - return mymatmul(at::autocast::cached_cast(at::kHalf, self), - at::autocast::cached_cast(at::kHalf, other)); -} - -TORCH_LIBRARY_IMPL(myops, Autocast, m) { - m.impl("mymatmul", mymatmul_autocast); -} - -``` - -如果`tensor`为 CUDA 和`float32`,则`cached_cast(kHalf, tensor)`将`tensor`强制转换为`float16`,否则,`tensor`保持不变(参见[资格策略](https://pytorch.org/docs/stable/amp.html#op-eligibility)对于本地自动播报的操作)。 这样可以确保网络是否在`float16`和`float32` CUDA 张量的任何混合形式上调用`mymatmul`,`mymatmul`在`float16`中运行。 同时,使用非 CUDA,整数类型或`float64`输入的对`mymatmul`的调用不受影响。 建议使用`cached_cast`在您自己的自动广播包装程序中遵循本机资格策略,但不是必需的。 例如,如果要对所有输入类型强制执行`float16`,则可以使用`return mymatmul(self.half(), other.half());`而不是使用`cached_cast`。 - -请注意,就像我们的 Autograd 内核一样,我们在重新分配之前从分配中排除`Autocast`键。 - -默认情况下,如果未提供自动广播包装器,我们将直接进入常规的运算符实现(不进行自动广播)。 (在此示例中,我们没有使用`myadd`,因为逐点加法不需要自动广播,因此应该会失败。) - -什么时候应该注册自动广播包装器? 不幸的是,对于运算符的首选精度并没有严格的规定。 通过查看[运算符列表](https://pytorch.org/docs/master/amp.html#op-specific-behavior),您可以了解某些本机运算符的首选精度。 一般指导: - -* 进行减少操作的操作可能应该在`float32`中执行, -* 在幕后进行卷积或宝石运算的任何操作都应在`float16`中执行,并且 -* 具有多个浮点张量输入的其他运算符应将它们标准化为通用精度(除非实现支持具有不同精度的输入)。 - -如果您的自定义操作属于第三类,则`promote_type`模板有助于找出输入张量中存在的最宽浮点类型,这是执行类型的最安全选择: - -```py -#include - -Tensor my_multiple_input_op_autocast(const Tensor& t0, const Tensor& t1) { - c10::impl::ExcludeDispatchKeyGuard no_autocast(c10::DispatchKey::Autocast); - // The required at::kHalf argument is an optimistic initial guess. - auto exec_type = at::autocast::promote_type(at::kHalf, t0, t1); - return my_multiple_input_op(at::autocast::cached_cast(exec_type, t0), - at::autocast::cached_cast(exec_type, t1)); -} - -``` - -如果您的自定义操作[已启用 Autograd](#autograd-support),则只需编写和注册自动广播包装器,其名称与注册自动梯度包装器的名称相同。 例如,如果您想为 Autograd 部分中显示的`myadd`函数使用自动广播包装,那么您所需要做的就是 - -```py -Tensor myadd_autocast(const Tensor& self, const Tensor& other) { - c10::impl::ExcludeDispatchKeyGuard no_autocast(c10::DispatchKey::Autocast); - return myadd(at::autocast::cached_cast(, self), - at::autocast::cached_cast(, other)); -} - -TORCH_LIBRARY_IMPL(myops, Autocast, m) { - m.impl("myadd", myadd_autocast); -} - -``` - -没有单独的体操可使后向方法自动广播兼容。 但是,在自定义 Autograd 函数中定义的向后方法将以与正向方法的自动广播集相同的`dtype`运行,因此您应该选择既适合于正向方法又适合于向后方法的``。 - -### 批量 - -批量张量允许您按示例方式编写代码,然后在`vmap`调用下运行时自动对其进行批量。 当前正在开发用于编写批量规则的 API,但是一旦稳定该 API,就可以通过在 Batched 调度键处注册内核来为运算符添加对`vmap`的支持。 - -### 追踪器 - -追踪器调度键实现了对在运行`torch.jit.trace`时将运算符调用记录到跟踪中的支持。 我们打算提供一个盒装后备,它将实现对任意操作的跟踪,请参阅 [ISSUE#41478](https://github.com/pytorch/pytorch/issues/41478) 以跟踪进度。 \ No newline at end of file diff --git a/pytorch/官方教程/51.md b/pytorch/官方教程/51.md deleted file mode 100644 index e47eefb2..00000000 --- a/pytorch/官方教程/51.md +++ /dev/null @@ -1 +0,0 @@ -# 模型优化 \ No newline at end of file diff --git a/pytorch/官方教程/52.md b/pytorch/官方教程/52.md deleted file mode 100644 index 0c8c7991..00000000 --- a/pytorch/官方教程/52.md +++ /dev/null @@ -1,285 +0,0 @@ -# 分析您的 PyTorch 模块 - -> 原文: - -**作者:** [Suraj Subramanian](https://github.com/suraj813) - -PyTorch 包含一个探查器 API,可用于识别代码中各种 PyTorch 操作的时间和内存成本。 Profiler 可以轻松集成到您的代码中,结果可以打印为表格或在 JSON 跟踪文件中显示。 - -注意 - -Profiler 支持多线程模型。 Profiler 与该操作在同一线程中运行,但它还将对可能在另一个线程中运行的子运算符进行概要分析。 同时运行的探查器的作用域将限制在其自己的线程中,以防止结果混淆。 - -转到[此秘籍](https://pytorch.org/tutorials/recipes/recipes/profiler.html),可以更快地了解 Profiler API 的用法。 - -* * * - -```py -import torch -import numpy as np -from torch import nn -import torch.autograd.profiler as profiler - -``` - -## 使用 Profiler 的性能调试 - -Profiler 有助于识别模型中的性能瓶颈。 在此示例中,我们构建了一个自定义模块,该模块执行两个子任务: - -* 输入的线性变换,以及 -* 使用转换结果来获取遮罩张量上的索引。 - -我们使用`profiler.record_function("label")`将每个子任务的代码包装在单独的带标签的上下文管理器中。 在事件探查器输出中,子任务中所有操作的综合性能指标将显示在其相应的标签下。 - -请注意,使用 Profiler 会产生一些开销,并且最好仅用于调查代码。 如果要对运行时进行基准测试,请记住将其删除。 - -```py -class MyModule(nn.Module): - def __init__(self, in_features: int, out_features: int, bias: bool = True): - super(MyModule, self).__init__() - self.linear = nn.Linear(in_features, out_features, bias) - - def forward(self, input, mask): - with profiler.record_function("LINEAR PASS"): - out = self.linear(input) - - with profiler.record_function("MASK INDICES"): - threshold = out.sum(axis=1).mean().item() - hi_idx = np.argwhere(mask.cpu().numpy() > threshold) - hi_idx = torch.from_numpy(hi_idx).cuda() - - return out, hi_idx - -``` - -## 分析正向传播 - -我们初始化随机输入和蒙版张量以及模型。 - -在运行探查器之前,我们需要对 CUDA 进行预热,以确保进行准确的性能基准测试。 我们将模块的正向传播包装在`profiler.profile`上下文管理器中。 `with_stack=True`参数在跟踪中附加操作的文件和行号。 - -警告 - -`with_stack=True`会产生额外的开销,并且更适合于研究代码。 如果要对性能进行基准测试,请记住将其删除。 - -```py -model = MyModule(500, 10).cuda() -input = torch.rand(128, 500).cuda() -mask = torch.rand((500, 500, 500), dtype=torch.double).cuda() - -# warm-up -model(input, mask) - -with profiler.profile(with_stack=True, profile_memory=True) as prof: - out, idx = model(input, mask) - -``` - -## 打印分析器结果 - -最后,我们打印分析器结果。 `profiler.key_averages`通过运算符名称,以及可选地通过输入形状和/或栈跟踪事件来聚合结果。 按输入形状分组有助于识别模型使用哪些张量形状。 - -在这里,我们使用`group_by_stack_n=5`通过操作及其回溯(截断为最近的 5 个事件)聚合运行时,并按事件注册的顺序显示事件。 还可以通过传递`sort_by`参数对表进行排序(有关有效的排序键,请参阅[文档](https://pytorch.org/docs/stable/autograd.html#profiler))。 - -注意 - -在笔记本中运行 Profiler 时,您可能会在栈跟踪中看到`(13): forward`之类的条目,而不是文件名。 这些对应于`(line number): calling-function`。 - -```py -print(prof.key_averages(group_by_stack_n=5).table(sort_by='self_cpu_time_total', row_limit=5)) - -""" -(Some columns are omitted) - -------------- ------------ ------------ ------------ --------------------------------- - Name Self CPU % Self CPU Self CPU Mem Source Location -------------- ------------ ------------ ------------ --------------------------------- - MASK INDICES 87.88% 5.212s -953.67 Mb /mnt/xarfuse/.../torch/au - (10): forward - /mnt/xarfuse/.../torch/nn - (9): - /mnt/xarfuse/.../IPython/ - - aten::copy_ 12.07% 715.848ms 0 b (12): forward - /mnt/xarfuse/.../torch/nn - (9): - /mnt/xarfuse/.../IPython/ - /mnt/xarfuse/.../IPython/ - - LINEAR PASS 0.01% 350.151us -20 b /mnt/xarfuse/.../torch/au - (7): forward - /mnt/xarfuse/.../torch/nn - (9): - /mnt/xarfuse/.../IPython/ - - aten::addmm 0.00% 293.342us 0 b /mnt/xarfuse/.../torch/nn - /mnt/xarfuse/.../torch/nn - /mnt/xarfuse/.../torch/nn - (8): forward - /mnt/xarfuse/.../torch/nn - - aten::mean 0.00% 235.095us 0 b (11): forward - /mnt/xarfuse/.../torch/nn - (9): - /mnt/xarfuse/.../IPython/ - /mnt/xarfuse/.../IPython/ - ------------------------------ ------------ ---------- ---------------------------------- -Self CPU time total: 5.931s - -""" - -``` - -## 提高内存性能 - -请注意,就内存和时间而言,最昂贵的操作位于`forward (10)`,代表掩码索引中的操作。 让我们尝试先解决内存消耗问题。 我们可以看到第 12 行的`.to()`操作消耗 953.67 Mb。 该操作将`mask`复制到 CPU。 `mask`使用`torch.double`数据类型初始化。 我们可以通过将其转换为`torch.float`来减少内存占用吗? - -```py -model = MyModule(500, 10).cuda() -input = torch.rand(128, 500).cuda() -mask = torch.rand((500, 500, 500), dtype=torch.float).cuda() - -# warm-up -model(input, mask) - -with profiler.profile(with_stack=True, profile_memory=True) as prof: - out, idx = model(input, mask) - -print(prof.key_averages(group_by_stack_n=5).table(sort_by='self_cpu_time_total', row_limit=5)) - -""" -(Some columns are omitted) - ------------------ ------------ ------------ ------------ -------------------------------- - Name Self CPU % Self CPU Self CPU Mem Source Location ------------------ ------------ ------------ ------------ -------------------------------- - MASK INDICES 93.61% 5.006s -476.84 Mb /mnt/xarfuse/.../torch/au - (10): forward - /mnt/xarfuse/ /torch/nn - (9): - /mnt/xarfuse/.../IPython/ - - aten::copy_ 6.34% 338.759ms 0 b (12): forward - /mnt/xarfuse/.../torch/nn - (9): - /mnt/xarfuse/.../IPython/ - /mnt/xarfuse/.../IPython/ - - aten::as_strided 0.01% 281.808us 0 b (11): forward - /mnt/xarfuse/.../torch/nn - (9): - /mnt/xarfuse/.../IPython/ - /mnt/xarfuse/.../IPython/ - - aten::addmm 0.01% 275.721us 0 b /mnt/xarfuse/.../torch/nn - /mnt/xarfuse/.../torch/nn - /mnt/xarfuse/.../torch/nn - (8): forward - /mnt/xarfuse/.../torch/nn - - aten::_local 0.01% 268.650us 0 b (11): forward - _scalar_dense /mnt/xarfuse/.../torch/nn - (9): - /mnt/xarfuse/.../IPython/ - /mnt/xarfuse/.../IPython/ - ------------------ ------------ ------------ ------------ -------------------------------- -Self CPU time total: 5.347s - -""" - -``` - -此操作的 CPU 内存占用量减少了一半。 - -## 提高时间表现 - -虽然所消耗的时间也有所减少,但仍然太高。 原来,将矩阵从 CUDA 复制到 CPU 非常昂贵! `forward (12)`中的`aten::copy_`运算符将`mask`复制到 CPU,以便可以使用 NumPy `argwhere`函数。 `forward(13)`处的`aten::copy_`将数组作为张量复制回 CUDA。 如果我们在这里使用`torch`函数`nonzero()`,则可以消除这两个方面。 - -```py -class MyModule(nn.Module): - def __init__(self, in_features: int, out_features: int, bias: bool = True): - super(MyModule, self).__init__() - self.linear = nn.Linear(in_features, out_features, bias) - - def forward(self, input, mask): - with profiler.record_function("LINEAR PASS"): - out = self.linear(input) - - with profiler.record_function("MASK INDICES"): - threshold = out.sum(axis=1).mean() - hi_idx = (mask > threshold).nonzero(as_tuple=True) - - return out, hi_idx - -model = MyModule(500, 10).cuda() -input = torch.rand(128, 500).cuda() -mask = torch.rand((500, 500, 500), dtype=torch.float).cuda() - -# warm-up -model(input, mask) - -with profiler.profile(with_stack=True, profile_memory=True) as prof: - out, idx = model(input, mask) - -print(prof.key_averages(group_by_stack_n=5).table(sort_by='self_cpu_time_total', row_limit=5)) - -""" -(Some columns are omitted) - --------------- ------------ ------------ ------------ --------------------------------- - Name Self CPU % Self CPU Self CPU Mem Source Location --------------- ------------ ------------ ------------ --------------------------------- - aten::gt 57.17% 129.089ms 0 b (12): forward - /mnt/xarfuse/.../torch/nn - (25): - /mnt/xarfuse/.../IPython/ - /mnt/xarfuse/.../IPython/ - - aten::nonzero 37.38% 84.402ms 0 b (12): forward - /mnt/xarfuse/.../torch/nn - (25): - /mnt/xarfuse/.../IPython/ - /mnt/xarfuse/.../IPython/ - - INDEX SCORE 3.32% 7.491ms -119.21 Mb /mnt/xarfuse/.../torch/au - (10): forward - /mnt/xarfuse/.../torch/nn - (25): - /mnt/xarfuse/.../IPython/ - -aten::as_strided 0.20% 441.587us 0 b (12): forward - /mnt/xarfuse/.../torch/nn - (25): - /mnt/xarfuse/.../IPython/ - /mnt/xarfuse/.../IPython/ - - aten::nonzero - _numpy 0.18% 395.602us 0 b (12): forward - /mnt/xarfuse/.../torch/nn - (25): - /mnt/xarfuse/.../IPython/ - /mnt/xarfuse/.../IPython/ --------------- ------------ ------------ ------------ --------------------------------- -Self CPU time total: 225.801ms - -""" - -``` - -## 进一步阅读 - -我们已经看到了 Profiler 如何用于调查 PyTorch 模型中的时间和内存瓶颈。 在此处阅读有关 Profiler 的更多信息: - -* [事件探查器使用秘籍](https://pytorch.org/tutorials/recipes/recipes/profiler.html) -* [分析基于 RPC 的工作负载](https://pytorch.org/tutorials/recipes/distributed_rpc_profiling.html) -* [Profiler API 文档](https://pytorch.org/docs/stable/autograd.html?highlight=profiler#profiler) - -**脚本的总运行时间**:(0 分钟 0.000 秒) - -[下载 Python 源码:`profiler.py`](../_downloads/390e82110dc76e71b26225b3f9020e14/profiler.py) - -[下载 Jupyter 笔记本:`profiler.ipynb`](../_downloads/28071a0f69f5106129ad8a68a47af061/profiler.ipynb) - -[由 Sphinx 画廊](https://sphinx-gallery.readthedocs.io)生成的画廊 \ No newline at end of file diff --git a/pytorch/官方教程/53.md b/pytorch/官方教程/53.md deleted file mode 100644 index b3c631b5..00000000 --- a/pytorch/官方教程/53.md +++ /dev/null @@ -1,1711 +0,0 @@ -# 使用 Ray Tune 的超参数调整 - -> 原文: - -超参数调整可以使平均模型与高精度模型有所不同。 通常,选择不同的学习率或更改网络层大小等简单的事情可能会对模型表现产生巨大影响。 - -幸运的是,有一些工具可以帮助您找到最佳的参数组合。 [Ray Tune](https://docs.ray.io/en/latest/tune.html) 是用于分布式超参数调整的行业标准工具。 Ray Tune 包含最新的超参数搜索算法,与 TensorBoard 和其他分析库集成,并通过 [Ray 的分布式机器学习引擎](https://ray.io/)本地支持分布式训练。 - -在本教程中,我们将向您展示如何将 Ray Tune 集成到 PyTorch 训练工作流程中。 我们将扩展 PyTorch 文档的[本教程](https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html),以训练 CIFAR10 图像分类器。 - -如您所见,我们只需要添加一些细微的修改即可。 特别是,我们需要 - -1. 在函数中包装数据加载和训练, -2. 使一些网络参数可配置, -3. 添加检查点(可选), -4. 并定义用于模型调整的搜索空间 - -要运行本教程,请确保已安装以下包: - -* `ray[tune]`:分布式超参数调整库 -* `torchvision`:用于数据转换器 - -## 设置/导入 - -让我们从导入开始: - -```py -from functools import partial -import numpy as np -import os -import torch -import torch.nn as nn -import torch.nn.functional as F -import torch.optim as optim -from torch.utils.data import random_split -import torchvision -import torchvision.transforms as transforms -from ray import tune -from ray.tune import CLIReporter -from ray.tune.schedulers import ASHAScheduler - -``` - -建立 PyTorch 模型需要大多数导入产品。 Ray Tune 仅最后三个导入。 - -## 数据加载器 - -我们将数据加载器包装在它们自己的函数中,并传递一个全局数据目录。 这样,我们可以在不同的试验之间共享数据目录。 - -```py -def load_data(data_dir="./data"): - transform = transforms.Compose([ - transforms.ToTensor(), - transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) - ]) - - trainset = torchvision.datasets.CIFAR10( - root=data_dir, train=True, download=True, transform=transform) - - testset = torchvision.datasets.CIFAR10( - root=data_dir, train=False, download=True, transform=transform) - - return trainset, testset - -``` - -## 可配置的神经网络 - -我们只能调整那些可配置的参数。 在此示例中,我们可以指定全连接层的层大小: - -```py -class Net(nn.Module): - def __init__(self, l1=120, l2=84): - super(Net, self).__init__() - self.conv1 = nn.Conv2d(3, 6, 5) - self.pool = nn.MaxPool2d(2, 2) - self.conv2 = nn.Conv2d(6, 16, 5) - self.fc1 = nn.Linear(16 * 5 * 5, l1) - self.fc2 = nn.Linear(l1, l2) - self.fc3 = nn.Linear(l2, 10) - - def forward(self, x): - x = self.pool(F.relu(self.conv1(x))) - x = self.pool(F.relu(self.conv2(x))) - x = x.view(-1, 16 * 5 * 5) - x = F.relu(self.fc1(x)) - x = F.relu(self.fc2(x)) - x = self.fc3(x) - return x - -``` - -## 训练函数 - -现在变得有趣了,因为我们对 [PyTorch 文档中的示例](https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html)进行了一些更改。 - -我们将训练脚本包装在函数`train_cifar(config, checkpoint_dir=None, data_dir=None)`中。 可以猜到,`config`参数将接收我们要训练的超参数。 `checkpoint_dir`参数用于还原检查点。 `data_dir`指定了我们加载和存储数据的目录,因此多次运行可以共享同一数据源。 - -```py -net = Net(config["l1"], config["l2"]) - -if checkpoint_dir: - model_state, optimizer_state = torch.load( - os.path.join(checkpoint_dir, "checkpoint")) - net.load_state_dict(model_state) - optimizer.load_state_dict(optimizer_state) - -``` - -优化器的学习率也可以配置: - -```py -optimizer = optim.SGD(net.parameters(), lr=config["lr"], momentum=0.9) - -``` - -我们还将训练数据分为训练和验证子集。 因此,我们训练了 80% 的数据,并计算了其余 20% 的验证损失。 我们遍历训练和测试集的批量大小也是可配置的。 - -### 通过`DataParallel`添加(多)GPU 支持 - -图像分类主要受益于 GPU。 幸运的是,我们可以继续在 Ray Tune 中使用 PyTorch 的抽象。 因此,我们可以将模型包装在`nn.DataParallel`中,以支持在多个 GPU 上进行数据并行训练: - -```py -device = "cpu" -if torch.cuda.is_available(): - device = "cuda:0" - if torch.cuda.device_count() > 1: - net = nn.DataParallel(net) -net.to(device) - -``` - -通过使用`device`变量,我们可以确保在没有 GPU 的情况下训练也能正常进行。 PyTorch 要求我们将数据显式发送到 GPU 内存,如下所示: - -```py -for i, data in enumerate(trainloader, 0): - inputs, labels = data - inputs, labels = inputs.to(device), labels.to(device) - -``` - -该代码现在支持在 CPU,单个 GPU 和多个 GPU 上进行训练。 值得注意的是,Ray 还支持[分数 GPU](https://docs.ray.io/en/master/using-ray-with-gpus.html#fractional-gpus) ,因此我们可以在试验之间共享 GPU,只要模型仍适合 GPU 内存即可。 我们稍后再讲。 - -### 与 Ray Tune 交流 - -最有趣的部分是与 Ray Tune 的交流: - -```py -with tune.checkpoint_dir(epoch) as checkpoint_dir: - path = os.path.join(checkpoint_dir, "checkpoint") - torch.save((net.state_dict(), optimizer.state_dict()), path) - -tune.report(loss=(val_loss / val_steps), accuracy=correct / total) - -``` - -在这里,我们首先保存一个检查点,然后将一些指标报告给 Ray Tune。 具体来说,我们将验证损失和准确率发送回 Ray Tune。 然后,Ray Tune 可以使用这些指标来决定哪种超参数配置可以带来最佳结果。 这些指标还可用于尽早停止效果不佳的试验,以避免浪费资源进行试验。 - -保存检查点是可选的,但是,如果我们想使用高级调度器,例如[基于总体的训练](https://docs.ray.io/en/master/tune/tutorials/tune-advanced-tutorial.html),则有必要。 另外,通过保存检查点,我们可以稍后加载经过训练的模型并在测试集上对其进行验证。 - -### 完整的训练函数 - -完整的代码示例如下所示: - -```py -def train_cifar(config, checkpoint_dir=None, data_dir=None): - net = Net(config["l1"], config["l2"]) - - device = "cpu" - if torch.cuda.is_available(): - device = "cuda:0" - if torch.cuda.device_count() > 1: - net = nn.DataParallel(net) - net.to(device) - - criterion = nn.CrossEntropyLoss() - optimizer = optim.SGD(net.parameters(), lr=config["lr"], momentum=0.9) - - if checkpoint_dir: - model_state, optimizer_state = torch.load( - os.path.join(checkpoint_dir, "checkpoint")) - net.load_state_dict(model_state) - optimizer.load_state_dict(optimizer_state) - - trainset, testset = load_data(data_dir) - - test_abs = int(len(trainset) * 0.8) - train_subset, val_subset = random_split( - trainset, [test_abs, len(trainset) - test_abs]) - - trainloader = torch.utils.data.DataLoader( - train_subset, - batch_size=int(config["batch_size"]), - shuffle=True, - num_workers=8) - valloader = torch.utils.data.DataLoader( - val_subset, - batch_size=int(config["batch_size"]), - shuffle=True, - num_workers=8) - - for epoch in range(10): # loop over the dataset multiple times - running_loss = 0.0 - epoch_steps = 0 - for i, data in enumerate(trainloader, 0): - # get the inputs; data is a list of [inputs, labels] - inputs, labels = data - inputs, labels = inputs.to(device), labels.to(device) - - # zero the parameter gradients - optimizer.zero_grad() - - # forward + backward + optimize - outputs = net(inputs) - loss = criterion(outputs, labels) - loss.backward() - optimizer.step() - - # print statistics - running_loss += loss.item() - epoch_steps += 1 - if i % 2000 == 1999: # print every 2000 mini-batches - print("[%d, %5d] loss: %.3f" % (epoch + 1, i + 1, - running_loss / epoch_steps)) - running_loss = 0.0 - - # Validation loss - val_loss = 0.0 - val_steps = 0 - total = 0 - correct = 0 - for i, data in enumerate(valloader, 0): - with torch.no_grad(): - inputs, labels = data - inputs, labels = inputs.to(device), labels.to(device) - - outputs = net(inputs) - _, predicted = torch.max(outputs.data, 1) - total += labels.size(0) - correct += (predicted == labels).sum().item() - - loss = criterion(outputs, labels) - val_loss += loss.cpu().numpy() - val_steps += 1 - - with tune.checkpoint_dir(epoch) as checkpoint_dir: - path = os.path.join(checkpoint_dir, "checkpoint") - torch.save((net.state_dict(), optimizer.state_dict()), path) - - tune.report(loss=(val_loss / val_steps), accuracy=correct / total) - print("Finished Training") - -``` - -如您所见,大多数代码直接来自原始示例。 - -## 测试集准确率 - -通常,机器学习模型的表现是在保持测试集上使用尚未用于训练模型的数据进行测试的。 我们还将其包装在一个函数中: - -```py -def test_accuracy(net, device="cpu"): - trainset, testset = load_data() - - testloader = torch.utils.data.DataLoader( - testset, batch_size=4, shuffle=False, num_workers=2) - - correct = 0 - total = 0 - with torch.no_grad(): - for data in testloader: - images, labels = data - images, labels = images.to(device), labels.to(device) - outputs = net(images) - _, predicted = torch.max(outputs.data, 1) - total += labels.size(0) - correct += (predicted == labels).sum().item() - - return correct / total - -``` - -该函数还需要一个`device`参数,因此我们可以在 GPU 上进行测试集验证。 - -## 配置搜索空间 - -最后,我们需要定义 Ray Tune 的搜索空间。 这是一个例子: - -```py -config = { - "l1": tune.sample_from(lambda _: 2**np.random.randint(2, 9)), - "l2": tune.sample_from(lambda _: 2**np.random.randint(2, 9)), - "lr": tune.loguniform(1e-4, 1e-1), - "batch_size": tune.choice([2, 4, 8, 16]) -} - -``` - -`tune.sample_from()`函数使您可以定义自己的采样方法以获得超参数。 在此示例中,`l1`和`l2`参数应为 4 到 256 之间的 2 的幂,因此应为 4、8、16、32、64、128 或 256。`lr`(学习率)应在 0.0001 和 0.1 之间均匀采样。 最后,批量大小可以在 2、4、8 和 16 之间选择。 - -现在,在每次试用中,Ray Tune 都会从这些搜索空间中随机抽取参数组合。 然后它将并行训练许多模型,并在其中找到表现最佳的模型。 我们还使用`ASHAScheduler`,它将尽早终止效果不佳的测试。 - -我们用`functools.partial`包装`train_cifar`函数以设置常量`data_dir`参数。 我们还可以告诉 Ray Tune 每个审判应提供哪些资源: - -```py -gpus_per_trial = 2 -# ... -result = tune.run( - partial(train_cifar, data_dir=data_dir), - resources_per_trial={"cpu": 8, "gpu": gpus_per_trial}, - config=config, - num_samples=num_samples, - scheduler=scheduler, - progress_reporter=reporter, - checkpoint_at_end=True) - -``` - -您可以指定 CPU 的数量,例如增加 PyTorch `DataLoader`实例的`num_workers`。 在每次试用中,选定数量的 GPU 对 PyTorch 都是可见的。 试用版无法访问未要求使用 GPU 的 GPU,因此您不必担心使用同一组资源进行两次试用。 - -在这里,我们还可以指定分数 GPU,因此`gpus_per_trial=0.5`之类的东西完全有效。 然后,试用版将彼此共享 GPU。 您只需要确保模型仍然适合 GPU 内存即可。 - -训练完模型后,我们将找到表现最好的模型,并从检查点文件中加载训练后的网络。 然后,我们获得测试仪的准确率,并通过打印报告一切。 - -完整的`main`函数如下: - -```py -def main(num_samples=10, max_num_epochs=10, gpus_per_trial=2): - data_dir = os.path.abspath("./data") - load_data(data_dir) - config = { - "l1": tune.sample_from(lambda _: 2 ** np.random.randint(2, 9)), - "l2": tune.sample_from(lambda _: 2 ** np.random.randint(2, 9)), - "lr": tune.loguniform(1e-4, 1e-1), - "batch_size": tune.choice([2, 4, 8, 16]) - } - scheduler = ASHAScheduler( - metric="loss", - mode="min", - max_t=max_num_epochs, - grace_period=1, - reduction_factor=2) - reporter = CLIReporter( - # parameter_columns=["l1", "l2", "lr", "batch_size"], - metric_columns=["loss", "accuracy", "training_iteration"]) - result = tune.run( - partial(train_cifar, data_dir=data_dir), - resources_per_trial={"cpu": 2, "gpu": gpus_per_trial}, - config=config, - num_samples=num_samples, - scheduler=scheduler, - progress_reporter=reporter) - - best_trial = result.get_best_trial("loss", "min", "last") - print("Best trial config: {}".format(best_trial.config)) - print("Best trial final validation loss: {}".format( - best_trial.last_result["loss"])) - print("Best trial final validation accuracy: {}".format( - best_trial.last_result["accuracy"])) - - best_trained_model = Net(best_trial.config["l1"], best_trial.config["l2"]) - device = "cpu" - if torch.cuda.is_available(): - device = "cuda:0" - if gpus_per_trial > 1: - best_trained_model = nn.DataParallel(best_trained_model) - best_trained_model.to(device) - - best_checkpoint_dir = best_trial.checkpoint.value - model_state, optimizer_state = torch.load(os.path.join( - best_checkpoint_dir, "checkpoint")) - best_trained_model.load_state_dict(model_state) - - test_acc = test_accuracy(best_trained_model, device) - print("Best trial test set accuracy: {}".format(test_acc)) - -if __name__ == "__main__": - # You can change the number of GPUs per trial here: - main(num_samples=10, max_num_epochs=10, gpus_per_trial=0) - -``` - -出: - -```py -Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to /var/lib/jenkins/workspace/beginner_source/data/cifar-10-python.tar.gz -Extracting /var/lib/jenkins/workspace/beginner_source/data/cifar-10-python.tar.gz to /var/lib/jenkins/workspace/beginner_source/data -Files already downloaded and verified -== Status == -Memory usage on this node: 4.0/240.1 GiB -Using AsyncHyperBand: num_stopped=0 -Bracket: Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: None -Resources requested: 2/32 CPUs, 0/2 GPUs, 0.0/157.71 GiB heap, 0.0/49.37 GiB objects -Result logdir: /var/lib/jenkins/ray_results/DEFAULT_2021-01-05_20-23-08 -Number of trials: 1/10 (1 RUNNING) -+---------------------+----------+-------+--------------+------+------+-------------+ -| Trial name | status | loc | batch_size | l1 | l2 | lr | -|---------------------+----------+-------+--------------+------+------+-------------| -| DEFAULT_d3304_00000 | RUNNING | | 2 | 4 | 16 | 0.000111924 | -+---------------------+----------+-------+--------------+------+------+-------------+ - -[2m[36m(pid=1588)[0m Files already downloaded and verified -[2m[36m(pid=1568)[0m Files already downloaded and verified -[2m[36m(pid=1504)[0m Files already downloaded and verified -[2m[36m(pid=1575)[0m Files already downloaded and verified -[2m[36m(pid=1494)[0m Files already downloaded and verified -[2m[36m(pid=1572)[0m Files already downloaded and verified -[2m[36m(pid=1567)[0m Files already downloaded and verified -[2m[36m(pid=1585)[0m Files already downloaded and verified -[2m[36m(pid=1565)[0m Files already downloaded and verified -[2m[36m(pid=1505)[0m Files already downloaded and verified -[2m[36m(pid=1588)[0m Files already downloaded and verified -[2m[36m(pid=1568)[0m Files already downloaded and verified -[2m[36m(pid=1504)[0m Files already downloaded and verified -[2m[36m(pid=1575)[0m Files already downloaded and verified -[2m[36m(pid=1494)[0m Files already downloaded and verified -[2m[36m(pid=1572)[0m Files already downloaded and verified -[2m[36m(pid=1567)[0m Files already downloaded and verified -[2m[36m(pid=1565)[0m Files already downloaded and verified -[2m[36m(pid=1585)[0m Files already downloaded and verified -[2m[36m(pid=1505)[0m Files already downloaded and verified -[2m[36m(pid=1585)[0m [1, 2000] loss: 2.307 -[2m[36m(pid=1568)[0m [1, 2000] loss: 2.226 -[2m[36m(pid=1565)[0m [1, 2000] loss: 2.141 -[2m[36m(pid=1505)[0m [1, 2000] loss: 2.339 -[2m[36m(pid=1504)[0m [1, 2000] loss: 2.042 -[2m[36m(pid=1572)[0m [1, 2000] loss: 2.288 -[2m[36m(pid=1567)[0m [1, 2000] loss: 2.047 -[2m[36m(pid=1575)[0m [1, 2000] loss: 2.316 -[2m[36m(pid=1494)[0m [1, 2000] loss: 2.322 -[2m[36m(pid=1588)[0m [1, 2000] loss: 2.289 -[2m[36m(pid=1585)[0m [1, 4000] loss: 1.154 -[2m[36m(pid=1505)[0m [1, 4000] loss: 1.170 -[2m[36m(pid=1565)[0m [1, 4000] loss: 0.939 -[2m[36m(pid=1568)[0m [1, 4000] loss: 1.102 -[2m[36m(pid=1504)[0m [1, 4000] loss: 0.916 -[2m[36m(pid=1572)[0m [1, 4000] loss: 1.156 -Result for DEFAULT_d3304_00003: - accuracy: 0.226 - date: 2021-01-05_20-23-37 - done: false - experiment_id: d4b00469893d498ea65a729df202882a - experiment_tag: 3_batch_size=16,l1=32,l2=4,lr=0.0012023 - hostname: 1a844a452371 - iterations_since_restore: 1 - loss: 2.083958268547058 - node_ip: 172.17.0.2 - pid: 1588 - should_checkpoint: true - time_since_restore: 27.169169902801514 - time_this_iter_s: 27.169169902801514 - time_total_s: 27.169169902801514 - timestamp: 1609878217 - timesteps_since_restore: 0 - training_iteration: 1 - trial_id: d3304_00003 - -== Status == -Memory usage on this node: 9.2/240.1 GiB -Using AsyncHyperBand: num_stopped=0 -Bracket: Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -2.083958268547058 -Resources requested: 20/32 CPUs, 0/2 GPUs, 0.0/157.71 GiB heap, 0.0/49.37 GiB objects -Result logdir: /var/lib/jenkins/ray_results/DEFAULT_2021-01-05_20-23-08 -Number of trials: 10/10 (10 RUNNING) -+---------------------+----------+-----------------+--------------+------+------+-------------+---------+------------+----------------------+ -| Trial name | status | loc | batch_size | l1 | l2 | lr | loss | accuracy | training_iteration | -|---------------------+----------+-----------------+--------------+------+------+-------------+---------+------------+----------------------| -| DEFAULT_d3304_00000 | RUNNING | | 2 | 4 | 16 | 0.000111924 | | | | -| DEFAULT_d3304_00001 | RUNNING | | 8 | 16 | 32 | 0.077467 | | | | -| DEFAULT_d3304_00002 | RUNNING | | 4 | 8 | 128 | 0.00436986 | | | | -| DEFAULT_d3304_00003 | RUNNING | 172.17.0.2:1588 | 16 | 32 | 4 | 0.00120234 | 2.08396 | 0.226 | 1 | -| DEFAULT_d3304_00004 | RUNNING | | 4 | 16 | 32 | 0.016474 | | | | -| DEFAULT_d3304_00005 | RUNNING | | 4 | 128 | 64 | 0.00757252 | | | | -| DEFAULT_d3304_00006 | RUNNING | | 2 | 64 | 256 | 0.00177236 | | | | -| DEFAULT_d3304_00007 | RUNNING | | 8 | 8 | 8 | 0.000155891 | | | | -| DEFAULT_d3304_00008 | RUNNING | | 2 | 16 | 64 | 0.0310199 | | | | -| DEFAULT_d3304_00009 | RUNNING | | 4 | 4 | 32 | 0.0175239 | | | | -+---------------------+----------+-----------------+--------------+------+------+-------------+---------+------------+----------------------+ - -[2m[36m(pid=1567)[0m [1, 4000] loss: 0.943 -[2m[36m(pid=1494)[0m [1, 4000] loss: 1.155 -[2m[36m(pid=1575)[0m [1, 4000] loss: 1.162 -[2m[36m(pid=1585)[0m [1, 6000] loss: 0.768 -[2m[36m(pid=1505)[0m [1, 6000] loss: 0.780 -[2m[36m(pid=1565)[0m [1, 6000] loss: 0.582 -[2m[36m(pid=1504)[0m [1, 6000] loss: 0.587 -[2m[36m(pid=1568)[0m [1, 6000] loss: 0.770 -[2m[36m(pid=1572)[0m [1, 6000] loss: 0.771 -[2m[36m(pid=1567)[0m [1, 6000] loss: 0.615 -Result for DEFAULT_d3304_00007: - accuracy: 0.1011 - date: 2021-01-05_20-23-51 - done: true - experiment_id: 947614a8c2a74533be128b929f363bd1 - experiment_tag: 7_batch_size=8,l1=8,l2=8,lr=0.00015589 - hostname: 1a844a452371 - iterations_since_restore: 1 - loss: 2.3038805620193483 - node_ip: 172.17.0.2 - pid: 1494 - should_checkpoint: true - time_since_restore: 41.69914960861206 - time_this_iter_s: 41.69914960861206 - time_total_s: 41.69914960861206 - timestamp: 1609878231 - timesteps_since_restore: 0 - training_iteration: 1 - trial_id: d3304_00007 - -== Status == -Memory usage on this node: 9.1/240.1 GiB -Using AsyncHyperBand: num_stopped=1 -Bracket: Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -2.193919415283203 -Resources requested: 20/32 CPUs, 0/2 GPUs, 0.0/157.71 GiB heap, 0.0/49.37 GiB objects -Result logdir: /var/lib/jenkins/ray_results/DEFAULT_2021-01-05_20-23-08 -Number of trials: 10/10 (10 RUNNING) -+---------------------+----------+-----------------+--------------+------+------+-------------+---------+------------+----------------------+ -| Trial name | status | loc | batch_size | l1 | l2 | lr | loss | accuracy | training_iteration | -|---------------------+----------+-----------------+--------------+------+------+-------------+---------+------------+----------------------| -| DEFAULT_d3304_00000 | RUNNING | | 2 | 4 | 16 | 0.000111924 | | | | -| DEFAULT_d3304_00001 | RUNNING | | 8 | 16 | 32 | 0.077467 | | | | -| DEFAULT_d3304_00002 | RUNNING | | 4 | 8 | 128 | 0.00436986 | | | | -| DEFAULT_d3304_00003 | RUNNING | 172.17.0.2:1588 | 16 | 32 | 4 | 0.00120234 | 2.08396 | 0.226 | 1 | -| DEFAULT_d3304_00004 | RUNNING | | 4 | 16 | 32 | 0.016474 | | | | -| DEFAULT_d3304_00005 | RUNNING | | 4 | 128 | 64 | 0.00757252 | | | | -| DEFAULT_d3304_00006 | RUNNING | | 2 | 64 | 256 | 0.00177236 | | | | -| DEFAULT_d3304_00007 | RUNNING | 172.17.0.2:1494 | 8 | 8 | 8 | 0.000155891 | 2.30388 | 0.1011 | 1 | -| DEFAULT_d3304_00008 | RUNNING | | 2 | 16 | 64 | 0.0310199 | | | | -| DEFAULT_d3304_00009 | RUNNING | | 4 | 4 | 32 | 0.0175239 | | | | -+---------------------+----------+-----------------+--------------+------+------+-------------+---------+------------+----------------------+ - -Result for DEFAULT_d3304_00001: - accuracy: 0.1017 - date: 2021-01-05_20-23-51 - done: true - experiment_id: 26ac228b4b454584869f8490742cf253 - experiment_tag: 1_batch_size=8,l1=16,l2=32,lr=0.077467 - hostname: 1a844a452371 - iterations_since_restore: 1 - loss: 2.321864831352234 - node_ip: 172.17.0.2 - pid: 1575 - should_checkpoint: true - time_since_restore: 42.09821367263794 - time_this_iter_s: 42.09821367263794 - time_total_s: 42.09821367263794 - timestamp: 1609878231 - timesteps_since_restore: 0 - training_iteration: 1 - trial_id: d3304_00001 - -[2m[36m(pid=1588)[0m [2, 2000] loss: 1.916 -[2m[36m(pid=1585)[0m [1, 8000] loss: 0.576 -[2m[36m(pid=1505)[0m [1, 8000] loss: 0.584 -[2m[36m(pid=1565)[0m [1, 8000] loss: 0.422 -[2m[36m(pid=1504)[0m [1, 8000] loss: 0.433 -[2m[36m(pid=1572)[0m [1, 8000] loss: 0.578 -[2m[36m(pid=1568)[0m [1, 8000] loss: 0.580 -Result for DEFAULT_d3304_00003: - accuracy: 0.3762 - date: 2021-01-05_20-24-00 - done: false - experiment_id: d4b00469893d498ea65a729df202882a - experiment_tag: 3_batch_size=16,l1=32,l2=4,lr=0.0012023 - hostname: 1a844a452371 - iterations_since_restore: 2 - loss: 1.7041921138763427 - node_ip: 172.17.0.2 - pid: 1588 - should_checkpoint: true - time_since_restore: 50.74612545967102 - time_this_iter_s: 23.576955556869507 - time_total_s: 50.74612545967102 - timestamp: 1609878240 - timesteps_since_restore: 0 - training_iteration: 2 - trial_id: d3304_00003 - -== Status == -Memory usage on this node: 8.0/240.1 GiB -Using AsyncHyperBand: num_stopped=2 -Bracket: Iter 8.000: None | Iter 4.000: None | Iter 2.000: -1.7041921138763427 | Iter 1.000: -2.3038805620193483 -Resources requested: 16/32 CPUs, 0/2 GPUs, 0.0/157.71 GiB heap, 0.0/49.37 GiB objects -Result logdir: /var/lib/jenkins/ray_results/DEFAULT_2021-01-05_20-23-08 -Number of trials: 10/10 (8 RUNNING, 2 TERMINATED) -+---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------+ -| Trial name | status | loc | batch_size | l1 | l2 | lr | loss | accuracy | training_iteration | -|---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------| -| DEFAULT_d3304_00000 | RUNNING | | 2 | 4 | 16 | 0.000111924 | | | | -| DEFAULT_d3304_00002 | RUNNING | | 4 | 8 | 128 | 0.00436986 | | | | -| DEFAULT_d3304_00003 | RUNNING | 172.17.0.2:1588 | 16 | 32 | 4 | 0.00120234 | 1.70419 | 0.3762 | 2 | -| DEFAULT_d3304_00004 | RUNNING | | 4 | 16 | 32 | 0.016474 | | | | -| DEFAULT_d3304_00005 | RUNNING | | 4 | 128 | 64 | 0.00757252 | | | | -| DEFAULT_d3304_00006 | RUNNING | | 2 | 64 | 256 | 0.00177236 | | | | -| DEFAULT_d3304_00008 | RUNNING | | 2 | 16 | 64 | 0.0310199 | | | | -| DEFAULT_d3304_00009 | RUNNING | | 4 | 4 | 32 | 0.0175239 | | | | -| DEFAULT_d3304_00001 | TERMINATED | | 8 | 16 | 32 | 0.077467 | 2.32186 | 0.1017 | 1 | -| DEFAULT_d3304_00007 | TERMINATED | | 8 | 8 | 8 | 0.000155891 | 2.30388 | 0.1011 | 1 | -+---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------+ - -[2m[36m(pid=1567)[0m [1, 8000] loss: 0.458 -[2m[36m(pid=1585)[0m [1, 10000] loss: 0.461 -[2m[36m(pid=1505)[0m [1, 10000] loss: 0.467 -[2m[36m(pid=1565)[0m [1, 10000] loss: 0.329 -[2m[36m(pid=1504)[0m [1, 10000] loss: 0.344 -[2m[36m(pid=1572)[0m [1, 10000] loss: 0.463 -[2m[36m(pid=1568)[0m [1, 10000] loss: 0.464 -[2m[36m(pid=1567)[0m [1, 10000] loss: 0.360 -[2m[36m(pid=1588)[0m [3, 2000] loss: 1.663 -Result for DEFAULT_d3304_00002: - accuracy: 0.3791 - date: 2021-01-05_20-24-18 - done: false - experiment_id: eaf4d25c9a0e46219afb226ed323095b - experiment_tag: 2_batch_size=4,l1=8,l2=128,lr=0.0043699 - hostname: 1a844a452371 - iterations_since_restore: 1 - loss: 1.6690538251161575 - node_ip: 172.17.0.2 - pid: 1504 - should_checkpoint: true - time_since_restore: 68.1856791973114 - time_this_iter_s: 68.1856791973114 - time_total_s: 68.1856791973114 - timestamp: 1609878258 - timesteps_since_restore: 0 - training_iteration: 1 - trial_id: d3304_00002 - -== Status == -Memory usage on this node: 8.0/240.1 GiB -Using AsyncHyperBand: num_stopped=2 -Bracket: Iter 8.000: None | Iter 4.000: None | Iter 2.000: -1.7041921138763427 | Iter 1.000: -2.193919415283203 -Resources requested: 16/32 CPUs, 0/2 GPUs, 0.0/157.71 GiB heap, 0.0/49.37 GiB objects -Result logdir: /var/lib/jenkins/ray_results/DEFAULT_2021-01-05_20-23-08 -Number of trials: 10/10 (8 RUNNING, 2 TERMINATED) -+---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------+ -| Trial name | status | loc | batch_size | l1 | l2 | lr | loss | accuracy | training_iteration | -|---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------| -| DEFAULT_d3304_00000 | RUNNING | | 2 | 4 | 16 | 0.000111924 | | | | -| DEFAULT_d3304_00002 | RUNNING | 172.17.0.2:1504 | 4 | 8 | 128 | 0.00436986 | 1.66905 | 0.3791 | 1 | -| DEFAULT_d3304_00003 | RUNNING | 172.17.0.2:1588 | 16 | 32 | 4 | 0.00120234 | 1.70419 | 0.3762 | 2 | -| DEFAULT_d3304_00004 | RUNNING | | 4 | 16 | 32 | 0.016474 | | | | -| DEFAULT_d3304_00005 | RUNNING | | 4 | 128 | 64 | 0.00757252 | | | | -| DEFAULT_d3304_00006 | RUNNING | | 2 | 64 | 256 | 0.00177236 | | | | -| DEFAULT_d3304_00008 | RUNNING | | 2 | 16 | 64 | 0.0310199 | | | | -| DEFAULT_d3304_00009 | RUNNING | | 4 | 4 | 32 | 0.0175239 | | | | -| DEFAULT_d3304_00001 | TERMINATED | | 8 | 16 | 32 | 0.077467 | 2.32186 | 0.1017 | 1 | -| DEFAULT_d3304_00007 | TERMINATED | | 8 | 8 | 8 | 0.000155891 | 2.30388 | 0.1011 | 1 | -+---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------+ - -[2m[36m(pid=1585)[0m [1, 12000] loss: 0.384 -[2m[36m(pid=1505)[0m [1, 12000] loss: 0.390 -Result for DEFAULT_d3304_00009: - accuracy: 0.101 - date: 2021-01-05_20-24-19 - done: true - experiment_id: 471eb6134c2a45509b005af46861c602 - experiment_tag: 9_batch_size=4,l1=4,l2=32,lr=0.017524 - hostname: 1a844a452371 - iterations_since_restore: 1 - loss: 2.310983589553833 - node_ip: 172.17.0.2 - pid: 1572 - should_checkpoint: true - time_since_restore: 69.29919123649597 - time_this_iter_s: 69.29919123649597 - time_total_s: 69.29919123649597 - timestamp: 1609878259 - timesteps_since_restore: 0 - training_iteration: 1 - trial_id: d3304_00009 - -Result for DEFAULT_d3304_00004: - accuracy: 0.102 - date: 2021-01-05_20-24-19 - done: true - experiment_id: bd1f438c1fdd4a9ba98074d1cfd573fe - experiment_tag: 4_batch_size=4,l1=16,l2=32,lr=0.016474 - hostname: 1a844a452371 - iterations_since_restore: 1 - loss: 2.313420217037201 - node_ip: 172.17.0.2 - pid: 1568 - should_checkpoint: true - time_since_restore: 69.48366618156433 - time_this_iter_s: 69.48366618156433 - time_total_s: 69.48366618156433 - timestamp: 1609878259 - timesteps_since_restore: 0 - training_iteration: 1 - trial_id: d3304_00004 - -[2m[36m(pid=1565)[0m [1, 12000] loss: 0.267 -Result for DEFAULT_d3304_00005: - accuracy: 0.3301 - date: 2021-01-05_20-24-22 - done: false - experiment_id: 738b3d315db548a7956646b2c07f1b0c - experiment_tag: 5_batch_size=4,l1=128,l2=64,lr=0.0075725 - hostname: 1a844a452371 - iterations_since_restore: 1 - loss: 1.8058318739891053 - node_ip: 172.17.0.2 - pid: 1567 - should_checkpoint: true - time_since_restore: 72.0806794166565 - time_this_iter_s: 72.0806794166565 - time_total_s: 72.0806794166565 - timestamp: 1609878262 - timesteps_since_restore: 0 - training_iteration: 1 - trial_id: d3304_00005 - -Result for DEFAULT_d3304_00003: - accuracy: 0.4242 - date: 2021-01-05_20-24-23 - done: false - experiment_id: d4b00469893d498ea65a729df202882a - experiment_tag: 3_batch_size=16,l1=32,l2=4,lr=0.0012023 - hostname: 1a844a452371 - iterations_since_restore: 3 - loss: 1.5498835063934326 - node_ip: 172.17.0.2 - pid: 1588 - should_checkpoint: true - time_since_restore: 73.29849410057068 - time_this_iter_s: 22.552368640899658 - time_total_s: 73.29849410057068 - timestamp: 1609878263 - timesteps_since_restore: 0 - training_iteration: 3 - trial_id: d3304_00003 - -== Status == -Memory usage on this node: 6.9/240.1 GiB -Using AsyncHyperBand: num_stopped=4 -Bracket: Iter 8.000: None | Iter 4.000: None | Iter 2.000: -1.7041921138763427 | Iter 1.000: -2.3038805620193483 -Resources requested: 12/32 CPUs, 0/2 GPUs, 0.0/157.71 GiB heap, 0.0/49.37 GiB objects -Result logdir: /var/lib/jenkins/ray_results/DEFAULT_2021-01-05_20-23-08 -Number of trials: 10/10 (6 RUNNING, 4 TERMINATED) -+---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------+ -| Trial name | status | loc | batch_size | l1 | l2 | lr | loss | accuracy | training_iteration | -|---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------| -| DEFAULT_d3304_00000 | RUNNING | | 2 | 4 | 16 | 0.000111924 | | | | -| DEFAULT_d3304_00002 | RUNNING | 172.17.0.2:1504 | 4 | 8 | 128 | 0.00436986 | 1.66905 | 0.3791 | 1 | -| DEFAULT_d3304_00003 | RUNNING | 172.17.0.2:1588 | 16 | 32 | 4 | 0.00120234 | 1.54988 | 0.4242 | 3 | -| DEFAULT_d3304_00005 | RUNNING | 172.17.0.2:1567 | 4 | 128 | 64 | 0.00757252 | 1.80583 | 0.3301 | 1 | -| DEFAULT_d3304_00006 | RUNNING | | 2 | 64 | 256 | 0.00177236 | | | | -| DEFAULT_d3304_00008 | RUNNING | | 2 | 16 | 64 | 0.0310199 | | | | -| DEFAULT_d3304_00001 | TERMINATED | | 8 | 16 | 32 | 0.077467 | 2.32186 | 0.1017 | 1 | -| DEFAULT_d3304_00004 | TERMINATED | | 4 | 16 | 32 | 0.016474 | 2.31342 | 0.102 | 1 | -| DEFAULT_d3304_00007 | TERMINATED | | 8 | 8 | 8 | 0.000155891 | 2.30388 | 0.1011 | 1 | -| DEFAULT_d3304_00009 | TERMINATED | | 4 | 4 | 32 | 0.0175239 | 2.31098 | 0.101 | 1 | -+---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------+ - -[2m[36m(pid=1585)[0m [1, 14000] loss: 0.329 -[2m[36m(pid=1504)[0m [2, 2000] loss: 1.708 -[2m[36m(pid=1565)[0m [1, 14000] loss: 0.225 -[2m[36m(pid=1505)[0m [1, 14000] loss: 0.334 -[2m[36m(pid=1567)[0m [2, 2000] loss: 1.803 -[2m[36m(pid=1585)[0m [1, 16000] loss: 0.288 -[2m[36m(pid=1588)[0m [4, 2000] loss: 1.541 -[2m[36m(pid=1504)[0m [2, 4000] loss: 0.840 -[2m[36m(pid=1565)[0m [1, 16000] loss: 0.198 -[2m[36m(pid=1505)[0m [1, 16000] loss: 0.292 -[2m[36m(pid=1567)[0m [2, 4000] loss: 0.912 -Result for DEFAULT_d3304_00003: - accuracy: 0.4494 - date: 2021-01-05_20-24-44 - done: false - experiment_id: d4b00469893d498ea65a729df202882a - experiment_tag: 3_batch_size=16,l1=32,l2=4,lr=0.0012023 - hostname: 1a844a452371 - iterations_since_restore: 4 - loss: 1.4720179980278014 - node_ip: 172.17.0.2 - pid: 1588 - should_checkpoint: true - time_since_restore: 94.81268787384033 - time_this_iter_s: 21.514193773269653 - time_total_s: 94.81268787384033 - timestamp: 1609878284 - timesteps_since_restore: 0 - training_iteration: 4 - trial_id: d3304_00003 - -== Status == -Memory usage on this node: 6.9/240.1 GiB -Using AsyncHyperBand: num_stopped=4 -Bracket: Iter 8.000: None | Iter 4.000: -1.4720179980278014 | Iter 2.000: -1.7041921138763427 | Iter 1.000: -2.3038805620193483 -Resources requested: 12/32 CPUs, 0/2 GPUs, 0.0/157.71 GiB heap, 0.0/49.37 GiB objects -Result logdir: /var/lib/jenkins/ray_results/DEFAULT_2021-01-05_20-23-08 -Number of trials: 10/10 (6 RUNNING, 4 TERMINATED) -+---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------+ -| Trial name | status | loc | batch_size | l1 | l2 | lr | loss | accuracy | training_iteration | -|---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------| -| DEFAULT_d3304_00000 | RUNNING | | 2 | 4 | 16 | 0.000111924 | | | | -| DEFAULT_d3304_00002 | RUNNING | 172.17.0.2:1504 | 4 | 8 | 128 | 0.00436986 | 1.66905 | 0.3791 | 1 | -| DEFAULT_d3304_00003 | RUNNING | 172.17.0.2:1588 | 16 | 32 | 4 | 0.00120234 | 1.47202 | 0.4494 | 4 | -| DEFAULT_d3304_00005 | RUNNING | 172.17.0.2:1567 | 4 | 128 | 64 | 0.00757252 | 1.80583 | 0.3301 | 1 | -| DEFAULT_d3304_00006 | RUNNING | | 2 | 64 | 256 | 0.00177236 | | | | -| DEFAULT_d3304_00008 | RUNNING | | 2 | 16 | 64 | 0.0310199 | | | | -| DEFAULT_d3304_00001 | TERMINATED | | 8 | 16 | 32 | 0.077467 | 2.32186 | 0.1017 | 1 | -| DEFAULT_d3304_00004 | TERMINATED | | 4 | 16 | 32 | 0.016474 | 2.31342 | 0.102 | 1 | -| DEFAULT_d3304_00007 | TERMINATED | | 8 | 8 | 8 | 0.000155891 | 2.30388 | 0.1011 | 1 | -| DEFAULT_d3304_00009 | TERMINATED | | 4 | 4 | 32 | 0.0175239 | 2.31098 | 0.101 | 1 | -+---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------+ - -[2m[36m(pid=1585)[0m [1, 18000] loss: 0.256 -[2m[36m(pid=1565)[0m [1, 18000] loss: 0.173 -[2m[36m(pid=1504)[0m [2, 6000] loss: 0.572 -[2m[36m(pid=1505)[0m [1, 18000] loss: 0.259 -[2m[36m(pid=1567)[0m [2, 6000] loss: 0.611 -[2m[36m(pid=1585)[0m [1, 20000] loss: 0.230 -[2m[36m(pid=1565)[0m [1, 20000] loss: 0.156 -[2m[36m(pid=1505)[0m [1, 20000] loss: 0.234 -[2m[36m(pid=1504)[0m [2, 8000] loss: 0.417 -[2m[36m(pid=1588)[0m [5, 2000] loss: 1.452 -[2m[36m(pid=1567)[0m [2, 8000] loss: 0.461 -Result for DEFAULT_d3304_00003: - accuracy: 0.4839 - date: 2021-01-05_20-25-06 - done: false - experiment_id: d4b00469893d498ea65a729df202882a - experiment_tag: 3_batch_size=16,l1=32,l2=4,lr=0.0012023 - hostname: 1a844a452371 - iterations_since_restore: 5 - loss: 1.4083827662467956 - node_ip: 172.17.0.2 - pid: 1588 - should_checkpoint: true - time_since_restore: 116.5817449092865 - time_this_iter_s: 21.769057035446167 - time_total_s: 116.5817449092865 - timestamp: 1609878306 - timesteps_since_restore: 0 - training_iteration: 5 - trial_id: d3304_00003 - -== Status == -Memory usage on this node: 6.9/240.1 GiB -Using AsyncHyperBand: num_stopped=4 -Bracket: Iter 8.000: None | Iter 4.000: -1.4720179980278014 | Iter 2.000: -1.7041921138763427 | Iter 1.000: -2.3038805620193483 -Resources requested: 12/32 CPUs, 0/2 GPUs, 0.0/157.71 GiB heap, 0.0/49.37 GiB objects -Result logdir: /var/lib/jenkins/ray_results/DEFAULT_2021-01-05_20-23-08 -Number of trials: 10/10 (6 RUNNING, 4 TERMINATED) -+---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------+ -| Trial name | status | loc | batch_size | l1 | l2 | lr | loss | accuracy | training_iteration | -|---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------| -| DEFAULT_d3304_00000 | RUNNING | | 2 | 4 | 16 | 0.000111924 | | | | -| DEFAULT_d3304_00002 | RUNNING | 172.17.0.2:1504 | 4 | 8 | 128 | 0.00436986 | 1.66905 | 0.3791 | 1 | -| DEFAULT_d3304_00003 | RUNNING | 172.17.0.2:1588 | 16 | 32 | 4 | 0.00120234 | 1.40838 | 0.4839 | 5 | -| DEFAULT_d3304_00005 | RUNNING | 172.17.0.2:1567 | 4 | 128 | 64 | 0.00757252 | 1.80583 | 0.3301 | 1 | -| DEFAULT_d3304_00006 | RUNNING | | 2 | 64 | 256 | 0.00177236 | | | | -| DEFAULT_d3304_00008 | RUNNING | | 2 | 16 | 64 | 0.0310199 | | | | -| DEFAULT_d3304_00001 | TERMINATED | | 8 | 16 | 32 | 0.077467 | 2.32186 | 0.1017 | 1 | -| DEFAULT_d3304_00004 | TERMINATED | | 4 | 16 | 32 | 0.016474 | 2.31342 | 0.102 | 1 | -| DEFAULT_d3304_00007 | TERMINATED | | 8 | 8 | 8 | 0.000155891 | 2.30388 | 0.1011 | 1 | -| DEFAULT_d3304_00009 | TERMINATED | | 4 | 4 | 32 | 0.0175239 | 2.31098 | 0.101 | 1 | -+---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------+ - -[2m[36m(pid=1504)[0m [2, 10000] loss: 0.339 -Result for DEFAULT_d3304_00000: - accuracy: 0.1104 - date: 2021-01-05_20-25-10 - done: false - experiment_id: 454624d453954d46b33a1eb496e7ec53 - experiment_tag: 0_batch_size=2,l1=4,l2=16,lr=0.00011192 - hostname: 1a844a452371 - iterations_since_restore: 1 - loss: 2.2988875378131866 - node_ip: 172.17.0.2 - pid: 1585 - should_checkpoint: true - time_since_restore: 120.59520411491394 - time_this_iter_s: 120.59520411491394 - time_total_s: 120.59520411491394 - timestamp: 1609878310 - timesteps_since_restore: 0 - training_iteration: 1 - trial_id: d3304_00000 - -Result for DEFAULT_d3304_00008: - accuracy: 0.0983 - date: 2021-01-05_20-25-11 - done: true - experiment_id: 381603b190bc47a9b794321f7692695f - experiment_tag: 8_batch_size=2,l1=16,l2=64,lr=0.03102 - hostname: 1a844a452371 - iterations_since_restore: 1 - loss: 2.336980807876587 - node_ip: 172.17.0.2 - pid: 1505 - should_checkpoint: true - time_since_restore: 121.36707901954651 - time_this_iter_s: 121.36707901954651 - time_total_s: 121.36707901954651 - timestamp: 1609878311 - timesteps_since_restore: 0 - training_iteration: 1 - trial_id: d3304_00008 - -Result for DEFAULT_d3304_00006: - accuracy: 0.4586 - date: 2021-01-05_20-25-11 - done: false - experiment_id: d8bae0fc87134e6398fd0341279c1a1a - experiment_tag: 6_batch_size=2,l1=64,l2=256,lr=0.0017724 - hostname: 1a844a452371 - iterations_since_restore: 1 - loss: 1.5124113649010658 - node_ip: 172.17.0.2 - pid: 1565 - should_checkpoint: true - time_since_restore: 121.536208152771 - time_this_iter_s: 121.536208152771 - time_total_s: 121.536208152771 - timestamp: 1609878311 - timesteps_since_restore: 0 - training_iteration: 1 - trial_id: d3304_00006 - -== Status == -Memory usage on this node: 6.6/240.1 GiB -Using AsyncHyperBand: num_stopped=5 -Bracket: Iter 8.000: None | Iter 4.000: -1.4720179980278014 | Iter 2.000: -1.7041921138763427 | Iter 1.000: -2.301384049916267 -Resources requested: 10/32 CPUs, 0/2 GPUs, 0.0/157.71 GiB heap, 0.0/49.37 GiB objects -Result logdir: /var/lib/jenkins/ray_results/DEFAULT_2021-01-05_20-23-08 -Number of trials: 10/10 (5 RUNNING, 5 TERMINATED) -+---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------+ -| Trial name | status | loc | batch_size | l1 | l2 | lr | loss | accuracy | training_iteration | -|---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------| -| DEFAULT_d3304_00000 | RUNNING | 172.17.0.2:1585 | 2 | 4 | 16 | 0.000111924 | 2.29889 | 0.1104 | 1 | -| DEFAULT_d3304_00002 | RUNNING | 172.17.0.2:1504 | 4 | 8 | 128 | 0.00436986 | 1.66905 | 0.3791 | 1 | -| DEFAULT_d3304_00003 | RUNNING | 172.17.0.2:1588 | 16 | 32 | 4 | 0.00120234 | 1.40838 | 0.4839 | 5 | -| DEFAULT_d3304_00005 | RUNNING | 172.17.0.2:1567 | 4 | 128 | 64 | 0.00757252 | 1.80583 | 0.3301 | 1 | -| DEFAULT_d3304_00006 | RUNNING | 172.17.0.2:1565 | 2 | 64 | 256 | 0.00177236 | 1.51241 | 0.4586 | 1 | -| DEFAULT_d3304_00001 | TERMINATED | | 8 | 16 | 32 | 0.077467 | 2.32186 | 0.1017 | 1 | -| DEFAULT_d3304_00004 | TERMINATED | | 4 | 16 | 32 | 0.016474 | 2.31342 | 0.102 | 1 | -| DEFAULT_d3304_00007 | TERMINATED | | 8 | 8 | 8 | 0.000155891 | 2.30388 | 0.1011 | 1 | -| DEFAULT_d3304_00008 | TERMINATED | | 2 | 16 | 64 | 0.0310199 | 2.33698 | 0.0983 | 1 | -| DEFAULT_d3304_00009 | TERMINATED | | 4 | 4 | 32 | 0.0175239 | 2.31098 | 0.101 | 1 | -+---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------+ - -Result for DEFAULT_d3304_00002: - accuracy: 0.4078 - date: 2021-01-05_20-25-16 - done: false - experiment_id: eaf4d25c9a0e46219afb226ed323095b - experiment_tag: 2_batch_size=4,l1=8,l2=128,lr=0.0043699 - hostname: 1a844a452371 - iterations_since_restore: 2 - loss: 1.6191314194440842 - node_ip: 172.17.0.2 - pid: 1504 - should_checkpoint: true - time_since_restore: 126.61185264587402 - time_this_iter_s: 58.42617344856262 - time_total_s: 126.61185264587402 - timestamp: 1609878316 - timesteps_since_restore: 0 - training_iteration: 2 - trial_id: d3304_00002 - -[2m[36m(pid=1567)[0m [2, 10000] loss: 0.371 -[2m[36m(pid=1585)[0m [2, 2000] loss: 2.298 -[2m[36m(pid=1565)[0m [2, 2000] loss: 1.466 -[2m[36m(pid=1588)[0m [6, 2000] loss: 1.383 -Result for DEFAULT_d3304_00005: - accuracy: 0.3647 - date: 2021-01-05_20-25-24 - done: true - experiment_id: 738b3d315db548a7956646b2c07f1b0c - experiment_tag: 5_batch_size=4,l1=128,l2=64,lr=0.0075725 - hostname: 1a844a452371 - iterations_since_restore: 2 - loss: 1.7739140236496926 - node_ip: 172.17.0.2 - pid: 1567 - should_checkpoint: true - time_since_restore: 134.1462869644165 - time_this_iter_s: 62.06560754776001 - time_total_s: 134.1462869644165 - timestamp: 1609878324 - timesteps_since_restore: 0 - training_iteration: 2 - trial_id: d3304_00005 - -== Status == -Memory usage on this node: 6.3/240.1 GiB -Using AsyncHyperBand: num_stopped=6 -Bracket: Iter 8.000: None | Iter 4.000: -1.4720179980278014 | Iter 2.000: -1.7041921138763427 | Iter 1.000: -2.301384049916267 -Resources requested: 10/32 CPUs, 0/2 GPUs, 0.0/157.71 GiB heap, 0.0/49.37 GiB objects -Result logdir: /var/lib/jenkins/ray_results/DEFAULT_2021-01-05_20-23-08 -Number of trials: 10/10 (5 RUNNING, 5 TERMINATED) -+---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------+ -| Trial name | status | loc | batch_size | l1 | l2 | lr | loss | accuracy | training_iteration | -|---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------| -| DEFAULT_d3304_00000 | RUNNING | 172.17.0.2:1585 | 2 | 4 | 16 | 0.000111924 | 2.29889 | 0.1104 | 1 | -| DEFAULT_d3304_00002 | RUNNING | 172.17.0.2:1504 | 4 | 8 | 128 | 0.00436986 | 1.61913 | 0.4078 | 2 | -| DEFAULT_d3304_00003 | RUNNING | 172.17.0.2:1588 | 16 | 32 | 4 | 0.00120234 | 1.40838 | 0.4839 | 5 | -| DEFAULT_d3304_00005 | RUNNING | 172.17.0.2:1567 | 4 | 128 | 64 | 0.00757252 | 1.77391 | 0.3647 | 2 | -| DEFAULT_d3304_00006 | RUNNING | 172.17.0.2:1565 | 2 | 64 | 256 | 0.00177236 | 1.51241 | 0.4586 | 1 | -| DEFAULT_d3304_00001 | TERMINATED | | 8 | 16 | 32 | 0.077467 | 2.32186 | 0.1017 | 1 | -| DEFAULT_d3304_00004 | TERMINATED | | 4 | 16 | 32 | 0.016474 | 2.31342 | 0.102 | 1 | -| DEFAULT_d3304_00007 | TERMINATED | | 8 | 8 | 8 | 0.000155891 | 2.30388 | 0.1011 | 1 | -| DEFAULT_d3304_00008 | TERMINATED | | 2 | 16 | 64 | 0.0310199 | 2.33698 | 0.0983 | 1 | -| DEFAULT_d3304_00009 | TERMINATED | | 4 | 4 | 32 | 0.0175239 | 2.31098 | 0.101 | 1 | -+---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------+ - -[2m[36m(pid=1504)[0m [3, 2000] loss: 1.656 -Result for DEFAULT_d3304_00003: - accuracy: 0.5061 - date: 2021-01-05_20-25-27 - done: false - experiment_id: d4b00469893d498ea65a729df202882a - experiment_tag: 3_batch_size=16,l1=32,l2=4,lr=0.0012023 - hostname: 1a844a452371 - iterations_since_restore: 6 - loss: 1.3623717227935792 - node_ip: 172.17.0.2 - pid: 1588 - should_checkpoint: true - time_since_restore: 137.95851016044617 - time_this_iter_s: 21.376765251159668 - time_total_s: 137.95851016044617 - timestamp: 1609878327 - timesteps_since_restore: 0 - training_iteration: 6 - trial_id: d3304_00003 - -[2m[36m(pid=1585)[0m [2, 4000] loss: 1.147 -[2m[36m(pid=1565)[0m [2, 4000] loss: 0.749 -[2m[36m(pid=1504)[0m [3, 4000] loss: 0.838 -[2m[36m(pid=1585)[0m [2, 6000] loss: 0.760 -[2m[36m(pid=1565)[0m [2, 6000] loss: 0.498 -[2m[36m(pid=1588)[0m [7, 2000] loss: 1.326 -[2m[36m(pid=1504)[0m [3, 6000] loss: 0.560 -[2m[36m(pid=1585)[0m [2, 8000] loss: 0.561 -Result for DEFAULT_d3304_00003: - accuracy: 0.5209 - date: 2021-01-05_20-25-48 - done: false - experiment_id: d4b00469893d498ea65a729df202882a - experiment_tag: 3_batch_size=16,l1=32,l2=4,lr=0.0012023 - hostname: 1a844a452371 - iterations_since_restore: 7 - loss: 1.316757419013977 - node_ip: 172.17.0.2 - pid: 1588 - should_checkpoint: true - time_since_restore: 158.4953932762146 - time_this_iter_s: 20.536883115768433 - time_total_s: 158.4953932762146 - timestamp: 1609878348 - timesteps_since_restore: 0 - training_iteration: 7 - trial_id: d3304_00003 - -== Status == -Memory usage on this node: 5.8/240.1 GiB -Using AsyncHyperBand: num_stopped=6 -Bracket: Iter 8.000: None | Iter 4.000: -1.4720179980278014 | Iter 2.000: -1.7041921138763427 | Iter 1.000: -2.301384049916267 -Resources requested: 8/32 CPUs, 0/2 GPUs, 0.0/157.71 GiB heap, 0.0/49.37 GiB objects -Result logdir: /var/lib/jenkins/ray_results/DEFAULT_2021-01-05_20-23-08 -Number of trials: 10/10 (4 RUNNING, 6 TERMINATED) -+---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------+ -| Trial name | status | loc | batch_size | l1 | l2 | lr | loss | accuracy | training_iteration | -|---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------| -| DEFAULT_d3304_00000 | RUNNING | 172.17.0.2:1585 | 2 | 4 | 16 | 0.000111924 | 2.29889 | 0.1104 | 1 | -| DEFAULT_d3304_00002 | RUNNING | 172.17.0.2:1504 | 4 | 8 | 128 | 0.00436986 | 1.61913 | 0.4078 | 2 | -| DEFAULT_d3304_00003 | RUNNING | 172.17.0.2:1588 | 16 | 32 | 4 | 0.00120234 | 1.31676 | 0.5209 | 7 | -| DEFAULT_d3304_00006 | RUNNING | 172.17.0.2:1565 | 2 | 64 | 256 | 0.00177236 | 1.51241 | 0.4586 | 1 | -| DEFAULT_d3304_00001 | TERMINATED | | 8 | 16 | 32 | 0.077467 | 2.32186 | 0.1017 | 1 | -| DEFAULT_d3304_00004 | TERMINATED | | 4 | 16 | 32 | 0.016474 | 2.31342 | 0.102 | 1 | -| DEFAULT_d3304_00005 | TERMINATED | | 4 | 128 | 64 | 0.00757252 | 1.77391 | 0.3647 | 2 | -| DEFAULT_d3304_00007 | TERMINATED | | 8 | 8 | 8 | 0.000155891 | 2.30388 | 0.1011 | 1 | -| DEFAULT_d3304_00008 | TERMINATED | | 2 | 16 | 64 | 0.0310199 | 2.33698 | 0.0983 | 1 | -| DEFAULT_d3304_00009 | TERMINATED | | 4 | 4 | 32 | 0.0175239 | 2.31098 | 0.101 | 1 | -+---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------+ - -[2m[36m(pid=1565)[0m [2, 8000] loss: 0.372 -[2m[36m(pid=1504)[0m [3, 8000] loss: 0.416 -[2m[36m(pid=1585)[0m [2, 10000] loss: 0.434 -[2m[36m(pid=1565)[0m [2, 10000] loss: 0.292 -[2m[36m(pid=1588)[0m [8, 2000] loss: 1.278 -[2m[36m(pid=1504)[0m [3, 10000] loss: 0.333 -[2m[36m(pid=1585)[0m [2, 12000] loss: 0.347 -[2m[36m(pid=1565)[0m [2, 12000] loss: 0.245 -Result for DEFAULT_d3304_00003: - accuracy: 0.5406 - date: 2021-01-05_20-26-08 - done: false - experiment_id: d4b00469893d498ea65a729df202882a - experiment_tag: 3_batch_size=16,l1=32,l2=4,lr=0.0012023 - hostname: 1a844a452371 - iterations_since_restore: 8 - loss: 1.267511115884781 - node_ip: 172.17.0.2 - pid: 1588 - should_checkpoint: true - time_since_restore: 179.13841199874878 - time_this_iter_s: 20.64301872253418 - time_total_s: 179.13841199874878 - timestamp: 1609878368 - timesteps_since_restore: 0 - training_iteration: 8 - trial_id: d3304_00003 - -== Status == -Memory usage on this node: 5.8/240.1 GiB -Using AsyncHyperBand: num_stopped=6 -Bracket: Iter 8.000: -1.267511115884781 | Iter 4.000: -1.4720179980278014 | Iter 2.000: -1.7041921138763427 | Iter 1.000: -2.301384049916267 -Resources requested: 8/32 CPUs, 0/2 GPUs, 0.0/157.71 GiB heap, 0.0/49.37 GiB objects -Result logdir: /var/lib/jenkins/ray_results/DEFAULT_2021-01-05_20-23-08 -Number of trials: 10/10 (4 RUNNING, 6 TERMINATED) -+---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------+ -| Trial name | status | loc | batch_size | l1 | l2 | lr | loss | accuracy | training_iteration | -|---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------| -| DEFAULT_d3304_00000 | RUNNING | 172.17.0.2:1585 | 2 | 4 | 16 | 0.000111924 | 2.29889 | 0.1104 | 1 | -| DEFAULT_d3304_00002 | RUNNING | 172.17.0.2:1504 | 4 | 8 | 128 | 0.00436986 | 1.61913 | 0.4078 | 2 | -| DEFAULT_d3304_00003 | RUNNING | 172.17.0.2:1588 | 16 | 32 | 4 | 0.00120234 | 1.26751 | 0.5406 | 8 | -| DEFAULT_d3304_00006 | RUNNING | 172.17.0.2:1565 | 2 | 64 | 256 | 0.00177236 | 1.51241 | 0.4586 | 1 | -| DEFAULT_d3304_00001 | TERMINATED | | 8 | 16 | 32 | 0.077467 | 2.32186 | 0.1017 | 1 | -| DEFAULT_d3304_00004 | TERMINATED | | 4 | 16 | 32 | 0.016474 | 2.31342 | 0.102 | 1 | -| DEFAULT_d3304_00005 | TERMINATED | | 4 | 128 | 64 | 0.00757252 | 1.77391 | 0.3647 | 2 | -| DEFAULT_d3304_00007 | TERMINATED | | 8 | 8 | 8 | 0.000155891 | 2.30388 | 0.1011 | 1 | -| DEFAULT_d3304_00008 | TERMINATED | | 2 | 16 | 64 | 0.0310199 | 2.33698 | 0.0983 | 1 | -| DEFAULT_d3304_00009 | TERMINATED | | 4 | 4 | 32 | 0.0175239 | 2.31098 | 0.101 | 1 | -+---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------+ - -Result for DEFAULT_d3304_00002: - accuracy: 0.3997 - date: 2021-01-05_20-26-11 - done: false - experiment_id: eaf4d25c9a0e46219afb226ed323095b - experiment_tag: 2_batch_size=4,l1=8,l2=128,lr=0.0043699 - hostname: 1a844a452371 - iterations_since_restore: 3 - loss: 1.7084122330278158 - node_ip: 172.17.0.2 - pid: 1504 - should_checkpoint: true - time_since_restore: 182.02509140968323 - time_this_iter_s: 55.413238763809204 - time_total_s: 182.02509140968323 - timestamp: 1609878371 - timesteps_since_restore: 0 - training_iteration: 3 - trial_id: d3304_00002 - -[2m[36m(pid=1585)[0m [2, 14000] loss: 0.290 -[2m[36m(pid=1565)[0m [2, 14000] loss: 0.213 -[2m[36m(pid=1504)[0m [4, 2000] loss: 1.653 -[2m[36m(pid=1588)[0m [9, 2000] loss: 1.245 -[2m[36m(pid=1585)[0m [2, 16000] loss: 0.244 -[2m[36m(pid=1565)[0m [2, 16000] loss: 0.186 -Result for DEFAULT_d3304_00003: - accuracy: 0.5409 - date: 2021-01-05_20-26-29 - done: false - experiment_id: d4b00469893d498ea65a729df202882a - experiment_tag: 3_batch_size=16,l1=32,l2=4,lr=0.0012023 - hostname: 1a844a452371 - iterations_since_restore: 9 - loss: 1.2721123942375183 - node_ip: 172.17.0.2 - pid: 1588 - should_checkpoint: true - time_since_restore: 199.56540870666504 - time_this_iter_s: 20.42699670791626 - time_total_s: 199.56540870666504 - timestamp: 1609878389 - timesteps_since_restore: 0 - training_iteration: 9 - trial_id: d3304_00003 - -== Status == -Memory usage on this node: 5.8/240.1 GiB -Using AsyncHyperBand: num_stopped=6 -Bracket: Iter 8.000: -1.267511115884781 | Iter 4.000: -1.4720179980278014 | Iter 2.000: -1.7041921138763427 | Iter 1.000: -2.301384049916267 -Resources requested: 8/32 CPUs, 0/2 GPUs, 0.0/157.71 GiB heap, 0.0/49.37 GiB objects -Result logdir: /var/lib/jenkins/ray_results/DEFAULT_2021-01-05_20-23-08 -Number of trials: 10/10 (4 RUNNING, 6 TERMINATED) -+---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------+ -| Trial name | status | loc | batch_size | l1 | l2 | lr | loss | accuracy | training_iteration | -|---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------| -| DEFAULT_d3304_00000 | RUNNING | 172.17.0.2:1585 | 2 | 4 | 16 | 0.000111924 | 2.29889 | 0.1104 | 1 | -| DEFAULT_d3304_00002 | RUNNING | 172.17.0.2:1504 | 4 | 8 | 128 | 0.00436986 | 1.70841 | 0.3997 | 3 | -| DEFAULT_d3304_00003 | RUNNING | 172.17.0.2:1588 | 16 | 32 | 4 | 0.00120234 | 1.27211 | 0.5409 | 9 | -| DEFAULT_d3304_00006 | RUNNING | 172.17.0.2:1565 | 2 | 64 | 256 | 0.00177236 | 1.51241 | 0.4586 | 1 | -| DEFAULT_d3304_00001 | TERMINATED | | 8 | 16 | 32 | 0.077467 | 2.32186 | 0.1017 | 1 | -| DEFAULT_d3304_00004 | TERMINATED | | 4 | 16 | 32 | 0.016474 | 2.31342 | 0.102 | 1 | -| DEFAULT_d3304_00005 | TERMINATED | | 4 | 128 | 64 | 0.00757252 | 1.77391 | 0.3647 | 2 | -| DEFAULT_d3304_00007 | TERMINATED | | 8 | 8 | 8 | 0.000155891 | 2.30388 | 0.1011 | 1 | -| DEFAULT_d3304_00008 | TERMINATED | | 2 | 16 | 64 | 0.0310199 | 2.33698 | 0.0983 | 1 | -| DEFAULT_d3304_00009 | TERMINATED | | 4 | 4 | 32 | 0.0175239 | 2.31098 | 0.101 | 1 | -+---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------+ - -[2m[36m(pid=1504)[0m [4, 4000] loss: 0.842 -[2m[36m(pid=1585)[0m [2, 18000] loss: 0.214 -[2m[36m(pid=1565)[0m [2, 18000] loss: 0.159 -[2m[36m(pid=1504)[0m [4, 6000] loss: 0.561 -[2m[36m(pid=1585)[0m [2, 20000] loss: 0.191 -[2m[36m(pid=1588)[0m [10, 2000] loss: 1.210 -[2m[36m(pid=1565)[0m [2, 20000] loss: 0.143 -Result for DEFAULT_d3304_00003: - accuracy: 0.5619 - date: 2021-01-05_20-26-50 - done: true - experiment_id: d4b00469893d498ea65a729df202882a - experiment_tag: 3_batch_size=16,l1=32,l2=4,lr=0.0012023 - hostname: 1a844a452371 - iterations_since_restore: 10 - loss: 1.2222298237800597 - node_ip: 172.17.0.2 - pid: 1588 - should_checkpoint: true - time_since_restore: 220.31984639167786 - time_this_iter_s: 20.754437685012817 - time_total_s: 220.31984639167786 - timestamp: 1609878410 - timesteps_since_restore: 0 - training_iteration: 10 - trial_id: d3304_00003 - -== Status == -Memory usage on this node: 5.8/240.1 GiB -Using AsyncHyperBand: num_stopped=7 -Bracket: Iter 8.000: -1.267511115884781 | Iter 4.000: -1.4720179980278014 | Iter 2.000: -1.7041921138763427 | Iter 1.000: -2.301384049916267 -Resources requested: 8/32 CPUs, 0/2 GPUs, 0.0/157.71 GiB heap, 0.0/49.37 GiB objects -Result logdir: /var/lib/jenkins/ray_results/DEFAULT_2021-01-05_20-23-08 -Number of trials: 10/10 (4 RUNNING, 6 TERMINATED) -+---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------+ -| Trial name | status | loc | batch_size | l1 | l2 | lr | loss | accuracy | training_iteration | -|---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------| -| DEFAULT_d3304_00000 | RUNNING | 172.17.0.2:1585 | 2 | 4 | 16 | 0.000111924 | 2.29889 | 0.1104 | 1 | -| DEFAULT_d3304_00002 | RUNNING | 172.17.0.2:1504 | 4 | 8 | 128 | 0.00436986 | 1.70841 | 0.3997 | 3 | -| DEFAULT_d3304_00003 | RUNNING | 172.17.0.2:1588 | 16 | 32 | 4 | 0.00120234 | 1.22223 | 0.5619 | 10 | -| DEFAULT_d3304_00006 | RUNNING | 172.17.0.2:1565 | 2 | 64 | 256 | 0.00177236 | 1.51241 | 0.4586 | 1 | -| DEFAULT_d3304_00001 | TERMINATED | | 8 | 16 | 32 | 0.077467 | 2.32186 | 0.1017 | 1 | -| DEFAULT_d3304_00004 | TERMINATED | | 4 | 16 | 32 | 0.016474 | 2.31342 | 0.102 | 1 | -| DEFAULT_d3304_00005 | TERMINATED | | 4 | 128 | 64 | 0.00757252 | 1.77391 | 0.3647 | 2 | -| DEFAULT_d3304_00007 | TERMINATED | | 8 | 8 | 8 | 0.000155891 | 2.30388 | 0.1011 | 1 | -| DEFAULT_d3304_00008 | TERMINATED | | 2 | 16 | 64 | 0.0310199 | 2.33698 | 0.0983 | 1 | -| DEFAULT_d3304_00009 | TERMINATED | | 4 | 4 | 32 | 0.0175239 | 2.31098 | 0.101 | 1 | -+---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------+ - -[2m[36m(pid=1504)[0m [4, 8000] loss: 0.422 -Result for DEFAULT_d3304_00000: - accuracy: 0.2724 - date: 2021-01-05_20-26-55 - done: true - experiment_id: 454624d453954d46b33a1eb496e7ec53 - experiment_tag: 0_batch_size=2,l1=4,l2=16,lr=0.00011192 - hostname: 1a844a452371 - iterations_since_restore: 2 - loss: 1.8605026947617531 - node_ip: 172.17.0.2 - pid: 1585 - should_checkpoint: true - time_since_restore: 225.84529209136963 - time_this_iter_s: 105.25008797645569 - time_total_s: 225.84529209136963 - timestamp: 1609878415 - timesteps_since_restore: 0 - training_iteration: 2 - trial_id: d3304_00000 - -== Status == -Memory usage on this node: 5.3/240.1 GiB -Using AsyncHyperBand: num_stopped=8 -Bracket: Iter 8.000: -1.267511115884781 | Iter 4.000: -1.4720179980278014 | Iter 2.000: -1.7390530687630177 | Iter 1.000: -2.301384049916267 -Resources requested: 6/32 CPUs, 0/2 GPUs, 0.0/157.71 GiB heap, 0.0/49.37 GiB objects -Result logdir: /var/lib/jenkins/ray_results/DEFAULT_2021-01-05_20-23-08 -Number of trials: 10/10 (3 RUNNING, 7 TERMINATED) -+---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------+ -| Trial name | status | loc | batch_size | l1 | l2 | lr | loss | accuracy | training_iteration | -|---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------| -| DEFAULT_d3304_00000 | RUNNING | 172.17.0.2:1585 | 2 | 4 | 16 | 0.000111924 | 1.8605 | 0.2724 | 2 | -| DEFAULT_d3304_00002 | RUNNING | 172.17.0.2:1504 | 4 | 8 | 128 | 0.00436986 | 1.70841 | 0.3997 | 3 | -| DEFAULT_d3304_00006 | RUNNING | 172.17.0.2:1565 | 2 | 64 | 256 | 0.00177236 | 1.51241 | 0.4586 | 1 | -| DEFAULT_d3304_00001 | TERMINATED | | 8 | 16 | 32 | 0.077467 | 2.32186 | 0.1017 | 1 | -| DEFAULT_d3304_00003 | TERMINATED | | 16 | 32 | 4 | 0.00120234 | 1.22223 | 0.5619 | 10 | -| DEFAULT_d3304_00004 | TERMINATED | | 4 | 16 | 32 | 0.016474 | 2.31342 | 0.102 | 1 | -| DEFAULT_d3304_00005 | TERMINATED | | 4 | 128 | 64 | 0.00757252 | 1.77391 | 0.3647 | 2 | -| DEFAULT_d3304_00007 | TERMINATED | | 8 | 8 | 8 | 0.000155891 | 2.30388 | 0.1011 | 1 | -| DEFAULT_d3304_00008 | TERMINATED | | 2 | 16 | 64 | 0.0310199 | 2.33698 | 0.0983 | 1 | -| DEFAULT_d3304_00009 | TERMINATED | | 4 | 4 | 32 | 0.0175239 | 2.31098 | 0.101 | 1 | -+---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------+ - -Result for DEFAULT_d3304_00006: - accuracy: 0.5007 - date: 2021-01-05_20-26-57 - done: false - experiment_id: d8bae0fc87134e6398fd0341279c1a1a - experiment_tag: 6_batch_size=2,l1=64,l2=256,lr=0.0017724 - hostname: 1a844a452371 - iterations_since_restore: 2 - loss: 1.3979384284215048 - node_ip: 172.17.0.2 - pid: 1565 - should_checkpoint: true - time_since_restore: 227.80454421043396 - time_this_iter_s: 106.26833605766296 - time_total_s: 227.80454421043396 - timestamp: 1609878417 - timesteps_since_restore: 0 - training_iteration: 2 - trial_id: d3304_00006 - -[2m[36m(pid=1504)[0m [4, 10000] loss: 0.335 -Result for DEFAULT_d3304_00002: - accuracy: 0.3849 - date: 2021-01-05_20-27-06 - done: true - experiment_id: eaf4d25c9a0e46219afb226ed323095b - experiment_tag: 2_batch_size=4,l1=8,l2=128,lr=0.0043699 - hostname: 1a844a452371 - iterations_since_restore: 4 - loss: 1.720731588792801 - node_ip: 172.17.0.2 - pid: 1504 - should_checkpoint: true - time_since_restore: 236.71593952178955 - time_this_iter_s: 54.69084811210632 - time_total_s: 236.71593952178955 - timestamp: 1609878426 - timesteps_since_restore: 0 - training_iteration: 4 - trial_id: d3304_00002 - -== Status == -Memory usage on this node: 4.7/240.1 GiB -Using AsyncHyperBand: num_stopped=9 -Bracket: Iter 8.000: -1.267511115884781 | Iter 4.000: -1.5963747934103012 | Iter 2.000: -1.7041921138763427 | Iter 1.000: -2.301384049916267 -Resources requested: 4/32 CPUs, 0/2 GPUs, 0.0/157.71 GiB heap, 0.0/49.37 GiB objects -Result logdir: /var/lib/jenkins/ray_results/DEFAULT_2021-01-05_20-23-08 -Number of trials: 10/10 (2 RUNNING, 8 TERMINATED) -+---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------+ -| Trial name | status | loc | batch_size | l1 | l2 | lr | loss | accuracy | training_iteration | -|---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------| -| DEFAULT_d3304_00002 | RUNNING | 172.17.0.2:1504 | 4 | 8 | 128 | 0.00436986 | 1.72073 | 0.3849 | 4 | -| DEFAULT_d3304_00006 | RUNNING | 172.17.0.2:1565 | 2 | 64 | 256 | 0.00177236 | 1.39794 | 0.5007 | 2 | -| DEFAULT_d3304_00000 | TERMINATED | | 2 | 4 | 16 | 0.000111924 | 1.8605 | 0.2724 | 2 | -| DEFAULT_d3304_00001 | TERMINATED | | 8 | 16 | 32 | 0.077467 | 2.32186 | 0.1017 | 1 | -| DEFAULT_d3304_00003 | TERMINATED | | 16 | 32 | 4 | 0.00120234 | 1.22223 | 0.5619 | 10 | -| DEFAULT_d3304_00004 | TERMINATED | | 4 | 16 | 32 | 0.016474 | 2.31342 | 0.102 | 1 | -| DEFAULT_d3304_00005 | TERMINATED | | 4 | 128 | 64 | 0.00757252 | 1.77391 | 0.3647 | 2 | -| DEFAULT_d3304_00007 | TERMINATED | | 8 | 8 | 8 | 0.000155891 | 2.30388 | 0.1011 | 1 | -| DEFAULT_d3304_00008 | TERMINATED | | 2 | 16 | 64 | 0.0310199 | 2.33698 | 0.0983 | 1 | -| DEFAULT_d3304_00009 | TERMINATED | | 4 | 4 | 32 | 0.0175239 | 2.31098 | 0.101 | 1 | -+---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------+ - -[2m[36m(pid=1565)[0m [3, 2000] loss: 1.373 -[2m[36m(pid=1565)[0m [3, 4000] loss: 0.696 -[2m[36m(pid=1565)[0m [3, 6000] loss: 0.466 -[2m[36m(pid=1565)[0m [3, 8000] loss: 0.357 -[2m[36m(pid=1565)[0m [3, 10000] loss: 0.283 -[2m[36m(pid=1565)[0m [3, 12000] loss: 0.241 -[2m[36m(pid=1565)[0m [3, 14000] loss: 0.203 -[2m[36m(pid=1565)[0m [3, 16000] loss: 0.178 -[2m[36m(pid=1565)[0m [3, 18000] loss: 0.160 -[2m[36m(pid=1565)[0m [3, 20000] loss: 0.142 -Result for DEFAULT_d3304_00006: - accuracy: 0.5095 - date: 2021-01-05_20-28-36 - done: false - experiment_id: d8bae0fc87134e6398fd0341279c1a1a - experiment_tag: 6_batch_size=2,l1=64,l2=256,lr=0.0017724 - hostname: 1a844a452371 - iterations_since_restore: 3 - loss: 1.4272501501079649 - node_ip: 172.17.0.2 - pid: 1565 - should_checkpoint: true - time_since_restore: 326.1525847911835 - time_this_iter_s: 98.34804058074951 - time_total_s: 326.1525847911835 - timestamp: 1609878516 - timesteps_since_restore: 0 - training_iteration: 3 - trial_id: d3304_00006 - -== Status == -Memory usage on this node: 4.2/240.1 GiB -Using AsyncHyperBand: num_stopped=9 -Bracket: Iter 8.000: -1.267511115884781 | Iter 4.000: -1.5963747934103012 | Iter 2.000: -1.7041921138763427 | Iter 1.000: -2.301384049916267 -Resources requested: 2/32 CPUs, 0/2 GPUs, 0.0/157.71 GiB heap, 0.0/49.37 GiB objects -Result logdir: /var/lib/jenkins/ray_results/DEFAULT_2021-01-05_20-23-08 -Number of trials: 10/10 (1 RUNNING, 9 TERMINATED) -+---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------+ -| Trial name | status | loc | batch_size | l1 | l2 | lr | loss | accuracy | training_iteration | -|---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------| -| DEFAULT_d3304_00006 | RUNNING | 172.17.0.2:1565 | 2 | 64 | 256 | 0.00177236 | 1.42725 | 0.5095 | 3 | -| DEFAULT_d3304_00000 | TERMINATED | | 2 | 4 | 16 | 0.000111924 | 1.8605 | 0.2724 | 2 | -| DEFAULT_d3304_00001 | TERMINATED | | 8 | 16 | 32 | 0.077467 | 2.32186 | 0.1017 | 1 | -| DEFAULT_d3304_00002 | TERMINATED | | 4 | 8 | 128 | 0.00436986 | 1.72073 | 0.3849 | 4 | -| DEFAULT_d3304_00003 | TERMINATED | | 16 | 32 | 4 | 0.00120234 | 1.22223 | 0.5619 | 10 | -| DEFAULT_d3304_00004 | TERMINATED | | 4 | 16 | 32 | 0.016474 | 2.31342 | 0.102 | 1 | -| DEFAULT_d3304_00005 | TERMINATED | | 4 | 128 | 64 | 0.00757252 | 1.77391 | 0.3647 | 2 | -| DEFAULT_d3304_00007 | TERMINATED | | 8 | 8 | 8 | 0.000155891 | 2.30388 | 0.1011 | 1 | -| DEFAULT_d3304_00008 | TERMINATED | | 2 | 16 | 64 | 0.0310199 | 2.33698 | 0.0983 | 1 | -| DEFAULT_d3304_00009 | TERMINATED | | 4 | 4 | 32 | 0.0175239 | 2.31098 | 0.101 | 1 | -+---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------+ - -[2m[36m(pid=1565)[0m [4, 2000] loss: 1.320 -[2m[36m(pid=1565)[0m [4, 4000] loss: 0.701 -[2m[36m(pid=1565)[0m [4, 6000] loss: 0.454 -[2m[36m(pid=1565)[0m [4, 8000] loss: 0.345 -[2m[36m(pid=1565)[0m [4, 10000] loss: 0.276 -[2m[36m(pid=1565)[0m [4, 12000] loss: 0.234 -[2m[36m(pid=1565)[0m [4, 14000] loss: 0.199 -[2m[36m(pid=1565)[0m [4, 16000] loss: 0.170 -[2m[36m(pid=1565)[0m [4, 18000] loss: 0.151 -[2m[36m(pid=1565)[0m [4, 20000] loss: 0.144 -Result for DEFAULT_d3304_00006: - accuracy: 0.4749 - date: 2021-01-05_20-30-15 - done: false - experiment_id: d8bae0fc87134e6398fd0341279c1a1a - experiment_tag: 6_batch_size=2,l1=64,l2=256,lr=0.0017724 - hostname: 1a844a452371 - iterations_since_restore: 4 - loss: 1.4950430885698218 - node_ip: 172.17.0.2 - pid: 1565 - should_checkpoint: true - time_since_restore: 425.3827154636383 - time_this_iter_s: 99.23013067245483 - time_total_s: 425.3827154636383 - timestamp: 1609878615 - timesteps_since_restore: 0 - training_iteration: 4 - trial_id: d3304_00006 - -== Status == -Memory usage on this node: 4.1/240.1 GiB -Using AsyncHyperBand: num_stopped=9 -Bracket: Iter 8.000: -1.267511115884781 | Iter 4.000: -1.4950430885698218 | Iter 2.000: -1.7041921138763427 | Iter 1.000: -2.301384049916267 -Resources requested: 2/32 CPUs, 0/2 GPUs, 0.0/157.71 GiB heap, 0.0/49.37 GiB objects -Result logdir: /var/lib/jenkins/ray_results/DEFAULT_2021-01-05_20-23-08 -Number of trials: 10/10 (1 RUNNING, 9 TERMINATED) -+---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------+ -| Trial name | status | loc | batch_size | l1 | l2 | lr | loss | accuracy | training_iteration | -|---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------| -| DEFAULT_d3304_00006 | RUNNING | 172.17.0.2:1565 | 2 | 64 | 256 | 0.00177236 | 1.49504 | 0.4749 | 4 | -| DEFAULT_d3304_00000 | TERMINATED | | 2 | 4 | 16 | 0.000111924 | 1.8605 | 0.2724 | 2 | -| DEFAULT_d3304_00001 | TERMINATED | | 8 | 16 | 32 | 0.077467 | 2.32186 | 0.1017 | 1 | -| DEFAULT_d3304_00002 | TERMINATED | | 4 | 8 | 128 | 0.00436986 | 1.72073 | 0.3849 | 4 | -| DEFAULT_d3304_00003 | TERMINATED | | 16 | 32 | 4 | 0.00120234 | 1.22223 | 0.5619 | 10 | -| DEFAULT_d3304_00004 | TERMINATED | | 4 | 16 | 32 | 0.016474 | 2.31342 | 0.102 | 1 | -| DEFAULT_d3304_00005 | TERMINATED | | 4 | 128 | 64 | 0.00757252 | 1.77391 | 0.3647 | 2 | -| DEFAULT_d3304_00007 | TERMINATED | | 8 | 8 | 8 | 0.000155891 | 2.30388 | 0.1011 | 1 | -| DEFAULT_d3304_00008 | TERMINATED | | 2 | 16 | 64 | 0.0310199 | 2.33698 | 0.0983 | 1 | -| DEFAULT_d3304_00009 | TERMINATED | | 4 | 4 | 32 | 0.0175239 | 2.31098 | 0.101 | 1 | -+---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------+ - -[2m[36m(pid=1565)[0m [5, 2000] loss: 1.314 -[2m[36m(pid=1565)[0m [5, 4000] loss: 0.663 -[2m[36m(pid=1565)[0m [5, 6000] loss: 0.453 -[2m[36m(pid=1565)[0m [5, 8000] loss: 0.341 -[2m[36m(pid=1565)[0m [5, 10000] loss: 0.278 -[2m[36m(pid=1565)[0m [5, 12000] loss: 0.235 -[2m[36m(pid=1565)[0m [5, 14000] loss: 0.197 -[2m[36m(pid=1565)[0m [5, 16000] loss: 0.173 -[2m[36m(pid=1565)[0m [5, 18000] loss: 0.155 -[2m[36m(pid=1565)[0m [5, 20000] loss: 0.137 -Result for DEFAULT_d3304_00006: - accuracy: 0.531 - date: 2021-01-05_20-31-56 - done: false - experiment_id: d8bae0fc87134e6398fd0341279c1a1a - experiment_tag: 6_batch_size=2,l1=64,l2=256,lr=0.0017724 - hostname: 1a844a452371 - iterations_since_restore: 5 - loss: 1.373500657767952 - node_ip: 172.17.0.2 - pid: 1565 - should_checkpoint: true - time_since_restore: 526.6667892932892 - time_this_iter_s: 101.28407382965088 - time_total_s: 526.6667892932892 - timestamp: 1609878716 - timesteps_since_restore: 0 - training_iteration: 5 - trial_id: d3304_00006 - -== Status == -Memory usage on this node: 4.1/240.1 GiB -Using AsyncHyperBand: num_stopped=9 -Bracket: Iter 8.000: -1.267511115884781 | Iter 4.000: -1.4950430885698218 | Iter 2.000: -1.7041921138763427 | Iter 1.000: -2.301384049916267 -Resources requested: 2/32 CPUs, 0/2 GPUs, 0.0/157.71 GiB heap, 0.0/49.37 GiB objects -Result logdir: /var/lib/jenkins/ray_results/DEFAULT_2021-01-05_20-23-08 -Number of trials: 10/10 (1 RUNNING, 9 TERMINATED) -+---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------+ -| Trial name | status | loc | batch_size | l1 | l2 | lr | loss | accuracy | training_iteration | -|---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------| -| DEFAULT_d3304_00006 | RUNNING | 172.17.0.2:1565 | 2 | 64 | 256 | 0.00177236 | 1.3735 | 0.531 | 5 | -| DEFAULT_d3304_00000 | TERMINATED | | 2 | 4 | 16 | 0.000111924 | 1.8605 | 0.2724 | 2 | -| DEFAULT_d3304_00001 | TERMINATED | | 8 | 16 | 32 | 0.077467 | 2.32186 | 0.1017 | 1 | -| DEFAULT_d3304_00002 | TERMINATED | | 4 | 8 | 128 | 0.00436986 | 1.72073 | 0.3849 | 4 | -| DEFAULT_d3304_00003 | TERMINATED | | 16 | 32 | 4 | 0.00120234 | 1.22223 | 0.5619 | 10 | -| DEFAULT_d3304_00004 | TERMINATED | | 4 | 16 | 32 | 0.016474 | 2.31342 | 0.102 | 1 | -| DEFAULT_d3304_00005 | TERMINATED | | 4 | 128 | 64 | 0.00757252 | 1.77391 | 0.3647 | 2 | -| DEFAULT_d3304_00007 | TERMINATED | | 8 | 8 | 8 | 0.000155891 | 2.30388 | 0.1011 | 1 | -| DEFAULT_d3304_00008 | TERMINATED | | 2 | 16 | 64 | 0.0310199 | 2.33698 | 0.0983 | 1 | -| DEFAULT_d3304_00009 | TERMINATED | | 4 | 4 | 32 | 0.0175239 | 2.31098 | 0.101 | 1 | -+---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------+ - -[2m[36m(pid=1565)[0m [6, 2000] loss: 1.325 -[2m[36m(pid=1565)[0m [6, 4000] loss: 0.668 -[2m[36m(pid=1565)[0m [6, 6000] loss: 0.457 -[2m[36m(pid=1565)[0m [6, 8000] loss: 0.338 -[2m[36m(pid=1565)[0m [6, 10000] loss: 0.283 -[2m[36m(pid=1565)[0m [6, 12000] loss: 0.232 -[2m[36m(pid=1565)[0m [6, 14000] loss: 0.198 -[2m[36m(pid=1565)[0m [6, 16000] loss: 0.175 -[2m[36m(pid=1565)[0m [6, 18000] loss: 0.149 -[2m[36m(pid=1565)[0m [6, 20000] loss: 0.140 -Result for DEFAULT_d3304_00006: - accuracy: 0.4852 - date: 2021-01-05_20-33-55 - done: false - experiment_id: d8bae0fc87134e6398fd0341279c1a1a - experiment_tag: 6_batch_size=2,l1=64,l2=256,lr=0.0017724 - hostname: 1a844a452371 - iterations_since_restore: 6 - loss: 1.5015573524537555 - node_ip: 172.17.0.2 - pid: 1565 - should_checkpoint: true - time_since_restore: 645.3050956726074 - time_this_iter_s: 118.63830637931824 - time_total_s: 645.3050956726074 - timestamp: 1609878835 - timesteps_since_restore: 0 - training_iteration: 6 - trial_id: d3304_00006 - -== Status == -Memory usage on this node: 4.1/240.1 GiB -Using AsyncHyperBand: num_stopped=9 -Bracket: Iter 8.000: -1.267511115884781 | Iter 4.000: -1.4950430885698218 | Iter 2.000: -1.7041921138763427 | Iter 1.000: -2.301384049916267 -Resources requested: 2/32 CPUs, 0/2 GPUs, 0.0/157.71 GiB heap, 0.0/49.37 GiB objects -Result logdir: /var/lib/jenkins/ray_results/DEFAULT_2021-01-05_20-23-08 -Number of trials: 10/10 (1 RUNNING, 9 TERMINATED) -+---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------+ -| Trial name | status | loc | batch_size | l1 | l2 | lr | loss | accuracy | training_iteration | -|---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------| -| DEFAULT_d3304_00006 | RUNNING | 172.17.0.2:1565 | 2 | 64 | 256 | 0.00177236 | 1.50156 | 0.4852 | 6 | -| DEFAULT_d3304_00000 | TERMINATED | | 2 | 4 | 16 | 0.000111924 | 1.8605 | 0.2724 | 2 | -| DEFAULT_d3304_00001 | TERMINATED | | 8 | 16 | 32 | 0.077467 | 2.32186 | 0.1017 | 1 | -| DEFAULT_d3304_00002 | TERMINATED | | 4 | 8 | 128 | 0.00436986 | 1.72073 | 0.3849 | 4 | -| DEFAULT_d3304_00003 | TERMINATED | | 16 | 32 | 4 | 0.00120234 | 1.22223 | 0.5619 | 10 | -| DEFAULT_d3304_00004 | TERMINATED | | 4 | 16 | 32 | 0.016474 | 2.31342 | 0.102 | 1 | -| DEFAULT_d3304_00005 | TERMINATED | | 4 | 128 | 64 | 0.00757252 | 1.77391 | 0.3647 | 2 | -| DEFAULT_d3304_00007 | TERMINATED | | 8 | 8 | 8 | 0.000155891 | 2.30388 | 0.1011 | 1 | -| DEFAULT_d3304_00008 | TERMINATED | | 2 | 16 | 64 | 0.0310199 | 2.33698 | 0.0983 | 1 | -| DEFAULT_d3304_00009 | TERMINATED | | 4 | 4 | 32 | 0.0175239 | 2.31098 | 0.101 | 1 | -+---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------+ - -[2m[36m(pid=1565)[0m [7, 2000] loss: 1.295 -[2m[36m(pid=1565)[0m [7, 4000] loss: 0.662 -[2m[36m(pid=1565)[0m [7, 6000] loss: 0.452 -[2m[36m(pid=1565)[0m [7, 8000] loss: 0.339 -[2m[36m(pid=1565)[0m [7, 10000] loss: 0.270 -[2m[36m(pid=1565)[0m [7, 12000] loss: 0.235 -[2m[36m(pid=1565)[0m [7, 14000] loss: 0.193 -[2m[36m(pid=1565)[0m [7, 16000] loss: 0.169 -[2m[36m(pid=1565)[0m [7, 18000] loss: 0.154 -[2m[36m(pid=1565)[0m [7, 20000] loss: 0.137 -Result for DEFAULT_d3304_00006: - accuracy: 0.4696 - date: 2021-01-05_20-35-52 - done: false - experiment_id: d8bae0fc87134e6398fd0341279c1a1a - experiment_tag: 6_batch_size=2,l1=64,l2=256,lr=0.0017724 - hostname: 1a844a452371 - iterations_since_restore: 7 - loss: 1.5851255111492393 - node_ip: 172.17.0.2 - pid: 1565 - should_checkpoint: true - time_since_restore: 762.1866834163666 - time_this_iter_s: 116.88158774375916 - time_total_s: 762.1866834163666 - timestamp: 1609878952 - timesteps_since_restore: 0 - training_iteration: 7 - trial_id: d3304_00006 - -== Status == -Memory usage on this node: 4.1/240.1 GiB -Using AsyncHyperBand: num_stopped=9 -Bracket: Iter 8.000: -1.267511115884781 | Iter 4.000: -1.4950430885698218 | Iter 2.000: -1.7041921138763427 | Iter 1.000: -2.301384049916267 -Resources requested: 2/32 CPUs, 0/2 GPUs, 0.0/157.71 GiB heap, 0.0/49.37 GiB objects -Result logdir: /var/lib/jenkins/ray_results/DEFAULT_2021-01-05_20-23-08 -Number of trials: 10/10 (1 RUNNING, 9 TERMINATED) -+---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------+ -| Trial name | status | loc | batch_size | l1 | l2 | lr | loss | accuracy | training_iteration | -|---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------| -| DEFAULT_d3304_00006 | RUNNING | 172.17.0.2:1565 | 2 | 64 | 256 | 0.00177236 | 1.58513 | 0.4696 | 7 | -| DEFAULT_d3304_00000 | TERMINATED | | 2 | 4 | 16 | 0.000111924 | 1.8605 | 0.2724 | 2 | -| DEFAULT_d3304_00001 | TERMINATED | | 8 | 16 | 32 | 0.077467 | 2.32186 | 0.1017 | 1 | -| DEFAULT_d3304_00002 | TERMINATED | | 4 | 8 | 128 | 0.00436986 | 1.72073 | 0.3849 | 4 | -| DEFAULT_d3304_00003 | TERMINATED | | 16 | 32 | 4 | 0.00120234 | 1.22223 | 0.5619 | 10 | -| DEFAULT_d3304_00004 | TERMINATED | | 4 | 16 | 32 | 0.016474 | 2.31342 | 0.102 | 1 | -| DEFAULT_d3304_00005 | TERMINATED | | 4 | 128 | 64 | 0.00757252 | 1.77391 | 0.3647 | 2 | -| DEFAULT_d3304_00007 | TERMINATED | | 8 | 8 | 8 | 0.000155891 | 2.30388 | 0.1011 | 1 | -| DEFAULT_d3304_00008 | TERMINATED | | 2 | 16 | 64 | 0.0310199 | 2.33698 | 0.0983 | 1 | -| DEFAULT_d3304_00009 | TERMINATED | | 4 | 4 | 32 | 0.0175239 | 2.31098 | 0.101 | 1 | -+---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------+ - -[2m[36m(pid=1565)[0m [8, 2000] loss: 1.341 -[2m[36m(pid=1565)[0m [8, 4000] loss: 0.667 -[2m[36m(pid=1565)[0m [8, 6000] loss: 0.445 -[2m[36m(pid=1565)[0m [8, 8000] loss: 0.336 -[2m[36m(pid=1565)[0m [8, 10000] loss: 0.271 -[2m[36m(pid=1565)[0m [8, 12000] loss: 0.228 -[2m[36m(pid=1565)[0m [8, 14000] loss: 0.196 -[2m[36m(pid=1565)[0m [8, 16000] loss: 0.175 -[2m[36m(pid=1565)[0m [8, 18000] loss: 0.155 -[2m[36m(pid=1565)[0m [8, 20000] loss: 0.135 -Result for DEFAULT_d3304_00006: - accuracy: 0.467 - date: 2021-01-05_20-37-32 - done: true - experiment_id: d8bae0fc87134e6398fd0341279c1a1a - experiment_tag: 6_batch_size=2,l1=64,l2=256,lr=0.0017724 - hostname: 1a844a452371 - iterations_since_restore: 8 - loss: 1.6539037554110967 - node_ip: 172.17.0.2 - pid: 1565 - should_checkpoint: true - time_since_restore: 862.3724186420441 - time_this_iter_s: 100.18573522567749 - time_total_s: 862.3724186420441 - timestamp: 1609879052 - timesteps_since_restore: 0 - training_iteration: 8 - trial_id: d3304_00006 - -== Status == -Memory usage on this node: 4.1/240.1 GiB -Using AsyncHyperBand: num_stopped=10 -Bracket: Iter 8.000: -1.4607074356479388 | Iter 4.000: -1.4950430885698218 | Iter 2.000: -1.7041921138763427 | Iter 1.000: -2.301384049916267 -Resources requested: 2/32 CPUs, 0/2 GPUs, 0.0/157.71 GiB heap, 0.0/49.37 GiB objects -Result logdir: /var/lib/jenkins/ray_results/DEFAULT_2021-01-05_20-23-08 -Number of trials: 10/10 (1 RUNNING, 9 TERMINATED) -+---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------+ -| Trial name | status | loc | batch_size | l1 | l2 | lr | loss | accuracy | training_iteration | -|---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------| -| DEFAULT_d3304_00006 | RUNNING | 172.17.0.2:1565 | 2 | 64 | 256 | 0.00177236 | 1.6539 | 0.467 | 8 | -| DEFAULT_d3304_00000 | TERMINATED | | 2 | 4 | 16 | 0.000111924 | 1.8605 | 0.2724 | 2 | -| DEFAULT_d3304_00001 | TERMINATED | | 8 | 16 | 32 | 0.077467 | 2.32186 | 0.1017 | 1 | -| DEFAULT_d3304_00002 | TERMINATED | | 4 | 8 | 128 | 0.00436986 | 1.72073 | 0.3849 | 4 | -| DEFAULT_d3304_00003 | TERMINATED | | 16 | 32 | 4 | 0.00120234 | 1.22223 | 0.5619 | 10 | -| DEFAULT_d3304_00004 | TERMINATED | | 4 | 16 | 32 | 0.016474 | 2.31342 | 0.102 | 1 | -| DEFAULT_d3304_00005 | TERMINATED | | 4 | 128 | 64 | 0.00757252 | 1.77391 | 0.3647 | 2 | -| DEFAULT_d3304_00007 | TERMINATED | | 8 | 8 | 8 | 0.000155891 | 2.30388 | 0.1011 | 1 | -| DEFAULT_d3304_00008 | TERMINATED | | 2 | 16 | 64 | 0.0310199 | 2.33698 | 0.0983 | 1 | -| DEFAULT_d3304_00009 | TERMINATED | | 4 | 4 | 32 | 0.0175239 | 2.31098 | 0.101 | 1 | -+---------------------+------------+-----------------+--------------+------+------+-------------+---------+------------+----------------------+ - -== Status == -Memory usage on this node: 4.0/240.1 GiB -Using AsyncHyperBand: num_stopped=10 -Bracket: Iter 8.000: -1.4607074356479388 | Iter 4.000: -1.4950430885698218 | Iter 2.000: -1.7041921138763427 | Iter 1.000: -2.301384049916267 -Resources requested: 0/32 CPUs, 0/2 GPUs, 0.0/157.71 GiB heap, 0.0/49.37 GiB objects -Result logdir: /var/lib/jenkins/ray_results/DEFAULT_2021-01-05_20-23-08 -Number of trials: 10/10 (10 TERMINATED) -+---------------------+------------+-------+--------------+------+------+-------------+---------+------------+----------------------+ -| Trial name | status | loc | batch_size | l1 | l2 | lr | loss | accuracy | training_iteration | -|---------------------+------------+-------+--------------+------+------+-------------+---------+------------+----------------------| -| DEFAULT_d3304_00000 | TERMINATED | | 2 | 4 | 16 | 0.000111924 | 1.8605 | 0.2724 | 2 | -| DEFAULT_d3304_00001 | TERMINATED | | 8 | 16 | 32 | 0.077467 | 2.32186 | 0.1017 | 1 | -| DEFAULT_d3304_00002 | TERMINATED | | 4 | 8 | 128 | 0.00436986 | 1.72073 | 0.3849 | 4 | -| DEFAULT_d3304_00003 | TERMINATED | | 16 | 32 | 4 | 0.00120234 | 1.22223 | 0.5619 | 10 | -| DEFAULT_d3304_00004 | TERMINATED | | 4 | 16 | 32 | 0.016474 | 2.31342 | 0.102 | 1 | -| DEFAULT_d3304_00005 | TERMINATED | | 4 | 128 | 64 | 0.00757252 | 1.77391 | 0.3647 | 2 | -| DEFAULT_d3304_00006 | TERMINATED | | 2 | 64 | 256 | 0.00177236 | 1.6539 | 0.467 | 8 | -| DEFAULT_d3304_00007 | TERMINATED | | 8 | 8 | 8 | 0.000155891 | 2.30388 | 0.1011 | 1 | -| DEFAULT_d3304_00008 | TERMINATED | | 2 | 16 | 64 | 0.0310199 | 2.33698 | 0.0983 | 1 | -| DEFAULT_d3304_00009 | TERMINATED | | 4 | 4 | 32 | 0.0175239 | 2.31098 | 0.101 | 1 | -+---------------------+------------+-------+--------------+------+------+-------------+---------+------------+----------------------+ - -Best trial config: {'l1': 32, 'l2': 4, 'lr': 0.0012023396319256663, 'batch_size': 16} -Best trial final validation loss: 1.2222298237800597 -Best trial final validation accuracy: 0.5619 -Files already downloaded and verified -Files already downloaded and verified -Best trial test set accuracy: 0.5537 - -``` - -如果运行代码,则示例输出如下所示: - -为了避免浪费资源,大多数审判​​已提早停止。 效果最好的试验的验证准确率约为 58%,可以在测试仪上进行确认。 - -就是这样了! 您现在可以调整 PyTorch 模型的参数。 - -**脚本的总运行时间**:(14 分钟 43.400 秒) - -[下载 Python 源码:`hyperparameter_tuning_tutorial.py`](../_downloads/95074cd7ce8c3e57a92e7a9c49182e6a/hyperparameter_tuning_tutorial.py) - -[下载 Jupyter 笔记本:`hyperparameter_tuning_tutorial.ipynb`](../_downloads/c24b93738bc036c1b66d0387555bf69a/hyperparameter_tuning_tutorial.ipynb) - -[由 Sphinx 画廊](https://sphinx-gallery.readthedocs.io)生成的画廊 \ No newline at end of file diff --git a/pytorch/官方教程/54.md b/pytorch/官方教程/54.md deleted file mode 100644 index e158b5e8..00000000 --- a/pytorch/官方教程/54.md +++ /dev/null @@ -1,781 +0,0 @@ -# 模型剪裁教程 - -> 原文: - -**作者**: [Michela Paganini](https://github.com/mickypaganini) - -最新的深度学习技术依赖于难以部署的过度参数化模型。 相反,已知生物神经网络使用有效的稀疏连通性。 为了减少内存,电池和硬件消耗,同时又不牺牲精度,在设备上部署轻量级模型并通过私有设备上计算来确保私密性,确定通过减少模型中的参数数量来压缩模型的最佳技术很重要。 在研究方面,剪裁用于研究参数过度配置和参数不足网络在学习动态方面的差异,以研究幸运的稀疏子网络的作用([“彩票”](https://arxiv.org/abs/1803.03635)),以及初始化,作为破坏性的神经结构搜索技术等等。 - -在本教程中,您将学习如何使用`torch.nn.utils.prune`稀疏神经网络,以及如何扩展它以实现自己的自定义剪裁技术。 - -## 要求 - -`"torch>=1.4.0a0+8e8a5e0"` - -```py -import torch -from torch import nn -import torch.nn.utils.prune as prune -import torch.nn.functional as F - -``` - -## 创建模型 - -在本教程中,我们使用 LeCun 等人,1998 年的 [LeNet](http://yann.lecun.com/exdb/publis/pdf/lecun-98.pdf) 架构。 - -```py -device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - -class LeNet(nn.Module): - def __init__(self): - super(LeNet, self).__init__() - # 1 input image channel, 6 output channels, 3x3 square conv kernel - self.conv1 = nn.Conv2d(1, 6, 3) - self.conv2 = nn.Conv2d(6, 16, 3) - self.fc1 = nn.Linear(16 * 5 * 5, 120) # 5x5 image dimension - self.fc2 = nn.Linear(120, 84) - self.fc3 = nn.Linear(84, 10) - - def forward(self, x): - x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2)) - x = F.max_pool2d(F.relu(self.conv2(x)), 2) - x = x.view(-1, int(x.nelement() / x.shape[0])) - x = F.relu(self.fc1(x)) - x = F.relu(self.fc2(x)) - x = self.fc3(x) - return x - -model = LeNet().to(device=device) - -``` - -## 检查模块 - -让我们检查一下 LeNet 模型中的(未剪裁)`conv1`层。 现在它将包含两个参数`weight`和`bias`,并且没有缓冲区。 - -```py -module = model.conv1 -print(list(module.named_parameters())) - -``` - -出: - -```py -[('weight', Parameter containing: -tensor([[[[ 0.1552, 0.0102, -0.1944], - [ 0.0263, 0.1374, -0.3139], - [ 0.2838, 0.1943, 0.0948]]], - - [[[-0.0296, -0.2514, 0.1300], - [ 0.0756, -0.3155, -0.2900], - [-0.1840, 0.1143, -0.0120]]], - - [[[-0.2383, -0.3022, 0.2295], - [-0.0050, 0.2485, -0.3230], - [-0.1317, -0.0054, 0.2659]]], - - [[[-0.0932, 0.1316, 0.0670], - [ 0.0572, -0.1845, 0.0870], - [ 0.1372, 0.1080, 0.0324]]], - - [[[ 0.0908, -0.3280, 0.0365], - [-0.3108, 0.2317, -0.2271], - [ 0.1171, 0.2113, -0.2259]]], - - [[[ 0.0407, 0.0512, 0.0954], - [-0.0437, 0.0302, -0.1317], - [ 0.2573, 0.0626, 0.0883]]]], device='cuda:0', requires_grad=True)), ('bias', Parameter containing: -tensor([-0.1803, 0.1331, -0.3267, 0.3173, -0.0349, 0.1828], device='cuda:0', - requires_grad=True))] - -``` - -```py -print(list(module.named_buffers())) - -``` - -出: - -```py -[] - -``` - -## 剪裁模块 - -要剪裁模块(在此示例中,为 LeNet 架构的`conv1`层),请首先从`torch.nn.utils.prune`中可用的那些技术中选择一种剪裁技术(或[通过子类化`BasePruningMethod`实现您自己的东西](#extending-torch-nn-utils-pruning-with-custom-pruning-functions))。 然后,指定模块和该模块中要剪裁的参数的名称。 最后,使用所选剪裁技术所需的适当关键字参数,指定剪裁参数。 - -在此示例中,我们将在`conv1`层中名为`weight`的参数中随机剪裁 30% 的连接。 模块作为第一个参数传递给函数; `name`使用其字符串标识符在该模块中标识参数; `amount`表示与剪裁的连接百分比(如果是介于 0 和 1 之间的浮点数),或表示与剪裁的连接的绝对数量(如果它是非负整数)。 - -```py -prune.random_unstructured(module, name="weight", amount=0.3) - -``` - -剪裁是通过从参数中删除`weight`并将其替换为名为`weight_orig`的新参数(即,将`"_orig"`附加到初始参数`name`)来进行的。 `weight_orig`存储未剪裁的张量版本。 `bias`未剪裁,因此它将保持完整。 - -```py -print(list(module.named_parameters())) - -``` - -出: - -```py -[('bias', Parameter containing: -tensor([-0.1803, 0.1331, -0.3267, 0.3173, -0.0349, 0.1828], device='cuda:0', - requires_grad=True)), ('weight_orig', Parameter containing: -tensor([[[[ 0.1552, 0.0102, -0.1944], - [ 0.0263, 0.1374, -0.3139], - [ 0.2838, 0.1943, 0.0948]]], - - [[[-0.0296, -0.2514, 0.1300], - [ 0.0756, -0.3155, -0.2900], - [-0.1840, 0.1143, -0.0120]]], - - [[[-0.2383, -0.3022, 0.2295], - [-0.0050, 0.2485, -0.3230], - [-0.1317, -0.0054, 0.2659]]], - - [[[-0.0932, 0.1316, 0.0670], - [ 0.0572, -0.1845, 0.0870], - [ 0.1372, 0.1080, 0.0324]]], - - [[[ 0.0908, -0.3280, 0.0365], - [-0.3108, 0.2317, -0.2271], - [ 0.1171, 0.2113, -0.2259]]], - - [[[ 0.0407, 0.0512, 0.0954], - [-0.0437, 0.0302, -0.1317], - [ 0.2573, 0.0626, 0.0883]]]], device='cuda:0', requires_grad=True))] - -``` - -通过以上选择的剪裁技术生成的剪裁掩码将保存为名为`weight_mask`的模块缓冲区(即,将`"_mask"`附加到初始参数`name`)。 - -```py -print(list(module.named_buffers())) - -``` - -出: - -```py -[('weight_mask', tensor([[[[1., 1., 0.], - [0., 0., 1.], - [1., 0., 1.]]], - - [[[1., 1., 1.], - [1., 1., 1.], - [1., 1., 1.]]], - - [[[1., 1., 0.], - [1., 0., 0.], - [1., 0., 1.]]], - - [[[1., 1., 1.], - [1., 0., 1.], - [1., 1., 1.]]], - - [[[1., 1., 1.], - [0., 0., 1.], - [1., 1., 1.]]], - - [[[1., 0., 0.], - [1., 0., 1.], - [1., 0., 0.]]]], device='cuda:0'))] - -``` - -为了使正向传播不更改即可工作,需要存在`weight`属性。 在`torch.nn.utils.prune`中实现的剪裁技术计算权重的剪裁版本(通过将掩码与原始参数组合)并将它们存储在属性`weight`中。 注意,这不再是`module`的参数,现在只是一个属性。 - -```py -print(module.weight) - -``` - -出: - -```py -tensor([[[[ 0.1552, 0.0102, -0.0000], - [ 0.0000, 0.0000, -0.3139], - [ 0.2838, 0.0000, 0.0948]]], - - [[[-0.0296, -0.2514, 0.1300], - [ 0.0756, -0.3155, -0.2900], - [-0.1840, 0.1143, -0.0120]]], - - [[[-0.2383, -0.3022, 0.0000], - [-0.0050, 0.0000, -0.0000], - [-0.1317, -0.0000, 0.2659]]], - - [[[-0.0932, 0.1316, 0.0670], - [ 0.0572, -0.0000, 0.0870], - [ 0.1372, 0.1080, 0.0324]]], - - [[[ 0.0908, -0.3280, 0.0365], - [-0.0000, 0.0000, -0.2271], - [ 0.1171, 0.2113, -0.2259]]], - - [[[ 0.0407, 0.0000, 0.0000], - [-0.0437, 0.0000, -0.1317], - [ 0.2573, 0.0000, 0.0000]]]], device='cuda:0', - grad_fn=) - -``` - -最后,使用 PyTorch 的`forward_pre_hooks`在每次向前传递之前应用剪裁。 具体来说,当剪裁`module`时(如我们在此处所做的那样),它将为与之关联的每个参数获取`forward_pre_hook`进行剪裁。 在这种情况下,由于到目前为止我们只剪裁了名称为`weight`的原始参数,因此只会出现一个钩子。 - -```py -print(module._forward_pre_hooks) - -``` - -出: - -```py -OrderedDict([(0, )]) - -``` - -为了完整起见,我们现在也可以剪裁`bias`,以查看`module`的参数,缓冲区,挂钩和属性如何变化。 仅出于尝试另一种剪裁技术的目的,在此我们按 L1 范数剪裁偏差中的 3 个最小条目,如`l1_unstructured`剪裁函数中所实现的。 - -```py -prune.l1_unstructured(module, name="bias", amount=3) - -``` - -现在,我们希望命名参数同时包含`weight_orig`(从前)和`bias_orig`。 缓冲区将包括`weight_mask`和`bias_mask`。 两个张量的剪裁后的版本将作为模块属性存在,并且该模块现在将具有两个`forward_pre_hooks`。 - -```py -print(list(module.named_parameters())) - -``` - -出: - -```py -[('weight_orig', Parameter containing: -tensor([[[[ 0.1552, 0.0102, -0.1944], - [ 0.0263, 0.1374, -0.3139], - [ 0.2838, 0.1943, 0.0948]]], - - [[[-0.0296, -0.2514, 0.1300], - [ 0.0756, -0.3155, -0.2900], - [-0.1840, 0.1143, -0.0120]]], - - [[[-0.2383, -0.3022, 0.2295], - [-0.0050, 0.2485, -0.3230], - [-0.1317, -0.0054, 0.2659]]], - - [[[-0.0932, 0.1316, 0.0670], - [ 0.0572, -0.1845, 0.0870], - [ 0.1372, 0.1080, 0.0324]]], - - [[[ 0.0908, -0.3280, 0.0365], - [-0.3108, 0.2317, -0.2271], - [ 0.1171, 0.2113, -0.2259]]], - - [[[ 0.0407, 0.0512, 0.0954], - [-0.0437, 0.0302, -0.1317], - [ 0.2573, 0.0626, 0.0883]]]], device='cuda:0', requires_grad=True)), ('bias_orig', Parameter containing: -tensor([-0.1803, 0.1331, -0.3267, 0.3173, -0.0349, 0.1828], device='cuda:0', - requires_grad=True))] - -``` - -```py -print(list(module.named_buffers())) - -``` - -出: - -```py -[('weight_mask', tensor([[[[1., 1., 0.], - [0., 0., 1.], - [1., 0., 1.]]], - - [[[1., 1., 1.], - [1., 1., 1.], - [1., 1., 1.]]], - - [[[1., 1., 0.], - [1., 0., 0.], - [1., 0., 1.]]], - - [[[1., 1., 1.], - [1., 0., 1.], - [1., 1., 1.]]], - - [[[1., 1., 1.], - [0., 0., 1.], - [1., 1., 1.]]], - - [[[1., 0., 0.], - [1., 0., 1.], - [1., 0., 0.]]]], device='cuda:0')), ('bias_mask', tensor([0., 0., 1., 1., 0., 1.], device='cuda:0'))] - -``` - -```py -print(module.bias) - -``` - -出: - -```py -tensor([-0.0000, 0.0000, -0.3267, 0.3173, -0.0000, 0.1828], device='cuda:0', - grad_fn=) - -``` - -```py -print(module._forward_pre_hooks) - -``` - -出: - -```py -OrderedDict([(0, ), (1, )]) - -``` - -## 迭代式剪裁 - -一个模块中的同一参数可以被多次剪裁,各种剪裁调用的效果等于连接应用的各种蒙版的组合。 `PruningContainer`的`compute_mask`方法可处理新遮罩与旧遮罩的组合。 - -例如,假设我们现在想进一步剪裁`module.weight`,这一次是使用沿着张量的第 0 轴的结构化剪裁(第 0 轴对应于卷积层的输出通道,并且对于`conv1`具有 6 维) ,基于渠道的 L2 规范。 这可以通过`ln_structured`和`n=2`和`dim=0`函数来实现。 - -```py -prune.ln_structured(module, name="weight", amount=0.5, n=2, dim=0) - -# As we can verify, this will zero out all the connections corresponding to -# 50% (3 out of 6) of the channels, while preserving the action of the -# previous mask. -print(module.weight) - -``` - -出: - -```py -tensor([[[[ 0.0000, 0.0000, -0.0000], - [ 0.0000, 0.0000, -0.0000], - [ 0.0000, 0.0000, 0.0000]]], - - [[[-0.0296, -0.2514, 0.1300], - [ 0.0756, -0.3155, -0.2900], - [-0.1840, 0.1143, -0.0120]]], - - [[[-0.2383, -0.3022, 0.0000], - [-0.0050, 0.0000, -0.0000], - [-0.1317, -0.0000, 0.2659]]], - - [[[-0.0000, 0.0000, 0.0000], - [ 0.0000, -0.0000, 0.0000], - [ 0.0000, 0.0000, 0.0000]]], - - [[[ 0.0908, -0.3280, 0.0365], - [-0.0000, 0.0000, -0.2271], - [ 0.1171, 0.2113, -0.2259]]], - - [[[ 0.0000, 0.0000, 0.0000], - [-0.0000, 0.0000, -0.0000], - [ 0.0000, 0.0000, 0.0000]]]], device='cuda:0', - grad_fn=) - -``` - -现在,对应的钩子将为`torch.nn.utils.prune.PruningContainer`类型,并将存储应用于`weight`参数的剪裁历史。 - -```py -for hook in module._forward_pre_hooks.values(): - if hook._tensor_name == "weight": # select out the correct hook - break - -print(list(hook)) # pruning history in the container - -``` - -出: - -```py -[, ] - -``` - -## 序列化剪裁的模型 - -所有相关的张量,包括掩码缓冲区和用于计算剪裁的张量的原始参数,都存储在模型的`state_dict`中,因此可以根据需要轻松地序列化和保存。 - -```py -print(model.state_dict().keys()) - -``` - -出: - -```py -odict_keys(['conv1.weight_orig', 'conv1.bias_orig', 'conv1.weight_mask', 'conv1.bias_mask', 'conv2.weight', 'conv2.bias', 'fc1.weight', 'fc1.bias', 'fc2.weight', 'fc2.bias', 'fc3.weight', 'fc3.bias']) - -``` - -## 删除剪裁重新参数化 - -要使剪裁永久化,请删除`weight_orig`和`weight_mask`的重新参数化,然后删除`forward_pre_hook`,我们可以使用`torch.nn.utils.prune`的`remove`函数。 请注意,这不会撤消剪裁,好像从未发生过。 而是通过将参数`weight`重新分配给模型参数(剪裁后的版本)来使其永久不变。 - -删除重新参数化之前: - -```py -print(list(module.named_parameters())) - -``` - -出: - -```py -[('weight_orig', Parameter containing: -tensor([[[[ 0.1552, 0.0102, -0.1944], - [ 0.0263, 0.1374, -0.3139], - [ 0.2838, 0.1943, 0.0948]]], - - [[[-0.0296, -0.2514, 0.1300], - [ 0.0756, -0.3155, -0.2900], - [-0.1840, 0.1143, -0.0120]]], - - [[[-0.2383, -0.3022, 0.2295], - [-0.0050, 0.2485, -0.3230], - [-0.1317, -0.0054, 0.2659]]], - - [[[-0.0932, 0.1316, 0.0670], - [ 0.0572, -0.1845, 0.0870], - [ 0.1372, 0.1080, 0.0324]]], - - [[[ 0.0908, -0.3280, 0.0365], - [-0.3108, 0.2317, -0.2271], - [ 0.1171, 0.2113, -0.2259]]], - - [[[ 0.0407, 0.0512, 0.0954], - [-0.0437, 0.0302, -0.1317], - [ 0.2573, 0.0626, 0.0883]]]], device='cuda:0', requires_grad=True)), ('bias_orig', Parameter containing: -tensor([-0.1803, 0.1331, -0.3267, 0.3173, -0.0349, 0.1828], device='cuda:0', - requires_grad=True))] - -``` - -```py -print(list(module.named_buffers())) - -``` - -出: - -```py -[('weight_mask', tensor([[[[0., 0., 0.], - [0., 0., 0.], - [0., 0., 0.]]], - - [[[1., 1., 1.], - [1., 1., 1.], - [1., 1., 1.]]], - - [[[1., 1., 0.], - [1., 0., 0.], - [1., 0., 1.]]], - - [[[0., 0., 0.], - [0., 0., 0.], - [0., 0., 0.]]], - - [[[1., 1., 1.], - [0., 0., 1.], - [1., 1., 1.]]], - - [[[0., 0., 0.], - [0., 0., 0.], - [0., 0., 0.]]]], device='cuda:0')), ('bias_mask', tensor([0., 0., 1., 1., 0., 1.], device='cuda:0'))] - -``` - -```py -print(module.weight) - -``` - -出: - -```py -tensor([[[[ 0.0000, 0.0000, -0.0000], - [ 0.0000, 0.0000, -0.0000], - [ 0.0000, 0.0000, 0.0000]]], - - [[[-0.0296, -0.2514, 0.1300], - [ 0.0756, -0.3155, -0.2900], - [-0.1840, 0.1143, -0.0120]]], - - [[[-0.2383, -0.3022, 0.0000], - [-0.0050, 0.0000, -0.0000], - [-0.1317, -0.0000, 0.2659]]], - - [[[-0.0000, 0.0000, 0.0000], - [ 0.0000, -0.0000, 0.0000], - [ 0.0000, 0.0000, 0.0000]]], - - [[[ 0.0908, -0.3280, 0.0365], - [-0.0000, 0.0000, -0.2271], - [ 0.1171, 0.2113, -0.2259]]], - - [[[ 0.0000, 0.0000, 0.0000], - [-0.0000, 0.0000, -0.0000], - [ 0.0000, 0.0000, 0.0000]]]], device='cuda:0', - grad_fn=) - -``` - -删除重新参数化后: - -```py -prune.remove(module, 'weight') -print(list(module.named_parameters())) - -``` - -出: - -```py -[('bias_orig', Parameter containing: -tensor([-0.1803, 0.1331, -0.3267, 0.3173, -0.0349, 0.1828], device='cuda:0', - requires_grad=True)), ('weight', Parameter containing: -tensor([[[[ 0.0000, 0.0000, -0.0000], - [ 0.0000, 0.0000, -0.0000], - [ 0.0000, 0.0000, 0.0000]]], - - [[[-0.0296, -0.2514, 0.1300], - [ 0.0756, -0.3155, -0.2900], - [-0.1840, 0.1143, -0.0120]]], - - [[[-0.2383, -0.3022, 0.0000], - [-0.0050, 0.0000, -0.0000], - [-0.1317, -0.0000, 0.2659]]], - - [[[-0.0000, 0.0000, 0.0000], - [ 0.0000, -0.0000, 0.0000], - [ 0.0000, 0.0000, 0.0000]]], - - [[[ 0.0908, -0.3280, 0.0365], - [-0.0000, 0.0000, -0.2271], - [ 0.1171, 0.2113, -0.2259]]], - - [[[ 0.0000, 0.0000, 0.0000], - [-0.0000, 0.0000, -0.0000], - [ 0.0000, 0.0000, 0.0000]]]], device='cuda:0', requires_grad=True))] - -``` - -```py -print(list(module.named_buffers())) - -``` - -出: - -```py -[('bias_mask', tensor([0., 0., 1., 1., 0., 1.], device='cuda:0'))] - -``` - -## 剪裁模型中的多个参数 - -通过指定所需的剪裁技术和参数,我们可以轻松地剪裁网络中的多个张量,也许根据它们的类型,如在本示例中将看到的那样。 - -```py -new_model = LeNet() -for name, module in new_model.named_modules(): - # prune 20% of connections in all 2D-conv layers - if isinstance(module, torch.nn.Conv2d): - prune.l1_unstructured(module, name='weight', amount=0.2) - # prune 40% of connections in all linear layers - elif isinstance(module, torch.nn.Linear): - prune.l1_unstructured(module, name='weight', amount=0.4) - -print(dict(new_model.named_buffers()).keys()) # to verify that all masks exist - -``` - -出: - -```py -dict_keys(['conv1.weight_mask', 'conv2.weight_mask', 'fc1.weight_mask', 'fc2.weight_mask', 'fc3.weight_mask']) - -``` - -## 全局剪裁 - -到目前为止,我们仅查看了通常称为“局部”剪裁的情况,即通过比较每个条目的统计信息(权重,激活度,梯度等)来逐个剪裁模型中的张量的做法。 到该张量中的其他条目。 但是,一种通用且可能更强大的技术是通过删除(例如)删除整个模型中最低的 20% 的连接,而不是删除每一层中最低的 20% 的连接来一次剪裁模型。 这很可能导致每个层的剪裁百分比不同。 让我们看看如何使用`torch.nn.utils.prune`中的`global_unstructured`进行操作。 - -```py -model = LeNet() - -parameters_to_prune = ( - (model.conv1, 'weight'), - (model.conv2, 'weight'), - (model.fc1, 'weight'), - (model.fc2, 'weight'), - (model.fc3, 'weight'), -) - -prune.global_unstructured( - parameters_to_prune, - pruning_method=prune.L1Unstructured, - amount=0.2, -) - -``` - -现在,我们可以检查每个剪裁参数中引起的稀疏性,该稀疏性将不等于每层中的 20%。 但是,全局稀疏度将(大约)为 20%。 - -```py -print( - "Sparsity in conv1.weight: {:.2f}%".format( - 100\. * float(torch.sum(model.conv1.weight == 0)) - / float(model.conv1.weight.nelement()) - ) -) -print( - "Sparsity in conv2.weight: {:.2f}%".format( - 100\. * float(torch.sum(model.conv2.weight == 0)) - / float(model.conv2.weight.nelement()) - ) -) -print( - "Sparsity in fc1.weight: {:.2f}%".format( - 100\. * float(torch.sum(model.fc1.weight == 0)) - / float(model.fc1.weight.nelement()) - ) -) -print( - "Sparsity in fc2.weight: {:.2f}%".format( - 100\. * float(torch.sum(model.fc2.weight == 0)) - / float(model.fc2.weight.nelement()) - ) -) -print( - "Sparsity in fc3.weight: {:.2f}%".format( - 100\. * float(torch.sum(model.fc3.weight == 0)) - / float(model.fc3.weight.nelement()) - ) -) -print( - "Global sparsity: {:.2f}%".format( - 100\. * float( - torch.sum(model.conv1.weight == 0) - + torch.sum(model.conv2.weight == 0) - + torch.sum(model.fc1.weight == 0) - + torch.sum(model.fc2.weight == 0) - + torch.sum(model.fc3.weight == 0) - ) - / float( - model.conv1.weight.nelement() - + model.conv2.weight.nelement() - + model.fc1.weight.nelement() - + model.fc2.weight.nelement() - + model.fc3.weight.nelement() - ) - ) -) - -``` - -出: - -```py -Sparsity in conv1.weight: 3.70% -Sparsity in conv2.weight: 8.10% -Sparsity in fc1.weight: 22.05% -Sparsity in fc2.weight: 12.29% -Sparsity in fc3.weight: 8.45% -Global sparsity: 20.00% - -``` - -## 使用自定义剪裁函数扩展`torch.nn.utils.prune` - -要实现自己的剪裁函数,可以通过继承`BasePruningMethod`基类的子类来扩展`nn.utils.prune`模块,这与所有其他剪裁方法一样。 基类为您实现以下方法:`__call__`,`apply_mask`,`apply`,`prune`和`remove`。 除了一些特殊情况外,您无需为新的剪裁技术重新实现这些方法。 但是,您将必须实现`__init__`(构造器)和`compute_mask`(有关如何根据剪裁技术的逻辑为给定张量计算掩码的说明)。 另外,您将必须指定此技术实现的剪裁类型(支持的选项为`global`,`structured`和`unstructured`)。 需要确定在迭代应用剪裁的情况下如何组合蒙版。 换句话说,当剪裁预剪裁的参数时,当前的剪裁技术应作用于参数的未剪裁部分。 指定`PRUNING_TYPE`将使`PruningContainer`(处理剪裁掩码的迭代应用)正确识别要剪裁的参数。 - -例如,假设您要实现一种剪裁技术,以剪裁张量中的所有其他条目(或者-如果先前已剪裁过张量,则剪裁张量的其余未剪裁部分)。 这将是`PRUNING_TYPE='unstructured'`,因为它作用于层中的单个连接,而不作用于整个单元/通道(`'structured'`),或作用于不同的参数(`'global'`)。 - -```py -class FooBarPruningMethod(prune.BasePruningMethod): - """Prune every other entry in a tensor - """ - PRUNING_TYPE = 'unstructured' - - def compute_mask(self, t, default_mask): - mask = default_mask.clone() - mask.view(-1)[::2] = 0 - return mask - -``` - -现在,要将其应用于`nn.Module`中的参数,还应该提供一个简单的函数来实例化该方法并将其应用。 - -```py -def foobar_unstructured(module, name): - """Prunes tensor corresponding to parameter called `name` in `module` - by removing every other entry in the tensors. - Modifies module in place (and also return the modified module) - by: - 1) adding a named buffer called `name+'_mask'` corresponding to the - binary mask applied to the parameter `name` by the pruning method. - The parameter `name` is replaced by its pruned version, while the - original (unpruned) parameter is stored in a new parameter named - `name+'_orig'`. - - Args: - module (nn.Module): module containing the tensor to prune - name (string): parameter name within `module` on which pruning - will act. - - Returns: - module (nn.Module): modified (i.e. pruned) version of the input - module - - Examples: - >>> m = nn.Linear(3, 4) - >>> foobar_unstructured(m, name='bias') - """ - FooBarPruningMethod.apply(module, name) - return module - -``` - -试试吧! - -```py -model = LeNet() -foobar_unstructured(model.fc3, name='bias') - -print(model.fc3.bias_mask) - -``` - -出: - -```py -tensor([0., 1., 0., 1., 0., 1., 0., 1., 0., 1.]) - -``` - -**脚本的总运行时间**:(0 分钟 0.135 秒) - -[下载 Python 源码:`pruning_tutorial.py`](../_downloads/8eb4a30bf66c6a1a0d1faba246c07bb3/pruning_tutorial.py) - -[下载 Jupyter 笔记本:`pruning_tutorial.ipynb`](../_downloads/f40ae04715cdb214ecba048c12f8dddf/pruning_tutorial.ipynb) - -[由 Sphinx 画廊](https://sphinx-gallery.readthedocs.io)生成的画廊 \ No newline at end of file diff --git a/pytorch/官方教程/55.md b/pytorch/官方教程/55.md deleted file mode 100644 index ad495c50..00000000 --- a/pytorch/官方教程/55.md +++ /dev/null @@ -1,398 +0,0 @@ -# LSTM 单词语言模型上的动态量化(beta) - -> 原文: - -**作者**: [James Reed](https://github.com/jamesr66a) - -**编辑**:[Seth Weidman](https://github.com/SethHWeidman/) - -## 简介 - -量化涉及将模型的权重和激活从`float`转换为`int`,这可以导致模型尺寸更小,推断速度更快,而对准确率的影响很小。 - -在本教程中,我们将最简单的量化形式-[动态量化](https://pytorch.org/docs/stable/quantization.html#torch.quantization.quantize_dynamic)应用于基于 LSTM 的下一个单词预测模型,紧紧遵循 PyTorch 示例中的[单词语言模型](https://github.com/pytorch/examples/tree/master/word_language_model) 。 - -```py -# imports -import os -from io import open -import time - -import torch -import torch.nn as nn -import torch.nn.functional as F - -``` - -## 1.定义模型 - -在这里,我们根据词语言模型示例中的[模型](https://github.com/pytorch/examples/blob/master/word_language_model/model.py)定义 LSTM 模型架构。 - -```py -class LSTMModel(nn.Module): - """Container module with an encoder, a recurrent module, and a decoder.""" - - def __init__(self, ntoken, ninp, nhid, nlayers, dropout=0.5): - super(LSTMModel, self).__init__() - self.drop = nn.Dropout(dropout) - self.encoder = nn.Embedding(ntoken, ninp) - self.rnn = nn.LSTM(ninp, nhid, nlayers, dropout=dropout) - self.decoder = nn.Linear(nhid, ntoken) - - self.init_weights() - - self.nhid = nhid - self.nlayers = nlayers - - def init_weights(self): - initrange = 0.1 - self.encoder.weight.data.uniform_(-initrange, initrange) - self.decoder.bias.data.zero_() - self.decoder.weight.data.uniform_(-initrange, initrange) - - def forward(self, input, hidden): - emb = self.drop(self.encoder(input)) - output, hidden = self.rnn(emb, hidden) - output = self.drop(output) - decoded = self.decoder(output) - return decoded, hidden - - def init_hidden(self, bsz): - weight = next(self.parameters()) - return (weight.new_zeros(self.nlayers, bsz, self.nhid), - weight.new_zeros(self.nlayers, bsz, self.nhid)) - -``` - -## 2.加载文本数据 - -接下来,我们再次根据单词模型示例对[预处理](https://github.com/pytorch/examples/blob/master/word_language_model/data.py),将 [Wikitext-2 数据集](https://www.google.com/search?q=wikitext+2+data)加载到语料库中。 - -```py -class Dictionary(object): - def __init__(self): - self.word2idx = {} - self.idx2word = [] - - def add_word(self, word): - if word not in self.word2idx: - self.idx2word.append(word) - self.word2idx[word] = len(self.idx2word) - 1 - return self.word2idx[word] - - def __len__(self): - return len(self.idx2word) - -class Corpus(object): - def __init__(self, path): - self.dictionary = Dictionary() - self.train = self.tokenize(os.path.join(path, 'train.txt')) - self.valid = self.tokenize(os.path.join(path, 'valid.txt')) - self.test = self.tokenize(os.path.join(path, 'test.txt')) - - def tokenize(self, path): - """Tokenizes a text file.""" - assert os.path.exists(path) - # Add words to the dictionary - with open(path, 'r', encoding="utf8") as f: - for line in f: - words = line.split() + [''] - for word in words: - self.dictionary.add_word(word) - - # Tokenize file content - with open(path, 'r', encoding="utf8") as f: - idss = [] - for line in f: - words = line.split() + [''] - ids = [] - for word in words: - ids.append(self.dictionary.word2idx[word]) - idss.append(torch.tensor(ids).type(torch.int64)) - ids = torch.cat(idss) - - return ids - -model_data_filepath = 'data/' - -corpus = Corpus(model_data_filepath + 'wikitext-2') - -``` - -## 3.加载预先训练的模型 - -这是一本有关动态量化的教程,这是在训练模型后应用的一种量化技术。 因此,我们将简单地将一些预训练的权重加载到此模型架构中; 这些权重是通过使用单词语言模型示例中的默认设置训练五个周期而获得的。 - -```py -ntokens = len(corpus.dictionary) - -model = LSTMModel( - ntoken = ntokens, - ninp = 512, - nhid = 256, - nlayers = 5, -) - -model.load_state_dict( - torch.load( - model_data_filepath + 'word_language_model_quantize.pth', - map_location=torch.device('cpu') - ) - ) - -model.eval() -print(model) - -``` - -出: - -```py -LSTMModel( - (drop): Dropout(p=0.5, inplace=False) - (encoder): Embedding(33278, 512) - (rnn): LSTM(512, 256, num_layers=5, dropout=0.5) - (decoder): Linear(in_features=256, out_features=33278, bias=True) -) - -``` - -现在,我们生成一些文本以确保预先训练的模型能够正常工作-与以前类似,我们在此处遵循 - -```py -input_ = torch.randint(ntokens, (1, 1), dtype=torch.long) -hidden = model.init_hidden(1) -temperature = 1.0 -num_words = 1000 - -with open(model_data_filepath + 'out.txt', 'w') as outf: - with torch.no_grad(): # no tracking history - for i in range(num_words): - output, hidden = model(input_, hidden) - word_weights = output.squeeze().div(temperature).exp().cpu() - word_idx = torch.multinomial(word_weights, 1)[0] - input_.fill_(word_idx) - - word = corpus.dictionary.idx2word[word_idx] - - outf.write(str(word.encode('utf-8')) + ('\n' if i % 20 == 19 else ' ')) - - if i % 100 == 0: - print('| Generated {}/{} words'.format(i, 1000)) - -with open(model_data_filepath + 'out.txt', 'r') as outf: - all_output = outf.read() - print(all_output) - -``` - -出: - -```py -| Generated 0/1000 words -| Generated 100/1000 words -| Generated 200/1000 words -| Generated 300/1000 words -| Generated 400/1000 words -| Generated 500/1000 words -| Generated 600/1000 words -| Generated 700/1000 words -| Generated 800/1000 words -| Generated 900/1000 words -b'broadcaster' b'good' b',' b'which' b'provided' b'for' b'a' b'vignettes' b'socially' b'and' b'the' b'FIA' b"'s" b'ad' b'.' b'The' b'state' b'into' b'this' b'position' -b'is' b'in' b'account' b'of' b'a' b'wide' b'Domonia' b'' b',' b'fallen' b'to' b'for' b'the' b'types' b'of' b'' b'developers' b'being' b'entertaining' b'.' -b'' b'The' b'Claus' b'II' b'(' b'The' b'Book' b'of' b'Karnataka' b',' b'2' b'/' b'10' b')' b'was' b'released' b'by' b'British' b'@-@' b'Irish' -b'ruler' b'arriving' b'on' b'the' b'winter' b'of' b'its' b'championship' b'orbit' b'.' b'In' b'early' b'spring' b'roles' b'dismay' b'when' b'he' b'replaced' b'by' b'a' -b'religious' b'park' b',' b'when' b'it' b'features' b'flowers' b'they' b'do' b'populist' b'.' b'temperatures' b'attempted' b'to' b'have' b'trouble' b'met' b',' b'' b',' -b'and' b'karaoke' b'leads' b'to' b'some' b'return' b'up' b'as' b'or' b'seated' b'.' b'The' b'remainder' b'of' b'w' b'voltage' b'contains' b'Allah' b'in' b'the' -b'series' b'to' b'infiltrate' b'disappeared' b'.' b'Though' b'it' b'comes' b'into' b'his' b'Shinnok' b"'s" b'history' b',' b'they' b'may' b'sometimes' b'7' b'@-@' b'April' -b',' b'roughly' b'7' b'%' b'of' b'50' b'mph' b'(' b'4' b'@.@' b'8' b'in' b')' b'while' b'males' b'have' b'put' b'except' b'far' b'as' -b'alkaline' b'@-@' b'up' b'.' b'' b'Electrical' b'medical' b'rings' b'were' b'always' b'published' b'.' b'' b'Based' b'on' b'2' b'November' b',' b'Idaho' b'can' -b'be' b'estimated' b'cooking' b'and' b'' b',' b'while' b'no' b',' b'thin' b'drugs' b'was' b'poor' b'to' b'each' b'area' b'.' b'It' b'has' b'not' -b'campaigned' b'those' b'of' b'the' b'most' b'potent' b'population' b'of' b'leaves' b'in' b'all' b'condition' b',' b'because' b'they' b'were' b'forced' b'to' b'die' b'in' -b'bhandara' b'' b'that' b'culture' b'.' b'Almost' b'a' b'prose' b'plan' b',' b'there' b'have' b'been' b'only' b'clear' b',' b'it' b'occurs' b'.' b'' -b'The' b'kakapo' b'was' b'interpreted' b'on' b'1998' b'from' b'1955' b'and' b'played' b'in' b'' b',' b'Western' b'Asia' b'on' b'0' b'August' b'1966' b',' -b'with' b'an' b'additional' b'population' b'that' b'Samuel' b'solemnly' b',' b'Chapman' b'sponsored' b'after' b'a' b'few' b'years' b'.' b'In' b'1990' b',' b'prominent' b'areas' -b'believe' b'that' b'as' b'being' b'an' b'rural' b'planet' b',' b'they' b'is' b'neglected' b'as' b'to' b'be' b'changed' b'.' b'Congress' b'This' b'well' b'"' -b'was' b'run' b'by' b'' b',' b'Waldemar' b'Greenwood' b'.' b'170' b'have' b'just' b'in' b'place' b',' b'he' b'overruled' b'.' b'The' b'1966' b'race' -b'is' b'a' b'embodies' b'state' b'of' b'Viking' b'or' b'most' b'generation' b',' b'not' b'in' b'the' b'codes' b'of' b'all' b'other' b'alignment' b'musical' b'politicians' -b'.' b'No' b'system' b'have' b'participated' b'on' b'3' b'to' b'9' b'%' b'of' b'any' b'urine' b',' b'with' b'both' b'drawings' b'and' b'significantly' b'towards' -b'his' b'deteriorating' b'and' b'poverty' b'.' b'As' b'a' b'rust' b',' b'contains' b'other' b'compositions' b'that' b'must' b'be' b'beneficial' b'by' b'overnight' b'or' b'fluid' -b',' b'u' b'organizations' b'can' b'seek' b'mild' b'late' b'down' b'on' b'a' b'broadside' b'and' b'leads' b'to' b'its' b'cycle' b'.' b'For' b'example' b',' -b'1137' b',' b'snowmelt' b'and' b'' b'\xe2\x80\x94' b'a' b'variety' b'of' b'dealt' b';' b'Species' b'(' b'with' b'a' b'reduction' b'of' b'prohibitions' b')' b',' -b'' b'exploration' b',' b'' b'an' b'fuel' b'eye' b'of' b'purple' b'trees' b',' b'was' b'shown' b'west' b'.' b'chased' b'Jack' b'of' b'claws' b',' -b'his' b'vertex' b'states' b'that' b'they' b',' b'in' b'1922' b',' b'was' b'killed' b'.' b'' b'There' b'have' b'been' b'official' b'concerns' b'of' b'Boat' -b'Kerry' b'including' b'L\xc3\xaa' b'\xe3\x80\x89' b'and' b'' b'A' b'Forest' b',' b'"' b'' b',' b'because' b'' b',' b'and' b'sometimes' b'encounters' b'like' b'I' -b"'ve" b'been' b'' b'.' b'"' b'' b'Hunter' b'pathway' b'writes' b'it' b'entering' b'the' b'second' b'.' b'The' b'kakapo' b'is' b'gems' b'used' b'after' -b'died' b'from' b'two' b'games' b'in' b'six' b'' b',' b'her' b'feature' b'and' b'called' b'"' b'mercenaries' b'"' b',' b'which' b'supported' b'by' b'the' -b'Selective' b'Race' b'.' b'"' b'' b'Bono' b'Dutch' b'struggles' b'to' b'the' b'species' b'' b',' b'especially' b'crusaders' b'I' b'lives' b'process' b',' b'but' -b'Constantin' b'approximate' b'and' b'character' b'or' b'so' b'.' b'There' b'have' b'numerous' b'pale' b'dioceses' b'as' b'a' b'resistant' b';' b'the' b'Inn' b'Comic' b'@-@' -b'white' b'individuals' b',' b'its' b'flat' b',' b'' b'and' b'correct' b',' b'in' b'which' b'they' b'felt' b'.' b'In' b'the' b'arms' b',' b'the' -b'original' b'occasion' b'about' b'Spanish' b'sites' b'all' b'(' b'millionaire' b'lay' b';' b'or' b'160' b'@-@' b'mosquitoes' b')' b'v' b'' b'(' b'c' b')' -b'.' b'The' b'bird' b'is' b'extremely' b'paved' b',' b'and' b'they' b'are' b'claimed' b'to' b'wedding' b'the' b'' b'of' b'Excellence' b',' b'and' b'an' -b'extinct' b'composite' b',' b'cute' b'outside' b'' b'.' b'This' b'may' b'be' b'seen' b'by' b'the' b'Seer' b'that' b'Tempest' b'"' b'comes' b'"' b'over' -b'a' b'bright' b'judicial' b'guitar' b',' b'which' b'describes' b',' b'and' b'tend' b'to' b'be' b'seen' b'.' b'' b'' b'=' b'=' b'Conservation' b'for' -b'contraception' b'=' b'=' b'' b'' b'Grieco' b'Island' b'is' b'a' b'eventually' b'scale' b'word' b'to' b'a' b'tropical' b'storm' b',' b'based' b'in' b'a' -b'pre' b'\xe2\x80\x93' b'9' b'lead' b',' b'a' b'forces' b'after' b'a' b'additional' b',' b'grey' b'substance' b',' b'Metro' b',' b'background' b',' b'and' b'cooperate' -b'with' b'its' b'overly' b'overview' b',' b'so' b'the' b'heaviest' b'route' b',' b'and' b'\xc2\xb3' b'.' b'portion' b'may' b'occur' b'this' b'other' b'up' b'an' -b'' b'break' b',' b'then' b'or' b'deep' b'distinct' b'or' b'female' b'offspring' b',' b'but' b'even' b'understand' b'.' b'Following' b'God' b'(' b'no' b'nervous' -b'image' b'from' b'complaints' b')' b',' b'the' b'player' b'represents' b'three' b'or' b'over' b'9' b'\xc2\xb0' b'large' b'(' b'five' b'weeks' b'of' b'many' b'cats' -b')' b',' b'as' b'it' b'targets' b'for' b'the' b'second' b'female' b'together' b'.' b'159' b',' b'it' b'also' b'spend' b'bold' b'markets' b'and' b'its' -b'players' b'powers' b',' b'dubbed' b'those' b'of' b'lengths' b'.' b'Most' b'are' b'arrow' b'could' b'be' b'noticed' b'involving' b'they' b'fall' b'.' b'On' b'FAU' -b"'s" b'only' b'lifetime' b'she' b'treated' b'or' b'their' b'apparent' b'soaring' b'proposition' b'has' b'5th' b'of' b'those' b'eye' b',' b'but' b'knows' b'in' b'a' -b'' b'Network' b';' b'which' b'of' b'that' b'reality' b'or' b'artificial' b'when' b'struggling' b'Bungie' b'is' b'successful' b'.' b'The' b'' b'sound' b'of' b'frontier' -b'ahead' b'for' b'damage' b'came' b'on' b',' b'so' b'the' b'first' b'series' b'funded' b'by' b'its' b'bowls' b',' b'a' b'chant' b'.' b'They' b'may' -b'be' b'used' b'Pongsak' b'or' b'occasionally' b'protected' b'them' b'.' b'Fingal' b'cylindrical' b'conspired' b'on' b'a' b'variety' b'of' b'prey' b',' b'' b',' b'Zach' -b',' b'and' b'young' b'possessing' b'Westland' b'valleys' b'.' b'Otherwise' b',' b'I' b'do' b'at' b'them' b'in' b'first' b'@-@' b'season' b'woodland' b',' b'where' -b'they' b'weighed' b'them' b'to' b'correct' b'a' b'list' b'of' b'other' b'birds' b'.' b'Another' b'theme' b'where' b'or' b',' b'it' b'is' b'a' b'appropriate' -b'source' b',' b'this' b'competed' b'in' b'integral' b'Waiouru' b'alone' b',' b'the' b'pathways' b'under' b'Aravind' b',' b'and' b'others' b',' b'instead' b'of' b'westward' -b',' b'as' b'they' b'are' b'quarters' b'and' b'caused' b'in' b'males' b'.' b'Once' b'selective' b'centered' b',' b'they' b'threats' b'were' b'Zuniceratops' b'.' b'Although' -b'the' b'most' b'spots' b'replication' b'became' b'a' b'fragile' b'pointer' b'(' b'a' b'pair' b'of' b'' b')' b',' b'strongly' b'"' b'mammals' b'"' b',' -b'which' b'give' b'Powderfinger' b'to' b'persecution' b'.' b'Other' b'conifers' b'but' b'even' b'only' b'swallow' b'so' b'every' b'symbols' b'of' b'Manders' b',' b'in' b'massive' - -``` - -它不是 GPT-2,但看起来该模型已开始学习语言结构! - -我们几乎准备好演示动态量化。 我们只需要定义一些辅助函数: - -```py -bptt = 25 -criterion = nn.CrossEntropyLoss() -eval_batch_size = 1 - -# create test data set -def batchify(data, bsz): - # Work out how cleanly we can divide the dataset into bsz parts. - nbatch = data.size(0) // bsz - # Trim off any extra elements that wouldn't cleanly fit (remainders). - data = data.narrow(0, 0, nbatch * bsz) - # Evenly divide the data across the bsz batches. - return data.view(bsz, -1).t().contiguous() - -test_data = batchify(corpus.test, eval_batch_size) - -# Evaluation functions -def get_batch(source, i): - seq_len = min(bptt, len(source) - 1 - i) - data = source[i:i+seq_len] - target = source[i+1:i+1+seq_len].reshape(-1) - return data, target - -def repackage_hidden(h): - """Wraps hidden states in new Tensors, to detach them from their history.""" - - if isinstance(h, torch.Tensor): - return h.detach() - else: - return tuple(repackage_hidden(v) for v in h) - -def evaluate(model_, data_source): - # Turn on evaluation mode which disables dropout. - model_.eval() - total_loss = 0. - hidden = model_.init_hidden(eval_batch_size) - with torch.no_grad(): - for i in range(0, data_source.size(0) - 1, bptt): - data, targets = get_batch(data_source, i) - output, hidden = model_(data, hidden) - hidden = repackage_hidden(hidden) - output_flat = output.view(-1, ntokens) - total_loss += len(data) * criterion(output_flat, targets).item() - return total_loss / (len(data_source) - 1) - -``` - -## 4.测试动态量化 - -最后,我们可以在模型上调用`torch.quantization.quantize_dynamic`! 特别, - -* 我们指定我们希望对模型中的`nn.LSTM`和`nn.Linear`模块进行量化 -* 我们指定希望将权重转换为`int8`值 - -```py -import torch.quantization - -quantized_model = torch.quantization.quantize_dynamic( - model, {nn.LSTM, nn.Linear}, dtype=torch.qint8 -) -print(quantized_model) - -``` - -出: - -```py -LSTMModel( - (drop): Dropout(p=0.5, inplace=False) - (encoder): Embedding(33278, 512) - (rnn): DynamicQuantizedLSTM(512, 256, num_layers=5, dropout=0.5) - (decoder): DynamicQuantizedLinear(in_features=256, out_features=33278, dtype=torch.qint8, qscheme=torch.per_tensor_affine) -) - -``` - -该模型看起来相同; 这对我们有什么好处? 首先,我们看到模型尺寸显着减小: - -```py -def print_size_of_model(model): - torch.save(model.state_dict(), "temp.p") - print('Size (MB):', os.path.getsize("temp.p")/1e6) - os.remove('temp.p') - -print_size_of_model(model) -print_size_of_model(quantized_model) - -``` - -出: - -```py -Size (MB): 113.945726 -Size (MB): 79.739984 - -``` - -其次,我们看到了更快的推理时间,而评估损失没有差异: - -注意:由于量化模型运行单线程,因此用于单线程比较的线程数为 1。 - -```py -torch.set_num_threads(1) - -def time_model_evaluation(model, test_data): - s = time.time() - loss = evaluate(model, test_data) - elapsed = time.time() - s - print('''loss: {0:.3f}\nelapsed time (seconds): {1:.1f}'''.format(loss, elapsed)) - -time_model_evaluation(model, test_data) -time_model_evaluation(quantized_model, test_data) - -``` - -出: - -```py -loss: 5.167 -elapsed time (seconds): 251.3 -loss: 5.168 -elapsed time (seconds): 166.3 - -``` - -在没有量化的情况下在 MacBook Pro 上本地运行此操作,推理大约需要 200 秒,而量化则只需大约 100 秒。 - -## 总结 - -动态量化可能是减小模型大小的简单方法,而对精度的影响有限。 - -谢谢阅读! 与往常一样,我们欢迎您提供反馈,因此,如果有任何问题,[请在这里创建一个 ISSUE](https://github.com/pytorch/pytorch/issues)。 - -**脚本的总运行时间**:(7 分钟 3.126 秒) - -[下载 Python 源码:`dynamic_quantization_tutorial.py`](../_downloads/3fa656e39c210acc81b96b164a3da032/dynamic_quantization_tutorial.py) - -[下载 Jupyter 笔记本:`dynamic_quantization_tutorial.ipynb`](../_downloads/9387e74b1a614d9ed5642654e06b1728/dynamic_quantization_tutorial.ipynb) - -[由 Sphinx 画廊](https://sphinx-gallery.readthedocs.io)生成的画廊 \ No newline at end of file diff --git a/pytorch/官方教程/56.md b/pytorch/官方教程/56.md deleted file mode 100644 index 9d12bb92..00000000 --- a/pytorch/官方教程/56.md +++ /dev/null @@ -1,444 +0,0 @@ -# BERT 上的动态量化(Beta) - -> 原文: - -小费 - -为了充分利用本教程,我们建议使用此 [Colab 版本](https://colab.research.google.com/github/pytorch/tutorials/blob/gh-pages/_downloads/dynamic_quantization_bert_tutorial.ipynb)。 这将使您可以尝试以下信息。 - -**作者**:[Jianyu Huang](https://github.com/jianyuh) - -**审核**: [Raghuraman Krishnamoorthi](https://github.com/raghuramank100) - -**编辑**:[Jessica Lin](https://github.com/jlin27) - -## 简介 - -在本教程中,我们将动态量化应用在 BERT 模型上,紧跟 [HuggingFace 转换器示例](https://github.com/huggingface/transformers)中的 BERT 模型。 通过这一循序渐进的旅程,我们将演示如何将著名的 BERT 等最新模型转换为动态量化模型。 - -* BERT,或者说转换器的双向嵌入表示,是一种预训练语言表示的新方法,它可以在许多常见的自然语言处理(NLP)任务(例如问题解答,文本分类, 和别的。 [可以在此处找到](https://arxiv.org/pdf/1810.04805.pdf)。 -* PyTorch 中的动态量化支持将浮点模型转换为具有静态`int8`或`float16`数据类型的权重和动态量化激活的量化模型。 当权重量化为`int8`时,激活(每批)动态量化为`int8`。 在 PyTorch 中,我们有[`torch.quantization.quantize_dynamic` API](https://pytorch.org/docs/stable/quantization.html#torch.quantization.quantize_dynamic),该 API 用仅动态权重的量化版本替换了指定的模块,并输出了量化模型。 -* 我们在[通用语言理解评估基准(GLUE)](https://gluebenchmark.com/)中演示了 [Microsoft Research Paraphrase 语料库(MRPC)任务](https://www.microsoft.com/en-us/download/details.aspx?id=52398)的准确率和推理表现结果。 MRPC(Dolan 和 Brockett,2005 年)是从在线新闻源中自动提取的句子对的语料库,带有人工标注,说明句子中的句子在语义上是否等效。 由于类别不平衡(正向为 68%,负向为 32%),我们遵循常规做法并报告 [F1 得分](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.f1_score.html)。 MRPC 是用于语言对分类的常见 NLP 任务,如下所示。 - -![../_img/bert.png](img/b43b70d8a6eef9ea4f75867b5e83b483.png) - -## 1.设置 - -### 1.1 安装 PyTorch 和 HuggingFace 转换器 - -要开始本教程,首先请遵循 [PyTorch](https://github.com/pytorch/pytorch/#installation) 和 [HuggingFace Github 仓库](https://github.com/huggingface/transformers#installation)中的安装说明。 此外,我们还将安装 [scikit-learn](https://github.com/scikit-learn/scikit-learn) 包,因为我们将重复使用其内置的 F1 分数计算助手函数。 - -```py -pip install sklearn -pip install transformers - -``` - -由于我们将使用 PyTorch 的 Beta 版部分,因此建议安装最新版本的 Torch 和`tochvision`。[ 您可以在此处找到有关本地安装的最新说明](https://pytorch.org/get-started/locally/)。 例如,要在 Mac 上安装: - -```py -yes y | pip uninstall torch tochvision -yes y | pip install --pre torch -f https://download.pytorch.org/whl/nightly/cu101/torch_nightly.html - -``` - -### 1.2 导入必要的模块 - -在这一步中,我们将导入本教程所需的 Python 模块。 - -```py -from __future__ import absolute_import, division, print_function - -import logging -import numpy as np -import os -import random -import sys -import time -import torch - -from argparse import Namespace -from torch.utils.data import (DataLoader, RandomSampler, SequentialSampler, - TensorDataset) -from tqdm import tqdm -from transformers import (BertConfig, BertForSequenceClassification, BertTokenizer,) -from transformers import glue_compute_metrics as compute_metrics -from transformers import glue_output_modes as output_modes -from transformers import glue_processors as processors -from transformers import glue_convert_examples_to_features as convert_examples_to_features - -# Setup logging -logger = logging.getLogger(__name__) -logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s', - datefmt = '%m/%d/%Y %H:%M:%S', - level = logging.WARN) - -logging.getLogger("transformers.modeling_utils").setLevel( - logging.WARN) # Reduce logging - -print(torch.__version__) - -``` - -我们设置线程数以比较 FP32 和 INT8 性能之间的单线程性能。 在本教程的最后,用户可以通过使用右侧并行后端构建 PyTorch 来设置其他线程数量。 - -```py -torch.set_num_threads(1) -print(torch.__config__.parallel_info()) - -``` - -### 1.3 了解辅助函数 - -助手函数内置在转换器库中。 我们主要使用以下辅助函数:一个用于将文本示例转换为特征向量的函数; 另一个用于测量预测结果的 F1 分数。 - -[`gum_convert_examples_to_features`](https://github.com/huggingface/transformers/blob/master/transformers/data/processors/glue.py)函数将文本转换为输入特征: - -* 标记输入序列; -* 在开头插入`[CLS]`; -* 在第一句和第二句之间并在最后插入`[SEP]`; -* 生成标记类型 ID,以指示标记是属于第一序列还是第二序列。 - -[`gum_compute_metrics`](https://github.com/huggingface/transformers/blob/master/transformers/data/processors/glue.py)函数的计算指标为 [F1 得分](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.f1_score.html),可以将其解释为精度和召回率的加权平均值,其中 F1 得分最佳值为 1,最差值为 0。精度和召回率对 F1 得分的相对贡献相等。 - -* F1 分数的公式为: - -![](img/tex56-1.gif) - -### 1.4 下载数据集 - -在运行 MRPC 任务之前,我们通过运行[此脚本](https://gist.github.com/W4ngatang/60c2bdb54d156a41194446737ce03e2e)并下载 [GLUE 数据](https://gluebenchmark.com/tasks)并将其解压缩到目录`glue_data`中。 - -```py -python download_glue_data.py --data_dir='glue_data' --tasks='MRPC' - -``` - -## 2.微调 BERT 模型 - -BERT 的精神是预训练语言表示形式,然后以最小的任务相关参数微调各种任务上的深层双向表示形式,并获得最新的结果。 在本教程中,我们将专注于对预训练的 BERT 模型进行微调,以对 MRPC 任务上的语义等效句子对进行分类。 - -要为 MRPC 任务微调预训练的 BERT 模型(HuggingFace 转换器中的`bert-base-uncased`模型),可以按照[示例](https://github.com/huggingface/transformers/tree/master/examples#mrpc)中的命令进行操作: - -```py -export GLUE_DIR=./glue_data -export TASK_NAME=MRPC -export OUT_DIR=./$TASK_NAME/ -python ./run_glue.py \ - --model_type bert \ - --model_name_or_path bert-base-uncased \ - --task_name $TASK_NAME \ - --do_train \ - --do_eval \ - --do_lower_case \ - --data_dir $GLUE_DIR/$TASK_NAME \ - --max_seq_length 128 \ - --per_gpu_eval_batch_size=8 \ - --per_gpu_train_batch_size=8 \ - --learning_rate 2e-5 \ - --num_train_epochs 3.0 \ - --save_steps 100000 \ - --output_dir $OUT_DIR - -``` - -[我们在此处为 MRPC 任务提供了经过微调的 BERT 模型](https://download.pytorch.org/tutorial/MRPC.zip)。 为了节省时间,您可以将模型文件(约 400 MB)直接下载到本地文件夹`$OUT_DIR`中。 - -### 2.1 设置全局配置 - -在这里,我们设置了用于在动态量化之前和之后评估微调 BERT 模型的全局配置。 - -```py -configs = Namespace() - -# The output directory for the fine-tuned model, $OUT_DIR. -configs.output_dir = "./MRPC/" - -# The data directory for the MRPC task in the GLUE benchmark, $GLUE_DIR/$TASK_NAME. -configs.data_dir = "./glue_data/MRPC" - -# The model name or path for the pre-trained model. -configs.model_name_or_path = "bert-base-uncased" -# The maximum length of an input sequence -configs.max_seq_length = 128 - -# Prepare GLUE task. -configs.task_name = "MRPC".lower() -configs.processor = processors[configs.task_name]() -configs.output_mode = output_modes[configs.task_name] -configs.label_list = configs.processor.get_labels() -configs.model_type = "bert".lower() -configs.do_lower_case = True - -# Set the device, batch size, topology, and caching flags. -configs.device = "cpu" -configs.per_gpu_eval_batch_size = 8 -configs.n_gpu = 0 -configs.local_rank = -1 -configs.overwrite_cache = False - -# Set random seed for reproducibility. -def set_seed(seed): - random.seed(seed) - np.random.seed(seed) - torch.manual_seed(seed) -set_seed(42) - -``` - -### 2.2 加载经过微调的 BERT 模型 - -我们从`configs.output_dir`加载标记器和经过微调的 BERT 序列分类器模型(FP32)。 - -```py -tokenizer = BertTokenizer.from_pretrained( - configs.output_dir, do_lower_case=configs.do_lower_case) - -model = BertForSequenceClassification.from_pretrained(configs.output_dir) -model.to(configs.device) - -``` - -### 2.3 定义分词和评估函数 - -我们重用了 [Huggingface](https://github.com/huggingface/transformers/blob/master/examples/run_glue.py) 中的分词和评估函数。 - -```py -# coding=utf-8 -# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. -# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -def evaluate(args, model, tokenizer, prefix=""): - # Loop to handle MNLI double evaluation (matched, mis-matched) - eval_task_names = ("mnli", "mnli-mm") if args.task_name == "mnli" else (args.task_name,) - eval_outputs_dirs = (args.output_dir, args.output_dir + '-MM') if args.task_name == "mnli" else (args.output_dir,) - - results = {} - for eval_task, eval_output_dir in zip(eval_task_names, eval_outputs_dirs): - eval_dataset = load_and_cache_examples(args, eval_task, tokenizer, evaluate=True) - - if not os.path.exists(eval_output_dir) and args.local_rank in [-1, 0]: - os.makedirs(eval_output_dir) - - args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu) - # Note that DistributedSampler samples randomly - eval_sampler = SequentialSampler(eval_dataset) if args.local_rank == -1 else DistributedSampler(eval_dataset) - eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args.eval_batch_size) - - # multi-gpu eval - if args.n_gpu > 1: - model = torch.nn.DataParallel(model) - - # Eval! - logger.info("***** Running evaluation {} *****".format(prefix)) - logger.info(" Num examples = %d", len(eval_dataset)) - logger.info(" Batch size = %d", args.eval_batch_size) - eval_loss = 0.0 - nb_eval_steps = 0 - preds = None - out_label_ids = None - for batch in tqdm(eval_dataloader, desc="Evaluating"): - model.eval() - batch = tuple(t.to(args.device) for t in batch) - - with torch.no_grad(): - inputs = {'input_ids': batch[0], - 'attention_mask': batch[1], - 'labels': batch[3]} - if args.model_type != 'distilbert': - inputs['token_type_ids'] = batch[2] if args.model_type in ['bert', 'xlnet'] else None # XLM, DistilBERT and RoBERTa don't use segment_ids - outputs = model(**inputs) - tmp_eval_loss, logits = outputs[:2] - - eval_loss += tmp_eval_loss.mean().item() - nb_eval_steps += 1 - if preds is None: - preds = logits.detach().cpu().numpy() - out_label_ids = inputs['labels'].detach().cpu().numpy() - else: - preds = np.append(preds, logits.detach().cpu().numpy(), axis=0) - out_label_ids = np.append(out_label_ids, inputs['labels'].detach().cpu().numpy(), axis=0) - - eval_loss = eval_loss / nb_eval_steps - if args.output_mode == "classification": - preds = np.argmax(preds, axis=1) - elif args.output_mode == "regression": - preds = np.squeeze(preds) - result = compute_metrics(eval_task, preds, out_label_ids) - results.update(result) - - output_eval_file = os.path.join(eval_output_dir, prefix, "eval_results.txt") - with open(output_eval_file, "w") as writer: - logger.info("***** Eval results {} *****".format(prefix)) - for key in sorted(result.keys()): - logger.info(" %s = %s", key, str(result[key])) - writer.write("%s = %s\n" % (key, str(result[key]))) - - return results - -def load_and_cache_examples(args, task, tokenizer, evaluate=False): - if args.local_rank not in [-1, 0] and not evaluate: - torch.distributed.barrier() # Make sure only the first process in distributed training process the dataset, and the others will use the cache - - processor = processors[task]() - output_mode = output_modes[task] - # Load data features from cache or dataset file - cached_features_file = os.path.join(args.data_dir, 'cached_{}_{}_{}_{}'.format( - 'dev' if evaluate else 'train', - list(filter(None, args.model_name_or_path.split('/'))).pop(), - str(args.max_seq_length), - str(task))) - if os.path.exists(cached_features_file) and not args.overwrite_cache: - logger.info("Loading features from cached file %s", cached_features_file) - features = torch.load(cached_features_file) - else: - logger.info("Creating features from dataset file at %s", args.data_dir) - label_list = processor.get_labels() - if task in ['mnli', 'mnli-mm'] and args.model_type in ['roberta']: - # HACK(label indices are swapped in RoBERTa pretrained model) - label_list[1], label_list[2] = label_list[2], label_list[1] - examples = processor.get_dev_examples(args.data_dir) if evaluate else processor.get_train_examples(args.data_dir) - features = convert_examples_to_features(examples, - tokenizer, - label_list=label_list, - max_length=args.max_seq_length, - output_mode=output_mode, - pad_on_left=bool(args.model_type in ['xlnet']), # pad on the left for xlnet - pad_token=tokenizer.convert_tokens_to_ids([tokenizer.pad_token])[0], - pad_token_segment_id=4 if args.model_type in ['xlnet'] else 0, - ) - if args.local_rank in [-1, 0]: - logger.info("Saving features into cached file %s", cached_features_file) - torch.save(features, cached_features_file) - - if args.local_rank == 0 and not evaluate: - torch.distributed.barrier() # Make sure only the first process in distributed training process the dataset, and the others will use the cache - - # Convert to Tensors and build dataset - all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long) - all_attention_mask = torch.tensor([f.attention_mask for f in features], dtype=torch.long) - all_token_type_ids = torch.tensor([f.token_type_ids for f in features], dtype=torch.long) - if output_mode == "classification": - all_labels = torch.tensor([f.label for f in features], dtype=torch.long) - elif output_mode == "regression": - all_labels = torch.tensor([f.label for f in features], dtype=torch.float) - - dataset = TensorDataset(all_input_ids, all_attention_mask, all_token_type_ids, all_labels) - return dataset - -``` - -## 3.应用动态量化 - -我们在模型上调用`torch.quantization.quantize_dynamic`,将动态量化应用于 HuggingFace BERT 模型。 特别, - -* 我们指定要对模型中的`torch.nn.Linear`模块进行量化; -* 我们指定希望将权重转换为量化的`int8`值。 - -```py -quantized_model = torch.quantization.quantize_dynamic( - model, {torch.nn.Linear}, dtype=torch.qint8 -) -print(quantized_model) - -``` - -### 3.1 检查模型大小 - -首先,检查模型尺寸。 我们可以观察到模型大小的显着减小(FP32 总大小:438 MB; INT8 总大小:181 MB): - -```py -def print_size_of_model(model): - torch.save(model.state_dict(), "temp.p") - print('Size (MB):', os.path.getsize("temp.p")/1e6) - os.remove('temp.p') - -print_size_of_model(model) -print_size_of_model(quantized_model) - -``` - -本教程中使用的 BERT 模型(`bert-base-uncased`)的词汇量`V`为 30522。在嵌入量为 768 的情况下,单词嵌入表的总大小为`~4 (Bytes/FP32) * 30522 * 768 = 90 MB` 。 因此,借助量化,非嵌入表部分的模型大小从 350 MB(FP32 模型)减少到 90 MB(INT8 模型)。 - -### 3.2 评估推理准确率和时间 - -接下来,我们比较一下动态量化后原始 FP32 模型和 INT8 模型之间的推断时间以及评估精度。 - -```py -def time_model_evaluation(model, configs, tokenizer): - eval_start_time = time.time() - result = evaluate(configs, model, tokenizer, prefix="") - eval_end_time = time.time() - eval_duration_time = eval_end_time - eval_start_time - print(result) - print("Evaluate total time (seconds): {0:.1f}".format(eval_duration_time)) - -# Evaluate the original FP32 BERT model -time_model_evaluation(model, configs, tokenizer) - -# Evaluate the INT8 BERT model after the dynamic quantization -time_model_evaluation(quantized_model, configs, tokenizer) - -``` - -在 MacBook Pro 上本地运行此程序,无需进行量化,推理(对于 MRPC 数据集中的所有 408 个示例)大约需要 160 秒,而进行量化则只需大约 90 秒。 我们总结了在 Macbook Pro 上运行量化 BERT 模型推断的结果,如下所示: - -```py -| Prec | F1 score | Model Size | 1 thread | 4 threads | -| FP32 | 0.9019 | 438 MB | 160 sec | 85 sec | -| INT8 | 0.902 | 181 MB | 90 sec | 46 sec | - -``` - -在 MRPC 任务的微调 BERT 模型上应用训练后动态量化后,我们的 F1 分数准确率为 0.6%。 作为比较,在[最新论文](https://arxiv.org/pdf/1910.06188.pdf)(表 1)中,通过应用训练后动态量化,可以达到 0.8788;通过应用量化感知训练,可以达到 0.8956。 主要区别在于我们在 PyTorch 中支持非对称量化,而该论文仅支持对称量化。 - -请注意,在本教程中,为了进行单线程比较,我们将线程数设置为 1。 我们还为这些量化的 INT8 运算符支持运算内并行化。 用户现在可以通过`torch.set_num_threads(N)`设置多线程(`N`是内部运算并行线程的数量)。 启用帧内并行支持的一项初步要求是使用正确的[后端](https://pytorch.org/docs/stable/notes/cpu_threading_torchscript_inference.html#build-options)(例如 OpenMP,Native 或 TBB)构建 PyTorch。 您可以使用`torch.__config__.parallel_info()`检查并行化设置。 在使用 PyTorch 和本机后端进行并行化的同一台 MacBook Pro 上,我们可以花大约 46 秒的时间来处理 MRPC 数据集的评估。 - -### 3.3 序列化量化模型 - -跟踪模型后,我们可以使用`torch.jit.save`序列化并保存量化模型,以备将来使用。 - -```py -input_ids = ids_tensor([8, 128], 2) -token_type_ids = ids_tensor([8, 128], 2) -attention_mask = ids_tensor([8, 128], vocab_size=2) -dummy_input = (input_ids, attention_mask, token_type_ids) -traced_model = torch.jit.trace(quantized_model, dummy_input) -torch.jit.save(traced_model, "bert_traced_eager_quant.pt") - -``` - -要加载量化模型,我们可以使用`torch.jit.load` - -```py -loaded_quantized_model = torch.jit.load("bert_traced_eager_quant.pt") - -``` - -## 总结 - -在本教程中,我们演示了如何演示如何将 BERT 等著名的最新 NLP 模型转换为动态量化模型。 动态量化可以减小模型的大小,而对准确率的影响有限。 - -谢谢阅读! 与往常一样,我们欢迎您提供反馈,因此,如果有任何问题,[请在这里创建一个 ISSUE](https://github.com/pytorch/pytorch/issues)。 - -## 参考文献 - -```py -[1] J.Devlin, M. Chang, K. Lee and K. Toutanova, BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding (2018). - -[2] HuggingFace Transformers. - -[3] O. Zafrir, G. Boudoukh, P. Izsak, and M. Wasserblat (2019). Q8BERT: Quantized 8bit BERT. -``` \ No newline at end of file diff --git a/pytorch/官方教程/57.md b/pytorch/官方教程/57.md deleted file mode 100644 index 4f3cd2bb..00000000 --- a/pytorch/官方教程/57.md +++ /dev/null @@ -1,813 +0,0 @@ -# PyTorch 中使用 Eager 模式的静态量化(beta) - -> 原文: - -**作者**: [Raghuraman Krishnamoorthi](https://github.com/raghuramank100) - -**编辑**:[Seth Weidman](https://github.com/SethHWeidman/) - -本教程说明了如何进行训练后的静态量化,并说明了两种更先进的技术-每通道量化和量化感知训练-可以进一步提高模型的准确率。 请注意,目前仅支持 CPU 量化,因此在本教程中我们将不使用 GPU/CUDA。 - -在本教程结束时,您将看到 PyTorch 中的量化如何导致模型大小显着减小同时提高速度。 此外,[您将在此处看到如何轻松应用显示的一些高级量化技术](https://arxiv.org/abs/1806.08342),以使您的量化模型受到的准确率影响要小得多。 - -警告:我们使用了许多其他 PyTorch 仓库中的样板代码,例如,定义了`MobileNetV2`模型架构,定义了数据加载器等等。 我们当然鼓励您阅读它; 但是如果要使用量化功能,请随时跳到“ 4。 训练后静态量化”部分。 - -我们将从进行必要的导入开始: - -```py -import numpy as np -import torch -import torch.nn as nn -import torchvision -from torch.utils.data import DataLoader -from torchvision import datasets -import torchvision.transforms as transforms -import os -import time -import sys -import torch.quantization - -# # Setup warnings -import warnings -warnings.filterwarnings( - action='ignore', - category=DeprecationWarning, - module=r'.*' -) -warnings.filterwarnings( - action='default', - module=r'torch.quantization' -) - -# Specify random seed for repeatable results -torch.manual_seed(191009) - -``` - -## 1.模型架构 - -我们首先定义 MobileNetV2 模型架构,并进行了一些值得注意的修改以实现量化: - -* 用`nn.quantized.FloatFunctional`代替添加 -* 在网络的开头和结尾处插入`QuantStub`和`DeQuantStub`。 -* 用 ReLU 替换 ReLU6 - -注意:此代码取自[此处](https://github.com/pytorch/vision/blob/master/torchvision/models/mobilenet.py)。 - -```py -from torch.quantization import QuantStub, DeQuantStub - -def _make_divisible(v, divisor, min_value=None): - """ - This function is taken from the original tf repo. - It ensures that all layers have a channel number that is divisible by 8 - It can be seen here: - https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py - :param v: - :param divisor: - :param min_value: - :return: - """ - if min_value is None: - min_value = divisor - new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) - # Make sure that round down does not go down by more than 10%. - if new_v < 0.9 * v: - new_v += divisor - return new_v - -class ConvBNReLU(nn.Sequential): - def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1): - padding = (kernel_size - 1) // 2 - super(ConvBNReLU, self).__init__( - nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False), - nn.BatchNorm2d(out_planes, momentum=0.1), - # Replace with ReLU - nn.ReLU(inplace=False) - ) - -class InvertedResidual(nn.Module): - def __init__(self, inp, oup, stride, expand_ratio): - super(InvertedResidual, self).__init__() - self.stride = stride - assert stride in [1, 2] - - hidden_dim = int(round(inp * expand_ratio)) - self.use_res_connect = self.stride == 1 and inp == oup - - layers = [] - if expand_ratio != 1: - # pw - layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1)) - layers.extend([ - # dw - ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim), - # pw-linear - nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), - nn.BatchNorm2d(oup, momentum=0.1), - ]) - self.conv = nn.Sequential(*layers) - # Replace torch.add with floatfunctional - self.skip_add = nn.quantized.FloatFunctional() - - def forward(self, x): - if self.use_res_connect: - return self.skip_add.add(x, self.conv(x)) - else: - return self.conv(x) - -class MobileNetV2(nn.Module): - def __init__(self, num_classes=1000, width_mult=1.0, inverted_residual_setting=None, round_nearest=8): - """ - MobileNet V2 main class - - Args: - num_classes (int): Number of classes - width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount - inverted_residual_setting: Network structure - round_nearest (int): Round the number of channels in each layer to be a multiple of this number - Set to 1 to turn off rounding - """ - super(MobileNetV2, self).__init__() - block = InvertedResidual - input_channel = 32 - last_channel = 1280 - - if inverted_residual_setting is None: - inverted_residual_setting = [ - # t, c, n, s - [1, 16, 1, 1], - [6, 24, 2, 2], - [6, 32, 3, 2], - [6, 64, 4, 2], - [6, 96, 3, 1], - [6, 160, 3, 2], - [6, 320, 1, 1], - ] - - # only check the first element, assuming user knows t,c,n,s are required - if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4: - raise ValueError("inverted_residual_setting should be non-empty " - "or a 4-element list, got {}".format(inverted_residual_setting)) - - # building first layer - input_channel = _make_divisible(input_channel * width_mult, round_nearest) - self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest) - features = [ConvBNReLU(3, input_channel, stride=2)] - # building inverted residual blocks - for t, c, n, s in inverted_residual_setting: - output_channel = _make_divisible(c * width_mult, round_nearest) - for i in range(n): - stride = s if i == 0 else 1 - features.append(block(input_channel, output_channel, stride, expand_ratio=t)) - input_channel = output_channel - # building last several layers - features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1)) - # make it nn.Sequential - self.features = nn.Sequential(*features) - self.quant = QuantStub() - self.dequant = DeQuantStub() - # building classifier - self.classifier = nn.Sequential( - nn.Dropout(0.2), - nn.Linear(self.last_channel, num_classes), - ) - - # weight initialization - for m in self.modules(): - if isinstance(m, nn.Conv2d): - nn.init.kaiming_normal_(m.weight, mode='fan_out') - if m.bias is not None: - nn.init.zeros_(m.bias) - elif isinstance(m, nn.BatchNorm2d): - nn.init.ones_(m.weight) - nn.init.zeros_(m.bias) - elif isinstance(m, nn.Linear): - nn.init.normal_(m.weight, 0, 0.01) - nn.init.zeros_(m.bias) - - def forward(self, x): - - x = self.quant(x) - - x = self.features(x) - x = x.mean([2, 3]) - x = self.classifier(x) - x = self.dequant(x) - return x - - # Fuse Conv+BN and Conv+BN+Relu modules prior to quantization - # This operation does not change the numerics - def fuse_model(self): - for m in self.modules(): - if type(m) == ConvBNReLU: - torch.quantization.fuse_modules(m, ['0', '1', '2'], inplace=True) - if type(m) == InvertedResidual: - for idx in range(len(m.conv)): - if type(m.conv[idx]) == nn.Conv2d: - torch.quantization.fuse_modules(m.conv, [str(idx), str(idx + 1)], inplace=True) - -``` - -## 2.辅助函数 - -接下来,我们定义一些助手函数以帮助模型评估。 这些主要来自[这里](https://github.com/pytorch/examples/blob/master/imagenet/main.py)。 - -```py -class AverageMeter(object): - """Computes and stores the average and current value""" - def __init__(self, name, fmt=':f'): - self.name = name - self.fmt = fmt - self.reset() - - def reset(self): - self.val = 0 - self.avg = 0 - self.sum = 0 - self.count = 0 - - def update(self, val, n=1): - self.val = val - self.sum += val * n - self.count += n - self.avg = self.sum / self.count - - def __str__(self): - fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' - return fmtstr.format(**self.__dict__) - -def accuracy(output, target, topk=(1,)): - """Computes the accuracy over the k top predictions for the specified values of k""" - with torch.no_grad(): - maxk = max(topk) - batch_size = target.size(0) - - _, pred = output.topk(maxk, 1, True, True) - pred = pred.t() - correct = pred.eq(target.view(1, -1).expand_as(pred)) - - res = [] - for k in topk: - correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True) - res.append(correct_k.mul_(100.0 / batch_size)) - return res - -def evaluate(model, criterion, data_loader, neval_batches): - model.eval() - top1 = AverageMeter('Acc@1', ':6.2f') - top5 = AverageMeter('Acc@5', ':6.2f') - cnt = 0 - with torch.no_grad(): - for image, target in data_loader: - output = model(image) - loss = criterion(output, target) - cnt += 1 - acc1, acc5 = accuracy(output, target, topk=(1, 5)) - print('.', end = '') - top1.update(acc1[0], image.size(0)) - top5.update(acc5[0], image.size(0)) - if cnt >= neval_batches: - return top1, top5 - - return top1, top5 - -def load_model(model_file): - model = MobileNetV2() - state_dict = torch.load(model_file) - model.load_state_dict(state_dict) - model.to('cpu') - return model - -def print_size_of_model(model): - torch.save(model.state_dict(), "temp.p") - print('Size (MB):', os.path.getsize("temp.p")/1e6) - os.remove('temp.p') - -``` - -## 3.定义数据集和数据加载器 - -作为最后的主要设置步骤,我们为训练和测试集定义了数据加载器。 - -### ImageNet 数据 - -我们为本教程创建的特定数据集仅包含来自 ImageNet 数据的 1000 张图像,每个类别都有一张(此数据集的大小刚好超过 250 MB,可以相对轻松地下载)。 此自定义数据集的 URL 为: - -```py -https://s3.amazonaws.com/pytorch-tutorial-assets/imagenet_1k.zip - -``` - -要使用 Python 在本地下载此数据,可以使用: - -```py -import requests - -url = 'https://s3.amazonaws.com/pytorch-tutorial-assets/imagenet_1k.zip` -filename = '~/Downloads/imagenet_1k_data.zip' - -r = requests.get(url) - -with open(filename, 'wb') as f: - f.write(r.content) - -``` - -为了运行本教程,我们下载了这些数据,并使用 [Makefile](https://github.com/pytorch/tutorials/blob/master/Makefile) 中的这些行将其移到正确的位置。 - -另一方面,要使用整个 ImageNet 数据集运行本教程中的代码,[可以在此之后使用`torchvision`下载数据](https://pytorch.org/docs/stable/torchvision/datasets.html#imagenet)。 例如,要下载训练集并对其进行一些标准转换,可以使用: - -```py -import torchvision -import torchvision.transforms as transforms - -imagenet_dataset = torchvision.datasets.ImageNet( - '~/.data/imagenet', - split='train', - download=True, - transforms.Compose([ - transforms.RandomResizedCrop(224), - transforms.RandomHorizontalFlip(), - transforms.ToTensor(), - transforms.Normalize(mean=[0.485, 0.456, 0.406], - std=[0.229, 0.224, 0.225]), - ]) - -``` - -下载完数据后,我们在下面显示了一些函数,这些函数定义了将用于读取此数据的数据加载器。 这些函数主要来自[此处](https://github.com/pytorch/vision/blob/master/references/detection/train.py)。 - -```py -def prepare_data_loaders(data_path): - - traindir = os.path.join(data_path, 'train') - valdir = os.path.join(data_path, 'val') - normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], - std=[0.229, 0.224, 0.225]) - - dataset = torchvision.datasets.ImageFolder( - traindir, - transforms.Compose([ - transforms.RandomResizedCrop(224), - transforms.RandomHorizontalFlip(), - transforms.ToTensor(), - normalize, - ])) - - dataset_test = torchvision.datasets.ImageFolder( - valdir, - transforms.Compose([ - transforms.Resize(256), - transforms.CenterCrop(224), - transforms.ToTensor(), - normalize, - ])) - - train_sampler = torch.utils.data.RandomSampler(dataset) - test_sampler = torch.utils.data.SequentialSampler(dataset_test) - - data_loader = torch.utils.data.DataLoader( - dataset, batch_size=train_batch_size, - sampler=train_sampler) - - data_loader_test = torch.utils.data.DataLoader( - dataset_test, batch_size=eval_batch_size, - sampler=test_sampler) - - return data_loader, data_loader_test - -``` - -接下来,我们将加载经过预先​​训练的 MobileNetV2 模型。 [我们在这里提供用于从`torchvision`中下载数据的 URL](https://github.com/pytorch/vision/blob/master/torchvision/models/mobilenet.py#L9)。 - -```py -data_path = 'data/imagenet_1k' -saved_model_dir = 'data/' -float_model_file = 'mobilenet_pretrained_float.pth' -scripted_float_model_file = 'mobilenet_quantization_scripted.pth' -scripted_quantized_model_file = 'mobilenet_quantization_scripted_quantized.pth' - -train_batch_size = 30 -eval_batch_size = 30 - -data_loader, data_loader_test = prepare_data_loaders(data_path) -criterion = nn.CrossEntropyLoss() -float_model = load_model(saved_model_dir + float_model_file).to('cpu') - -``` - -接下来,我们将“融合模块”; 通过节省内存访问量,这可以使模型更快,同时还可以提高数值精度。 尽管这可以用于任何模型,但在量化模型中尤为常见。 - -```py -print('\n Inverted Residual Block: Before fusion \n\n', float_model.features[1].conv) -float_model.eval() - -# Fuses modules -float_model.fuse_model() - -# Note fusion of Conv+BN+Relu and Conv+Relu -print('\n Inverted Residual Block: After fusion\n\n',float_model.features[1].conv) - -``` - -出: - -```py -Inverted Residual Block: Before fusion - - Sequential( - (0): ConvBNReLU( - (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False) - (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (2): ReLU() - ) - (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False) - (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) -) - - Inverted Residual Block: After fusion - - Sequential( - (0): ConvBNReLU( - (0): ConvReLU2d( - (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32) - (1): ReLU() - ) - (1): Identity() - (2): Identity() - ) - (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1)) - (2): Identity() -) - -``` - -最后,为了获得“基准”精度,让我们看看带有融合模块的未量化模型的精度 - -```py -num_eval_batches = 10 - -print("Size of baseline model") -print_size_of_model(float_model) - -top1, top5 = evaluate(float_model, criterion, data_loader_test, neval_batches=num_eval_batches) -print('Evaluation accuracy on %d images, %2.2f'%(num_eval_batches * eval_batch_size, top1.avg)) -torch.jit.save(torch.jit.script(float_model), saved_model_dir + scripted_float_model_file) - -``` - -出: - -```py -Size of baseline model -Size (MB): 13.999657 -..........Evaluation accuracy on 300 images, 77.67 - -``` - -我们看到 300 张图像的准确率达到 78%,这是 ImageNet 的坚实基础,特别是考虑到我们的模型只有 14.0 MB。 - -这将是我们比较的基准。 接下来,让我们尝试不同的量化方法 - -## 4.训练后的静态量化 - -训练后的静态量化不仅涉及像动态量化中那样将权重从`float`转换为`int`,而且还执行额外的步骤,即首先通过网络馈送一批数据并计算不同激活的结果分布(具体来说,这通过在记录此数据的不同点插入观察者模块来完成)。 然后使用这些分布来确定在推理时如何具体量化不同的激活(一种简单的技术是将整个激活范围简单地划分为 256 个级别,但我们也支持更复杂的方法)。 重要的是,此附加步骤使我们能够在操作之间传递量化值,而不是在每次操作之间将这些值转换为浮点数,然后再转换为整数,从而显着提高了速度。 - -```py -num_calibration_batches = 10 - -myModel = load_model(saved_model_dir + float_model_file).to('cpu') -myModel.eval() - -# Fuse Conv, bn and relu -myModel.fuse_model() - -# Specify quantization configuration -# Start with simple min/max range estimation and per-tensor quantization of weights -myModel.qconfig = torch.quantization.default_qconfig -print(myModel.qconfig) -torch.quantization.prepare(myModel, inplace=True) - -# Calibrate first -print('Post Training Quantization Prepare: Inserting Observers') -print('\n Inverted Residual Block:After observer insertion \n\n', myModel.features[1].conv) - -# Calibrate with the training set -evaluate(myModel, criterion, data_loader, neval_batches=num_calibration_batches) -print('Post Training Quantization: Calibration done') - -# Convert to quantized model -torch.quantization.convert(myModel, inplace=True) -print('Post Training Quantization: Convert done') -print('\n Inverted Residual Block: After fusion and quantization, note fused modules: \n\n',myModel.features[1].conv) - -print("Size of model after quantization") -print_size_of_model(myModel) - -top1, top5 = evaluate(myModel, criterion, data_loader_test, neval_batches=num_eval_batches) -print('Evaluation accuracy on %d images, %2.2f'%(num_eval_batches * eval_batch_size, top1.avg)) - -``` - -出: - -```py -QConfig(activation=functools.partial(, reduce_range=True), weight=functools.partial(, dtype=torch.qint8, qscheme=torch.per_tensor_symmetric)) -Post Training Quantization Prepare: Inserting Observers - - Inverted Residual Block:After observer insertion - - Sequential( - (0): ConvBNReLU( - (0): ConvReLU2d( - (0): Conv2d( - 32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32 - (activation_post_process): MinMaxObserver(min_val=inf, max_val=-inf) - ) - (1): ReLU( - (activation_post_process): MinMaxObserver(min_val=inf, max_val=-inf) - ) - ) - (1): Identity() - (2): Identity() - ) - (1): Conv2d( - 32, 16, kernel_size=(1, 1), stride=(1, 1) - (activation_post_process): MinMaxObserver(min_val=inf, max_val=-inf) - ) - (2): Identity() -) -..........Post Training Quantization: Calibration done -Post Training Quantization: Convert done - - Inverted Residual Block: After fusion and quantization, note fused modules: - - Sequential( - (0): ConvBNReLU( - (0): QuantizedConvReLU2d(32, 32, kernel_size=(3, 3), stride=(1, 1), scale=0.1516050398349762, zero_point=0, padding=(1, 1), groups=32) - (1): Identity() - (2): Identity() - ) - (1): QuantizedConv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), scale=0.17719413340091705, zero_point=63) - (2): Identity() -) -Size of model after quantization -Size (MB): 3.631847 -..........Evaluation accuracy on 300 images, 66.67 - -``` - -对于这个量化模型,我们发现在这 300 张相同的图像上,准确率仅低至约 62%。 不过,我们确实将模型的大小减小到了 3.6 MB 以下,几乎减少了 4 倍。 - -此外,我们可以通过使用不同的量化配置来显着提高准确率。 我们使用推荐的配置对 x86 架构进行量化,重复相同的练习。 此配置执行以下操作: - -* 量化每个通道的权重 -* 使用直方图观察器,该直方图观察器收集激活的直方图,然后以最佳方式选择量化参数。 - -```py -per_channel_quantized_model = load_model(saved_model_dir + float_model_file) -per_channel_quantized_model.eval() -per_channel_quantized_model.fuse_model() -per_channel_quantized_model.qconfig = torch.quantization.get_default_qconfig('fbgemm') -print(per_channel_quantized_model.qconfig) - -torch.quantization.prepare(per_channel_quantized_model, inplace=True) -evaluate(per_channel_quantized_model,criterion, data_loader, num_calibration_batches) -torch.quantization.convert(per_channel_quantized_model, inplace=True) -top1, top5 = evaluate(per_channel_quantized_model, criterion, data_loader_test, neval_batches=num_eval_batches) -print('Evaluation accuracy on %d images, %2.2f'%(num_eval_batches * eval_batch_size, top1.avg)) -torch.jit.save(torch.jit.script(per_channel_quantized_model), saved_model_dir + scripted_quantized_model_file) - -``` - -出: - -```py -QConfig(activation=functools.partial(, reduce_range=True), weight=functools.partial(, dtype=torch.qint8, qscheme=torch.per_channel_symmetric)) -....................Evaluation accuracy on 300 images, 74.67 - -``` - -仅更改这种量化配置方法,就可以将准确率提高到 76% 以上! 尽管如此,这仍比上述 78% 的基准差 1-2%。 因此,让我们尝试量化意识的训练。 - -## 5.量化感知的训练 - -量化感知的训练(QAT)是通常导致最高准确率的量化方法。 使用 QAT,在训练的正向和反向过程中,所有权重和激活都被“伪量化”:即,浮点值四舍五入以模拟`int8`值,但所有计算仍使用浮点数完成。 因此,在训练过程中进行所有权重调整,同时“意识到”该模型将最终被量化的事实。 因此,在量化之后,此方法通常会比动态量化或训练后静态量化产生更高的精度。 - -实际执行 QAT 的总体工作流程与之前非常相似: - -* 我们可以使用与以前相同的模型:量化感知的训练不需要额外的准备。 -* 我们需要使用`qconfig`来指定要在权重和激活之后插入哪种伪量化,而不是指定观察者 - -我们首先定义一个训练函数: - -```py -def train_one_epoch(model, criterion, optimizer, data_loader, device, ntrain_batches): - model.train() - top1 = AverageMeter('Acc@1', ':6.2f') - top5 = AverageMeter('Acc@5', ':6.2f') - avgloss = AverageMeter('Loss', '1.5f') - - cnt = 0 - for image, target in data_loader: - start_time = time.time() - print('.', end = '') - cnt += 1 - image, target = image.to(device), target.to(device) - output = model(image) - loss = criterion(output, target) - optimizer.zero_grad() - loss.backward() - optimizer.step() - acc1, acc5 = accuracy(output, target, topk=(1, 5)) - top1.update(acc1[0], image.size(0)) - top5.update(acc5[0], image.size(0)) - avgloss.update(loss, image.size(0)) - if cnt >= ntrain_batches: - print('Loss', avgloss.avg) - - print('Training: * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}' - .format(top1=top1, top5=top5)) - return - - print('Full imagenet train set: * Acc@1 {top1.global_avg:.3f} Acc@5 {top5.global_avg:.3f}' - .format(top1=top1, top5=top5)) - return - -``` - -我们像以前一样融合模块 - -```py -qat_model = load_model(saved_model_dir + float_model_file) -qat_model.fuse_model() - -optimizer = torch.optim.SGD(qat_model.parameters(), lr = 0.0001) -qat_model.qconfig = torch.quantization.get_default_qat_qconfig('fbgemm') - -``` - -最后,`prepare_qat`执行“伪量化”,为量化感知训练准备模型 - -```py -torch.quantization.prepare_qat(qat_model, inplace=True) -print('Inverted Residual Block: After preparation for QAT, note fake-quantization modules \n',qat_model.features[1].conv) - -``` - -出: - -```py -Inverted Residual Block: After preparation for QAT, note fake-quantization modules - Sequential( - (0): ConvBNReLU( - (0): ConvBnReLU2d( - 32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False - (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (weight_fake_quant): FakeQuantize( - fake_quant_enabled=tensor([1], dtype=torch.uint8), observer_enabled=tensor([1], dtype=torch.uint8), quant_min=-128, quant_max=127, dtype=torch.qint8, qscheme=torch.per_channel_symmetric, ch_axis=0, scale=tensor([1.]), zero_point=tensor([0]) - (activation_post_process): MovingAveragePerChannelMinMaxObserver(min_val=tensor([]), max_val=tensor([])) - ) - (activation_post_process): FakeQuantize( - fake_quant_enabled=tensor([1], dtype=torch.uint8), observer_enabled=tensor([1], dtype=torch.uint8), quant_min=0, quant_max=255, dtype=torch.quint8, qscheme=torch.per_tensor_affine, ch_axis=-1, scale=tensor([1.]), zero_point=tensor([0]) - (activation_post_process): MovingAverageMinMaxObserver(min_val=inf, max_val=-inf) - ) - ) - (1): Identity() - (2): Identity() - ) - (1): ConvBn2d( - 32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False - (bn): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) - (weight_fake_quant): FakeQuantize( - fake_quant_enabled=tensor([1], dtype=torch.uint8), observer_enabled=tensor([1], dtype=torch.uint8), quant_min=-128, quant_max=127, dtype=torch.qint8, qscheme=torch.per_channel_symmetric, ch_axis=0, scale=tensor([1.]), zero_point=tensor([0]) - (activation_post_process): MovingAveragePerChannelMinMaxObserver(min_val=tensor([]), max_val=tensor([])) - ) - (activation_post_process): FakeQuantize( - fake_quant_enabled=tensor([1], dtype=torch.uint8), observer_enabled=tensor([1], dtype=torch.uint8), quant_min=0, quant_max=255, dtype=torch.quint8, qscheme=torch.per_tensor_affine, ch_axis=-1, scale=tensor([1.]), zero_point=tensor([0]) - (activation_post_process): MovingAverageMinMaxObserver(min_val=inf, max_val=-inf) - ) - ) - (2): Identity() -) - -``` - -高精度训练量化模型需要在推断时对数字进行精确建模。 因此,对于量化感知的训练,我们通过以下方式修改训练循环: - -* 在训练快要结束时切换批量规范以使用运行均值和方差,以更好地匹配推理数字。 -* 我们还冻结了量化器参数(比例和零点),并对权重进行了微调。 - -```py -num_train_batches = 20 - -# Train and check accuracy after each epoch -for nepoch in range(8): - train_one_epoch(qat_model, criterion, optimizer, data_loader, torch.device('cpu'), num_train_batches) - if nepoch > 3: - # Freeze quantizer parameters - qat_model.apply(torch.quantization.disable_observer) - if nepoch > 2: - # Freeze batch norm mean and variance estimates - qat_model.apply(torch.nn.intrinsic.qat.freeze_bn_stats) - - # Check the accuracy after each epoch - quantized_model = torch.quantization.convert(qat_model.eval(), inplace=False) - quantized_model.eval() - top1, top5 = evaluate(quantized_model,criterion, data_loader_test, neval_batches=num_eval_batches) - print('Epoch %d :Evaluation accuracy on %d images, %2.2f'%(nepoch, num_eval_batches * eval_batch_size, top1.avg)) - -``` - -出: - -```py -....................Loss tensor(2.0747, grad_fn=) -Training: * Acc@1 56.167 Acc@5 77.333 -..........Epoch 0 :Evaluation accuracy on 300 images, 77.67 -....................Loss tensor(2.0358, grad_fn=) -Training: * Acc@1 54.833 Acc@5 78.500 -..........Epoch 1 :Evaluation accuracy on 300 images, 77.00 -....................Loss tensor(2.0417, grad_fn=) -Training: * Acc@1 54.667 Acc@5 77.333 -..........Epoch 2 :Evaluation accuracy on 300 images, 74.67 -....................Loss tensor(1.9055, grad_fn=) -Training: * Acc@1 56.833 Acc@5 78.667 -..........Epoch 3 :Evaluation accuracy on 300 images, 76.33 -....................Loss tensor(1.9055, grad_fn=) -Training: * Acc@1 58.167 Acc@5 80.000 -..........Epoch 4 :Evaluation accuracy on 300 images, 77.00 -....................Loss tensor(1.7821, grad_fn=) -Training: * Acc@1 60.500 Acc@5 82.833 -..........Epoch 5 :Evaluation accuracy on 300 images, 76.33 -....................Loss tensor(1.8145, grad_fn=) -Training: * Acc@1 58.833 Acc@5 82.333 -..........Epoch 6 :Evaluation accuracy on 300 images, 74.33 -....................Loss tensor(1.6930, grad_fn=) -Training: * Acc@1 63.000 Acc@5 81.333 -..........Epoch 7 :Evaluation accuracy on 300 images, 75.67 - -``` - -在这里,我们只对少数几个周期执行量化感知训练。 尽管如此,量化感知的训练在整个 imagenet 数据集上的准确率仍超过 71%,接近浮点精度 71.9%。 - -有关量化感知的训练的更多信息: - -* QAT 是训练后量化技术的超集,可以进行更多调试。 例如,我们可以分析模型的准确率是否受到权重或激活量化的限制。 -* 由于我们使用伪量化来对实际量化算术的数值建模,因此我们还可以在浮点中模拟量化模型的准确率。 -* 我们也可以轻松地模拟训练后量化。 - -### 来自量化的加速 - -最后,让我们确认一下我们上面提到的内容:量化模型实际上执行推理的速度更快吗? 让我们测试一下: - -```py -def run_benchmark(model_file, img_loader): - elapsed = 0 - model = torch.jit.load(model_file) - model.eval() - num_batches = 5 - # Run the scripted model on a few batches of images - for i, (images, target) in enumerate(img_loader): - if i < num_batches: - start = time.time() - output = model(images) - end = time.time() - elapsed = elapsed + (end-start) - else: - break - num_images = images.size()[0] * num_batches - - print('Elapsed time: %3.0f ms' % (elapsed/num_images*1000)) - return elapsed - -run_benchmark(saved_model_dir + scripted_float_model_file, data_loader_test) - -run_benchmark(saved_model_dir + scripted_quantized_model_file, data_loader_test) - -``` - -出: - -```py -Elapsed time: 7 ms -Elapsed time: 4 ms - -``` - -在 MacBook Pro 上本地运行此程序,常规模型的运行时间为 61 毫秒,而量化模型的运行时间仅为 20 毫秒,这说明了量化模型与浮点模型相比,典型的 2-4 倍加速。 - -## 总结 - -在本教程中,我们展示了两种量化方法-训练后静态量化和量化感知训练-描述它们在“幕后”进行的操作以及如何在 PyTorch 中使用它们。 - -谢谢阅读! 与往常一样,我们欢迎您提供反馈,因此,如果有任何问题,[请在这里创建一个 ISSUE](https://github.com/pytorch/pytorch/issues)。 - -**脚本的总运行时间**:(5 分钟 40.226 秒) - -[下载 Python 源码:`static_quantization_tutorial.py`](../_downloads/bd7ace4f5df8c6f747eadb10a7f737cd/static_quantization_tutorial.py) - -[下载 Jupyter 笔记本:`static_quantization_tutorial.ipynb`](../_downloads/03ac9a8e1176f5e39736885e8c439a82/static_quantization_tutorial.ipynb) - -[由 Sphinx 画廊](https://sphinx-gallery.readthedocs.io)生成的画廊 \ No newline at end of file diff --git a/pytorch/官方教程/58.md b/pytorch/官方教程/58.md deleted file mode 100644 index ca6788e6..00000000 --- a/pytorch/官方教程/58.md +++ /dev/null @@ -1,431 +0,0 @@ -# 计算机视觉的量化迁移学习教程(beta) - -> 原文: - -小费 - -为了充分利用本教程,我们建议使用此 [Colab 版本](https://colab.research.google.com/github/pytorch/tutorials/blob/gh-pages/_downloads/quantized_transfer_learning_tutorial.ipynb)。 这将使您可以尝试以下信息。 - -**作者**: [Zafar Takhirov](https://github.com/z-a-f) - -**由**审核: [Raghuraman Krishnamoorthi](https://github.com/raghuramank100) - -**编辑**:[Jessica Lin](https://github.com/jlin27) - -本教程以 [Sasank Chilamkurthy](https://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html) 编写的原始 [PyTorch 迁移学习](https://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html)教程为基础。 - -迁移学习是指利用预训练的模型应用于不同数据集的技术。 使用迁移学习的主要方法有两种: - -1. **作为固定特征提取器的 ConvNet**:在这里,您[“冻结”](https://arxiv.org/abs/1706.04983)网络中所有参数的权重,除了最后几层(又称“头部”,通常是全连接层)。 将这些最后一层替换为使用随机权重初始化的新层,并且仅训练这些层。 -2. **ConvNet 的微调**:使用随机训练的网络初始化模型,而不是随机初始化,然后像往常一样使用不同的数据集进行训练。 通常,如果输出数量不同,则在网络中也会更换头部(或头部的一部分)。 这种方法通常将学习率设置为较小的值。 这样做是因为已经对网络进行了训练,并且只需进行较小的更改即可将其“微调”到新的数据集。 - -您还可以结合以上两种方法:首先,可以冻结特征提取器,并训练头部。 之后,您可以解冻特征提取器(或其一部分),将学习率设置为较小的值,然后继续进行训练。 - -在本部分中,您将使用第一种方法-使用量化模型提取特征。 - -## 第 0 部分,先决条件 - -在深入学习迁移学习之前,让我们回顾一下“先决条件”,例如安装和数据加载/可视化。 - -```py -# Imports -import copy -import matplotlib.pyplot as plt -import numpy as np -import os -import time - -plt.ion() - -``` - -### 安装每夜构建 - -因为您将使用 PyTorch 的 Beta 部分,所以建议安装最新版本的`torch`和`torchvision`。 [您可以在这里找到有关本地安装的最新说明](https://pytorch.org/get-started/locally/)。 例如,要在没有 GPU 支持的情况下进行安装: - -```py -pip install numpy -pip install --pre torch torchvision -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html -# For CUDA support use https://download.pytorch.org/whl/nightly/cu101/torch_nightly.html - -``` - -### 加载数据 - -注意 - -本部分与原始的迁移学习教程相同。 - -我们将使用`torchvision`和`torch.utils.data`包加载数据。 - -您今天要解决的问题是从图像中对**蚂蚁**和**蜜蜂**进行分类。 该数据集包含约 120 张针对蚂蚁和蜜蜂的训练图像。 每个类别有 75 个验证图像。 可以认为这是一个很小的数据集。 但是,由于我们正在使用迁移学习,因此我们应该能够很好地进行概括。 - -*此数据集是 imagenet 的很小子集。* - -注意 - -从[此处](https://download.pytorch.org/tutorial/hymenoptera_data.zip)下载数据,并将其提取到`data`目录。 - -```py -import torch -from torchvision import transforms, datasets - -# Data augmentation and normalization for training -# Just normalization for validation -data_transforms = { - 'train': transforms.Compose([ - transforms.Resize(224), - transforms.RandomCrop(224), - transforms.RandomHorizontalFlip(), - transforms.ToTensor(), - transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) - ]), - 'val': transforms.Compose([ - transforms.Resize(224), - transforms.CenterCrop(224), - transforms.ToTensor(), - transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) - ]), -} - -data_dir = 'data/hymenoptera_data' -image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), - data_transforms[x]) - for x in ['train', 'val']} -dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=16, - shuffle=True, num_workers=8) - for x in ['train', 'val']} -dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']} -class_names = image_datasets['train'].classes - -device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") - -``` - -### 可视化一些图像 - -让我们可视化一些训练图像,以了解数据扩充。 - -```py -import torchvision - -def imshow(inp, title=None, ax=None, figsize=(5, 5)): - """Imshow for Tensor.""" - inp = inp.numpy().transpose((1, 2, 0)) - mean = np.array([0.485, 0.456, 0.406]) - std = np.array([0.229, 0.224, 0.225]) - inp = std * inp + mean - inp = np.clip(inp, 0, 1) - if ax is None: - fig, ax = plt.subplots(1, figsize=figsize) - ax.imshow(inp) - ax.set_xticks([]) - ax.set_yticks([]) - if title is not None: - ax.set_title(title) - -# Get a batch of training data -inputs, classes = next(iter(dataloaders['train'])) - -# Make a grid from batch -out = torchvision.utils.make_grid(inputs, nrow=4) - -fig, ax = plt.subplots(1, figsize=(10, 10)) -imshow(out, title=[class_names[x] for x in classes], ax=ax) - -``` - -### 模型训练的支持函数 - -以下是模型训练的通用函数。 此函数也: - -* 安排学习率 -* 保存最佳模型 - -```py -def train_model(model, criterion, optimizer, scheduler, num_epochs=25, device='cpu'): - """ - Support function for model training. - - Args: - model: Model to be trained - criterion: Optimization criterion (loss) - optimizer: Optimizer to use for training - scheduler: Instance of ``torch.optim.lr_scheduler`` - num_epochs: Number of epochs - device: Device to run the training on. Must be 'cpu' or 'cuda' - """ - since = time.time() - - best_model_wts = copy.deepcopy(model.state_dict()) - best_acc = 0.0 - - for epoch in range(num_epochs): - print('Epoch {}/{}'.format(epoch, num_epochs - 1)) - print('-' * 10) - - # Each epoch has a training and validation phase - for phase in ['train', 'val']: - if phase == 'train': - model.train() # Set model to training mode - else: - model.eval() # Set model to evaluate mode - - running_loss = 0.0 - running_corrects = 0 - - # Iterate over data. - for inputs, labels in dataloaders[phase]: - inputs = inputs.to(device) - labels = labels.to(device) - - # zero the parameter gradients - optimizer.zero_grad() - - # forward - # track history if only in train - with torch.set_grad_enabled(phase == 'train'): - outputs = model(inputs) - _, preds = torch.max(outputs, 1) - loss = criterion(outputs, labels) - - # backward + optimize only if in training phase - if phase == 'train': - loss.backward() - optimizer.step() - - # statistics - running_loss += loss.item() * inputs.size(0) - running_corrects += torch.sum(preds == labels.data) - if phase == 'train': - scheduler.step() - - epoch_loss = running_loss / dataset_sizes[phase] - epoch_acc = running_corrects.double() / dataset_sizes[phase] - - print('{} Loss: {:.4f} Acc: {:.4f}'.format( - phase, epoch_loss, epoch_acc)) - - # deep copy the model - if phase == 'val' and epoch_acc > best_acc: - best_acc = epoch_acc - best_model_wts = copy.deepcopy(model.state_dict()) - - print() - - time_elapsed = time.time() - since - print('Training complete in {:.0f}m {:.0f}s'.format( - time_elapsed // 60, time_elapsed % 60)) - print('Best val Acc: {:4f}'.format(best_acc)) - - # load best model weights - model.load_state_dict(best_model_wts) - return model - -``` - -### 可视化模型预测的支持函数 - -通用函数,显示一些图像的预测 - -```py -def visualize_model(model, rows=3, cols=3): - was_training = model.training - model.eval() - current_row = current_col = 0 - fig, ax = plt.subplots(rows, cols, figsize=(cols*2, rows*2)) - - with torch.no_grad(): - for idx, (imgs, lbls) in enumerate(dataloaders['val']): - imgs = imgs.cpu() - lbls = lbls.cpu() - - outputs = model(imgs) - _, preds = torch.max(outputs, 1) - - for jdx in range(imgs.size()[0]): - imshow(imgs.data[jdx], ax=ax[current_row, current_col]) - ax[current_row, current_col].axis('off') - ax[current_row, current_col].set_title('predicted: {}'.format(class_names[preds[jdx]])) - - current_col += 1 - if current_col >= cols: - current_row += 1 - current_col = 0 - if current_row >= rows: - model.train(mode=was_training) - return - model.train(mode=was_training) - -``` - -## 第 1 部分,基于量化特征提取器训练自定义分类器 - -在本节中,您将使用“冻结”量化特征提取器,并在其顶部训练自定义分类器头。 与浮点模型不同,您无需为量化模型设置`require_grad = False`,因为它没有可训练的参数。 请参阅[文档](https://pytorch.org/docs/stable/quantization.html)了解更多详细信息。 - -加载预训练的模型:在本练习中,您将使用 [ResNet-18](https://pytorch.org/hub/pytorch_vision_resnet/) 。 - -```py -import torchvision.models.quantization as models - -# You will need the number of filters in the `fc` for future use. -# Here the size of each output sample is set to 2. -# Alternatively, it can be generalized to nn.Linear(num_ftrs, len(class_names)). -model_fe = models.resnet18(pretrained=True, progress=True, quantize=True) -num_ftrs = model_fe.fc.in_features - -``` - -此时,您需要修改预训练模型。 该模型在开始和结束时都有量化/去量化块。 但是,由于只使用特征提取器,因此反量化层必须在线性层(头部)之前移动。 最简单的方法是将模型包装在`nn.Sequential`模块中。 - -第一步是在 ResNet 模型中隔离特征提取器。 尽管在本示例中,您被责成使用`fc`以外的所有层作为特征提取器,但实际上,您可以根据需要选择任意数量的零件。 如果您也想替换一些卷积层,这将很有用。 - -注意 - -将特征提取器与量化模型的其余部分分开时,必须手动将量化器/去量化器放置在要保持量化的部分的开头和结尾。 - -下面的函数创建一个带有自定义头部的模型。 - -```py -from torch import nn - -def create_combined_model(model_fe): - # Step 1\. Isolate the feature extractor. - model_fe_features = nn.Sequential( - model_fe.quant, # Quantize the input - model_fe.conv1, - model_fe.bn1, - model_fe.relu, - model_fe.maxpool, - model_fe.layer1, - model_fe.layer2, - model_fe.layer3, - model_fe.layer4, - model_fe.avgpool, - model_fe.dequant, # Dequantize the output - ) - - # Step 2\. Create a new "head" - new_head = nn.Sequential( - nn.Dropout(p=0.5), - nn.Linear(num_ftrs, 2), - ) - - # Step 3\. Combine, and don't forget the quant stubs. - new_model = nn.Sequential( - model_fe_features, - nn.Flatten(1), - new_head, - ) - return new_model - -``` - -警告 - -当前,量化模型只能在 CPU 上运行。 但是,可以将模型的未量化部分发送到 GPU。 - -```py -import torch.optim as optim -new_model = create_combined_model(model_fe) -new_model = new_model.to('cpu') - -criterion = nn.CrossEntropyLoss() - -# Note that we are only training the head. -optimizer_ft = optim.SGD(new_model.parameters(), lr=0.01, momentum=0.9) - -# Decay LR by a factor of 0.1 every 7 epochs -exp_lr_scheduler = optim.lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1) - -``` - -### 训练和评估 - -此步骤在 CPU 上大约需要 15-25 分钟。 由于量化模型只能在 CPU 上运行,因此您不能在 GPU 上运行训练。 - -```py -new_model = train_model(new_model, criterion, optimizer_ft, exp_lr_scheduler, - num_epochs=25, device='cpu') - -visualize_model(new_model) -plt.tight_layout() - -``` - -## 第 2 部分,微调量化模型 - -在这一部分中,我们将微调用于迁移学习的特征提取器,并对特征提取器进行量化。 请注意,在第 1 部分和第 2 部分中,特征提取器都是量化的。 不同之处在于,在第 1 部分中,我们使用了预训练的量化模型。 在这一部分中,我们将在对感兴趣的数据集进行微调之后创建一个量化的特征提取器,因此这是一种在具有量化优势的同时通过迁移学习获得更好的准确率的方法。 请注意,在我们的特定示例中,训练集非常小(120 张图像),因此微调整个模型的好处并不明显。 但是,此处显示的过程将提高使用较大数据集进行传递学习的准确率。 - -预训练特征提取器必须是可量化的。 为确保其可量化,请执行以下步骤: - -> 1. 使用 `torch.quantization.fuse_modules` 熔断 `(Conv, BN, ReLU)` , `(Conv, BN)` 和 `(Conv, ReLU)` 。 -> 2. 将特征提取器与自定义头部连接。 这需要对特征提取器的输出进行反量化。 -> 3. 在特征提取器中的适当位置插入伪量化模块,以模拟训练期间的量化。 - -对于步骤(1),我们使用`torchvision/models/quantization`中的模型,这些模型具有成员方法`fuse_model`。 此函数将所有`conv`,`bn`和`relu`模块融合在一起。 对于自定义模型,这需要使用模块列表调用`torch.quantization.fuse_modules` API 进行手动融合。 - -步骤(2)由上一节中使用的`create_combined_model`函数执行。 - -步骤(3)通过使用`torch.quantization.prepare_qat`来实现,它会插入伪量化模块。 - -在步骤(4)中,您可以开始“微调”模型,然后将其转换为完全量化的版本(步骤 5)。 - -要将微调模型转换为量化模型,可以调用`torch.quantization.convert`函数(在我们的情况下,仅对特征提取器进行量化)。 - -注意 - -由于随机初始化,您的结果可能与本教程中显示的结果不同。 - -```py -# notice quantize=False model = models.resnet18(pretrained=True, progress=True, quantize=False) num_ftrs = model.fc.in_features - -# Step 1 model.train() model.fuse_model() # Step 2 model_ft = create_combined_model(model) model_ft[0].qconfig = torch.quantization.default_qat_qconfig # Use default QAT configuration # Step 3 model_ft = torch.quantization.prepare_qat(model_ft, inplace=True) -``` - -### 微调模型 - -在当前教程中,整个模型都经过了微调。 通常,这将导致更高的精度。 但是,由于此处使用的训练集很小,最终导致我们过度适应了训练集。 - -步骤 4.微调模型 - -```py -for param in model_ft.parameters(): - param.requires_grad = True - -model_ft.to(device) # We can fine-tune on GPU if available - -criterion = nn.CrossEntropyLoss() - -# Note that we are training everything, so the learning rate is lower -# Notice the smaller learning rate -optimizer_ft = optim.SGD(model_ft.parameters(), lr=1e-3, momentum=0.9, weight_decay=0.1) - -# Decay LR by a factor of 0.3 every several epochs -exp_lr_scheduler = optim.lr_scheduler.StepLR(optimizer_ft, step_size=5, gamma=0.3) - -model_ft_tuned = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, - num_epochs=25, device=device) - -``` - -步骤 5.转换为量化模型 - -```py -from torch.quantization import convert -model_ft_tuned.cpu() - -model_quantized_and_trained = convert(model_ft_tuned, inplace=False) - -``` - -让我们看看量化模型在几张图像上的表现 - -```py -visualize_model(model_quantized_and_trained) - -plt.ioff() -plt.tight_layout() -plt.show() - -``` \ No newline at end of file diff --git a/pytorch/官方教程/59.md b/pytorch/官方教程/59.md deleted file mode 100644 index 4ad24ab9..00000000 --- a/pytorch/官方教程/59.md +++ /dev/null @@ -1 +0,0 @@ -# 并行和分布式训练 \ No newline at end of file diff --git a/pytorch/官方教程/80 API-torch.md b/pytorch/官方教程/80 API-torch.md new file mode 100644 index 00000000..565f0e67 --- /dev/null +++ b/pytorch/官方教程/80 API-torch.md @@ -0,0 +1,272 @@ +# API torch + +## 1 张量 + +> 张量判断 + +## 2 张量创建 + + +tensor +用构造一个张量data。 + +from_numpy +Tensor从创建一个numpy.ndarray。 + +zeros +返回一个由标量值0填充的张量,其形状由变量参数定义size。 + +zeros_like +返回一个填充有标量值0的张量,其大小与相同input。 + +ones +返回一个由标量值1填充的张量,其形状由变量参数定义size。 + +ones_like +返回用标量值1填充的张量,其大小与相同input。 + +arange +返回大小的一维张量 + +range +返回大小的一维张量 + +linspace +创建大小为一维的张量,steps其值从start到end,包括两端均等间隔。 + +logspace +创建尺寸的一维张量 + +eye +返回一个二维张量,对角线上有一个,其他位置为零。 + +empty +返回填充有未初始化数据的张量。 + +empty_like +返回与相同大小的未初始化张量input。 + +empty_strided +返回填充有未初始化数据的张量。 + +full +创建一个size用填充的大小的张量fill_value。 + +full_like +返回与input填充大小相同的张量fill_value。 + + +## 3 张量拼接 + +cat +在给seq定维度上连接给定张量序列。 + +stack +沿着新维度连接一系列张量。 + +row_stack +的别名torch.vstack()。 + +column_stack +通过在中水平堆叠张量来创建新的张量tensors。 + +dstack +沿深度方向(沿第三轴)按顺序堆叠张量。 + + +hstack +按水平顺序(列方向)堆叠张量。 + + +vstack +垂直(行方向)按顺序堆叠张量。 + + +reshape +返回具有与相同的数据和元素数量input但具有指定形状的张量。 + + +tile + +通过重复元素构造张量input。 + +transpose +返回一个张量,该张量是的转置版本input。 + +unsqueeze +返回在指定位置插入的尺寸为1的新张量。 + + +## 4 张量随机 + +bernoulli +从伯努利分布中提取二进制随机数(0或1)。 + +multinomial +返回一个张量,其中每一行都包含num_samples从位于张量对应行中的多项式概率分布中采样的索引input。 + +normal +返回从单独的正态分布中得出均值和标准差的随机数张量。 + +poisson +返回input与从Poisson分布中采样的每个元素具有相同大小的张量,其中速率参数由相应元素中的给定,input即 + +rand +返回一个张量,该张量由区间上的均匀分布的随机数填充 [0,1)[ 0 ,1 ) + +rand_like +返回一个张量,其大小input与间隔上的均匀分布的随机数填充的张量相同[0,1)[ 0 ,1 ) 。 + +randint +返回一个张量,该张量填充在low(含)和high(不含)之间均匀生成的随机整数。 + +randint_like +返回具有与Tensor相同形状的张量,其中input填充了在low(包含)和high(包含)之间统一生成的随机整数。 + +randn +从平均值为0且方差为1的正态分布(也称为标准正态分布)中返回一个填充有随机数的张量。 + +randn_like +返回一个张量,其大小input与正态分布中均值为0和方差为1的随机数填充的张量相同。 + +## 5 序列化 + +save +将对象保存到磁盘文件。 + +load +加载torch.save()从文件中保存的对象。 + +## 6 局部梯度计算 + +上下文管理器torch.no_grad(),torch.enable_grad()和 torch.set_grad_enabled()有助于局部禁用和启用梯度计算。有关其用法的更多详细信息,请参见本地禁用梯度计算。这些上下文管理器是线程本地的,因此如果您使用threading模块等将工作发送到另一个线程,它们将无法工作。 + +no_grad +禁用梯度计算的上下文管理器。 + +enable_grad +启用梯度计算的上下文管理器。 + +set_grad_enabled +将渐变计算设置为开或关的上下文管理器。 + +## 7.1 数学运算——逐点操作 + +三角函数 + +指数函数 + +幂函数 + +对数函数 + +## 7.2 数学运算——统计操作 + +argmax +返回input张量中所有元素的最大值的索引。 + +argmin +返回展平张量或沿着维度的最小值的索引 + +amax +返回input给定维度中张量的每个切片的最大值dim。 + +amin +返回input给定维度上张量的每个切片的最小值dim。 + +all +测试中的所有元素的input评估结果是否为True。 + +any +测试中的存在元素的input评估结果是否为True。 + +max +返回input张量中所有元素的最大值。 + +min +返回input张量中所有元素的最小值。 + +mean +返回input张量中所有元素的平均值。 + +median +返回中的值的中位数input。 + +norm +返回给定张量的矩阵范数或向量范数。 + +prod +返回input张量中所有元素的乘积。 + +std +返回input张量中所有元素的标准偏差。 + +sum +返回input张量中所有元素的总和。 + +var +返回input张量中所有元素的方差。 + +var_mean +返回input张量中所有元素的方差和均值。 + +## 7.3 数学运算——比较操作 + +eq +计算按元素相等 + +equal +True如果两个张量具有相同的大小和元素,False否则。 + +ge +计算 \ text {input} \ geq \ text {other}输入≥其他 在元素方面。 + +greater_equal +的别名torch.ge()。 + +gt +计算 \ text {input}> \ text {other}输入>其他 在元素方面。 + +greater +的别名torch.gt()。 + +le +计算 \ text {input} \ leq \ text {other}输入≤其他 在元素方面。 + +less_equal +的别名torch.le()。 + +lt +计算 \ text {input} <\ text {other}输入<其他 在元素方面。 + +less +的别名torch.lt()。 + +ne +计算 \ text {input} \ neq \ text {other}输入=其他 在元素方面。 + +not_equal +的别名torch.ne()。 + +sort +input沿给定维度按值升序对张量的元素进行排序。 + +msort +input沿张量的第一个维度按值升序对元素进行排序。 + + +## 7.4 数学运算——光谱操作 + +傅里叶变换 + + +## 8 其他操作 + +broadcast_to +广播input到形状shape。 + +broadcast_shapes +与broadcast_tensors()形状相似。 + +clone +返回的副本input。 \ No newline at end of file diff --git a/pytorch/官方教程/81 API-torch.nn.md b/pytorch/官方教程/81 API-torch.nn.md new file mode 100644 index 00000000..12a1819b --- /dev/null +++ b/pytorch/官方教程/81 API-torch.nn.md @@ -0,0 +1,103 @@ +# torch.nn + +## 1 Containers + +## 2 Convolution Layers + +## 3 Pooling layers + +## 4 Padding Layers + +## 5 Non-linear Activations (weighted sum, nonlinearity) + +## 6 Non-linear Activations (other) + +## 7 Normalization Layers + +## 8 Recurrent Layers + +## 9 Transformer Layers + +## 10 Linear Layers + +## 11 Dropout Layers + +## 12 Sparse Layers + +## 13 Distance Functions + +## 14 Loss Functions损失函数 + + +nn.L1Loss +创建一个标准来测量输入中每个元素之间的平均绝对误差(MAE) XX 和目标 ÿÿ 。 + +nn.MSELoss +创建一个标准来测量输入中每个元素之间的均方误差(L2平方的平方) XX 和目标 ÿÿ 。 + +nn.CrossEntropyLoss +这一标准联合收割机LogSoftmax,并NLLLoss在一个单独的类。 + +nn.CTCLoss +连接主义者的时间分类损失。 + +nn.NLLLoss +负对数似然损失。 + +nn.PoissonNLLLoss +带有目标泊松分布的负对数似然损失。 + +nn.GaussianNLLLoss +高斯负对数似然损失。 + +nn.KLDivLoss +Kullback-Leibler散度损失测度 + +nn.BCELoss +创建一个衡量目标和输出之间的二进制交叉熵的标准: + +nn.BCEWithLogitsLoss +这种损耗将Sigmoid层和BCELoss合并为一个类别。 + +nn.MarginRankingLoss +创建一个标准来衡量给定输入的损失 11x 1 , 2倍X 2 ,两个1D迷你批量张量和一个标签1D迷你批量张量ÿÿ (包含1或-1)。 + +nn.HingeEmbeddingLoss +测量输入张量下的损耗 XX 和标签张量 ÿÿ (包含1或-1)。 + +nn.MultiLabelMarginLoss +创建一个标准,以优化输入之间的多类多分类铰链损耗(基于边距的损耗) XX (2D迷你批量张量)和输出ÿÿ (这是目标类别索引的2D张量)。 + +nn.SmoothL1Loss +创建一个使用平方项的条件,如果逐元素的绝对误差低于beta,则使用平方项;否则,则使用L1项。 + +nn.SoftMarginLoss +创建一个标准来优化输入张量之间的两类分类逻辑损失 XX 和目标张量 ÿÿ (包含1或-1)。 + +nn.MultiLabelSoftMarginLoss +创建一个标准,基于输入之间的最大熵优化多标签“一对所有”损失 XX 和目标 ÿÿ 大小 (N,C)(N ,C ) 。 + +nn.CosineEmbeddingLoss +创建一个标准来测量给定输入张量的损耗 x_1X +1个, x_2X 2个和张量标签ÿÿ 值为1或-1。 + +nn.MultiMarginLoss +创建一个标准,以优化输入之间的多类分类铰链损耗(基于边距的损耗) XX (2D迷你批量张量)和输出ÿÿ (这是目标类别索引的一维张量, 0 \ leq y \ leq \ text {x.size}(1)-10≤ÿ≤尺寸(1 )-1个 ): + + +nn.TripletMarginLoss +创建一个标准来衡量给定输入张量的三重态损失 11x 1 , 2倍X 2 , 3倍X 3 且边距值大于 00 。 + +nn.TripletMarginWithDistanceLoss +创建一个标准来测量给定输入张量的三重态损失 一种一种 , pp , 和 ññ (分别表示锚点,正例和负例),以及用于计算锚点和正例(“正距离”)与锚点和负例之间的关系的非负实值函数(“距离函数”) (“负距离”)。 +## 15 Vision Layers + +## 16 Shuffle Layers + +## 17 DataParallel Layers (multi-GPU, distributed) + +## 18 Utilities + +## 19 Quantized Functions + +## 20 Lazy Modules Initialization \ No newline at end of file diff --git a/pytorch/官方教程/82 API-torch.nn.functional.md b/pytorch/官方教程/82 API-torch.nn.functional.md new file mode 100644 index 00000000..c70c07c3 --- /dev/null +++ b/pytorch/官方教程/82 API-torch.nn.functional.md @@ -0,0 +1,7 @@ +> 提供了函数化的 + + +torch.nn 实现了各个算子层,每层包含相同的算子。 + +torch.nn.functional 实现了各个算子。每一层可以包含不同的算子,用来实现比torch.nn更加细致的运算。 + diff --git a/pytorch/官方教程/83 API-torch.Tensor.md b/pytorch/官方教程/83 API-torch.Tensor.md new file mode 100644 index 00000000..f09f2af3 --- /dev/null +++ b/pytorch/官方教程/83 API-torch.Tensor.md @@ -0,0 +1,5 @@ +> torch中有很多处理tensor的方法,返回的是torch.Tensor对象。 + +# torch.Tensor + +> 包含tensor处理的各种方法和属性。 \ No newline at end of file diff --git a/pytorch/官方教程/84 API-torch attributes.md b/pytorch/官方教程/84 API-torch attributes.md new file mode 100644 index 00000000..1c153e46 --- /dev/null +++ b/pytorch/官方教程/84 API-torch attributes.md @@ -0,0 +1,8 @@ + +> 每torch.Tensor有一个torch.dtype,torch.device和torch.layout。 + +## 1 torch.dtype +## 2 torch.device + +## 3 torch.layout + diff --git a/pytorch/官方教程/85 API-tensor view.md b/pytorch/官方教程/85 API-tensor view.md new file mode 100644 index 00000000..ce9ef694 --- /dev/null +++ b/pytorch/官方教程/85 API-tensor view.md @@ -0,0 +1 @@ +PyTorch允许张量View成为现有张量的a。视图张量与其基本张量共享相同的基础数据。支持View避免了显式的数据复制,从而使我们能够快速且高效地进行内存重塑,切片和按元素操作。 diff --git a/pytorch/官方教程/86 API-torch.autograd.md b/pytorch/官方教程/86 API-torch.autograd.md new file mode 100644 index 00000000..e69de29b diff --git a/pytorch/官方教程/87 API-torch.linalg.md b/pytorch/官方教程/87 API-torch.linalg.md new file mode 100644 index 00000000..a9781657 --- /dev/null +++ b/pytorch/官方教程/87 API-torch.linalg.md @@ -0,0 +1 @@ +线性代数运算库 \ No newline at end of file diff --git a/pytorch/官方教程/88 API-torch.optim.md b/pytorch/官方教程/88 API-torch.optim.md new file mode 100644 index 00000000..64cded43 --- /dev/null +++ b/pytorch/官方教程/88 API-torch.optim.md @@ -0,0 +1,75 @@ +# torch.optim + + +## 1 使用优化器 + + +### 构造优化器 + +要构造一个,Optimizer您必须为其提供一个包含参数的可迭代项(所有参数都应为Variables)以进行优化。然后,您可以指定优化器特定的选项,例如学习率,权重衰减等。 + +```py +optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9) +optimizer = optim.Adam([var1, var2], lr=0.0001) +``` + +### 使用优化器 +所有优化器都实现一种step()更新参数的方法。它可以以两种方式使用:optimizer.step() + +这是大多数优化程序支持的简化版本。一旦使用例如来计算梯度,就可以调用该函数 backward()。 +```py +for input, target in dataset: + optimizer.zero_grad() + output = model(input) + loss = loss_fn(output, target) + loss.backward() + optimizer.step() +``` + +## 2 优化器方法 +torch.optim.Optimizer(参数,默认值)是所有优化器的基类,主要有一下方法。 + + +add_param_group(param_group ) +将参数组添加到Optimizers param_groups。 + +load_state_dict(state_dict ) +加载优化器状态。 + +state_dict() +以形式返回优化器的状态dict。 + + +step(关闭) +执行单个优化步骤(参数更新)。 + +zero_grad(set_to_none = False ) +将所有已优化torch.Tensors的梯度设置为零。 + +## 3 常见优化器 + + +torch.optim.Adadelta(参数,lr = 1.0,rho = 0.9,eps = 1e-06,weight_decay = 0 ) +实现Adadelta算法。它已在ADADELTA中提出:一种自适应学习率方法。 + +torch.optim.Adagrad(参数,lr = 0.01,lr_decay = 0,weight_decay = 0,initial_accumulator_value = 0,eps = 1e-10 +实现Adagrad算法。在在线学习和随机优化的自适应次梯度方法中提出了该方法。 + +torch.optim.Adam(PARAMS,LR = 0.001,贝塔=(0.9,0.999) ,EPS = 1E-08,weight_decay = 0,amsgrad =假) +实现亚当算法。它已在《亚当:一种随机优化方法》中提出。L2惩罚的实现遵循“去耦权重衰减正则化”中提出的更改 。 + +torch.optim.AdamW(PARAMS,LR = 0.001,贝塔=(0.9,0.999) ,EPS = 1E-08,weight_decay = 0.01,amsgrad =假)[来源] +实现AdamW算法。最初的Adam算法是在Adam:一种随机优化方法中提出的。AdamW变体在去耦权重衰减正则化中提出。 + +torch.optim.SparseAdam(PARAMS,LR = 0.001,贝塔=(0.9,0.999) ,EPS = 1E-08 ) +实现适合稀疏张量的懒惰版本的Adam算法。在此变体中,仅显示出现在渐变中的力矩,并且仅将渐变的那些部分应用于参数。 + + +torch.optim.ASGD(参数,lr = 0.01,lambd = 0.0001,alpha = 0.75,t0 = 1000000.0,weight_decay = 0 ) +实施平均随机梯度下降。已经提出了通过平均来加速随机逼近。 + +torch.optim.RMSprop(参数,lr = 0.01,alpha = 0.99,eps = 1e-08,weight_decay = 0,动量= 0,居中= False )[来源] +实现RMSprop算法。G. Hinton在他的课程中提出的建议 。 + +torch.optim.SGD(参数,lr = <必需参数>,动量= 0,阻尼= 0,weight_decay = 0,nesterov = False )[来源] +实现随机梯度下降(可选带动量)。Nesterov动量基于“深度学习中初始化和动量的重要性”中的公式 。 \ No newline at end of file diff --git a/pytorch/官方教程/89 API-Pipeline.md b/pytorch/官方教程/89 API-Pipeline.md new file mode 100644 index 00000000..e69de29b diff --git a/pytorch/官方教程/90 API-RPC.md b/pytorch/官方教程/90 API-RPC.md new file mode 100644 index 00000000..48f5a47d --- /dev/null +++ b/pytorch/官方教程/90 API-RPC.md @@ -0,0 +1,5 @@ +远程通信和分布式训练的框架。 + +可以学习一下,用来实现pysyft的过程。 + + diff --git a/工作日志/2021年5月1日-五月份计划.md b/工作日志/2021年5月1日-五月份计划.md index 9ec57701..52791a7f 100644 --- a/工作日志/2021年5月1日-五月份计划.md +++ b/工作日志/2021年5月1日-五月份计划.md @@ -1,4 +1,4 @@ -## 任务 +## 研究任务 > 四月份计划主要进行基础知识的学习和复习。完成了初步的数据处理。接下来的主要任务是阅读论文并完成论文复习计划。 > 开始执行五月份计划,与四月份计划进行衔接。完成工程上与学术上的推进工作。 @@ -7,6 +7,8 @@ > (100篇计划)蒋师兄前前后后分享了估计也有一百篇了,现在重新开始读论文,在复现文章的同时进行广泛的阅读。利用citavi这个工具。 +- [ ] 待定 + ### **联邦学习文章复现**(四周) > 阅读当前最新的联邦学习文章。使用别人的代码复现联邦学习的过程。复习和复现 @@ -20,11 +22,68 @@ ### **联邦学习+恶意软件文章复现**(四周) - - > 后续任务:论文撰写计划(六月份计划) +- [ ] 待定 + + +## 工程任务 +> 需要确定最终实现到什么程度。如果还是单机多线程仿真的,大可不必进行大量修改。在本地通过socket多线程通信进行仿真即可。没有必要考虑网页端的训练(用户将模型下载到Chrome浏览器中,在Chrome浏览器中进行梯度下降?大可不必。第一步应该是首先实现本地浏览器调用本地后端的程序进行仿真。然后尝试远程浏览器,建立socket通信,进行真实系统仿真。而且远程系统,必须也有相关的Python环境才行,或者直接在浏览器中使用JavaScript进行梯度下降。) + +> 对于单机仿真环境:前端调用后端的训练线程,实现联邦学习。 + +> 对于多机真实环境:前端直接下载模型,在浏览器中进行梯度下降,实现联邦学习过程。 + + + +### **初级阶段**(五月份) +> 主要实现本地线程级别的仿真。用来完成研究中的实验和计划 +> 该仿真实验主要是了验证论文,发论文。 + + +- [ ] pysyft线程级别仿真 +- [ ] pysyft论文仿真实验 + - [ ] 针对联邦学习的仿真 + - [ ] 针对恶意软件的仿真 + + +- [ ] 尝试掌握最新的联邦学习框架(既然选择了pysyft,就不要考虑tensorflow了。pysyft的生态可能更好,而且做起来简单。主要是tensorflow框架过于庞大,很多东西虽然规范很好用起来方便,但是训练过程着实过于麻烦。使用Keras还好。如果想要修改底层必然会耗费大量精力。tensorflow能够使用js在网页中完成训练,pytorch也能够使用js在网页中完成训练。pytorch也能在android端完成训练。) +- [ ] 对pysyft框架进行优化和扩展。 + +### **中级阶段**(六月份) +> 主要对pysyft框架中的内容进行理解和修改。 +> 构建pysyft对联邦学习的远程仿真实现。实现多端协同仿真,实现可视化界面。 + +- [ ] 使用docker容器和socket编程,实现多端协同训练。(从仿真向现实场景过渡。) +- [ ] 使用docker容器,优化部署过程,实现windows/linux/android场景下的跨平台实验。(向现实场景过度。) + +### **最终阶段**(七八月份) +> 构建系统。主要是为了最终的毕业服务。 + + +- [ ] 使用web服务器,实现可视化。包括可视化训练过程的控制、可视化展示训练过程和训练结果。(大概花一个月时间,本质上为了毕业,能够形成一个看起来很漂亮的系统) + +- 前端: + - [ ] 训练控制模块(训练过程的配置。选择训练客户端、选择控制客户端,设置训练的参数。加号按钮,在本地网页上进行选择。) + - [ ] 训练过程模块(训练过程中的日志和状态变化图。) + - [ ] 模型展示模块(用来展示训练结果,可视化训练模型) + - [ ] 恶意软件检测模块。(参考手写体ONNX.js的网页,简单好用) + +- 后端: + - [ ] 通信模块(参与者,与参与者之间通信的实现。可以通过报名和注册的方式参与其中。) + - [ ] 训练模块(联邦学习引擎,主要通过联邦学习的方法,训练一个恶意软件模型) + - [ ] 应用模块(恶意软件检测引擎,包括大量的恶意软件处理模块) + +> 如果在实习过程中学了spring那一套,就用spring来写后端。如果没有学spring那一套。就用Python-Django那一套来写后端,正好与算法部分也十分契合。 +> 需要实现跨平台socket通信。如果弄完这个,发一篇论文不怕比不了也。 + +考虑使用一下技术展示成果: +- html css javascrip +- bootstrap vue.js chart.js +- python django mysql +- pytorch pysyft websocket + ## 计划 diff --git a/工作日志/2021年5月6日-今日计划.md b/工作日志/2021年5月6日-今日计划.md index fb0c3bbe..06b29eab 100644 --- a/工作日志/2021年5月6日-今日计划.md +++ b/工作日志/2021年5月6日-今日计划.md @@ -1,7 +1,15 @@ ## 任务 - [x] 四月份计划——tensorflow federated -- [ ] 四月份计划——pysyft +- [ ] 四月份计划——pytorch——pysyft + - [x] 官方教程 + - [ ] 相关博客实践 + - [ ] 学弟的代码 + - [ ] 自己实现 +- [ ] 四月份计划——pytorch——basic教程(整理完教程,API文档layer、loss、optimizer整理完成) +- [x] ~~四月份计划——pytorch——分布式教程(包括教程和API文章整理)~~ +- [x] ~~四月份计划——pytorch——android教程(包括教程和API文章整理)~~ +- [x] 四月份计划——pytorch——APIdoc @@ -9,4 +17,10 @@ 1. 我发现python机器学习这一套在linux下更好运行,windows配置环境果然要麻烦一百倍。从今天开始将主要的工作环境转移到linux上边。算法的运行和学习都在linux上执行。去Windows上做一下收尾工作。 2. 当前的主要任务包括两个,一个是四月份未完成的计划。一个是五月份新开始的计划。 -3. tensorflow federated已经学习完成了,能够完成基本的联邦学习过程。因为与学弟合作的部分还是pytorch。今后最好使用pytorch进行开发吧。除非由绝对的优势,不会回到tensorflow上了。 \ No newline at end of file +3. tensorflow federated已经学习完成了,能够完成基本的联邦学习过程。因为与学弟合作的部分还是pytorch。今后最好使用pytorch进行开发吧。除非由绝对的优势,不会回到tensorflow上了。 + +4. 学习了很多新的Python知识、掌握了pysyft框架的基本使用方法和一系列原理。主要目标有两个:**使用、修改**。 + 1. Python的模块loggin、asynicio、argparse等、第三方模块websocket, + 2. pysyft的原理。worker通信原理和websocket实现(send、receive、client、server)、远程计算的实现(plan,protocol)、加密算法的实现(MFC同态加密)、联邦平均算法的实现(util.fed_avg(models)) + 3. pytorch的模块的使用。torch.nn,torch.function,torch.jit(实现了代码的序列化) + \ No newline at end of file diff --git a/工作日志/2021年5月8日-今日计划.md b/工作日志/2021年5月8日-今日计划.md index 823ebea9..4831f463 100644 --- a/工作日志/2021年5月8日-今日计划.md +++ b/工作日志/2021年5月8日-今日计划.md @@ -3,4 +3,6 @@ - [ ] 五月份计划——ida 实现 - [ ] 五月份计划——prox实现 -## 收获 \ No newline at end of file +## 收获 + +* 学习了websocket的使用,了解了Python异步通信编程。可以考虑使用Django快速构建本地的一个网站,用来展示和控制联邦学习过程。 \ No newline at end of file diff --git a/工作日志/2021年6月1日-六月份计划.md b/工作日志/2021年6月1日-六月份计划.md new file mode 100644 index 00000000..4e774099 --- /dev/null +++ b/工作日志/2021年6月1日-六月份计划.md @@ -0,0 +1,19 @@ +## 计划 + +> 感觉金融经济学还是挺有意思的。 +> 试着考一下相关的证。 + + +### 金融经济学 +> 基础金融经济规律 + +### 法学 +> 基础法律 + +### 医学 + +> 基础医学 + +### 计算机——office +> 学会word、PPT、Excel的用法 +> 学会Python、numpy、pandas、matplotlib数据分析和处理方法 \ No newline at end of file