pysyft 博客看完

2026-02-03 02:23:31 +08:00 · 2021-05-09 16:21:02 +08:00
parent f55d48db3f
commit 4df1fa4d7c
13 changed files with 288 additions and 163 deletions
--- a/Python/web_socket_test/client.py
+++ b/Python/web_socket_test/client.py
@@ -0,0 +1,35 @@
+import asyncio
+import websockets
+
+# 向服务器端认证，用户名密码通过才能退出循环
+async def auth_system(websocket):
+    while True:
+        cred_text = input("please enter your username and password: ")
+        await websocket.send(cred_text)
+        response_str = await websocket.recv()
+        print("receive_message",response_str)
+        if "congratulation" in response_str:
+            return True
+
+# 向服务器端发送认证后的消息
+async def send_msg(websocket):
+    while True:
+        _text = input("please enter your context: ")
+        if _text == "exit":
+            print(f'you have enter "exit", goodbye')
+            await websocket.close(reason="user exit")
+            return False
+        await websocket.send(_text)
+        recv_text = await websocket.recv()
+        print(f"{recv_text}")
+
+# 客户端主逻辑
+async def main_logic():
+    async with websockets.connect('ws://127.0.0.1:5678') as websocket:
+        await auth_system(websocket)
+
+        await send_msg(websocket)
+
+asyncio.get_event_loop().run_until_complete(main_logic())
+
+asyncio.get_event_loop().run_forever()
--- a/Python/web_socket_test/index.html
+++ b/Python/web_socket_test/index.html
@@ -0,0 +1,24 @@
+<!DOCTYPE html>
+<html>
+<head>
+    <title>WebSocket Test</title>
+</head>
+<body>
+<script>
+    var wsObj = new WebSocket("ws://127.0.0.1:5678");   //建立连接
+    wsObj.onopen = function(){  //发送请求
+        alert("open");
+        wsObj.send("admin:123456");
+    };
+    wsObj.onmessage = function(ev){  //获取后端响应
+        alert(ev.data);
+    };
+    wsObj.onclose = function(ev){
+        alert("close");
+    };
+    wsObj.onerror = function(ev){
+        alert("error");
+    };
+</script>
+</body>
+</html>
--- a/Python/web_socket_test/server.py
+++ b/Python/web_socket_test/server.py
@@ -0,0 +1,42 @@
+import asyncio
+import websockets
+
+# 检测客户端权限，用户名密码通过才能退出循环
+async def check_permit(websocket):
+    while True:
+        recv_str = await websocket.recv()
+        cred_dict = recv_str.split(":")
+        if cred_dict[0] == "admin" and cred_dict[1] == "123456":
+            response_str = "congratulation, you have connect with server\r\nnow, you can do something else"
+            await websocket.send(response_str)
+            return True
+        else:
+            response_str = "sorry, the username or password is wrong, please submit again"
+            await websocket.send(response_str)
+
+# 接收客户端消息并处理，这里只是简单把客户端发来的返回回去
+async def recv_msg(websocket):
+    while True:
+        recv_text = await websocket.recv()
+        print("recv_mesage:",recv_text)
+        response_text = f"your submit context: {recv_text}"
+        await websocket.send(response_text)
+
+# 服务器端主逻辑
+# websocket和path是该函数被回调时自动传过来的，不需要自己传
+async def main_logic(websocket, path):
+    await check_permit(websocket)
+
+    await recv_msg(websocket)
+
+# 把ip换成自己本地的ip
+start_server = websockets.serve(main_logic, '127.0.0.1', 5678)
+# 如果要给被回调的main_logic传递自定义参数，可使用以下形式
+# 一、修改回调形式
+# import functools
+# start_server = websockets.serve(functools.partial(main_logic, other_param="test_value"), '10.10.6.91', 5678)
+# 修改被回调函数定义，增加相应参数
+# async def main_logic(websocket, path, other_param)
+
+asyncio.get_event_loop().run_until_complete(start_server)
+asyncio.get_event_loop().run_forever()
--- a/pytorch/Pysyft实例/websockets-example-MNIST-parallel/run_websocket_client.py
+++ b/pytorch/Pysyft实例/websockets-example-MNIST-parallel/run_websocket_client.py
@@ -109,8 +109,13 @@ async def fit_model_on_worker(
        optimizer="SGD",
        optimizer_args={"lr": lr},
    )
+    # 需要发送的数据。调用send，通过websocket发送给worker
    train_config.send(worker)
+
+    # 远程执行过程。远程客户端调用训练代码，执行操作。并取回训练的结果。
    loss = await worker.async_fit(dataset_key="mnist", return_ids=[0])
+    
+    # 将训练的结果取回
    model = train_config.model_ptr.get().obj
    return worker.id, model, loss

--- a/pytorch/官方教程/10
+++ b/pytorch/官方教程/10
@@ -35,7 +35,16 @@
  },
  {
   "source": [
-    "## 1 send tensor to bob's machine"
+    "## 1 张量通信\n",
+    "\n",
+    "VirtualWorker可以简单理解为一个远程机器。x和y是两个张量，x_ptr和y_ptr是x和y的指针。\n",
+    "\n",
+    "这里有一个方法：.send()。它的作用是把张量发送到远程机器，在发送之后，本机依然保留了它的操作权，就是通过它返回的指针进行操作。\n",
+    "\n",
+    "下面一句很关键：z_ptr = x_ptr + x_ptr。\n",
+    "在原教程中写的是z = x_ptr + x_ptr。我觉得命名有误，因为此时，z并不是一个实际的张量，而是一个指针。\n",
+    "\n",
+    "这里的x_ptr和y_ptr都不是实际数据，但却可以执行加法操作，事实上这里是发送了一个操作到远程机器，让远程机器在数据上执行加法，而其产生的结果也是一个指针，指向的是保留在远程机器上的结果，通过get()获取其真实数据，并且在获取后，远程的bob将失去这个数据，这就是将数据所有权归还给了本地——数据所有权是传递的。"
   ],
   "cell_type": "markdown",
   "metadata": {}
@@ -86,6 +95,7 @@
    }
   ],
   "source": [
+    "# 基本张量运算\n",
    "x = torch.tensor([1,2,3,4,5])\n",
    "y = x + x\n",
    "print(y)"
@@ -97,117 +107,27 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "bob = sy.VirtualWorker(hook, id=\"bob\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
+    "# 张量指针的运算\n",
+    "bob = sy.VirtualWorker(hook, id=\"bob\")\n",
    "x = torch.tensor([1,2,3,4,5])\n",
-    "y = torch.tensor([1,1,1,1,1])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "metadata": {},
-   "outputs": [
-    {
-     "output_type": "stream",
-     "name": "stdout",
-     "text": [
-      "(Wrapper)>[PointerTensor | me:2749667760 -> bob:24084234239]\n(Wrapper)>[PointerTensor | me:90763379457 -> bob:66772298697]\n"
-     ]
-    },
-    {
-     "output_type": "execute_result",
-     "data": {
-      "text/plain": [
-       "<VirtualWorker id:me #objects:0>"
-      ]
-     },
-     "metadata": {},
-     "execution_count": 22
-    }
-   ],
-   "source": [
+    "y = torch.tensor([1,1,1,1,1])\n",
    "x_ptr = x.send(bob)\n",
    "y_ptr = y.send(bob)\n",
-    "print(x_ptr)\n",
-    "print(y_ptr)\n",
-    "x_ptr.location\n",
-    "x_ptr.id_at_location\n",
-    "x_ptr.owner"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "output_type": "execute_result",
-     "data": {
-      "text/plain": [
-       "{24084234239: tensor([1, 2, 3, 4, 5]), 66772298697: tensor([1, 1, 1, 1, 1])}"
-      ]
-     },
-     "metadata": {},
-     "execution_count": 8
-    }
-   ],
-   "source": [
-    "bob._objects"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [
-    {
-     "output_type": "stream",
-     "name": "stdout",
-     "text": [
-      "(Wrapper)>[PointerTensor | me:43302083549 -> bob:80050839170]\n"
-     ]
-    }
-   ],
-   "source": [
-    "z = x_ptr + x_ptr\n",
-    "print(z)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 33,
-   "metadata": {},
-   "outputs": [
-    {
-     "output_type": "execute_result",
-     "data": {
-      "text/plain": [
-       "{24084234239: tensor([1, 2, 3, 4, 5]),\n",
-       " 66772298697: tensor([1, 1, 1, 1, 1]),\n",
-       " 44465068457: tensor([ 2,  4,  6,  8, 10]),\n",
-       " 57491926079: tensor([1, 2, 3, 4, 5]),\n",
-       " 11932127750: tensor([1, 1, 1, 1, 1])}"
-      ]
-     },
-     "metadata": {},
-     "execution_count": 33
-    }
-   ],
-   "source": [
-    "bob._objects"
+    "z_ptr = x_ptr + x_ptr\n",
+    "z = z_ptr.get()"
   ]
  },
  {
   "source": [
-    "## 2 using tensor pointer"
+    "## 2 张量指针\n",
+    "\n",
+    "张量（tensor）是数据科学、深度学习中的一个基本概念，用过pytorch、tensorflow的会对它有更深的认识。\n",
+    "\n",
+    "这里不详细阐述张量的概念，可以去之前学tensorflow的文章中看看。这里只谈用PySyft是如何解决安全隐私问题的：\n",
+    "\n",
+    "张量通常包含数据，数据可能包含隐私信息，很多时候计算的任务不能独立完成，需要借助第三方，在这个过程中，必须保留数据持有者对数据的操作权，PySyft就是基于这个思想，提出了张量指针（PointerTensor）的概念。\n",
+    "\n",
+    "指针我们都知道，学过计算机语言的都能说出“地址”等概念。但这里的张量指针并不只是变量地址这么简单的东西。"
   ],
   "cell_type": "markdown",
   "metadata": {}
@@ -241,28 +161,6 @@
    "print(z)"
   ]
  },
-  {
-   "cell_type": "code",
-   "execution_count": 40,
-   "metadata": {},
-   "outputs": [
-    {
-     "output_type": "execute_result",
-     "data": {
-      "text/plain": [
-       "{78395822529: tensor([1, 2, 3, 4, 5]),\n",
-       " 72335892048: tensor([1, 1, 1, 1, 1]),\n",
-       " 49833880055: tensor([2, 3, 4, 5, 6])}"
-      ]
-     },
-     "metadata": {},
-     "execution_count": 40
-    }
-   ],
-   "source": [
-    "bob._objects"
-   ]
-  },
  {
   "cell_type": "code",
   "execution_count": 41,
@@ -283,26 +181,6 @@
    "z.get()"
   ]
  },
-  {
-   "cell_type": "code",
-   "execution_count": 42,
-   "metadata": {},
-   "outputs": [
-    {
-     "output_type": "execute_result",
-     "data": {
-      "text/plain": [
-       "{78395822529: tensor([1, 2, 3, 4, 5]), 72335892048: tensor([1, 1, 1, 1, 1])}"
-      ]
-     },
-     "metadata": {},
-     "execution_count": 42
-    }
-   ],
-   "source": [
-    "bob._objects"
-   ]
-  },
  {
   "cell_type": "code",
   "execution_count": 43,
@@ -326,6 +204,45 @@
    "print(x.grad)"
   ]
  },
+  {
+   "source": [
+    "## 3 worker工作机\n",
+    "1. 工作机（Worker），它表示一台拥有计算资源和数据资源的实体。之前的VirtualWorker就是对这样一个实体的模拟，用于演示与远程机器的通信。\n",
+    "\n",
+    "2. 本地工作机的计算资源和数据资源就是原生的torch操作和张量。\n",
+    "\n",
+    "3. 它在调用hook的时候会自动创建。\n",
+    "\n",
+    "4. 工作机的一个基本原则是，它只能对自己的机器上的数据进行计算。\n",
+    "\n",
+    "5. 事实上，除了数据只能使用工作机所有，“计算”也是一样，只是在上面进行加操作的每一步，事实上都是把每一个计算操作发送到了远程工作机上。下面的“计划”会进一步说明。"
+   ],
+   "cell_type": "markdown",
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "alice = sy.VirtualWorker(hook, id=\"alice\")\n",
+    "bob = sy.VirtualWorker(hook, id=\"bob\")\n",
+    "# alice和bob是远程的工作机\n",
+    "x = torch.tensor([1,2,3,4,5])\n",
+    "y = torch.tensor([1,1,1,1,1])\n",
+    "# x y都是本地的数据\n",
+    "z = x + y # z 也是本地的\n",
+    "# 将x发送到alice、y发送到bob\n",
+    "x_ptr = x.send(alice)\n",
+    "y_ptr = y.send(bob)\n",
+    "# 这一句不能执行，因为x_ptr是alice的数据，y_ptr是bob的数据\n",
+    "z = x_ptr + y\n",
+    "# 可以执行，x_ptr和y_ptr此时都在bob上\n",
+    "x_ptr = x.send(bob)\n",
+    "z = x_ptr+y_ptr"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
--- a/pytorch/官方教程/10
+++ b/pytorch/官方教程/10
@@ -1,13 +1,16 @@
 # pysyft
 > 对pytorch框架和TensorFlow框架的federated框架进行了研究。
+> * tensorflow federated 框架只提供了本地的仿真。
+> * pysyft 框架提供了websocket worker初步实现了基于websocket网络通信的多进程仿真，是当前最接近于实践的一种仿真方式，能够实现多个linux/python环境下的仿真与多个进程下的仿真。只在0.2.4中有，可以尝试在此基础进行改进和训练。

 > 参考文献
 > * [A generic framework for privacy preserving deep learning](https://zhuanlan.zhihu.com/p/114774133)
 > * [FedAvg 的 Pytorch 实现](https://zhuanlan.zhihu.com/p/259806876?utm_source=wechat_session)
+> * [安全深度学习框架PySyft](https://blog.csdn.net/u011602557/article/details/103661581/)
+

 ## 1 论文阅读

-
 ### pysyft的特点

 PySyft是用于安全和隐私深度学习的Python库，它在主流深度学习框架（例如PyTorch和TensorFlow）中使用联邦学习，差分隐私和加密计算（例如多方计算（MPC）和同态加密（HE））将隐私数据与模型训练分离。
@@ -53,5 +56,62 @@ PySyft是用于安全和隐私深度学习的Python库，它在主流深度学
 1. 由于主模型的参数和节点中所有局部模型的参数都是随机初始化的，所有这些参数将彼此不同。因此，在对节点中的本地模型进行训练之前，主模型会将模型参数发送给节点。
 2. 节点使用这些参数在其自身的数据上训练本地模型。
 3. 每个节点在训练自己的模型时都会更新其参数。训练过程完成后，每个节点会将其参数发送到主模型。
-主模型采用这些参数的平均值并将其设置为新的权重参数，并将其传递回节点以进行下一次迭代。
+4. 主模型采用这些参数的平均值并将其设置为新的权重参数，并将其传递回节点以进行下一次迭代。

+## 3 Pysyft简介
+
+### 环境简介
+
+* pysyft==0.2.4
+* pytorch==1.4.0
+
+### 安装
+
+```
+git clone https://github.com/OpenMined/PySyft.git
+cd PySyft
+pip install -r pip-dep/requirements.txt
+pip install -r pip-dep/requirements_udacity.txt
+python setup.py install
+python setup.py test
+
+pip install scipy
+pip install nbformat
+pip install pandas
+pip install pyOpenSSL
+pip install papermill
+pip install scikit-learn
+
+pip install jupyter_latex_envs --upgrade [--user|sys-prefix]
+jupyter nbextension install --py latex_envs --user
+jupyter nbextension enable latex_envs --user --py
+```
+
+
+## 4 设计思路
+
+提供了不同级别的联邦学习技术
+
+1. 本地单线程仿真virtual_worker：move模型，依次训练。
+2. 本地单线程仿真virtual_worker：集中数据分离，训练模型聚合。
+3. 本地单线程仿真virtual_worker：分散数据，训练模型聚合。
+4. 本地多线程仿真websocket_worker：多线程通信，训练模型聚合
+5. 远程多线程仿真websocket_worker：多线程通信，训练模型聚合
+
+
+主要的设计思想
+
+1. 使用“张量指针”来记录对远程张量的操作。
+2. 使用“worker”的send和get方法封装不同的通信过程（虚拟通信和websocket远程通信）
+   1. 张量通信
+   2. plan&protocol通信
+
+
+主要包含以下五个模块
+
+
+1. 张量指针：tensor_ptr指针模块。
+2. 工作机器：worker通信原理和websocket实现（send、receive、client、server）、
+3. 远程计算：远程计算的实现（plan，protocol）、
+4. 加密计算：加密算法的实现（MFC同态加密）、
+5. 联邦平均：联邦平均算法的实现（util.fed_avg(models))
--- a/pytorch/官方教程/11
+++ b/pytorch/官方教程/11
@@ -241,13 +241,6 @@
   "source": [
    "train()"
   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
  }
 ]
 }
--- a/pytorch/官方教程/12
+++ b/pytorch/官方教程/12
@@ -213,13 +213,6 @@
   "source": [
    "x"
   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
  }
 ]
 }
--- a/pytorch/官方教程/13
+++ b/pytorch/官方教程/13
@@ -26,6 +26,23 @@
 "nbformat": 4,
 "nbformat_minor": 2,
 "cells": [
+  {
+   "source": [
+    "# 联邦平均\n",
+    "\n",
+    "联邦学习（Federated Learning）是一种安全分布式深度学习技术，它允许各个数据持有者在不公开数据的情况下协同训练得到一个共享的模型，其目的是打破数据孤岛，在保护数据的隐私的前提下利用数据实现数据整合。\n",
+    "目前关于联邦学习的实现有许多说法，有梯度聚合、模型平均、选择上传等等。有的认为参数服务器持有模型，参与者不持有；有的认为是各个数据持有者持有模型，参数服务器不需要获取模型。众说纷纭。\n",
+    "但其核心是不变的：那就是数据分离，通信加密。\n",
+    "\n",
+    "联邦学习的各个参与者，会在本地训练模型，然后每一轮（或者固定间隔的轮次）将其模型参数，或者梯度（广义梯度，即前一轮次与当前轮次的模型参数的差）上传到参数服务器，由参数服务器将各个参与者的上传参数进行聚合，得到的结果再返还给各个参与者，参与者更新本地模型后，继续训练。\n",
+    "在这个过程中，有如下几个计划：\n",
+    "\n",
+    "模型训练，模型是需要训练的，这个操作必须由各个参与者执行\n",
+    "安全聚合，在梯度传递到参数服务器并返回给各个参与者这个过程中，传递的参数是不安全的，需要进行加密保护；并且，参数服务器要对参数进行聚合。\n"
+   ],
+   "cell_type": "markdown",
+   "metadata": {}
+  },
  {
   "cell_type": "code",
   "execution_count": 2,
--- a/中心数据实例.ipynb
+++ b/中心数据实例.ipynb
--- a/pytorch/官方教程/17
+++ b/pytorch/官方教程/17
@@ -40,6 +40,17 @@
   "cell_type": "markdown",
   "metadata": {}
  },
+  {
+   "source": [
+    "1. 计划（Plan）指的是可存储的Torch操作序列，它可以被发送到远程机器执行，并且保留对其引用。它提出的目的是**减少通信量**。举之前例子，如果我们要反复在远程机器上完成两个张量的求和平均两个操作.每次计算都需要与远程机器通信一次，是不必要的开销。因此我们可以用计划包裹一系列操作，发送给工作机，然后只需要发一次消息即可。\n",
+    "\n",
+    "2. 要将普通的函数转化为计划函数，只需要用**装饰器**即可实现.\n",
+    "\n",
+    "3. 创建计划函数后，需要保证计划已经**被构建**才能使用，通过calcu.is_bulit进行判断。\n"
+   ],
+   "cell_type": "markdown",
+   "metadata": {}
+  },
  {
   "cell_type": "code",
   "execution_count": 1,
--- a/pytorch/官方教程/18
+++ b/pytorch/官方教程/18
@@ -36,6 +36,19 @@
   "cell_type": "markdown",
   "metadata": {}
  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "协议（Protocol）是对计划到工作机的分配。\n",
+    "在上面的计划中，构建计划后，需要将计划发送到目标工作机，如果计划较多，这会十分繁琐。使用协议，将会变得便捷。\n",
+    "\n",
+    "下面的例子是，首先由Alice完成加法，然后由Bob完成乘法，最后得到结果。\n",
+    "\n"
+   ]
+  },
  {
   "source": [
    "## 1 创建部署\n",
--- a/工作日志/2021年5月10日-深入研究pysyft.md
+++ b/工作日志/2021年5月10日-深入研究pysyft.md
@@ -0,0 +1,15 @@
+## 计划
+> 需要深入阅读了理解0.2.4版本的pysyft框架。
+> 包括其指针、计划、加密、worker（仿真与实现）。我发现这些代码的设计都非常优秀。日后方便自己完成整个系统。
+
+
+- [ ] 指针模块
+- [ ] 计划模块
+- [ ] 加密模块
+- [ ] 训练模块（util.fed_avg)
+- [ ] worker通信模块（仿真与通信的实现封装）
+
+
+
+
+## 收获