diff --git a/unit5/unit5.ipynb b/unit5/unit5.ipynb
index 3840862..b23e2e5 100644
--- a/unit5/unit5.ipynb
+++ b/unit5/unit5.ipynb
@@ -673,8 +673,13 @@
     "        for t in range(n_steps)[::-1]:\n",
     "            disc_return_t = (returns[0] if len(returns)>0 else 0)\n",
     "            returns.appendleft(    ) # complete here            \n",
-    "        \n",
-    "\n",
+    "       \n",
+    "        ## standardization of the returns is employed to make training more stable\n",
+    "        eps = np.finfo(np.float32).eps.item()\n",
+    "        ## eps is the smallest representable float, which is \n",
+    "        # added to the standard deviation of the returns to avoid numerical instabilities\n",
+    "        returns = torch.tensor(returns)\n",
+    "        returns = (returns - returns.mean()) / (returns.std() + eps)\n",
     "        # Line 7:\n",
     "        policy_loss = []\n",
     "        for log_prob, disc_return in zip(saved_log_probs, returns):\n",
@@ -766,6 +771,12 @@
     "            disc_return_t = (returns[0] if len(returns)>0 else 0)\n",
     "            returns.appendleft( gamma*disc_return_t + rewards[t]   )    \n",
     "            \n",
+    "        ## standardization of the returns is employed to make training more stable\n",
+    "        eps = np.finfo(np.float32).eps.item()\n",
+    "        ## eps is the smallest representable float, which is \n",
+    "        # added to the standard deviation of the returns to avoid numerical instabilities        \n",
+    "        returns = torch.tensor(returns)\n",
+    "        returns = (returns - returns.mean()) / (returns.std() + eps)\n",
     "        # Line 7:\n",
     "        policy_loss = []\n",
     "        for log_prob, disc_return in zip(saved_log_probs, returns):\n",