Update Notebook (epsilon greedy function bug pointed by sambit)

2026-06-15 06:27:24 +08:00 · 2022-05-29 15:22:26 +02:00
parent 591c8ccb75
commit 16a76eb540
1 changed files with 14 additions and 5 deletions
--- a/unit2/unit2.ipynb
+++ b/unit2/unit2.ipynb
@@ -562,10 +562,19 @@
        "id": "3kk8TU3w4Ali"
      }
    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Thanks to Sambit for finding a bug on the epsilon function 🤗"
+      ],
+      "metadata": {
+        "id": "LjZSvhsD7_52"
+      }
+    },
    {
      "cell_type": "code",
      "source": [
-        "def epsilon_greedy_policy(Qtable, state):\n",
+        "def epsilon_greedy_policy(Qtable, state, epsilon):\n",
        "  # Randomly generate a number between 0 and 1\n",
        "  random_num = \n",
        "  # if random_num > greater than epsilon --> exploitation\n",
@@ -597,7 +606,7 @@
    {
      "cell_type": "code",
      "source": [
-        "def epsilon_greedy_policy(Qtable, state):\n",
+        "def epsilon_greedy_policy(Qtable, state, epsilon):\n",
        "  # Randomly generate a number between 0 and 1\n",
        "  random_int = random.uniform(0,1)\n",
        "  # if random_int > greater than epsilon --> exploitation\n",
@@ -713,7 +722,7 @@
        "epsilon = 1.0                 # Exploration rate\n",
        "max_epsilon = 1.0             # Exploration probability at start\n",
        "min_epsilon = 0.05            # Minimum exploration probability \n",
-        "decay_rate = 0.005            # Exponential decay rate for exploration prob"
+        "decay_rate = 0.0005            # Exponential decay rate for exploration prob"
      ],
      "metadata": {
        "id": "Y1tWn0tycWZ1"
@@ -792,7 +801,7 @@
        "    # repeat\n",
        "    for step in range(max_steps):\n",
        "      # Choose the action At using epsilon greedy policy\n",
-        "      action = epsilon_greedy_policy(Qtable, state)\n",
+        "      action = epsilon_greedy_policy(Qtable, state, epsilon)\n",
        "\n",
        "      # Take action At and observe Rt+1 and St+1\n",
        "      # Take the action (a) and observe the outcome state(s') and reward (r)\n",
@@ -1780,4 +1789,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 0
-}
+}