From 16a76eb5401117189c2624d172d4e4cde86f8514 Mon Sep 17 00:00:00 2001 From: Thomas Simonini Date: Sun, 29 May 2022 15:22:26 +0200 Subject: [PATCH] Update Notebook (epsilon greedy function bug pointed by sambit) --- unit2/unit2.ipynb | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/unit2/unit2.ipynb b/unit2/unit2.ipynb index 766b190..20fbb1c 100644 --- a/unit2/unit2.ipynb +++ b/unit2/unit2.ipynb @@ -562,10 +562,19 @@ "id": "3kk8TU3w4Ali" } }, + { + "cell_type": "markdown", + "source": [ + "Thanks to Sambit for finding a bug on the epsilon function 🤗" + ], + "metadata": { + "id": "LjZSvhsD7_52" + } + }, { "cell_type": "code", "source": [ - "def epsilon_greedy_policy(Qtable, state):\n", + "def epsilon_greedy_policy(Qtable, state, epsilon):\n", " # Randomly generate a number between 0 and 1\n", " random_num = \n", " # if random_num > greater than epsilon --> exploitation\n", @@ -597,7 +606,7 @@ { "cell_type": "code", "source": [ - "def epsilon_greedy_policy(Qtable, state):\n", + "def epsilon_greedy_policy(Qtable, state, epsilon):\n", " # Randomly generate a number between 0 and 1\n", " random_int = random.uniform(0,1)\n", " # if random_int > greater than epsilon --> exploitation\n", @@ -713,7 +722,7 @@ "epsilon = 1.0 # Exploration rate\n", "max_epsilon = 1.0 # Exploration probability at start\n", "min_epsilon = 0.05 # Minimum exploration probability \n", - "decay_rate = 0.005 # Exponential decay rate for exploration prob" + "decay_rate = 0.0005 # Exponential decay rate for exploration prob" ], "metadata": { "id": "Y1tWn0tycWZ1" @@ -792,7 +801,7 @@ " # repeat\n", " for step in range(max_steps):\n", " # Choose the action At using epsilon greedy policy\n", - " action = epsilon_greedy_policy(Qtable, state)\n", + " action = epsilon_greedy_policy(Qtable, state, epsilon)\n", "\n", " # Take action At and observe Rt+1 and St+1\n", " # Take the action (a) and observe the outcome state(s') and reward (r)\n", @@ -1780,4 +1789,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} +} \ No newline at end of file