Update Notebook (epsilon greedy function bug pointed by sambit)

This commit is contained in:
Thomas Simonini
2022-05-29 15:22:26 +02:00
parent 591c8ccb75
commit 16a76eb540

View File

@@ -562,10 +562,19 @@
"id": "3kk8TU3w4Ali"
}
},
{
"cell_type": "markdown",
"source": [
"Thanks to Sambit for finding a bug on the epsilon function 🤗"
],
"metadata": {
"id": "LjZSvhsD7_52"
}
},
{
"cell_type": "code",
"source": [
"def epsilon_greedy_policy(Qtable, state):\n",
"def epsilon_greedy_policy(Qtable, state, epsilon):\n",
" # Randomly generate a number between 0 and 1\n",
" random_num = \n",
" # if random_num > greater than epsilon --> exploitation\n",
@@ -597,7 +606,7 @@
{
"cell_type": "code",
"source": [
"def epsilon_greedy_policy(Qtable, state):\n",
"def epsilon_greedy_policy(Qtable, state, epsilon):\n",
" # Randomly generate a number between 0 and 1\n",
" random_int = random.uniform(0,1)\n",
" # if random_int > greater than epsilon --> exploitation\n",
@@ -713,7 +722,7 @@
"epsilon = 1.0 # Exploration rate\n",
"max_epsilon = 1.0 # Exploration probability at start\n",
"min_epsilon = 0.05 # Minimum exploration probability \n",
"decay_rate = 0.005 # Exponential decay rate for exploration prob"
"decay_rate = 0.0005 # Exponential decay rate for exploration prob"
],
"metadata": {
"id": "Y1tWn0tycWZ1"
@@ -792,7 +801,7 @@
" # repeat\n",
" for step in range(max_steps):\n",
" # Choose the action At using epsilon greedy policy\n",
" action = epsilon_greedy_policy(Qtable, state)\n",
" action = epsilon_greedy_policy(Qtable, state, epsilon)\n",
"\n",
" # Take action At and observe Rt+1 and St+1\n",
" # Take the action (a) and observe the outcome state(s') and reward (r)\n",
@@ -1780,4 +1789,4 @@
},
"nbformat": 4,
"nbformat_minor": 0
}
}