mirror of
https://github.com/huggingface/deep-rl-class.git
synced 2026-04-05 03:28:05 +08:00
Update Notebook (epsilon greedy function bug pointed by sambit)
This commit is contained in:
@@ -562,10 +562,19 @@
|
||||
"id": "3kk8TU3w4Ali"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"Thanks to Sambit for finding a bug on the epsilon function 🤗"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "LjZSvhsD7_52"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"def epsilon_greedy_policy(Qtable, state):\n",
|
||||
"def epsilon_greedy_policy(Qtable, state, epsilon):\n",
|
||||
" # Randomly generate a number between 0 and 1\n",
|
||||
" random_num = \n",
|
||||
" # if random_num > greater than epsilon --> exploitation\n",
|
||||
@@ -597,7 +606,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"def epsilon_greedy_policy(Qtable, state):\n",
|
||||
"def epsilon_greedy_policy(Qtable, state, epsilon):\n",
|
||||
" # Randomly generate a number between 0 and 1\n",
|
||||
" random_int = random.uniform(0,1)\n",
|
||||
" # if random_int > greater than epsilon --> exploitation\n",
|
||||
@@ -713,7 +722,7 @@
|
||||
"epsilon = 1.0 # Exploration rate\n",
|
||||
"max_epsilon = 1.0 # Exploration probability at start\n",
|
||||
"min_epsilon = 0.05 # Minimum exploration probability \n",
|
||||
"decay_rate = 0.005 # Exponential decay rate for exploration prob"
|
||||
"decay_rate = 0.0005 # Exponential decay rate for exploration prob"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "Y1tWn0tycWZ1"
|
||||
@@ -792,7 +801,7 @@
|
||||
" # repeat\n",
|
||||
" for step in range(max_steps):\n",
|
||||
" # Choose the action At using epsilon greedy policy\n",
|
||||
" action = epsilon_greedy_policy(Qtable, state)\n",
|
||||
" action = epsilon_greedy_policy(Qtable, state, epsilon)\n",
|
||||
"\n",
|
||||
" # Take action At and observe Rt+1 and St+1\n",
|
||||
" # Take the action (a) and observe the outcome state(s') and reward (r)\n",
|
||||
@@ -1780,4 +1789,4 @@
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user