mirror of
https://github.com/huggingface/deep-rl-class.git
synced 2026-04-05 03:28:05 +08:00
Fix unit2 notebook
- Add imageio and imageio_ffmpeg - Add assignment for Qtable_frozenlake - Fix variable name from `random_int` to `random_num` - Fix variable name from `Q` to `Qtable` - Use `repo_id` for Huggingface API - Blank out repo_id - Replace reference to Lunar environment (from Unit 1)
This commit is contained in:
@@ -207,7 +207,8 @@
|
||||
"\n",
|
||||
"!pip install huggingface_hub\n",
|
||||
"!pip install pickle5\n",
|
||||
"!pip install pyyaml==6.0 # avoid key error metadata"
|
||||
"!pip install pyyaml==6.0 # avoid key error metadata\n",
|
||||
"!pip install imageio imageio_ffmpeg"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "1Ac7wW_5ClJC"
|
||||
@@ -476,7 +477,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"initialize_q_table(state_space, action_space)"
|
||||
"Qtable_frozenlake = initialize_q_table(state_space, action_space)"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "9YfvrqRt3jdR"
|
||||
@@ -566,9 +567,9 @@
|
||||
"source": [
|
||||
"def epsilon_greedy_policy(Qtable, state):\n",
|
||||
" # Randomly generate a number between 0 and 1\n",
|
||||
" random_int = \n",
|
||||
" # if random_int > greater than epsilon --> exploitation\n",
|
||||
" if random_int > epsilon:\n",
|
||||
" random_num = \n",
|
||||
" # if random_num > greater than epsilon --> exploitation\n",
|
||||
" if random_num > epsilon:\n",
|
||||
" # Take the action with the highest value given a state\n",
|
||||
" # np.argmax can be useful here\n",
|
||||
" action = \n",
|
||||
@@ -603,7 +604,7 @@
|
||||
" if random_int > epsilon:\n",
|
||||
" # Take the action with the highest value given a state\n",
|
||||
" # np.argmax can be useful here\n",
|
||||
" action = np.argmax(Q[state])\n",
|
||||
" action = np.argmax(Qtable[state])\n",
|
||||
" # else --> exploration\n",
|
||||
" else:\n",
|
||||
" action = env.action_space.sample()\n",
|
||||
@@ -669,7 +670,7 @@
|
||||
"source": [
|
||||
"def greedy_policy(Qtable, state):\n",
|
||||
" # Exploitation: take the action with the highest state, action value\n",
|
||||
" action = np.argmax(Q[state])\n",
|
||||
" action = np.argmax(Qtable[state])\n",
|
||||
" \n",
|
||||
" return action"
|
||||
],
|
||||
@@ -1030,9 +1031,8 @@
|
||||
" api = HfApi()\n",
|
||||
" \n",
|
||||
" repo_url = api.create_repo(\n",
|
||||
" name=repo_name,\n",
|
||||
" repo_id=repo_id,\n",
|
||||
" token=token,\n",
|
||||
" organization=organization,\n",
|
||||
" private=False,\n",
|
||||
" exist_ok=True,)\n",
|
||||
" \n",
|
||||
@@ -1284,10 +1284,10 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"push_to_hub(repo_id=\"ThomasSimonini/q-FrozenLake-v1-8x8-slippery\", \n",
|
||||
" model=model,\n",
|
||||
" env=env\n",
|
||||
" )"
|
||||
"repo_id = ",
|
||||
"push_to_hub(repo_id=repo_id,\n",
|
||||
" model=model,\n",
|
||||
" env=env)"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "RpOTtSt83kPZ"
|
||||
@@ -1526,10 +1526,10 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"push_to_hub(repo_id = \"ThomasSimonini/q-Taxi-v3\", \n",
|
||||
" model=model,\n",
|
||||
" env=env\n",
|
||||
" )"
|
||||
"repo_id = \n",
|
||||
"push_to_hub(repo_id=repo_id,\n",
|
||||
" model=model,\n",
|
||||
" env=env)"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "dhQtiQozhOn1"
|
||||
@@ -1709,7 +1709,7 @@
|
||||
"* Try different hyperparameters by looking at what your classmates have done.\n",
|
||||
"* **Push your new trained model** on the Hub 🔥\n",
|
||||
"\n",
|
||||
"Is moon landing too boring to you? Try to **change the environment**, why not using FrozenLake-v1 non slippery version? Check how they works [using the gym documentation](https://www.gymlibrary.ml/) and have fun 🎉."
|
||||
"Are walking on ice and driving taxis too boring to you? Try to **change the environment**, why not using FrozenLake-v1 slippery version? Check how they work [using the gym documentation](https://www.gymlibrary.ml/) and have fun 🎉."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1778,4 +1778,4 @@
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user