From 9802c0b6b4e3c5f247df067a05eb3f6d5146a217 Mon Sep 17 00:00:00 2001 From: Roberto Schiavone Date: Thu, 1 Jun 2023 23:00:00 +0200 Subject: [PATCH 1/2] fix: wrap eval_env in Monitor :bug: --- notebooks/unit1/unit1.ipynb | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/notebooks/unit1/unit1.ipynb b/notebooks/unit1/unit1.ipynb index cc09268..8283dd3 100644 --- a/notebooks/unit1/unit1.ipynb +++ b/notebooks/unit1/unit1.ipynb @@ -338,8 +338,9 @@ "from huggingface_hub import notebook_login # To log to our Hugging Face account to be able to upload models to the Hub.\n", "\n", "from stable_baselines3 import PPO\n", + "from stable_baselines3.common.env_util import make_vec_env\n", "from stable_baselines3.common.evaluation import evaluate_policy\n", - "from stable_baselines3.common.env_util import make_vec_env" + "from stable_baselines3.common.monitor import Monitor" ] }, { @@ -738,6 +739,7 @@ }, "source": [ "## Evaluate the agent 📈\n", + "- Remember to wrap the environment in a [Monitor](https://stable-baselines3.readthedocs.io/en/master/common/monitor.html).\n", "- Now that our Lunar Lander agent is trained 🚀, we need to **check its performance**.\n", "- Stable-Baselines3 provides a method to do that: `evaluate_policy`.\n", "- To fill that part you need to [check the documentation](https://stable-baselines3.readthedocs.io/en/master/guide/examples.html#basic-usage-training-saving-loading)\n", @@ -784,7 +786,7 @@ "outputs": [], "source": [ "#@title\n", - "eval_env = gym.make(\"LunarLander-v2\")\n", + "eval_env = Monitor(gym.make(\"LunarLander-v2\"))\n", "mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=10, deterministic=True)\n", "print(f\"mean_reward={mean_reward:.2f} +/- {std_reward}\")" ] @@ -917,7 +919,7 @@ "env_id = \n", "\n", "# Create the evaluation env and set the render_mode=\"rgb_array\"\n", - "eval_env = DummyVecEnv([lambda: gym.make(env_id, render_mode=\"rgb_array\")])\n", + "eval_env = DummyVecEnv([lambda: Monitor(gym.make(env_id, render_mode=\"rgb_array\"))])\n", "\n", "\n", "# TODO: Define the model architecture we used\n", @@ -1096,7 +1098,7 @@ "cell_type": "code", "source": [ "#@title\n", - "eval_env = gym.make(\"LunarLander-v2\")\n", + "eval_env = Monitor(gym.make(\"LunarLander-v2\"))\n", "mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=10, deterministic=True)\n", "print(f\"mean_reward={mean_reward:.2f} +/- {std_reward}\")" ], From 7eab5ef2ed96be5096615b0b85331b63785aca34 Mon Sep 17 00:00:00 2001 From: Roberto Schiavone Date: Fri, 2 Jun 2023 15:30:00 +0200 Subject: [PATCH 2/2] docs: wrap eval_env in Monitor :memo: --- units/en/unit1/hands-on.mdx | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/units/en/unit1/hands-on.mdx b/units/en/unit1/hands-on.mdx index 36a9c97..7661f8c 100644 --- a/units/en/unit1/hands-on.mdx +++ b/units/en/unit1/hands-on.mdx @@ -200,8 +200,9 @@ from huggingface_hub import ( ) # To log to our Hugging Face account to be able to upload models to the Hub. from stable_baselines3 import PPO -from stable_baselines3.common.evaluation import evaluate_policy from stable_baselines3.common.env_util import make_vec_env +from stable_baselines3.common.evaluation import evaluate_policy +from stable_baselines3.common.monitor import Monitor ``` ## Understand Gymnasium and how it works 🤖 @@ -448,6 +449,7 @@ model.save(model_name) ## Evaluate the agent 📈 +- Remember to wrap the environment in a [Monitor](https://stable-baselines3.readthedocs.io/en/master/common/monitor.html). - Now that our Lunar Lander agent is trained 🚀, we need to **check its performance**. - Stable-Baselines3 provides a method to do that: `evaluate_policy`. - To fill that part you need to [check the documentation](https://stable-baselines3.readthedocs.io/en/master/guide/examples.html#basic-usage-training-saving-loading) @@ -471,7 +473,7 @@ mean_reward, std_reward = ```python # @title -eval_env = gym.make("LunarLander-v2") +eval_env = Monitor(gym.make("LunarLander-v2")) mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=10, deterministic=True) print(f"mean_reward={mean_reward:.2f} +/- {std_reward}") ``` @@ -589,7 +591,7 @@ repo_id = "ThomasSimonini/ppo-LunarLander-v2" # Change with your repo id, you c commit_message = "Upload PPO LunarLander-v2 trained agent" # Create the evaluation env and set the render_mode="rgb_array" -eval_env = DummyVecEnv([lambda: gym.make(env_id, render_mode="rgb_array")]) +eval_env = DummyVecEnv([lambda: Monitor(gym.make(env_id, render_mode="rgb_array"))]) # PLACE the package_to_hub function you've just filled here package_to_hub( @@ -660,7 +662,7 @@ Let's evaluate this agent: ```python # @title -eval_env = gym.make("LunarLander-v2") +eval_env = Monitor(gym.make("LunarLander-v2")) mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=10, deterministic=True) print(f"mean_reward={mean_reward:.2f} +/- {std_reward}") ```