From e4359372148725f8970443f250b69674855665d1 Mon Sep 17 00:00:00 2001 From: simoninithomas Date: Sat, 7 Jan 2023 18:13:55 +0100 Subject: [PATCH] Some minor updates --- notebooks/unit1/unit1.ipynb | 11 +---------- units/en/unit0/introduction.mdx | 8 ++++---- units/en/unit1/hands-on.mdx | 2 ++ units/en/unit2/glossary.mdx | 2 ++ units/en/unit2/hands-on.mdx | 2 ++ units/en/unit3/hands-on.mdx | 3 +++ units/en/unit4/hands-on.mdx | 21 ++++++++++++--------- 7 files changed, 26 insertions(+), 23 deletions(-) diff --git a/notebooks/unit1/unit1.ipynb b/notebooks/unit1/unit1.ipynb index fff439e..ec814b0 100644 --- a/notebooks/unit1/unit1.ipynb +++ b/notebooks/unit1/unit1.ipynb @@ -230,15 +230,6 @@ "execution_count": null, "outputs": [] }, - { - "cell_type": "markdown", - "source": [ - "TODO CHANGE LINK OF THE REQUIREMENTS" - ], - "metadata": { - "id": "32e3NPYgH5ET" - } - }, { "cell_type": "code", "execution_count": null, @@ -1155,4 +1146,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} \ No newline at end of file +} diff --git a/units/en/unit0/introduction.mdx b/units/en/unit0/introduction.mdx index c0586c7..b3e2e00 100644 --- a/units/en/unit0/introduction.mdx +++ b/units/en/unit0/introduction.mdx @@ -52,8 +52,8 @@ The course is composed of: You can choose to follow this course either: -- *To get a certificate of completion*: you need to complete 80% of the assignments before the end of March 2023. -- *To get a certificate of honors*: you need to complete 100% of the assignments before the end of March 2023. +- *To get a certificate of completion*: you need to complete 80% of the assignments before the end of April 2023. +- *To get a certificate of honors*: you need to complete 100% of the assignments before the end of April 2023. - *As a simple audit*: you can participate in all challenges and do assignments if you want, but you have no deadlines. Both paths **are completely free**. @@ -65,8 +65,8 @@ You don't need to tell us which path you choose. At the end of March, when we wi The certification process is **completely free**: -- *To get a certificate of completion*: you need to complete 80% of the assignments before the end of March 2023. -- *To get a certificate of honors*: you need to complete 100% of the assignments before the end of March 2023. +- *To get a certificate of completion*: you need to complete 80% of the assignments before the end of April 2023. +- *To get a certificate of honors*: you need to complete 100% of the assignments before the end of April 2023. Course certification diff --git a/units/en/unit1/hands-on.mdx b/units/en/unit1/hands-on.mdx index 0d5732d..c2dc4cd 100644 --- a/units/en/unit1/hands-on.mdx +++ b/units/en/unit1/hands-on.mdx @@ -24,6 +24,8 @@ To find your result, go to the [leaderboard](https://huggingface.co/spaces/huggi For more information about the certification process, check this section 👉 https://huggingface.co/deep-rl-course/en/unit0/introduction#certification-process +And you can check your progress here 👉 https://huggingface.co/spaces/ThomasSimonini/Check-my-progress-Deep-RL-Course + So let's get started! 🚀 **To start the hands-on click on Open In Colab button** 👇 : diff --git a/units/en/unit2/glossary.mdx b/units/en/unit2/glossary.mdx index d8b13ef..a8365e9 100644 --- a/units/en/unit2/glossary.mdx +++ b/units/en/unit2/glossary.mdx @@ -14,12 +14,14 @@ This is a community-created glossary. Contributions are welcomed! - **The action-value function.** In contrast to the state-value function, the action-value calculates for each state and action pair the expected return if the agent starts in that state and takes an action. Then it follows the policy forever after. ### Epsilon-greedy strategy: + - Common exploration strategy used in reinforcement learning that involves balancing exploration and exploitation. - Chooses the action with the highest expected reward with a probability of 1-epsilon. - Chooses a random action with a probability of epsilon. - Epsilon is typically decreased over time to shift focus towards exploitation. ### Greedy strategy: + - Involves always choosing the action that is expected to lead to the highest reward, based on the current knowledge of the environment. (only exploitation) - Always chooses the action with the highest expected reward. - Does not include any exploration. diff --git a/units/en/unit2/hands-on.mdx b/units/en/unit2/hands-on.mdx index 71c0151..5e4c164 100644 --- a/units/en/unit2/hands-on.mdx +++ b/units/en/unit2/hands-on.mdx @@ -22,6 +22,8 @@ To find your result, go to the [leaderboard](https://huggingface.co/spaces/huggi For more information about the certification process, check this section 👉 https://huggingface.co/deep-rl-course/en/unit0/introduction#certification-process +And you can check your progress here 👉 https://huggingface.co/spaces/ThomasSimonini/Check-my-progress-Deep-RL-Course + **To start the hands-on click on Open In Colab button** 👇 : diff --git a/units/en/unit3/hands-on.mdx b/units/en/unit3/hands-on.mdx index 1d64f14..b1dd03c 100644 --- a/units/en/unit3/hands-on.mdx +++ b/units/en/unit3/hands-on.mdx @@ -24,6 +24,9 @@ To find your result, go to the leaderboard and find your model, **the result = m For more information about the certification process, check this section 👉 https://huggingface.co/deep-rl-course/en/unit0/introduction#certification-process +And you can check your progress here 👉 https://huggingface.co/spaces/ThomasSimonini/Check-my-progress-Deep-RL-Course + + **To start the hands-on click on Open In Colab button** 👇 : [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/deep-rl-class/blob/master/notebooks/unit3/unit3.ipynb) diff --git a/units/en/unit4/hands-on.mdx b/units/en/unit4/hands-on.mdx index b0c8956..e4deb34 100644 --- a/units/en/unit4/hands-on.mdx +++ b/units/en/unit4/hands-on.mdx @@ -28,6 +28,9 @@ To find your result, go to the leaderboard and find your model, **the result = m For more information about the certification process, check this section 👉 https://huggingface.co/deep-rl-course/en/unit0/introduction#certification-process +And you can check your progress here 👉 https://huggingface.co/spaces/ThomasSimonini/Check-my-progress-Deep-RL-Course + + **To start the hands-on click on Open In Colab button** 👇 : [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/deep-rl-class/blob/master/notebooks/unit4/unit4.ipynb) @@ -671,7 +674,7 @@ def record_video(env, policy, out_directory, fps=30): ``` ```python -def push_to_hub(repo_id, +def push_to_hub(repo_id, model, hyperparameters, eval_env, @@ -689,12 +692,12 @@ def push_to_hub(repo_id, :param model: the pytorch model we want to save :param hyperparameters: training hyperparameters :param eval_env: evaluation environment - :param video_fps: how many frame per seconds to record our video replay + :param video_fps: how many frame per seconds to record our video replay """ _, repo_name = repo_id.split("/") api = HfApi() - + # Step 1: Create the repo repo_url = api.create_repo( repo_id=repo_id, @@ -703,25 +706,25 @@ def push_to_hub(repo_id, with tempfile.TemporaryDirectory() as tmpdirname: local_directory = Path(tmpdirname) - + # Step 2: Save the model torch.save(model, local_directory / "model.pt") # Step 3: Save the hyperparameters to JSON with open(local_directory / "hyperparameters.json", "w") as outfile: json.dump(hyperparameters, outfile) - + # Step 4: Evaluate the model and build JSON - mean_reward, std_reward = evaluate_agent(eval_env, + mean_reward, std_reward = evaluate_agent(eval_env, hyperparameters["max_t"], - hyperparameters["n_evaluation_episodes"], + hyperparameters["n_evaluation_episodes"], model) # Get datetime eval_datetime = datetime.datetime.now() eval_form_datetime = eval_datetime.isoformat() evaluate_data = { - "env_id": hyperparameters["env_id"], + "env_id": hyperparameters["env_id"], "mean_reward": mean_reward, "n_evaluation_episodes": hyperparameters["n_evaluation_episodes"], "eval_datetime": eval_form_datetime, @@ -733,7 +736,7 @@ def push_to_hub(repo_id, # Step 5: Create the model card env_name = hyperparameters["env_id"] - + metadata = {} metadata["tags"] = [ env_name,