mirror of
https://github.com/huggingface/deep-rl-class.git
synced 2026-06-14 22:17:15 +08:00
Some minor updates
This commit is contained in:
@@ -230,15 +230,6 @@
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"TODO CHANGE LINK OF THE REQUIREMENTS"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "32e3NPYgH5ET"
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -1155,4 +1146,4 @@
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
||||
}
|
||||
|
||||
@@ -52,8 +52,8 @@ The course is composed of:
|
||||
|
||||
You can choose to follow this course either:
|
||||
|
||||
- *To get a certificate of completion*: you need to complete 80% of the assignments before the end of March 2023.
|
||||
- *To get a certificate of honors*: you need to complete 100% of the assignments before the end of March 2023.
|
||||
- *To get a certificate of completion*: you need to complete 80% of the assignments before the end of April 2023.
|
||||
- *To get a certificate of honors*: you need to complete 100% of the assignments before the end of April 2023.
|
||||
- *As a simple audit*: you can participate in all challenges and do assignments if you want, but you have no deadlines.
|
||||
|
||||
Both paths **are completely free**.
|
||||
@@ -65,8 +65,8 @@ You don't need to tell us which path you choose. At the end of March, when we wi
|
||||
|
||||
The certification process is **completely free**:
|
||||
|
||||
- *To get a certificate of completion*: you need to complete 80% of the assignments before the end of March 2023.
|
||||
- *To get a certificate of honors*: you need to complete 100% of the assignments before the end of March 2023.
|
||||
- *To get a certificate of completion*: you need to complete 80% of the assignments before the end of April 2023.
|
||||
- *To get a certificate of honors*: you need to complete 100% of the assignments before the end of April 2023.
|
||||
|
||||
<img src="https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/unit0/certification.jpg" alt="Course certification" width="100%"/>
|
||||
|
||||
|
||||
@@ -24,6 +24,8 @@ To find your result, go to the [leaderboard](https://huggingface.co/spaces/huggi
|
||||
|
||||
For more information about the certification process, check this section 👉 https://huggingface.co/deep-rl-course/en/unit0/introduction#certification-process
|
||||
|
||||
And you can check your progress here 👉 https://huggingface.co/spaces/ThomasSimonini/Check-my-progress-Deep-RL-Course
|
||||
|
||||
So let's get started! 🚀
|
||||
|
||||
**To start the hands-on click on Open In Colab button** 👇 :
|
||||
|
||||
@@ -14,12 +14,14 @@ This is a community-created glossary. Contributions are welcomed!
|
||||
- **The action-value function.** In contrast to the state-value function, the action-value calculates for each state and action pair the expected return if the agent starts in that state and takes an action. Then it follows the policy forever after.
|
||||
|
||||
### Epsilon-greedy strategy:
|
||||
|
||||
- Common exploration strategy used in reinforcement learning that involves balancing exploration and exploitation.
|
||||
- Chooses the action with the highest expected reward with a probability of 1-epsilon.
|
||||
- Chooses a random action with a probability of epsilon.
|
||||
- Epsilon is typically decreased over time to shift focus towards exploitation.
|
||||
|
||||
### Greedy strategy:
|
||||
|
||||
- Involves always choosing the action that is expected to lead to the highest reward, based on the current knowledge of the environment. (only exploitation)
|
||||
- Always chooses the action with the highest expected reward.
|
||||
- Does not include any exploration.
|
||||
|
||||
@@ -22,6 +22,8 @@ To find your result, go to the [leaderboard](https://huggingface.co/spaces/huggi
|
||||
|
||||
For more information about the certification process, check this section 👉 https://huggingface.co/deep-rl-course/en/unit0/introduction#certification-process
|
||||
|
||||
And you can check your progress here 👉 https://huggingface.co/spaces/ThomasSimonini/Check-my-progress-Deep-RL-Course
|
||||
|
||||
|
||||
**To start the hands-on click on Open In Colab button** 👇 :
|
||||
|
||||
|
||||
@@ -24,6 +24,9 @@ To find your result, go to the leaderboard and find your model, **the result = m
|
||||
|
||||
For more information about the certification process, check this section 👉 https://huggingface.co/deep-rl-course/en/unit0/introduction#certification-process
|
||||
|
||||
And you can check your progress here 👉 https://huggingface.co/spaces/ThomasSimonini/Check-my-progress-Deep-RL-Course
|
||||
|
||||
|
||||
**To start the hands-on click on Open In Colab button** 👇 :
|
||||
|
||||
[](https://colab.research.google.com/github/huggingface/deep-rl-class/blob/master/notebooks/unit3/unit3.ipynb)
|
||||
|
||||
@@ -28,6 +28,9 @@ To find your result, go to the leaderboard and find your model, **the result = m
|
||||
|
||||
For more information about the certification process, check this section 👉 https://huggingface.co/deep-rl-course/en/unit0/introduction#certification-process
|
||||
|
||||
And you can check your progress here 👉 https://huggingface.co/spaces/ThomasSimonini/Check-my-progress-Deep-RL-Course
|
||||
|
||||
|
||||
**To start the hands-on click on Open In Colab button** 👇 :
|
||||
|
||||
[](https://colab.research.google.com/github/huggingface/deep-rl-class/blob/master/notebooks/unit4/unit4.ipynb)
|
||||
@@ -671,7 +674,7 @@ def record_video(env, policy, out_directory, fps=30):
|
||||
```
|
||||
|
||||
```python
|
||||
def push_to_hub(repo_id,
|
||||
def push_to_hub(repo_id,
|
||||
model,
|
||||
hyperparameters,
|
||||
eval_env,
|
||||
@@ -689,12 +692,12 @@ def push_to_hub(repo_id,
|
||||
:param model: the pytorch model we want to save
|
||||
:param hyperparameters: training hyperparameters
|
||||
:param eval_env: evaluation environment
|
||||
:param video_fps: how many frame per seconds to record our video replay
|
||||
:param video_fps: how many frame per seconds to record our video replay
|
||||
"""
|
||||
|
||||
_, repo_name = repo_id.split("/")
|
||||
api = HfApi()
|
||||
|
||||
|
||||
# Step 1: Create the repo
|
||||
repo_url = api.create_repo(
|
||||
repo_id=repo_id,
|
||||
@@ -703,25 +706,25 @@ def push_to_hub(repo_id,
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdirname:
|
||||
local_directory = Path(tmpdirname)
|
||||
|
||||
|
||||
# Step 2: Save the model
|
||||
torch.save(model, local_directory / "model.pt")
|
||||
|
||||
# Step 3: Save the hyperparameters to JSON
|
||||
with open(local_directory / "hyperparameters.json", "w") as outfile:
|
||||
json.dump(hyperparameters, outfile)
|
||||
|
||||
|
||||
# Step 4: Evaluate the model and build JSON
|
||||
mean_reward, std_reward = evaluate_agent(eval_env,
|
||||
mean_reward, std_reward = evaluate_agent(eval_env,
|
||||
hyperparameters["max_t"],
|
||||
hyperparameters["n_evaluation_episodes"],
|
||||
hyperparameters["n_evaluation_episodes"],
|
||||
model)
|
||||
# Get datetime
|
||||
eval_datetime = datetime.datetime.now()
|
||||
eval_form_datetime = eval_datetime.isoformat()
|
||||
|
||||
evaluate_data = {
|
||||
"env_id": hyperparameters["env_id"],
|
||||
"env_id": hyperparameters["env_id"],
|
||||
"mean_reward": mean_reward,
|
||||
"n_evaluation_episodes": hyperparameters["n_evaluation_episodes"],
|
||||
"eval_datetime": eval_form_datetime,
|
||||
@@ -733,7 +736,7 @@ def push_to_hub(repo_id,
|
||||
|
||||
# Step 5: Create the model card
|
||||
env_name = hyperparameters["env_id"]
|
||||
|
||||
|
||||
metadata = {}
|
||||
metadata["tags"] = [
|
||||
env_name,
|
||||
|
||||
Reference in New Issue
Block a user