From 4be6adb7f48f0834919df23713eff063e48588b3 Mon Sep 17 00:00:00 2001 From: Thomas Simonini Date: Tue, 28 Mar 2023 15:45:22 +0200 Subject: [PATCH] Update Course --- notebooks/unit3/unit3.ipynb | 1 + units/en/unit3/hands-on.mdx | 3 +++ units/en/unit7/self-play.mdx | 2 +- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/notebooks/unit3/unit3.ipynb b/notebooks/unit3/unit3.ipynb index 1c4b17b..8587f14 100644 --- a/notebooks/unit3/unit3.ipynb +++ b/notebooks/unit3/unit3.ipynb @@ -289,6 +289,7 @@ }, "outputs": [], "source": [ + "pip install setuptools==65.5.0\n", "!pip install -r requirements.txt\n", "# Since colab uses Python 3.9 we need to add this installation\n", "!pip install gym[atari,accept-rom-license]==0.21.0" diff --git a/units/en/unit3/hands-on.mdx b/units/en/unit3/hands-on.mdx index 409d410..f0a7d4e 100644 --- a/units/en/unit3/hands-on.mdx +++ b/units/en/unit3/hands-on.mdx @@ -127,7 +127,10 @@ cd /content/rl-baselines3-zoo/ ``` ```bash +pip install setuptools==65.5.0 pip install -r requirements.txt +# Since colab uses Python 3.9 we need to add this installation +pip install gym[atari,accept-rom-license]==0.21.0 ``` ## Train our Deep Q-Learning Agent to Play Space Invaders 👾 diff --git a/units/en/unit7/self-play.mdx b/units/en/unit7/self-play.mdx index 8dd44c7..347695d 100644 --- a/units/en/unit7/self-play.mdx +++ b/units/en/unit7/self-play.mdx @@ -31,7 +31,7 @@ We do the same with self-play: - We **start with a copy of our agent as an opponent** this way, this opponent is on a similar level. - We **learn from it**, and when we acquire some skills, we **update our opponent with a more recent copy of our training policy**. -The theory behind self-play is not something new. It was already used by Arthur Samuel’s checker player system in the fifties and by Gerald Tesauro’s TD-Gammon in 1955. If you want to learn more about the history of self-play [check this very good blogpost by Andrew Cohen](https://blog.unity.com/technology/training-intelligent-adversaries-using-self-play-with-ml-agents) +The theory behind self-play is not something new. It was already used by Arthur Samuel’s checker player system in the fifties and by Gerald Tesauro’s TD-Gammon in 1995. If you want to learn more about the history of self-play [check this very good blogpost by Andrew Cohen](https://blog.unity.com/technology/training-intelligent-adversaries-using-self-play-with-ml-agents) ## Self-Play in MLAgents