diff --git a/unit1/unit1_bonus/Wandb Logging.ipynb b/unit1/unit1_bonus/Wandb Logging.ipynb index b314b90..9d872cd 100644 --- a/unit1/unit1_bonus/Wandb Logging.ipynb +++ b/unit1/unit1_bonus/Wandb Logging.ipynb @@ -10,12 +10,12 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 7, "id": "2e020e33", "metadata": { "ExecuteTime": { - "end_time": "2022-05-07T16:36:51.138888Z", - "start_time": "2022-05-07T16:36:49.643694Z" + "end_time": "2022-05-08T01:11:29.835785Z", + "start_time": "2022-05-08T01:11:29.831878Z" } }, "outputs": [], @@ -28,7 +28,7 @@ "from stable_baselines3.common.evaluation import evaluate_policy\n", "from stable_baselines3.common.env_util import make_vec_env\n", "\n", - "from stable_baselines3.common.vec_env import VecVideoRecorder\n", + "from stable_baselines3.common.vec_env import VecVideoRecorder , DummyVecEnv\n", "\n", "# Import wandb stuff\n", "import wandb\n", @@ -81,8 +81,8 @@ "id": "ae0fd212", "metadata": { "ExecuteTime": { - "end_time": "2022-05-07T16:36:59.329343Z", - "start_time": "2022-05-07T16:36:53.808583Z" + "end_time": "2022-05-08T00:55:11.285850Z", + "start_time": "2022-05-08T00:55:05.640088Z" } }, "outputs": [ @@ -108,7 +108,7 @@ { "data": { "text/html": [ - "Run data is saved locally in /home/venom/Desktop/deep-rl-class/unit1/unit1_bonus/wandb/run-20220507_220654-8czogswz" + "Run data is saved locally in /home/venom/Desktop/deep-rl-class/unit1/unit1_bonus/wandb/run-20220508_062506-2ovixu73" ], "text/plain": [ "" @@ -120,7 +120,7 @@ { "data": { "text/html": [ - "Syncing run brisk-fire-2 to Weights & Biases (docs)
" + "Syncing run warm-gorge-3 to Weights & Biases (docs)
" ], "text/plain": [ "" @@ -138,6 +138,7 @@ " \"policy_type\": \"MlpPolicy\",\n", " \"total_timesteps\": 100000,\n", " \"env_name\": \"LunarLander-v2\",\n", + " \"learning_rate\" : 0.0002,\n", "}\n", "\n", "run = wandb.init(\n", @@ -166,8 +167,8 @@ "id": "0b8046ae", "metadata": { "ExecuteTime": { - "end_time": "2022-05-07T16:37:34.011591Z", - "start_time": "2022-05-07T16:37:29.500555Z" + "end_time": "2022-05-08T00:55:59.965683Z", + "start_time": "2022-05-08T00:55:55.396867Z" } }, "outputs": [ @@ -185,29 +186,126 @@ "# be pretty resource intensive. \n", "# env = VecVideoRecorder(env, f\"videos/{run.id}\", record_video_trigger=lambda x: x % 2000 == 0, video_length=200) # Set the video recorder, to record our agent during training\n", "\n", + "# I would suggest you to add all your hyperparameters in the config dictionary defined before the wandb init step. This would help you to visualize the effect those hyper parameters\n", + "# have on your model, via the wandb dashboard\n", "model = PPO(\n", " policy = config[\"policy_type\"],\n", " env = env,\n", + " learning_rate=config[\"learning_rate\"],\n", " tensorboard_log=\"logs\",\n", " verbose=1)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "f772cc3f", "metadata": { "ExecuteTime": { - "end_time": "2022-05-07T16:39:54.654747Z", - "start_time": "2022-05-07T16:37:35.052748Z" + "end_time": "2022-05-08T00:59:14.354142Z", + "start_time": "2022-05-08T00:56:32.259913Z" }, "scrolled": true }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Logging to logs/PPO_1\n", + "---------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 91.1 |\n", + "| ep_rew_mean | -170 |\n", + "| time/ | |\n", + "| fps | 5432 |\n", + "| iterations | 1 |\n", + "| time_elapsed | 6 |\n", + "| total_timesteps | 32768 |\n", + "---------------------------------\n", + "-----------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 98.9 |\n", + "| ep_rew_mean | -132 |\n", + "| time/ | |\n", + "| fps | 1480 |\n", + "| iterations | 2 |\n", + "| time_elapsed | 44 |\n", + "| total_timesteps | 65536 |\n", + "| train/ | |\n", + "| approx_kl | 0.009069282 |\n", + "| clip_fraction | 0.0836 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.38 |\n", + "| explained_variance | 0.00211 |\n", + "| learning_rate | 0.0002 |\n", + "| loss | 345 |\n", + "| n_updates | 10 |\n", + "| policy_gradient_loss | -0.00783 |\n", + "| value_loss | 810 |\n", + "-----------------------------------------\n", + "-----------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 103 |\n", + "| ep_rew_mean | -104 |\n", + "| time/ | |\n", + "| fps | 1135 |\n", + "| iterations | 3 |\n", + "| time_elapsed | 86 |\n", + "| total_timesteps | 98304 |\n", + "| train/ | |\n", + "| approx_kl | 0.012206479 |\n", + "| clip_fraction | 0.12 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.35 |\n", + "| explained_variance | 0.436 |\n", + "| learning_rate | 0.0002 |\n", + "| loss | 77.5 |\n", + "| n_updates | 20 |\n", + "| policy_gradient_loss | -0.0125 |\n", + "| value_loss | 335 |\n", + "-----------------------------------------\n", + "-----------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 113 |\n", + "| ep_rew_mean | -87.2 |\n", + "| time/ | |\n", + "| fps | 1031 |\n", + "| iterations | 4 |\n", + "| time_elapsed | 127 |\n", + "| total_timesteps | 131072 |\n", + "| train/ | |\n", + "| approx_kl | 0.012416394 |\n", + "| clip_fraction | 0.167 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.31 |\n", + "| explained_variance | 0.513 |\n", + "| learning_rate | 0.0002 |\n", + "| loss | 66.1 |\n", + "| n_updates | 30 |\n", + "| policy_gradient_loss | -0.015 |\n", + "| value_loss | 257 |\n", + "-----------------------------------------\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Now we do the magical stuff of logging to wandb. All you have to do is add the wandb callback to the model's callback like this\n", "\n", - "model.learn(total_timesteps=config[\"total_timesteps\"], callback=[WandbCallback()])" + "model.learn(total_timesteps=config[\"total_timesteps\"], \n", + " callback=[WandbCallback(\n", + " gradient_save_freq=100\n", + " )])" ] }, { @@ -216,8 +314,8 @@ "id": "d2a6341c", "metadata": { "ExecuteTime": { - "end_time": "2022-05-07T16:40:06.824067Z", - "start_time": "2022-05-07T16:39:54.696684Z" + "end_time": "2022-05-08T00:59:27.663456Z", + "start_time": "2022-05-08T00:59:14.382414Z" } }, "outputs": [ @@ -255,7 +353,7 @@ " .wandb-row { display: flex; flex-direction: row; flex-wrap: wrap; justify-content: flex-start; width: 100% }\n", " .wandb-col { display: flex; flex-direction: column; flex-basis: 100%; flex: 1; padding: 10px; }\n", " \n", - "

Run history:


global_step▁▃▆█
rollout/ep_len_mean▁▃▄█
rollout/ep_rew_mean▁▄▆█
time/fps█▁▁▁
train/approx_kl▁▇█
train/clip_fraction▁▅█
train/clip_range▁▁▁
train/entropy_loss▁▄█
train/explained_variance▁▇█
train/learning_rate▁▁▁
train/loss█▅▁
train/policy_gradient_loss█▄▁
train/value_loss█▂▁

Run summary:


global_step131072
rollout/ep_len_mean107.59
rollout/ep_rew_mean-75.28564
time/fps1176.0
train/approx_kl0.01353
train/clip_fraction0.20614
train/clip_range0.2
train/entropy_loss-1.30999
train/explained_variance0.65612
train/learning_rate0.0003
train/loss44.55813
train/policy_gradient_loss-0.01972
train/value_loss212.05472

" + "

Run history:


global_step▁▃▆█
rollout/ep_len_mean▁▃▅█
rollout/ep_rew_mean▁▄▇█
time/fps█▂▁▁
train/approx_kl▁██
train/clip_fraction▁▄█
train/clip_range▁▁▁
train/entropy_loss▁▄█
train/explained_variance▁▇█
train/learning_rate▁▁▁
train/loss█▁▁
train/policy_gradient_loss█▃▁
train/value_loss█▂▁

Run summary:


global_step131072
rollout/ep_len_mean113.48
rollout/ep_rew_mean-87.24881
time/fps1031.0
train/approx_kl0.01242
train/clip_fraction0.16663
train/clip_range0.2
train/entropy_loss-1.30755
train/explained_variance0.51275
train/learning_rate0.0002
train/loss66.06168
train/policy_gradient_loss-0.01496
train/value_loss257.38614

" ], "text/plain": [ "" @@ -267,7 +365,7 @@ { "data": { "text/html": [ - "Synced brisk-fire-2: https://wandb.ai/supersecurehuman/LunarLander-v2/runs/8czogswz
Synced 6 W&B file(s), 0 media file(s), 0 artifact file(s) and 1 other file(s)" + "Synced warm-gorge-3: https://wandb.ai/supersecurehuman/LunarLander-v2/runs/2ovixu73
Synced 6 W&B file(s), 0 media file(s), 0 artifact file(s) and 1 other file(s)" ], "text/plain": [ "" @@ -279,7 +377,7 @@ { "data": { "text/html": [ - "Find logs at: ./wandb/run-20220507_220654-8czogswz/logs" + "Find logs at: ./wandb/run-20220508_062506-2ovixu73/logs" ], "text/plain": [ "" @@ -298,7 +396,7 @@ }, { "cell_type": "markdown", - "id": "3ffa0dae", + "id": "e9f1e6a5", "metadata": {}, "source": [ "### Note\n", @@ -308,7 +406,288 @@ }, { "cell_type": "markdown", - "id": "a03b8746", + "id": "4f448887", + "metadata": {}, + "source": [ + "## Package to 🤗 hub" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "f8d1b517", + "metadata": { + "ExecuteTime": { + "end_time": "2022-05-08T01:13:41.386029Z", + "start_time": "2022-05-08T01:13:41.335337Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# You have to disable wandb while packaging it to hub, because it seems to be interfering with package to hub function.\n", + "wandb.init(mode=\"disabled\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5f93f140", + "metadata": {}, + "outputs": [], + "source": [ + "from huggingface_hub import notebook_login # To log to our Hugging Face account to be able to upload models to the Hub." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f0461e29", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: You just need to run notebook_login() once in any machine you are trying to login. The token is saved in you machine, making future access to your account easier\n", + "notebook_login()\n", + "!git config --global credential.helper store" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "7d7ea1b3", + "metadata": { + "ExecuteTime": { + "end_time": "2022-05-08T01:14:35.254796Z", + "start_time": "2022-05-08T01:13:43.366135Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[38;5;4mℹ This function will save, evaluate, generate a video of your agent,\n", + "create a model card and push everything to the hub. It might take up to 1min.\n", + "This is a work in progress: If you encounter a bug, please open an issue and use\n", + "push_to_hub instead.\u001b[0m\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/venom/miniconda3/envs/RL/lib/python3.7/site-packages/huggingface_hub/hf_api.py:82: FutureWarning: `name` and `organization` input arguments are deprecated and will be removed in v0.7. Pass `repo_id` instead.\n", + " FutureWarning,\n", + "/home/venom/Desktop/deep-rl-class/unit1/unit1_bonus/hub/LunarLander_v2_PPO_wandb is already a clone of https://huggingface.co/SuperSecureHuman/LunarLander_v2_PPO_wandb. Make sure you pull the latest changes with `repo.git_pull()`.\n", + "/home/venom/miniconda3/envs/RL/lib/python3.7/site-packages/stable_baselines3/common/evaluation.py:69: UserWarning: Evaluation environment is not wrapped with a ``Monitor`` wrapper. This may result in reporting modified episode lengths and rewards, if other wrappers happen to modify these. Consider wrapping environment first with ``Monitor`` wrapper.\n", + " UserWarning,\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Saving video to /home/venom/Desktop/deep-rl-class/unit1/unit1_bonus/-step-0-to-step-1000.mp4\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "ffmpeg version 4.2.2 Copyright (c) 2000-2019 the FFmpeg developers\n", + " built with gcc 7.3.0 (crosstool-NG 1.23.0.449-a04d0)\n", + " configuration: --prefix=/home/venom/miniconda3/envs/RL --cc=/tmp/build/80754af9/ffmpeg_1587154242452/_build_env/bin/x86_64-conda_cos6-linux-gnu-cc --disable-doc --enable-avresample --enable-gmp --enable-hardcoded-tables --enable-libfreetype --enable-libvpx --enable-pthreads --enable-libopus --enable-postproc --enable-pic --enable-pthreads --enable-shared --enable-static --enable-version3 --enable-zlib --enable-libmp3lame --disable-nonfree --enable-gpl --enable-gnutls --disable-openssl --enable-libopenh264 --enable-libx264\n", + " libavutil 56. 31.100 / 56. 31.100\n", + " libavcodec 58. 54.100 / 58. 54.100\n", + " libavformat 58. 29.100 / 58. 29.100\n", + " libavdevice 58. 8.100 / 58. 8.100\n", + " libavfilter 7. 57.100 / 7. 57.100\n", + " libavresample 4. 0. 0 / 4. 0. 0\n", + " libswscale 5. 5.100 / 5. 5.100\n", + " libswresample 3. 5.100 / 3. 5.100\n", + " libpostproc 55. 5.100 / 55. 5.100\n", + "Input #0, mov,mp4,m4a,3gp,3g2,mj2, from './test.mp4':\n", + " Metadata:\n", + " major_brand : isom\n", + " minor_version : 512\n", + " compatible_brands: isomiso2avc1mp41\n", + " encoder : Lavf58.29.100\n", + " Duration: 00:00:20.02, start: 0.000000, bitrate: 39 kb/s\n", + " Stream #0:0(und): Video: h264 (High) (avc1 / 0x31637661), yuv420p, 600x400, 34 kb/s, 50 fps, 50 tbr, 12800 tbn, 100 tbc (default)\n", + " Metadata:\n", + " handler_name : VideoHandler\n", + "Stream mapping:\n", + " Stream #0:0 -> #0:0 (h264 (native) -> h264 (libx264))\n", + "Press [q] to stop, [?] for help\n", + "[libx264 @ 0x560e05024ec0] using cpu capabilities: MMX2 SSE2Fast SSSE3 SSE4.2 AVX FMA3 BMI2 AVX2 AVX512\n", + "[libx264 @ 0x560e05024ec0] profile High, level 3.1, 4:2:0, 8-bit\n", + "[libx264 @ 0x560e05024ec0] 264 - core 157 - H.264/MPEG-4 AVC codec - Copyleft 2003-2018 - http://www.videolan.org/x264.html - options: cabac=1 ref=3 deblock=1:0:0 analyse=0x3:0x113 me=hex subme=7 psy=1 psy_rd=1.00:0.00 mixed_ref=1 me_range=16 chroma_me=1 trellis=1 8x8dct=1 cqm=0 deadzone=21,11 fast_pskip=1 chroma_qp_offset=-2 threads=12 lookahead_threads=2 sliced_threads=0 nr=0 decimate=1 interlaced=0 bluray_compat=0 constrained_intra=0 bframes=3 b_pyramid=2 b_adapt=1 b_bias=0 direct=1 weightb=1 open_gop=0 weightp=2 keyint=250 keyint_min=25 scenecut=40 intra_refresh=0 rc_lookahead=40 rc=crf mbtree=1 crf=23.0 qcomp=0.60 qpmin=0 qpmax=69 qpstep=4 ip_ratio=1.40 aq=1:1.00\n", + "Output #0, mp4, to 'replay.mp4':\n", + " Metadata:\n", + " major_brand : isom\n", + " minor_version : 512\n", + " compatible_brands: isomiso2avc1mp41\n", + " encoder : Lavf58.29.100\n", + " Stream #0:0(und): Video: h264 (libx264) (avc1 / 0x31637661), yuv420p, 600x400, q=-1--1, 50 fps, 12800 tbn, 50 tbc (default)\n", + " Metadata:\n", + " handler_name : VideoHandler\n", + " encoder : Lavc58.54.100 libx264\n", + " Side data:\n", + " cpb: bitrate max/min/avg: 0/0/0 buffer size: 0 vbv_delay: -1\n", + "frame= 1001 fps=0.0 q=-1.0 Lsize= 94kB time=00:00:19.96 bitrate= 38.7kbits/s speed=27.4x \n", + "video:82kB audio:0kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 15.259376%\n", + "[libx264 @ 0x560e05024ec0] frame I:5 Avg QP:10.02 size: 1676\n", + "[libx264 @ 0x560e05024ec0] frame P:259 Avg QP:17.41 size: 99\n", + "[libx264 @ 0x560e05024ec0] frame B:737 Avg QP:20.72 size: 67\n", + "[libx264 @ 0x560e05024ec0] consecutive B-frames: 1.0% 1.8% 2.1% 95.1%\n", + "[libx264 @ 0x560e05024ec0] mb I I16..4: 87.6% 7.3% 5.0%\n", + "[libx264 @ 0x560e05024ec0] mb P I16..4: 0.1% 0.3% 0.1% P16..4: 0.9% 0.1% 0.0% 0.0% 0.0% skip:98.5%\n", + "[libx264 @ 0x560e05024ec0] mb B I16..4: 0.0% 0.0% 0.1% B16..8: 1.2% 0.1% 0.0% direct: 0.0% skip:98.6% L0:57.8% L1:41.6% BI: 0.5%\n", + "[libx264 @ 0x560e05024ec0] 8x8 transform intra:17.5% inter:12.1%\n", + "[libx264 @ 0x560e05024ec0] coded y,uvDC,uvAC intra: 6.9% 9.7% 9.3% inter: 0.1% 0.1% 0.0%\n", + "[libx264 @ 0x560e05024ec0] i16 v,h,dc,p: 89% 5% 6% 0%\n", + "[libx264 @ 0x560e05024ec0] i8 v,h,dc,ddl,ddr,vr,hd,vl,hu: 26% 8% 65% 0% 0% 0% 0% 0% 0%\n", + "[libx264 @ 0x560e05024ec0] i4 v,h,dc,ddl,ddr,vr,hd,vl,hu: 13% 14% 60% 2% 2% 2% 3% 2% 3%\n", + "[libx264 @ 0x560e05024ec0] i8c dc,h,v,p: 96% 3% 2% 0%\n", + "[libx264 @ 0x560e05024ec0] Weighted P-Frames: Y:0.0% UV:0.0%\n", + "[libx264 @ 0x560e05024ec0] ref P L0: 76.2% 0.8% 16.4% 6.6%\n", + "[libx264 @ 0x560e05024ec0] ref B L0: 62.0% 34.4% 3.7%\n", + "[libx264 @ 0x560e05024ec0] ref B L1: 94.1% 5.9%\n", + "[libx264 @ 0x560e05024ec0] kb/s:33.21\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[38;5;4mℹ Pushing repo LunarLander_v2_PPO_wandb to the Hugging Face Hub\u001b[0m\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "ee4b1a1441da4bff8a0a30cbfaa79d54", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Upload file PPO-LunarLander-v2.zip: 23%|##2 | 32.0k/141k [00:00 main\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[38;5;4mℹ Your model is pushed to the hub. You can view your model here:\n", + "https://huggingface.co/SuperSecureHuman/LunarLander_v2_PPO_wandb\u001b[0m\n" + ] + } + ], + "source": [ + "from huggingface_sb3 import package_to_hub\n", + "\n", + "env_id = config[\"env_name\"]\n", + "\n", + "model_architecture = \"PPO\"\n", + "model_name = \"PPO-LunarLander-v2\"\n", + "\n", + "repo_id = \"SuperSecureHuman/LunarLander_v2_PPO_wandb\"\n", + "\n", + "commit_message = \"Initial Commit\"\n", + "\n", + "eval_env = DummyVecEnv([lambda: gym.make(env_id)])\n", + "\n", + "package_to_hub(model=model, # Our trained model\n", + " model_name=model_name, # The name of our trained model \n", + " model_architecture=model_architecture, # The model architecture we used: in our case PPO\n", + " env_id=env_id, # Name of the environment\n", + " eval_env=eval_env, # Evaluation Environment\n", + " repo_id=repo_id, # id of the model repository from the Hugging Face Hub\n", + " commit_message=commit_message)\n", + "eval_env.close()" + ] + }, + { + "cell_type": "markdown", + "id": "9368a9f9", "metadata": {}, "source": [ "## Congarts!\n",