diff --git a/references/reinforcement.bib b/references/reinforcement.bib
index c6f1528..7160d3f 100644
--- a/references/reinforcement.bib
+++ b/references/reinforcement.bib
@@ -171,4 +171,157 @@ series = {ADKDD'14}
   year={2017},
   howpublished = "Website",
   note = {\url{http://www.nvidia.com/object/volta-architecture-whitepaper.html}}
+}
+
+@inproceedings{mnih2016asynchronous,
+  title={Asynchronous methods for deep reinforcement learning},
+  author={Mnih, Volodymyr and Badia, Adria Puigdomenech and Mirza, Mehdi and Graves, Alex and Lillicrap, Timothy and Harley, Tim and Silver, David and Kavukcuoglu, Koray},
+  booktitle={International Conference on Machine Learning (ICML)},
+  pages={1928--1937},
+  year={2016}
+}
+
+@article{espeholt2018impala,
+  title={Impala: Scalable distributed deep-rl with importance weighted actor-learner architectures},
+  author={Espeholt, Lasse and Soyer, Hubert and Munos, Remi and Simonyan, Karen and Mnih, Volodymir and Ward, Tom and Doron, Yotam and Firoiu, Vlad and Harley, Tim and Dunning, Iain and others},
+  journal={arXiv preprint arXiv:1802.01561},
+  year={2018}
+}
+
+@article{espeholt2019seed,
+  title={Seed rl: Scalable and efficient deep-rl with accelerated central inference},
+  author={Espeholt, Lasse and Marinier, Rapha{\"e}l and Stanczyk, Piotr and Wang, Ke and Michalski, Marcin},
+  journal={arXiv preprint arXiv:1910.06591},
+  year={2019}
+}
+
+@misc{horgan2018distributed,
+    title={Distributed Prioritized Experience Replay},
+    author={Dan Horgan and John Quan and David Budden and Gabriel Barth-Maron and Matteo Hessel and Hado van Hasselt and David Silver},
+    year={2018},
+    eprint={1803.00933},
+    archivePrefix={arXiv},
+    primaryClass={cs.LG}
+}
+
+@inproceedings{moritz2018ray,
+  title={Ray: A distributed framework for emerging $\{$AI$\}$ applications},
+  author={Moritz, Philipp and Nishihara, Robert and Wang, Stephanie and Tumanov, Alexey and Liaw, Richard and Liang, Eric and Elibol, Melih and Yang, Zongheng and Paul, William and Jordan, Michael I and others},
+  booktitle={13th $\{$USENIX$\}$ Symposium on Operating Systems Design and Implementation ($\{$OSDI$\}$ 18)},
+  pages={561--577},
+  year={2018}
+}
+
+
+@article{liang2017ray,
+  title={Ray rllib: A composable and scalable reinforcement learning library},
+  author={Liang, Eric and Liaw, Richard and Nishihara, Robert and Moritz, Philipp and Fox, Roy and Gonzalez, Joseph and Goldberg, Ken and Stoica, Ion},
+  journal={arXiv preprint arXiv:1712.09381},
+  pages={85},
+  year={2017}
+}
+
+@article{cassirer2021reverb,
+  title={Reverb: A Framework For Experience Replay},
+  author={Cassirer, Albin and Barth-Maron, Gabriel and Brevdo, Eugene and Ramos, Sabela and Boyd, Toby and Sottiaux, Thibault and Kroiss, Manuel},
+  journal={arXiv preprint arXiv:2102.04736},
+  year={2021}
+}
+
+
+@article{hoffman2020acme,
+  title={Acme: A research framework for distributed reinforcement learning},
+  author={Hoffman, Matt and Shahriari, Bobak and Aslanides, John and Barth-Maron, Gabriel and Behbahani, Feryal and Norman, Tamara and Abdolmaleki, Abbas and Cassirer, Albin and Yang, Fan and Baumli, Kate and others},
+  journal={arXiv preprint arXiv:2006.00979},
+  year={2020}
+}
+
+@article{vinyals2019grandmaster,
+  title={Grandmaster level in StarCraft II using multi-agent reinforcement learning},
+  author={Vinyals, Oriol and Babuschkin, Igor and Czarnecki, Wojciech M and Mathieu, Micha{\"e}l and Dudzik, Andrew and Chung, Junyoung and Choi, David H and Powell, Richard and Ewalds, Timo and Georgiev, Petko and others},
+  journal={Nature},
+  volume={575},
+  number={7782},
+  pages={350--354},
+  year={2019},
+  publisher={Nature Publishing Group}
+}
+
+@article{berner2019dota,
+  title={Dota 2 with large scale deep reinforcement learning},
+  author={Berner, Christopher and Brockman, Greg and Chan, Brooke and Cheung, Vicki and D{\k{e}}biak, Przemys{\l}aw and Dennison, Christy and Farhi, David and Fischer, Quirin and Hashme, Shariq and Hesse, Chris and others},
+  journal={arXiv preprint arXiv:1912.06680},
+  year={2019}
+}
+
+@article{han2020tstarbot,
+  title={Tstarbot-x: An open-sourced and comprehensive study for efficient league training in starcraft ii full game},
+  author={Han, Lei and Xiong, Jiechao and Sun, Peng and Sun, Xinghai and Fang, Meng and Guo, Qingwei and Chen, Qiaobo and Shi, Tengfei and Yu, Hongsheng and Wu, Xipeng and others},
+  journal={arXiv preprint arXiv:2011.13729},
+  year={2020}
+}
+
+@inproceedings{wang2021scc,
+  title={SCC: an efficient deep reinforcement learning agent mastering the game of StarCraft II},
+  author={Wang, Xiangjun and Song, Junxiao and Qi, Penghui and Peng, Peng and Tang, Zhenkun and Zhang, Wei and Li, Weimin and Pi, Xiongjun and He, Jujie and Gao, Chao and others},
+  booktitle={International Conference on Machine Learning},
+  pages={10905--10915},
+  year={2021},
+  organization={PMLR}
+}
+
+@article{sunehag2017value,
+  title={Value-decomposition networks for cooperative multi-agent learning},
+  author={Sunehag, Peter and Lever, Guy and Gruslys, Audrunas and Czarnecki, Wojciech Marian and Zambaldi, Vinicius and Jaderberg, Max and Lanctot, Marc and Sonnerat, Nicolas and Leibo, Joel Z and Tuyls, Karl and others},
+  journal={arXiv preprint arXiv:1706.05296},
+  year={2017}
+}
+
+
+@inproceedings{rashid2018qmix,
+  title={Qmix: Monotonic value function factorisation for deep multi-agent reinforcement learning},
+  author={Rashid, Tabish and Samvelyan, Mikayel and Schroeder, Christian and Farquhar, Gregory and Foerster, Jakob and Whiteson, Shimon},
+  booktitle={International Conference on Machine Learning},
+  pages={4295--4304},
+  year={2018},
+  organization={PMLR}
+}
+
+@inproceedings{foerster2018counterfactual,
+  title={Counterfactual multi-agent policy gradients},
+  author={Foerster, Jakob and Farquhar, Gregory and Afouras, Triantafyllos and Nardelli, Nantas and Whiteson, Shimon},
+  booktitle={Proceedings of the AAAI conference on artificial intelligence},
+  volume={32},
+  number={1},
+  year={2018}
+}
+
+
+@article{lanctot2017unified,
+  title={A unified game-theoretic approach to multiagent reinforcement learning},
+  author={Lanctot, Marc and Zambaldi, Vinicius and Gruslys, Audrunas and Lazaridou, Angeliki and Tuyls, Karl and P{\'e}rolat, Julien and Silver, David and Graepel, Thore},
+  journal={Advances in neural information processing systems},
+  volume={30},
+  year={2017}
+}
+
+@article{mnih2013playing,
+  title={Playing atari with deep reinforcement learning},
+  author={Mnih, Volodymyr and Kavukcuoglu, Koray and Silver, David and Graves, Alex and Antonoglou, Ioannis and Wierstra, Daan and Riedmiller, Martin},
+  journal={arXiv preprint arXiv:1312.5602},
+  year={2013}
+}
+
+@article{ding2020efficient,
+  title={Efficient Reinforcement Learning Development with RLzoo},
+  author={Ding, Zihan and Yu, Tianyang and Huang, Yanhua and Zhang, Hongming and Li, Guo and Guo, Quancheng and Mai, Luo and Dong, Hao},
+  journal={arXiv preprint arXiv:2009.08644},
+  year={2020}
+}
+
+@article{makoviychuk2021isaac,
+  title={Isaac Gym: High Performance GPU-Based Physics Simulation For Robot Learning},
+  author={Makoviychuk, Viktor and Wawrzyniak, Lukasz and Guo, Yunrong and Lu, Michelle and Storey, Kier and Macklin, Miles and Hoeller, David and Rudin, Nikita and Allshire, Arthur and Handa, Ankur and others},
+  journal={arXiv preprint arXiv:2108.10470},
+  year={2021}
 }
\ No newline at end of file