diff --git a/references/reinforcement.bib b/references/reinforcement.bib index c6f1528..7160d3f 100644 --- a/references/reinforcement.bib +++ b/references/reinforcement.bib @@ -171,4 +171,157 @@ series = {ADKDD'14} year={2017}, howpublished = "Website", note = {\url{http://www.nvidia.com/object/volta-architecture-whitepaper.html}} +} + +@inproceedings{mnih2016asynchronous, + title={Asynchronous methods for deep reinforcement learning}, + author={Mnih, Volodymyr and Badia, Adria Puigdomenech and Mirza, Mehdi and Graves, Alex and Lillicrap, Timothy and Harley, Tim and Silver, David and Kavukcuoglu, Koray}, + booktitle={International Conference on Machine Learning (ICML)}, + pages={1928--1937}, + year={2016} +} + +@article{espeholt2018impala, + title={Impala: Scalable distributed deep-rl with importance weighted actor-learner architectures}, + author={Espeholt, Lasse and Soyer, Hubert and Munos, Remi and Simonyan, Karen and Mnih, Volodymir and Ward, Tom and Doron, Yotam and Firoiu, Vlad and Harley, Tim and Dunning, Iain and others}, + journal={arXiv preprint arXiv:1802.01561}, + year={2018} +} + +@article{espeholt2019seed, + title={Seed rl: Scalable and efficient deep-rl with accelerated central inference}, + author={Espeholt, Lasse and Marinier, Rapha{\"e}l and Stanczyk, Piotr and Wang, Ke and Michalski, Marcin}, + journal={arXiv preprint arXiv:1910.06591}, + year={2019} +} + +@misc{horgan2018distributed, + title={Distributed Prioritized Experience Replay}, + author={Dan Horgan and John Quan and David Budden and Gabriel Barth-Maron and Matteo Hessel and Hado van Hasselt and David Silver}, + year={2018}, + eprint={1803.00933}, + archivePrefix={arXiv}, + primaryClass={cs.LG} +} + +@inproceedings{moritz2018ray, + title={Ray: A distributed framework for emerging $\{$AI$\}$ applications}, + author={Moritz, Philipp and Nishihara, Robert and Wang, Stephanie and Tumanov, Alexey and Liaw, Richard and Liang, Eric and Elibol, Melih and Yang, Zongheng and Paul, William and Jordan, Michael I and others}, + booktitle={13th $\{$USENIX$\}$ Symposium on Operating Systems Design and Implementation ($\{$OSDI$\}$ 18)}, + pages={561--577}, + year={2018} +} + + +@article{liang2017ray, + title={Ray rllib: A composable and scalable reinforcement learning library}, + author={Liang, Eric and Liaw, Richard and Nishihara, Robert and Moritz, Philipp and Fox, Roy and Gonzalez, Joseph and Goldberg, Ken and Stoica, Ion}, + journal={arXiv preprint arXiv:1712.09381}, + pages={85}, + year={2017} +} + +@article{cassirer2021reverb, + title={Reverb: A Framework For Experience Replay}, + author={Cassirer, Albin and Barth-Maron, Gabriel and Brevdo, Eugene and Ramos, Sabela and Boyd, Toby and Sottiaux, Thibault and Kroiss, Manuel}, + journal={arXiv preprint arXiv:2102.04736}, + year={2021} +} + + +@article{hoffman2020acme, + title={Acme: A research framework for distributed reinforcement learning}, + author={Hoffman, Matt and Shahriari, Bobak and Aslanides, John and Barth-Maron, Gabriel and Behbahani, Feryal and Norman, Tamara and Abdolmaleki, Abbas and Cassirer, Albin and Yang, Fan and Baumli, Kate and others}, + journal={arXiv preprint arXiv:2006.00979}, + year={2020} +} + +@article{vinyals2019grandmaster, + title={Grandmaster level in StarCraft II using multi-agent reinforcement learning}, + author={Vinyals, Oriol and Babuschkin, Igor and Czarnecki, Wojciech M and Mathieu, Micha{\"e}l and Dudzik, Andrew and Chung, Junyoung and Choi, David H and Powell, Richard and Ewalds, Timo and Georgiev, Petko and others}, + journal={Nature}, + volume={575}, + number={7782}, + pages={350--354}, + year={2019}, + publisher={Nature Publishing Group} +} + +@article{berner2019dota, + title={Dota 2 with large scale deep reinforcement learning}, + author={Berner, Christopher and Brockman, Greg and Chan, Brooke and Cheung, Vicki and D{\k{e}}biak, Przemys{\l}aw and Dennison, Christy and Farhi, David and Fischer, Quirin and Hashme, Shariq and Hesse, Chris and others}, + journal={arXiv preprint arXiv:1912.06680}, + year={2019} +} + +@article{han2020tstarbot, + title={Tstarbot-x: An open-sourced and comprehensive study for efficient league training in starcraft ii full game}, + author={Han, Lei and Xiong, Jiechao and Sun, Peng and Sun, Xinghai and Fang, Meng and Guo, Qingwei and Chen, Qiaobo and Shi, Tengfei and Yu, Hongsheng and Wu, Xipeng and others}, + journal={arXiv preprint arXiv:2011.13729}, + year={2020} +} + +@inproceedings{wang2021scc, + title={SCC: an efficient deep reinforcement learning agent mastering the game of StarCraft II}, + author={Wang, Xiangjun and Song, Junxiao and Qi, Penghui and Peng, Peng and Tang, Zhenkun and Zhang, Wei and Li, Weimin and Pi, Xiongjun and He, Jujie and Gao, Chao and others}, + booktitle={International Conference on Machine Learning}, + pages={10905--10915}, + year={2021}, + organization={PMLR} +} + +@article{sunehag2017value, + title={Value-decomposition networks for cooperative multi-agent learning}, + author={Sunehag, Peter and Lever, Guy and Gruslys, Audrunas and Czarnecki, Wojciech Marian and Zambaldi, Vinicius and Jaderberg, Max and Lanctot, Marc and Sonnerat, Nicolas and Leibo, Joel Z and Tuyls, Karl and others}, + journal={arXiv preprint arXiv:1706.05296}, + year={2017} +} + + +@inproceedings{rashid2018qmix, + title={Qmix: Monotonic value function factorisation for deep multi-agent reinforcement learning}, + author={Rashid, Tabish and Samvelyan, Mikayel and Schroeder, Christian and Farquhar, Gregory and Foerster, Jakob and Whiteson, Shimon}, + booktitle={International Conference on Machine Learning}, + pages={4295--4304}, + year={2018}, + organization={PMLR} +} + +@inproceedings{foerster2018counterfactual, + title={Counterfactual multi-agent policy gradients}, + author={Foerster, Jakob and Farquhar, Gregory and Afouras, Triantafyllos and Nardelli, Nantas and Whiteson, Shimon}, + booktitle={Proceedings of the AAAI conference on artificial intelligence}, + volume={32}, + number={1}, + year={2018} +} + + +@article{lanctot2017unified, + title={A unified game-theoretic approach to multiagent reinforcement learning}, + author={Lanctot, Marc and Zambaldi, Vinicius and Gruslys, Audrunas and Lazaridou, Angeliki and Tuyls, Karl and P{\'e}rolat, Julien and Silver, David and Graepel, Thore}, + journal={Advances in neural information processing systems}, + volume={30}, + year={2017} +} + +@article{mnih2013playing, + title={Playing atari with deep reinforcement learning}, + author={Mnih, Volodymyr and Kavukcuoglu, Koray and Silver, David and Graves, Alex and Antonoglou, Ioannis and Wierstra, Daan and Riedmiller, Martin}, + journal={arXiv preprint arXiv:1312.5602}, + year={2013} +} + +@article{ding2020efficient, + title={Efficient Reinforcement Learning Development with RLzoo}, + author={Ding, Zihan and Yu, Tianyang and Huang, Yanhua and Zhang, Hongming and Li, Guo and Guo, Quancheng and Mai, Luo and Dong, Hao}, + journal={arXiv preprint arXiv:2009.08644}, + year={2020} +} + +@article{makoviychuk2021isaac, + title={Isaac Gym: High Performance GPU-Based Physics Simulation For Robot Learning}, + author={Makoviychuk, Viktor and Wawrzyniak, Lukasz and Guo, Yunrong and Lu, Michelle and Storey, Kier and Macklin, Miles and Hoeller, David and Rudin, Nikita and Allshire, Arthur and Handa, Ankur and others}, + journal={arXiv preprint arXiv:2108.10470}, + year={2021} } \ No newline at end of file