mirror of
https://github.com/openmlsys/openmlsys-zh.git
synced 2026-04-15 02:49:52 +08:00
fix ref
This commit is contained in:
@@ -171,4 +171,157 @@ series = {ADKDD'14}
|
||||
year={2017},
|
||||
howpublished = "Website",
|
||||
note = {\url{http://www.nvidia.com/object/volta-architecture-whitepaper.html}}
|
||||
}
|
||||
|
||||
@inproceedings{mnih2016asynchronous,
|
||||
title={Asynchronous methods for deep reinforcement learning},
|
||||
author={Mnih, Volodymyr and Badia, Adria Puigdomenech and Mirza, Mehdi and Graves, Alex and Lillicrap, Timothy and Harley, Tim and Silver, David and Kavukcuoglu, Koray},
|
||||
booktitle={International Conference on Machine Learning (ICML)},
|
||||
pages={1928--1937},
|
||||
year={2016}
|
||||
}
|
||||
|
||||
@article{espeholt2018impala,
|
||||
title={Impala: Scalable distributed deep-rl with importance weighted actor-learner architectures},
|
||||
author={Espeholt, Lasse and Soyer, Hubert and Munos, Remi and Simonyan, Karen and Mnih, Volodymir and Ward, Tom and Doron, Yotam and Firoiu, Vlad and Harley, Tim and Dunning, Iain and others},
|
||||
journal={arXiv preprint arXiv:1802.01561},
|
||||
year={2018}
|
||||
}
|
||||
|
||||
@article{espeholt2019seed,
|
||||
title={Seed rl: Scalable and efficient deep-rl with accelerated central inference},
|
||||
author={Espeholt, Lasse and Marinier, Rapha{\"e}l and Stanczyk, Piotr and Wang, Ke and Michalski, Marcin},
|
||||
journal={arXiv preprint arXiv:1910.06591},
|
||||
year={2019}
|
||||
}
|
||||
|
||||
@misc{horgan2018distributed,
|
||||
title={Distributed Prioritized Experience Replay},
|
||||
author={Dan Horgan and John Quan and David Budden and Gabriel Barth-Maron and Matteo Hessel and Hado van Hasselt and David Silver},
|
||||
year={2018},
|
||||
eprint={1803.00933},
|
||||
archivePrefix={arXiv},
|
||||
primaryClass={cs.LG}
|
||||
}
|
||||
|
||||
@inproceedings{moritz2018ray,
|
||||
title={Ray: A distributed framework for emerging $\{$AI$\}$ applications},
|
||||
author={Moritz, Philipp and Nishihara, Robert and Wang, Stephanie and Tumanov, Alexey and Liaw, Richard and Liang, Eric and Elibol, Melih and Yang, Zongheng and Paul, William and Jordan, Michael I and others},
|
||||
booktitle={13th $\{$USENIX$\}$ Symposium on Operating Systems Design and Implementation ($\{$OSDI$\}$ 18)},
|
||||
pages={561--577},
|
||||
year={2018}
|
||||
}
|
||||
|
||||
|
||||
@article{liang2017ray,
|
||||
title={Ray rllib: A composable and scalable reinforcement learning library},
|
||||
author={Liang, Eric and Liaw, Richard and Nishihara, Robert and Moritz, Philipp and Fox, Roy and Gonzalez, Joseph and Goldberg, Ken and Stoica, Ion},
|
||||
journal={arXiv preprint arXiv:1712.09381},
|
||||
pages={85},
|
||||
year={2017}
|
||||
}
|
||||
|
||||
@article{cassirer2021reverb,
|
||||
title={Reverb: A Framework For Experience Replay},
|
||||
author={Cassirer, Albin and Barth-Maron, Gabriel and Brevdo, Eugene and Ramos, Sabela and Boyd, Toby and Sottiaux, Thibault and Kroiss, Manuel},
|
||||
journal={arXiv preprint arXiv:2102.04736},
|
||||
year={2021}
|
||||
}
|
||||
|
||||
|
||||
@article{hoffman2020acme,
|
||||
title={Acme: A research framework for distributed reinforcement learning},
|
||||
author={Hoffman, Matt and Shahriari, Bobak and Aslanides, John and Barth-Maron, Gabriel and Behbahani, Feryal and Norman, Tamara and Abdolmaleki, Abbas and Cassirer, Albin and Yang, Fan and Baumli, Kate and others},
|
||||
journal={arXiv preprint arXiv:2006.00979},
|
||||
year={2020}
|
||||
}
|
||||
|
||||
@article{vinyals2019grandmaster,
|
||||
title={Grandmaster level in StarCraft II using multi-agent reinforcement learning},
|
||||
author={Vinyals, Oriol and Babuschkin, Igor and Czarnecki, Wojciech M and Mathieu, Micha{\"e}l and Dudzik, Andrew and Chung, Junyoung and Choi, David H and Powell, Richard and Ewalds, Timo and Georgiev, Petko and others},
|
||||
journal={Nature},
|
||||
volume={575},
|
||||
number={7782},
|
||||
pages={350--354},
|
||||
year={2019},
|
||||
publisher={Nature Publishing Group}
|
||||
}
|
||||
|
||||
@article{berner2019dota,
|
||||
title={Dota 2 with large scale deep reinforcement learning},
|
||||
author={Berner, Christopher and Brockman, Greg and Chan, Brooke and Cheung, Vicki and D{\k{e}}biak, Przemys{\l}aw and Dennison, Christy and Farhi, David and Fischer, Quirin and Hashme, Shariq and Hesse, Chris and others},
|
||||
journal={arXiv preprint arXiv:1912.06680},
|
||||
year={2019}
|
||||
}
|
||||
|
||||
@article{han2020tstarbot,
|
||||
title={Tstarbot-x: An open-sourced and comprehensive study for efficient league training in starcraft ii full game},
|
||||
author={Han, Lei and Xiong, Jiechao and Sun, Peng and Sun, Xinghai and Fang, Meng and Guo, Qingwei and Chen, Qiaobo and Shi, Tengfei and Yu, Hongsheng and Wu, Xipeng and others},
|
||||
journal={arXiv preprint arXiv:2011.13729},
|
||||
year={2020}
|
||||
}
|
||||
|
||||
@inproceedings{wang2021scc,
|
||||
title={SCC: an efficient deep reinforcement learning agent mastering the game of StarCraft II},
|
||||
author={Wang, Xiangjun and Song, Junxiao and Qi, Penghui and Peng, Peng and Tang, Zhenkun and Zhang, Wei and Li, Weimin and Pi, Xiongjun and He, Jujie and Gao, Chao and others},
|
||||
booktitle={International Conference on Machine Learning},
|
||||
pages={10905--10915},
|
||||
year={2021},
|
||||
organization={PMLR}
|
||||
}
|
||||
|
||||
@article{sunehag2017value,
|
||||
title={Value-decomposition networks for cooperative multi-agent learning},
|
||||
author={Sunehag, Peter and Lever, Guy and Gruslys, Audrunas and Czarnecki, Wojciech Marian and Zambaldi, Vinicius and Jaderberg, Max and Lanctot, Marc and Sonnerat, Nicolas and Leibo, Joel Z and Tuyls, Karl and others},
|
||||
journal={arXiv preprint arXiv:1706.05296},
|
||||
year={2017}
|
||||
}
|
||||
|
||||
|
||||
@inproceedings{rashid2018qmix,
|
||||
title={Qmix: Monotonic value function factorisation for deep multi-agent reinforcement learning},
|
||||
author={Rashid, Tabish and Samvelyan, Mikayel and Schroeder, Christian and Farquhar, Gregory and Foerster, Jakob and Whiteson, Shimon},
|
||||
booktitle={International Conference on Machine Learning},
|
||||
pages={4295--4304},
|
||||
year={2018},
|
||||
organization={PMLR}
|
||||
}
|
||||
|
||||
@inproceedings{foerster2018counterfactual,
|
||||
title={Counterfactual multi-agent policy gradients},
|
||||
author={Foerster, Jakob and Farquhar, Gregory and Afouras, Triantafyllos and Nardelli, Nantas and Whiteson, Shimon},
|
||||
booktitle={Proceedings of the AAAI conference on artificial intelligence},
|
||||
volume={32},
|
||||
number={1},
|
||||
year={2018}
|
||||
}
|
||||
|
||||
|
||||
@article{lanctot2017unified,
|
||||
title={A unified game-theoretic approach to multiagent reinforcement learning},
|
||||
author={Lanctot, Marc and Zambaldi, Vinicius and Gruslys, Audrunas and Lazaridou, Angeliki and Tuyls, Karl and P{\'e}rolat, Julien and Silver, David and Graepel, Thore},
|
||||
journal={Advances in neural information processing systems},
|
||||
volume={30},
|
||||
year={2017}
|
||||
}
|
||||
|
||||
@article{mnih2013playing,
|
||||
title={Playing atari with deep reinforcement learning},
|
||||
author={Mnih, Volodymyr and Kavukcuoglu, Koray and Silver, David and Graves, Alex and Antonoglou, Ioannis and Wierstra, Daan and Riedmiller, Martin},
|
||||
journal={arXiv preprint arXiv:1312.5602},
|
||||
year={2013}
|
||||
}
|
||||
|
||||
@article{ding2020efficient,
|
||||
title={Efficient Reinforcement Learning Development with RLzoo},
|
||||
author={Ding, Zihan and Yu, Tianyang and Huang, Yanhua and Zhang, Hongming and Li, Guo and Guo, Quancheng and Mai, Luo and Dong, Hao},
|
||||
journal={arXiv preprint arXiv:2009.08644},
|
||||
year={2020}
|
||||
}
|
||||
|
||||
@article{makoviychuk2021isaac,
|
||||
title={Isaac Gym: High Performance GPU-Based Physics Simulation For Robot Learning},
|
||||
author={Makoviychuk, Viktor and Wawrzyniak, Lukasz and Guo, Yunrong and Lu, Michelle and Storey, Kier and Macklin, Miles and Hoeller, David and Rudin, Nikita and Allshire, Arthur and Handa, Ankur and others},
|
||||
journal={arXiv preprint arXiv:2108.10470},
|
||||
year={2021}
|
||||
}
|
||||
Reference in New Issue
Block a user