Files
openmlsys-zh/references/reinforcement.bib
Zihan Ding 719de7d582 Update RL chapter (#349)
* fix chap12 render

* add distributed rl chapter

* fix bug

* fix issue #212

* fix typo

* update imgs

* fix chinese

* fix svg img

* update contents in rl chapter

* update marl sys

* fix a fig

* fix ref

* fix error

Co-authored-by: Dalong <39682259+eedalong@users.noreply.github.com>
2022-05-23 21:04:08 +08:00

314 lines
17 KiB
BibTeX
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
@inproceedings{wang2021scc,
title={SCC: an efficient deep reinforcement learning agent mastering the game of StarCraft II},
author={Wang, Xiangjun and Song, Junxiao and Qi, Penghui and Peng, Peng and Tang, Zhenkun and Zhang, Wei and Li, Weimin and Pi, Xiongjun and He, Jujie and Gao, Chao and others},
booktitle={International Conference on Machine Learning},
pages={10905--10915},
year={2021},
organization={PMLR}
}
@inproceedings{MLSYS2021_979d472a,
author = {Yin, Chunxing and Acun, Bilge and Wu, Carole-Jean and Liu, Xing},
booktitle = {Proceedings of Machine Learning and Systems},
editor = {A. Smola and A. Dimakis and I. Stoica},
pages = {448--462},
title = {TT-Rec: Tensor Train Compression for Deep Learning Recommendation Models},
url = {https://proceedings.mlsys.org/paper/2021/file/979d472a84804b9f647bc185a877a8b5-Paper.pdf},
volume = {3},
year = {2021}
}
@inproceedings{MLSYS2020_f7e6c855,
author = {Zhao, Weijie and Xie, Deping and Jia, Ronglai and Qian, Yulei and Ding, Ruiquan and Sun, Mingming and Li, Ping},
booktitle = {Proceedings of Machine Learning and Systems},
editor = {I. Dhillon and D. Papailiopoulos and V. Sze},
pages = {412--428},
title = {Distributed Hierarchical GPU Parameter Server for Massive Scale Deep Learning Ads Systems},
url = {https://proceedings.mlsys.org/paper/2020/file/f7e6c85504ce6e82442c770f7c8606f0-Paper.pdf},
volume = {2},
year = {2020}
}
@article{zionex,
title={Software-Hardware Co-design for Fast and Scalable Training of Deep Learning Recommendation Models},
author={Mudigere, Dheevatsa and Hao, Yuchen and Huang, Jianyu and Jia, Zhihao and Tulloch, Andrew and Sridharan, Srinivas and Liu, Xing and Ozdal, Mustafa and Nie, Jade and Park, Jongsoo and others},
journal={arXiv preprint arXiv:2104.05158},
year={2021}
}
@inproceedings{gong2020edgerec,
title={EdgeRec: Recommender System on Edge in Mobile Taobao},
author={Gong, Yu and Jiang, Ziwen and Feng, Yufei and Hu, Binbin and Zhao, Kaiqi and Liu, Qingwen and Ou, Wenwu},
booktitle={Proceedings of the 29th ACM International Conference on Information \& Knowledge Management},
pages={2477--2484},
year={2020}
}
@inproceedings{NEURIPS2020_a1d4c20b,
author = {He, Chaoyang and Annavaram, Murali and Avestimehr, Salman},
booktitle = {Advances in Neural Information Processing Systems},
editor = {H. Larochelle and M. Ranzato and R. Hadsell and M. F. Balcan and H. Lin},
pages = {14068--14080},
publisher = {Curran Associates, Inc.},
title = {Group Knowledge Transfer: Federated Learning of Large CNNs at the Edge},
url = {https://proceedings.neurips.cc/paper/2020/file/a1d4c20b182ad7137ab3606f0e3fc8a4-Paper.pdf},
volume = {33},
year = {2020}
}
@INPROCEEDINGS{9355295,
author={Xie, Minhui and Ren, Kai and Lu, Youyou and Yang, Guangxu and Xu, Qingxing and Wu, Bihai and Lin, Jiazhen and Ao, Hongbo and Xu, Wanhong and Shu, Jiwu},
booktitle={SC20: International Conference for High Performance Computing, Networking, Storage and Analysis},
title={Kraken: Memory-Efficient Continual Learning for Large-Scale Real-Time Recommendations},
year={2020},
volume={},
number={},
pages={1-17},
doi={10.1109/SC41405.2020.00025}
}
@inproceedings{MLSYS2021_ec895663,
author = {Jiang, Wenqi and He, Zhenhao and Zhang, Shuai and Preu\ss er, Thomas B. and Zeng, Kai and Feng, Liang and Zhang, Jiansong and Liu, Tongxuan and Li , Yong and Zhou, Jingren and Zhang, Ce and Alonso, Gustavo},
booktitle = {Proceedings of Machine Learning and Systems},
editor = {A. Smola and A. Dimakis and I. Stoica},
pages = {845--859},
title = {MicroRec: Efficient Recommendation Inference by Hardware and Data Structure Solutions},
url = {https://proceedings.mlsys.org/paper/2021/file/ec8956637a99787bd197eacd77acce5e-Paper.pdf},
volume = {3},
year = {2021}
}
@inproceedings{10.1145/3394486.3403059,
author = {Shi, Hao-Jun Michael and Mudigere, Dheevatsa and Naumov, Maxim and Yang, Jiyan},
title = {Compositional Embeddings Using Complementary Partitions for Memory-Efficient Recommendation Systems},
year = {2020},
isbn = {9781450379984},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3394486.3403059},
doi = {10.1145/3394486.3403059},
abstract = {},
booktitle = {Proceedings of the 26th ACM SIGKDD International Conference on Knowledge Discovery &amp; Data Mining},
pages = {165175},
numpages = {11},
keywords = {model compression, recommendation systems, embeddings},
location = {Virtual Event, CA, USA},
series = {KDD '20}
}
@misc{ginart2021mixed,
title={Mixed Dimension Embeddings with Application to Memory-Efficient Recommendation Systems},
author={Antonio Ginart and Maxim Naumov and Dheevatsa Mudigere and Jiyan Yang and James Zou},
year={2021},
eprint={1909.11810},
archivePrefix={arXiv},
primaryClass={cs.LG}
}
@inproceedings{10.1145/2020408.2020444,
author = {Chu, Wei and Zinkevich, Martin and Li, Lihong and Thomas, Achint and Tseng, Belle},
title = {Unbiased Online Active Learning in Data Streams},
year = {2011},
isbn = {9781450308137},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/2020408.2020444},
doi = {10.1145/2020408.2020444},
abstract = {Unlabeled samples can be intelligently selected for labeling to minimize classification error. In many real-world applications, a large number of unlabeled samples arrive in a streaming manner, making it impossible to maintain all the data in a candidate pool. In this work, we focus on binary classification problems and study selective labeling in data streams where a decision is required on each sample sequentially. We consider the unbiasedness property in the sampling process, and design optimal instrumental distributions to minimize the variance in the stochastic process. Meanwhile, Bayesian linear classifiers with weighted maximum likelihood are optimized online to estimate parameters. In empirical evaluation, we collect a data stream of user-generated comments on a commercial news portal in 30 consecutive days, and carry out offline evaluation to compare various sampling strategies, including unbiased active learning, biased variants, and random sampling. Experimental results verify the usefulness of online active learning, especially in the non-stationary situation with concept drift.},
booktitle = {Proceedings of the 17th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining},
pages = {195203},
numpages = {9},
keywords = {unbiasedness, bayesian online learning, active learning, data streaming, adaptive importance sampling},
location = {San Diego, California, USA},
series = {KDD '11}
}
@inproceedings{10.1145/3267809.3267817,
author = {Tian, Huangshi and Yu, Minchen and Wang, Wei},
title = {Continuum: A Platform for Cost-Aware, Low-Latency Continual Learning},
year = {2018},
isbn = {9781450360111},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3267809.3267817},
doi = {10.1145/3267809.3267817},
abstract = {Many machine learning applications operate in dynamic environments that change over time, in which models must be continually updated to capture the recent trend in data. However, most of today's learning frameworks perform training offline, without a system support for continual model updating.In this paper, we design and implement Continuum, a general-purpose platform that streamlines the implementation and deployment of continual model updating across existing learning frameworks. In pursuit of fast data incorporation, we further propose two update policies, cost-aware and best-effort, that judiciously determine when to perform model updating, with and without accounting for the training cost (machine-time), respectively. Theoretical analysis shows that cost-aware policy is 2-competitive. We implement both polices in Continuum, and evaluate their performance through EC2 deployment and trace-driven simulations. The evaluation shows that Continuum results in reduced data incorporation latency, lower training cost, and improved model quality in a number of popular online learning applications that span multiple application domains, programming languages, and frameworks.},
booktitle = {Proceedings of the ACM Symposium on Cloud Computing},
pages = {2640},
numpages = {15},
keywords = {Competitive Analysis, Continual Learning System, Online Algorithm},
location = {Carlsbad, CA, USA},
series = {SoCC '18}
}
@inproceedings{10.1145/2648584.2648589,
author = {He, Xinran and Pan, Junfeng and Jin, Ou and Xu, Tianbing and Liu, Bo and Xu, Tao and Shi, Yanxin and Atallah, Antoine and Herbrich, Ralf and Bowers, Stuart and Candela, Joaquin Qui\~{n}onero},
title = {Practical Lessons from Predicting Clicks on Ads at Facebook},
year = {2014},
isbn = {9781450329996},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/2648584.2648589},
doi = {10.1145/2648584.2648589},
abstract = {Online advertising allows advertisers to only bid and pay for measurable user responses, such as clicks on ads. As a consequence, click prediction systems are central to most online advertising systems. With over 750 million daily active users and over 1 million active advertisers, predicting clicks on Facebook ads is a challenging machine learning task. In this paper we introduce a model which combines decision trees with logistic regression, outperforming either of these methods on its own by over 3%, an improvement with significant impact to the overall system performance. We then explore how a number of fundamental parameters impact the final prediction performance of our system. Not surprisingly, the most important thing is to have the right features: those capturing historical information about the user or ad dominate other types of features. Once we have the right features and the right model (decisions trees plus logistic regression), other factors play small roles (though even small improvements are important at scale). Picking the optimal handling for data freshness, learning rate schema and data sampling improve the model slightly, though much less than adding a high-value feature, or picking the right model to begin with.},
booktitle = {Proceedings of the Eighth International Workshop on Data Mining for Online Advertising},
pages = {19},
numpages = {9},
location = {New York, NY, USA},
series = {ADKDD'14}
}
@misc{2017NVIDIA,
author={NVIDIA},
title={NVIDIA Tesla V100 GPU Architecture: The World's Most Advanced Datacenter GPU},
year={2017},
howpublished = "Website",
note = {\url{http://www.nvidia.com/object/volta-architecture-whitepaper.html}}
}
@inproceedings{mnih2016asynchronous,
title={Asynchronous methods for deep reinforcement learning},
author={Mnih, Volodymyr and Badia, Adria Puigdomenech and Mirza, Mehdi and Graves, Alex and Lillicrap, Timothy and Harley, Tim and Silver, David and Kavukcuoglu, Koray},
booktitle={International Conference on Machine Learning (ICML)},
pages={1928--1937},
year={2016}
}
@article{espeholt2018impala,
title={Impala: Scalable distributed deep-rl with importance weighted actor-learner architectures},
author={Espeholt, Lasse and Soyer, Hubert and Munos, Remi and Simonyan, Karen and Mnih, Volodymir and Ward, Tom and Doron, Yotam and Firoiu, Vlad and Harley, Tim and Dunning, Iain and others},
journal={arXiv preprint arXiv:1802.01561},
year={2018}
}
@article{espeholt2019seed,
title={Seed rl: Scalable and efficient deep-rl with accelerated central inference},
author={Espeholt, Lasse and Marinier, Rapha{\"e}l and Stanczyk, Piotr and Wang, Ke and Michalski, Marcin},
journal={arXiv preprint arXiv:1910.06591},
year={2019}
}
@misc{horgan2018distributed,
title={Distributed Prioritized Experience Replay},
author={Dan Horgan and John Quan and David Budden and Gabriel Barth-Maron and Matteo Hessel and Hado van Hasselt and David Silver},
year={2018},
eprint={1803.00933},
archivePrefix={arXiv},
primaryClass={cs.LG}
}
@inproceedings{moritz2018ray,
title={Ray: A distributed framework for emerging $\{$AI$\}$ applications},
author={Moritz, Philipp and Nishihara, Robert and Wang, Stephanie and Tumanov, Alexey and Liaw, Richard and Liang, Eric and Elibol, Melih and Yang, Zongheng and Paul, William and Jordan, Michael I and others},
booktitle={13th $\{$USENIX$\}$ Symposium on Operating Systems Design and Implementation ($\{$OSDI$\}$ 18)},
pages={561--577},
year={2018}
}
@article{liang2017ray,
title={Ray rllib: A composable and scalable reinforcement learning library},
author={Liang, Eric and Liaw, Richard and Nishihara, Robert and Moritz, Philipp and Fox, Roy and Gonzalez, Joseph and Goldberg, Ken and Stoica, Ion},
journal={arXiv preprint arXiv:1712.09381},
pages={85},
year={2017}
}
@article{cassirer2021reverb,
title={Reverb: A Framework For Experience Replay},
author={Cassirer, Albin and Barth-Maron, Gabriel and Brevdo, Eugene and Ramos, Sabela and Boyd, Toby and Sottiaux, Thibault and Kroiss, Manuel},
journal={arXiv preprint arXiv:2102.04736},
year={2021}
}
@article{hoffman2020acme,
title={Acme: A research framework for distributed reinforcement learning},
author={Hoffman, Matt and Shahriari, Bobak and Aslanides, John and Barth-Maron, Gabriel and Behbahani, Feryal and Norman, Tamara and Abdolmaleki, Abbas and Cassirer, Albin and Yang, Fan and Baumli, Kate and others},
journal={arXiv preprint arXiv:2006.00979},
year={2020}
}
@article{vinyals2019grandmaster,
title={Grandmaster level in StarCraft II using multi-agent reinforcement learning},
author={Vinyals, Oriol and Babuschkin, Igor and Czarnecki, Wojciech M and Mathieu, Micha{\"e}l and Dudzik, Andrew and Chung, Junyoung and Choi, David H and Powell, Richard and Ewalds, Timo and Georgiev, Petko and others},
journal={Nature},
volume={575},
number={7782},
pages={350--354},
year={2019},
publisher={Nature Publishing Group}
}
@article{berner2019dota,
title={Dota 2 with large scale deep reinforcement learning},
author={Berner, Christopher and Brockman, Greg and Chan, Brooke and Cheung, Vicki and D{\k{e}}biak, Przemys{\l}aw and Dennison, Christy and Farhi, David and Fischer, Quirin and Hashme, Shariq and Hesse, Chris and others},
journal={arXiv preprint arXiv:1912.06680},
year={2019}
}
@article{han2020tstarbot,
title={Tstarbot-x: An open-sourced and comprehensive study for efficient league training in starcraft ii full game},
author={Han, Lei and Xiong, Jiechao and Sun, Peng and Sun, Xinghai and Fang, Meng and Guo, Qingwei and Chen, Qiaobo and Shi, Tengfei and Yu, Hongsheng and Wu, Xipeng and others},
journal={arXiv preprint arXiv:2011.13729},
year={2020}
}
@article{sunehag2017value,
title={Value-decomposition networks for cooperative multi-agent learning},
author={Sunehag, Peter and Lever, Guy and Gruslys, Audrunas and Czarnecki, Wojciech Marian and Zambaldi, Vinicius and Jaderberg, Max and Lanctot, Marc and Sonnerat, Nicolas and Leibo, Joel Z and Tuyls, Karl and others},
journal={arXiv preprint arXiv:1706.05296},
year={2017}
}
@inproceedings{rashid2018qmix,
title={Qmix: Monotonic value function factorisation for deep multi-agent reinforcement learning},
author={Rashid, Tabish and Samvelyan, Mikayel and Schroeder, Christian and Farquhar, Gregory and Foerster, Jakob and Whiteson, Shimon},
booktitle={International Conference on Machine Learning},
pages={4295--4304},
year={2018},
organization={PMLR}
}
@inproceedings{foerster2018counterfactual,
title={Counterfactual multi-agent policy gradients},
author={Foerster, Jakob and Farquhar, Gregory and Afouras, Triantafyllos and Nardelli, Nantas and Whiteson, Shimon},
booktitle={Proceedings of the AAAI conference on artificial intelligence},
volume={32},
number={1},
year={2018}
}
@article{lanctot2017unified,
title={A unified game-theoretic approach to multiagent reinforcement learning},
author={Lanctot, Marc and Zambaldi, Vinicius and Gruslys, Audrunas and Lazaridou, Angeliki and Tuyls, Karl and P{\'e}rolat, Julien and Silver, David and Graepel, Thore},
journal={Advances in neural information processing systems},
volume={30},
year={2017}
}
@article{mnih2013playing,
title={Playing atari with deep reinforcement learning},
author={Mnih, Volodymyr and Kavukcuoglu, Koray and Silver, David and Graves, Alex and Antonoglou, Ioannis and Wierstra, Daan and Riedmiller, Martin},
journal={arXiv preprint arXiv:1312.5602},
year={2013}
}
@article{ding2020efficient,
title={Efficient Reinforcement Learning Development with RLzoo},
author={Ding, Zihan and Yu, Tianyang and Huang, Yanhua and Zhang, Hongming and Li, Guo and Guo, Quancheng and Mai, Luo and Dong, Hao},
journal={arXiv preprint arXiv:2009.08644},
year={2020}
}
@article{makoviychuk2021isaac,
title={Isaac Gym: High Performance GPU-Based Physics Simulation For Robot Learning},
author={Makoviychuk, Viktor and Wawrzyniak, Lukasz and Guo, Yunrong and Lu, Michelle and Storey, Kier and Macklin, Miles and Hoeller, David and Rudin, Nikita and Allshire, Arthur and Handa, Ankur and others},
journal={arXiv preprint arXiv:2108.10470},
year={2021}
}