@article{rosenblatt1958perceptron, title={The perceptron: a probabilistic model for information storage and organization in the brain.}, author={Rosenblatt, Frank}, journal={Psychological Review}, volume={65}, number={6}, pages={386}, year={1958}, publisher={American Psychological Association} } @article{lecun1989backpropagation, title={Backpropagation applied to handwritten zip code recognition}, author={LeCun, Yann and Boser, Bernhard and Denker, John S and Henderson, Donnie and Howard, Richard E and Hubbard, Wayne and Jackel, Lawrence D}, journal={Neural computation}, volume={1}, number={4}, pages={541--551}, year={1989}, publisher={MIT Press} } @inproceedings{krizhevsky2012imagenet, title={Imagenet classification with deep convolutional neural networks}, author={Krizhevsky, Alex and Sutskever, Ilya and Hinton, Geoffrey E}, booktitle={Advances in Neural Information Processing Systems}, pages={1097--1105}, year={2012} } @inproceedings{he2016deep, title={{Deep Residual Learning for Image Recognition}}, author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian}, booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, year={2016} } @article{rumelhart1986learning, title={Learning representations by back-propagating errors}, author={Rumelhart, David E and Hinton, Geoffrey E and Williams, Ronald J}, journal={Nature}, volume={323}, number={6088}, pages={533}, year={1986}, publisher={Nature Publishing Group} } @article{Hochreiter1997lstm, author = {Hochreiter, Sepp and Hochreiter, S and Schmidhuber, J{\"{u}}rgen and Schmidhuber, J}, isbn = {08997667 (ISSN)}, issn = {0899-7667}, journal = {Neural Computation}, number = {8}, pages = {1735--80}, pmid = {9377276}, title = {{Long Short-Term Memory.}}, volume = {9}, year = {1997} } @inproceedings{vaswani2017attention, title={Attention is all you need}, author={Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, {\L}ukasz and Polosukhin, Illia}, booktitle={Advances in Neural Information Processing Systems}, pages={5998--6008}, year={2017} } @article{lecun2015deep, title={Deep learning}, author={LeCun, Yann and Bengio, Yoshua and Hinton, Geoffrey}, journal={Nature}, volume={521}, number={7553}, pages={436}, year={2015}, publisher={Nature Publishing Group} } @inproceedings{KingmaAdam2014, title = {{Adam}: A Method for Stochastic Optimization}, author = {Kingma, Diederik and Ba, Jimmy}, booktitle = {Proceedings of the International Conference on Learning Representations (ICLR)}, year = {2014} } @techreport{tieleman2012rmsprop, title={Divide the gradient by a running average of its recent magnitude. COURSERA: Neural networks for machine learning}, author={Tieleman, T and Hinton, G}, year={2017}, institution={Technical Report} } @article{duchi2011adagrad, title={Adaptive subgradient methods for online learning and stochastic optimization}, author={Duchi, John and Hazan, Elad and Singer, Yoram}, journal={Journal of Machine Learning Research (JMLR)}, volume={12}, number={Jul}, pages={2121--2159}, year={2011} }