@inproceedings{c9dcfd7115164fa08ae813ad40988099,
title = "Variance reduction for optimization in speech recognition",
abstract = "Deep neural network (DNN) is trained according to a mini-batch optimization based on the stochastic gradient descent algorithm. Such a stochastic learning suffers from instability in parameter updating and may easily trap into local optimum. This study deals with the stability of stochastic learning by reducing the variance of gradients in optimization procedure. We upgrade the optimization from the stochastic dual coordinated ascent (SDCA) to the accelerated SDCA without duality (or dual-free ASDCA). This optimization incorporates the momentum method to accelerate the updating rule where the variance of gradients can be reduced. Using dual-free ASDCA, the optimization of dual function of SDCA in a form of convex loss is implemented by directly optimizing the primal function with respect to pseudo-dual parameters. The non-convex optimization in DNN training can be resolved and accelerated. Experimental results illustrate the reduction of training loss, variance of gradients and word error rate by using the proposed optimization for DNN speech recognition.",
keywords = "deep neural network, Optimization algorithm, speech recognition, variance reduction",
author = "Jen-Tzung Chien and Huang, {Pei Wen}",
year = "2016",
month = nov,
day = "8",
doi = "10.1109/MLSP.2016.7738864",
language = "English",
series = "IEEE International Workshop on Machine Learning for Signal Processing, MLSP",
publisher = "IEEE Computer Society",
editor = "Kostas Diamantaras and Aurelio Uncini and Palmieri, {Francesco A. N.} and Jan Larsen",
booktitle = "2016 IEEE International Workshop on Machine Learning for Signal Processing, MLSP 2016 - Proceedings",
address = "United States",
note = "null ; Conference date: 13-09-2016 Through 16-09-2016",
}