@inproceedings{a3aeef1169b3498686a20de4d3f72852,
title = "Bayesian learning for neural network compression",
abstract = "Quantization on weight parameters in neural network training plays a key role for model compression in mobile devices. This paper presents a general M-ary adaptive quantization in construction of Bayesian neural networks. The trade-off between model capacity and memory cost is adjustable. The stochastic weight parameters are faithfully reflected. A compact model is trained to achieve robustness to model uncertainty due to heterogeneous data collection. To minimize the performance loss, the representation levels in quantized neural network are estimated by maximizing the variational lower bound of log likelihood conditioned on M-ary quantization. Bayesian learning is formulated by using the multi-spike-and- slab prior for quantization levels. An adaptive quantization is derived to implement a flexible parameter space for learning representation which is applied for object recognition. Experiments on image recognition show the merit of this Bayesian model compression for M-ary quantized neural networks. ",
keywords = "Adaptive quantization, Bayesian neural network, Model compression, Quantized neural network",
author = "Chien, {Jen Tzung} and Chang, {Su Ting}",
year = "2020",
month = jul,
doi = "10.1109/ICMEW46912.2020.9105998",
language = "English",
series = "2020 IEEE International Conference on Multimedia and Expo Workshops, ICMEW 2020",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
booktitle = "2020 IEEE International Conference on Multimedia and Expo Workshops, ICMEW 2020",
address = "United States",
note = "null ; Conference date: 06-07-2020 Through 10-07-2020",
}