@inproceedings{10eab0d0b9f443e18eaf3e5739619181,
title = "LDA-based context dependent recurrent neural network language model using document-based topic distribution of words",
abstract = "Adding context information into recurrent neural network language models (RNNLMs) have been investigated recently to improve the effectiveness of learning RNNLM. Conventionally, a fast approximate topic representation for a block of words was proposed by using corpus-based topic distribution of word incorporating latent Dirichlet allocation (LDA) model. It is then updated for each subsequent word using an exponential decay. However, words could represent different topics in different documents. In this paper, we form document-based distribution over topics for each word using LDA model and apply it in the computation of fast approximate exponentially decaying features. We have shown experimental results on a well known Penn Treebank corpus and found that our approach outperforms the conventional LDA-based context RNNLM approach. Moreover, we carried out speech recognition experiments on Wall Street Journal corpus and achieved word error rate (WER) improvements over the other approach.",
keywords = "language modeling, latent Dirichlet allocation, Recurrent neural network, speech recognition",
author = "Haidar, {Md Akmal} and Mikko Kurimo",
year = "2017",
month = jun,
day = "16",
doi = "10.1109/ICASSP.2017.7953254",
language = "English",
series = "Proceedings of the IEEE International Conference on Acoustics, Speech, and Signal Processing",
publisher = "IEEE",
pages = "5730--5734",
booktitle = "2017 IEEE International Conference on Acoustics, Speech, and Signal Processing, ICASSP 2017 - Proceedings",
address = "United States",
note = "IEEE International Conference on Acoustics, Speech, and Signal Processing, ICASSP ; Conference date: 05-03-2017 Through 09-03-2017",
}