@inproceedings{147595f715ec481fa8d5809c613af0d4,
title = "CROWDSOURCED AND AUTOMATIC SPEECH PROMINENCE ESTIMATION",
abstract = "The prominence of a spoken word is the degree to which an average native listener perceives the word as salient or emphasized relative to its context. Speech prominence estimation is the process of assigning a numeric value to the prominence of each word in an utterance. These prominence labels are useful for linguistic analysis, as well as training automated systems to perform emphasis-controlled text-to-speech or emotion recognition. Manually annotating prominence is time-consuming and expensive, which motivates the development of automated methods for speech prominence estimation. However, developing such an automated system using machine-learning methods requires human-annotated training data. Using our system for acquiring such human annotations, we collect and open-source crowdsourced annotations of a portion of the LibriTTS dataset. We use these annotations as ground truth to train a neural speech prominence estimator that generalizes to unseen speakers, datasets, and speaking styles. We investigate design decisions for neural prominence estimation as well as how neural prominence estimation improves as a function of two key factors of annotation cost: dataset size and the number of annotations per utterance.",
keywords = "emphasis, paralinguistics, prominence, prosody",
author = "Max Morrison and Pranav Pawar and Nathan Pruyne and Cole, {Jennifer Sandra} and Bryan Pardo",
note = "Publisher Copyright: {\textcopyright} 2024 IEEE.; 49th IEEE International Conference on Acoustics, Speech, and Signal Processing, ICASSP 2024 ; Conference date: 14-04-2024 Through 19-04-2024",
year = "2024",
doi = "10.1109/ICASSP48485.2024.10447107",
language = "English (US)",
series = "ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "12281--12285",
booktitle = "2024 IEEE International Conference on Acoustics, Speech, and Signal Processing, ICASSP 2024 - Proceedings",
address = "United States",
}