@inproceedings{19485cbe1fa541b8b064d8214aabd27e,
title = "A Comparison Between Convolutional and Transformer Architectures for Speech Emotion Recognition",
abstract = "Creating speech emotion recognition models com-parable to the capability of how humans recognise emotions is a long-standing challenge in the field of speech technology with many potential commercial applications. As transformer-based architectures have recently become the state-of-the-art for many natural language processing related applications, this paper investigates their suitability for acoustic emotion recognition and compares them to the well-known AlexNet convolutional approach. This comparison is made using several publicly available speech emotion corpora. Experimental results demonstrate the efficacy of the different architectural approaches for particular emotions. The results show that the transformer-based models outperform their convolutional counterparts yielding F1-scores in the range [70.33%, 75.76%]. This paper further provides insights via dimensionality reduction analysis of output layer activations in both architectures and reveals significantly improved clustering in transformer-based models whilst highlighting the nuances with regard to the separability of different emotion classes.",
keywords = "alexnet, convolutional neural networks, mel spectrograms, speech emotion recognition, transfer learning, transformers, wav2vec2",
author = "Shreyah Iyer and Cornelius Glackin and Nigel Cannings and Vito Veneziano and Yi Sun",
note = "Funding Information: This project has received funding from the European Union s Horizon 2020 Research and Innovation programme under Grant Agreement 823907 (MENHIR project: https://menhir-project.eu) and by an Innovate UK grant via the Knowledge Transfer Partnership programme, KTP No.11039. Funding Information: This project has received funding from the European Union{\textquoteright}s Horizon 2020 Research and Innovation programme under Grant Agreement 823907 (MENHIR project: https://menhir-project.eu) and by an Innovate UK grant via the Knowledge Transfer Partnership programme, KTP No.11039. Publisher Copyright: {\textcopyright} 2022 IEEE.; 2022 International Joint Conference on Neural Networks, IJCNN 2022 ; Conference date: 18-07-2022 Through 23-07-2022",
year = "2022",
doi = "10.1109/IJCNN55064.2022.9891882",
language = "English",
series = "Proceedings of the International Joint Conference on Neural Networks",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
booktitle = "2022 International Joint Conference on Neural Networks, IJCNN 2022 - Proceedings",
}