@inproceedings{6db0813c5c074c74bdee16d49393b601,
title = "SIGRNN: Synthetic Minority Instances Generation in Imbalanced Datasets using a Recurrent Neural Network",
abstract = "Machine learning models trained on imbalanced datasets tend to produce sub-optimal results. This happens because the learning of the minority classes is dominated by the learning of the majority class. Recommendations to overcome this obstacle include oversampling the minority class by synthesizing new instances and using different performance measures. We propose a novel approach to handle the imbalance in datasets by using a sequence-to-sequence recurrent neural network to synthesize minority class instances. The generative neural network is trained on the minority class instances to learn its data distribution; the generative neural network is then used to synthesize minority class instances; these instances are used to augment the original dataset and balance the minority class. We evaluate our proposed approach against several imbalanced datasets. We train Decision Tree models on the original and augmented datasets and compare their results against the Synthetic Minority Over-sampling TEchnique (SMOTE), Adaptive Synthetic sampling (ADASYN) and Synthetic Minority Over-sampling TEchnique-Nominal Continuous (SMOTE-NC). All results are an average of multiple runs and the results are compared across four different performance metrics. SIGRNN performs well compared to SMOTE and ADASYN, specifically in lower percentage increments to the minority class. Also, SIGRNN outperforms SMOTE-NC on datasets having nominal features.",
keywords = "Balancing, Classification, Imbalanced Dataset, Oversampling, Synthetic Data",
author = "Reda Al-Bahrani and Dipendra Jha and Qiao Kang and Sunwoo Lee and Zijiang Yang and Liao, {Wei Keng} and Ankit Agrawal and Alok Choudhary",
note = "Publisher Copyright: {\textcopyright} 2021 by SCITEPRESS-Science and Technology Publications, Lda. All rights reserved.; 10th International Conference on Pattern Recognition Applications and Methods, ICPRAM 2021 ; Conference date: 04-02-2021 Through 06-02-2021",
year = "2021",
doi = "10.5220/0010348103490356",
language = "English (US)",
isbn = "9789897584862",
series = "International Conference on Pattern Recognition Applications and Methods",
publisher = "Science and Technology Publications, Lda",
pages = "349--356",
editor = "{De Marsico}, Maria and {Sanniti di Baja}, Gabriella and Fred, {Ana L.N.}",
booktitle = "ICPRAM 2021 - Proceedings of the 10th International Conference on Pattern Recognition Applications and Methods, Volume 1",
}