@inproceedings{3c8937b2cbc24278b2fdd76006746ff4,
title = "Statistical issues with labeled sample size analysis for semi-supervised linear discriminant analysis",
abstract = "Recently, semi-supervised classification has drawn more attention and many practical semi-supervised learning methods have been proposed. However, current literature ignores an important fact-How to estimate the exact labeled sample size given a lot of unlabeled samples, which is important because of the rareness and expensiveness of labeled examples and is also crucial for us to explore the relative value of labeled and unlabeled samples given a specific model. Based on the assumption of a latent gaussian-distribution to the domain, we describe a reasonable labeled sample size estimation method for semi-supervised linear discriminant analysis (Transductive LDA). A detailed mathematical derivation and a computationally tractable approach are given out. Our technique extends naturally to handle two difficult problems: learning from gaussian distributions with different covariances, and learning for multiple classes.",
keywords = "Bayes risk, Sample size estimation, Semi-supervised classification, Transductive LDA, Unlabeled data",
author = "Han Liu and Xiaolin Yang and Di Wu and Xiaobin Yuan and Ji Zhang and Rafal Kustra",
year = "2004",
language = "English (US)",
isbn = "1932415335",
series = "Proceedings of the International Conference on Artificial Intelligence, IC-AI'04",
pages = "1007--1012",
editor = "H.R. Arabnia and M. Youngsong",
booktitle = "Proceedings of the International Conference on Artificial Intelligence, IC-AI'04 and Proceedings of the International Conference on Machine Learning; Models, Technologies and Applications, MLMTA'04)",
note = "Proceedings of the International Conference on Artificial Intelligence, IC-AI'04 ; Conference date: 21-06-2004 Through 24-06-2004",
}