@inproceedings{16829837f69246f2a60f5cc402c2101c,

title = "Streaming algorithms for k-center clustering with outliers and with anonymity",

abstract = "Clustering is a common problem in the analysis of large data sets. Streaming algorithms, which make a single pass over the data set using small working memory and produce a clustering comparable in cost to the optimal offline solution, are especially useful. We develop the first streaming algorithms achieving a constant-factor approximation to the cluster radius for two variations of the k-center clustering problem. We give a streaming (4 + ε)-approximation algorithm using O(ε-1 kz) memory for the problem with outliers, in which the clustering is allowed to drop up to z of the input points; previous work used a random sampling approach which yields only a bicriteria approximation. We also give a streaming (6 + ε)-approximation algorithm using O(ε-1 ln (ε-1) k + k 2) memory for a variation motivated by anonymity considerations in which each cluster must contain at least a certain number of input points.",

keywords = "Anonymity, Clustering, Outliers, Streaming, k-center",

author = "McCutchen, {Richard Matthew} and Samir Khuller",

year = "2008",

doi = "10.1007/978-3-540-85363-3_14",

language = "English (US)",

isbn = "3540853626",

series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",

pages = "165--178",

booktitle = "Approximation, Randomization and Combinatorial Optimization",

note = "11th International Workshop on Approximation Algorithms for Combinatorial Optimization Problems, APPROX 2008 and 12th International Workshop on Randomization and Computation, RANDOM 2008 ; Conference date: 25-08-2008 Through 27-08-2008",

}