@inproceedings{6dcf18ccc383468894fef634938c12b5,
title = "SILVERBACK: Scalable association mining for temporal data in columnar probabilistic databases",
abstract = "We1 address the problem of large scale probabilistic association rule mining and consider the trade-offs between accuracy of the mining results and quest of scalability on modest hardware infrastructure. We demonstrate how extensions and adaptations of research findings can be integrated in an industrial application, and we present the commercially deployed Silverback framework, developed at Voxsup Inc. Silverback tackles the storage efficiency problem by proposing a probabilistic columnar infrastructure and using Bloom filters and reservoir sampling techniques. In addition, a probabilistic pruning technique has been introduced based on Apriori for mining frequent item-sets. The proposed target-driven technique yields a significant reduction on the size of the frequent item-set candidates. We present extensive experimental evaluations which demonstrate the benefits of a context-aware incorporation of infrastructure limitations into corresponding research techniques. The experiments indicate that, when compared to the traditional Hadoop-based approach for improving scalability by adding more hosts, Silverback - which has been commercially deployed and developed at Voxsup Inc. since May 2011 - has much better run-time performance with negligible accuracy sacrifices.",
author = "Yusheng Xie and Diana Palsetia and Goce Trajcevski and Ankit Agrawal and Alok Choudhary",
year = "2014",
doi = "10.1109/ICDE.2014.6816724",
language = "English (US)",
isbn = "9781479925544",
series = "Proceedings - International Conference on Data Engineering",
publisher = "IEEE Computer Society",
pages = "1072--1083",
booktitle = "2014 IEEE 30th International Conference on Data Engineering, ICDE 2014",
address = "United States",
note = "30th IEEE International Conference on Data Engineering, ICDE 2014 ; Conference date: 31-03-2014 Through 04-04-2014",
}