@inproceedings{88aafaf28a614a558169ca8b4f5075ef,
title = "Supporting Data Compression in PnetCDF",
abstract = "Recently, the dramatic increase of the data amounts drives up the demand for data compression among HPC applications. Although many file systems and I/O middlewares have incorporated compression features, few high-level parallel I/O libraries support data compression due to the challenges of achieving scalable performance on HPC systems. This paper presents the design and implementation of the variable compression feature in the Parallel NetCDF library. Our design employs the same concept of chunking used by the HDF5 library, but we focus on enabling I/O aggregation across multiple requests to address the challenges on performance and scalability. We evaluate our solution using the I/O kernel of real-world scientific applications and analyze the impacts of data compression on parallel I/O performance. Our result suggests that handling multiple requests at once can significantly improve the parallel I/O performance on chunked and compressed data.",
keywords = "Chunked Storage Layout, Compression, I/O Aggregation, NetCDF",
author = "Kaiyuan Hou and Qiao Kang and Sunwoo Lee and Ankit Agrawal and Alok Choudhary and Liao, {Wei Keng}",
note = "Funding Information: This material is based upon work supported by the U.S. Department of Energy, Office of Science, Office of Advanced Scientific Computing Research, Scientific Discovery through Advanced Computing (SciDAC) program under Award Numbers DE-SC0021399 and DE-SC0019358. This work is partially supported by the National Institute of Standards and Technology award number 70NANB19H005. This research used resources of the National Energy Research Scientific Computing Center (NERSC), a U.S. Department of Energy Office of Science User Facility located at Lawrence Berkeley National Laboratory, operated under Contract No. DE-AC02-05CH11231. Publisher Copyright: {\textcopyright} 2021 IEEE.; 2021 IEEE International Conference on Big Data, Big Data 2021 ; Conference date: 15-12-2021 Through 18-12-2021",
year = "2021",
doi = "10.1109/BigData52589.2021.9671998",
language = "English (US)",
series = "Proceedings - 2021 IEEE International Conference on Big Data, Big Data 2021",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "86--97",
editor = "Yixin Chen and Heiko Ludwig and Yicheng Tu and Usama Fayyad and Xingquan Zhu and Hu, {Xiaohua Tony} and Suren Byna and Xiong Liu and Jianping Zhang and Shirui Pan and Vagelis Papalexakis and Jianwu Wang and Alfredo Cuzzocrea and Carlos Ordonez",
booktitle = "Proceedings - 2021 IEEE International Conference on Big Data, Big Data 2021",
address = "United States",
}