@article{7addc8368f77407bb406648ec09ede3d,
title = "A Genocentric Approach to Discovery of Mendelian Disorders",
abstract = "The advent of inexpensive, clinical exome sequencing (ES) has led to the accumulation of genetic data from thousands of samples from individuals affected with a wide range of diseases, but for whom the underlying genetic and molecular etiology of their clinical phenotype remains unknown. In many cases, detailed phenotypes are unavailable or poorly recorded and there is little family history to guide study. To accelerate discovery, we integrated ES data from 18,696 individuals referred for suspected Mendelian disease, together with relatives, in an Apache Hadoop data lake (Hadoop Architecture Lake of Exomes [HARLEE]) and implemented a genocentric analysis that rapidly identified 154 genes harboring variants suspected to cause Mendelian disorders. The approach did not rely on case-specific phenotypic classifications but was driven by optimization of gene- and variant-level filter parameters utilizing historical Mendelian disease-gene association discovery data. Variants in 19 of the 154 candidate genes were subsequently reported as causative of a Mendelian trait and additional data support the association of all other candidate genes with disease endpoints.",
keywords = "HARLEE, Hadoop, Mendelian disease, big data, clan genomics, data lake, developmental disorder, genotype-first, ultra-rare, whole-exome sequencing",
author = "{Task Force for Neonatal Genomics} and Hansen, {Adam W.} and Mullai Murugan and He Li and Khayat, {Michael M.} and Liwen Wang and Jill Rosenfeld and Andrews, {B. Kim} and Jhangiani, {Shalini N.} and {Coban Akdemir}, {Zeynep H.} and Sedlazeck, {Fritz J.} and Ashley-Koch, {Allison E.} and Pengfei Liu and Muzny, {Donna M.} and Alexander Allori and Misha Angrist and Patricia Ashley and Margarita Bidegain and Brita Boyd and Eileen Chambers and Heidi Cope and Cotten, {C. Michael} and Theresa Curington and Davis, {Erica E.} and Sarah Ellestad and Kimberley Fisher and Amanda French and William Gallentine and Ronald Goldberg and Kevin Hill and Sujay Kansagra and Nicholas Katsanis and Sara Katsanis and Joanne Kurtzberg and Jeffrey Marcus and Marie McDonald and Mohammed Mikati and Stephen Miller and Amy Murtha and Yezmin Perilla and Carolyn Pizoli and Todd Purves and Sherry Ross and Azita Sadeghpour and Edward Smith and John Wiener and Aniko Sabo and Posey, {Jennifer E.} and Yaping Yang and Wangler, {Michael F.} and Eng, {Christine M.}",
note = "Funding Information: This work was supported in part by grants UM1 HG008898 from the National Human Genome Research Institute (NHGRI) to the Baylor College of Medicine Center for Common Disease Genetics; UM1 HG006542 from the NHGRI/National Heart, Lung, and Blood Institute (NHLBI) to the Baylor Hopkins Center for Mendelian Genomics; R01 NS058529 and R35 NS105078 (J.R.L.) from the National Institute of Neurological Disorders and Stroke (NINDS); and P50 DK096415 (N.K.) from the National Institute of Diabetes and Digestive and Kidney Diseases (NIDDK). This work was also supported in part by the Baylor College of Medicine President{\textquoteright}s Circle Precision Medicine/Population Health Initiative . A.W.H. was supported in part by NIH T32 GM08307-26 and The Cullen Foundation . J.E.P. was supported by NHGRI K08 HG008986 . We thank Huda Y. Zoghbi and Joshua M. Shulman for their insight and feedback as related to genocentric and phenocentric studies of human disease. We thank Jeremy Easton-Marks, Simon White, Joshua Traynelis, Piyushkumar Panchel, and Brian Palazzo for assistance with data architecture, data wrangling, and systems administration. We thank Stephen Wilson for sharing archived OMIM database downloads. Publisher Copyright: {\textcopyright} 2019 American Society of Human Genetics",
year = "2019",
month = nov,
day = "7",
doi = "10.1016/j.ajhg.2019.09.027",
language = "English (US)",
volume = "105",
pages = "974--986",
journal = "American Journal of Human Genetics",
issn = "0002-9297",
publisher = "Cell Press",
number = "5",
}