@article{69a20968f7214afba8b1fafbd9951706,
title = "Large-scale design and refinement of stable proteins using sequence-only models",
abstract = "Engineered proteins generally must possess a stable structure in order to achieve their designed function. Stable designs, however, are astronomically rare within the space of all possible amino acid sequences. As a consequence, many designs must be tested computationally and experimentally in order to find stable ones, which is expensive in terms of time and resources. Here we use a high-throughput, low-fidelity assay to experimentally evaluate the stability of approximately 200,000 novel proteins. These include a wide range of sequence perturbations, providing a baseline for future work in the field. We build a neural network model that predicts protein stability given only sequences of amino acids, and compare its performance to the assayed values. We also report another network model that is able to generate the amino acid sequences of novel stable proteins given requested secondary sequences. Finally, we show that the predictive model-despite weaknesses including a noisy data set-can be used to substantially increase the stability of both expert-designed and model-generated proteins.",
author = "Singer, {Jedediah M.} and Scott Novotney and Devin Strickland and Haddox, {Hugh K.} and Nicholas Leiby and Rocklin, {Gabriel J.} and Chow, {Cameron M.} and Anindya Roy and Bera, {Asim K.} and Motta, {Francis C.} and Longxing Cao and Strauch, {Eva Maria} and Chidyausiku, {Tamuka M.} and Alex Ford and Ethan Ho and Alexander Zaitzeff and Mackenzie, {Craig O.} and Hamed Eramian and {Di Maio}, Frank and Gevorg Grigoryan and Matthew Vaughn and Stewart, {Lance J.} and David Baker and Eric Klavins",
note = "Funding Information: This material is based upon work supported by the Defense Advanced Research Projects Agency (DARPA) and the Air Force Research Laboratory under Contract No. FA8750- 17-C-0231 (and related contracts by SD2 Publication Consortium Members). The specified contract number applies to JMS, SN, NL, AZ, and HE, while related contracts under the same program pertain to other authors. We thank the staff at Northeastern Collaborative Access Team (NECAT) at Advanced Photon Source for the beamtime. Representatives of DARPA-the funders-asked interested scientific questions that may have provided ideas for study design and data analysis, but played no other role in study design or data analysis. They had no role in data collection or preparation of the manuscript. They encouraged publication after the decision to publish was made by the authors. Publisher Copyright: {\textcopyright} 2022 Singer et al.",
year = "2022",
month = mar,
doi = "10.1371/journal.pone.0265020",
language = "English (US)",
volume = "17",
journal = "PloS one",
issn = "1932-6203",
publisher = "Public Library of Science",
number = "3 March",
}