Search Google Scholar | Search PubMed
Park, Helen; Joachimiak, Marcin P.; Jungbluth, Sean P.; Yang, Ziming; Riehl, William J.; Canon, R. Shane; Arkin, Adam P.; Dehal, Paramvir S.
A bacterial sensor taxonomy across earth ecosystems for machine learning applications Journal Article
In: mSystems, 2023, ISSN: 2379-5077.
Abstract | Links | BibTeX | Tags: Behavior and Systematics, Biochemistry, Computer Science Applications, Ecology, Evolution, Genetics, kbase, Microbiology, Modeling and Simulation, Molecular Biology, Physiology
@article{Park2023b,
title = {A bacterial sensor taxonomy across earth ecosystems for machine learning applications},
author = {Helen Park and Marcin P. Joachimiak and Sean P. Jungbluth and Ziming Yang and William J. Riehl and R. Shane Canon and Adam P. Arkin and Paramvir S. Dehal},
editor = {Babak Momeni},
doi = {10.1128/msystems.00026-23},
issn = {2379-5077},
year = {2023},
date = {2023-12-11},
urldate = {2023-12-11},
journal = {mSystems},
publisher = {American Society for Microbiology},
abstract = {<jats:title>ABSTRACT</jats:title>
<jats:p>
Microbial communities have evolved to colonize all ecosystems of the planet, from the deep sea to the human gut. Microbes survive by sensing, responding, and adapting to immediate environmental cues. This process is driven by signal transduction proteins such as histidine kinases, which use their sensing domains to bind or otherwise detect environmental cues and “transduce” signals to adjust internal processes. We hypothesized that an ecosystem’s unique stimuli leave a sensor “fingerprint,” able to identify and shed insight on ecosystem conditions. To test this, we collected 20,712 publicly available metagenomes from
<jats:italic>Host-associated</jats:italic>
,
<jats:italic>Environmental</jats:italic>
, and
<jats:italic>Engineered</jats:italic>
ecosystems across the globe. We extracted and clustered the collection’s nearly 18M unique sensory domains into 113,712 similar groupings with MMseqs2. We built gradient-boosted decision tree machine learning models and found we could classify the ecosystem type (accuracy: 87%) and predict the levels of different physical parameters (R2 score: 83%) using the sensor cluster abundance as features. Feature importance enables identification of the most predictive sensors to differentiate between ecosystems which can lead to mechanistic interpretations if the sensor domains are well annotated. To demonstrate this, a machine learning model was trained to predict patient’s disease state and used to identify domains related to oxygen sensing present in a healthy gut but missing in patients with abnormal conditions. Moreover, since 98.7% of identified sensor domains are uncharacterized, importance ranking can be used to prioritize sensors to determine what ecosystem function they may be sensing. Furthermore, these new predictive sensors can function as targets for novel sensor engineering with applications in biotechnology, ecosystem maintenance, and medicine.
</jats:p>
<jats:sec>
<jats:title>IMPORTANCE</jats:title>
<jats:p>Microbes infect, colonize, and proliferate due to their ability to sense and respond quickly to their surroundings. In this research, we extract the sensory proteins from a diverse range of environmental, engineered, and host-associated metagenomes. We trained machine learning classifiers using sensors as features such that it is possible to predict the ecosystem for a metagenome from its sensor profile. We use the optimized model’s feature importance to identify the most impactful and predictive sensors in different environments. We next use the sensor profile from human gut metagenomes to classify their disease states and explore which sensors can explain differences between diseases. The sensors most predictive of environmental labels here, most of which correspond to uncharacterized proteins, are a useful starting point for the discovery of important environment signals and the development of possible diagnostic interventions.</jats:p>
</jats:sec>},
keywords = {Behavior and Systematics, Biochemistry, Computer Science Applications, Ecology, Evolution, Genetics, kbase, Microbiology, Modeling and Simulation, Molecular Biology, Physiology},
pubstate = {published},
tppubtype = {article}
}
<jats:p>
Microbial communities have evolved to colonize all ecosystems of the planet, from the deep sea to the human gut. Microbes survive by sensing, responding, and adapting to immediate environmental cues. This process is driven by signal transduction proteins such as histidine kinases, which use their sensing domains to bind or otherwise detect environmental cues and “transduce” signals to adjust internal processes. We hypothesized that an ecosystem’s unique stimuli leave a sensor “fingerprint,” able to identify and shed insight on ecosystem conditions. To test this, we collected 20,712 publicly available metagenomes from
<jats:italic>Host-associated</jats:italic>
,
<jats:italic>Environmental</jats:italic>
, and
<jats:italic>Engineered</jats:italic>
ecosystems across the globe. We extracted and clustered the collection’s nearly 18M unique sensory domains into 113,712 similar groupings with MMseqs2. We built gradient-boosted decision tree machine learning models and found we could classify the ecosystem type (accuracy: 87%) and predict the levels of different physical parameters (R2 score: 83%) using the sensor cluster abundance as features. Feature importance enables identification of the most predictive sensors to differentiate between ecosystems which can lead to mechanistic interpretations if the sensor domains are well annotated. To demonstrate this, a machine learning model was trained to predict patient’s disease state and used to identify domains related to oxygen sensing present in a healthy gut but missing in patients with abnormal conditions. Moreover, since 98.7% of identified sensor domains are uncharacterized, importance ranking can be used to prioritize sensors to determine what ecosystem function they may be sensing. Furthermore, these new predictive sensors can function as targets for novel sensor engineering with applications in biotechnology, ecosystem maintenance, and medicine.
</jats:p>
<jats:sec>
<jats:title>IMPORTANCE</jats:title>
<jats:p>Microbes infect, colonize, and proliferate due to their ability to sense and respond quickly to their surroundings. In this research, we extract the sensory proteins from a diverse range of environmental, engineered, and host-associated metagenomes. We trained machine learning classifiers using sensors as features such that it is possible to predict the ecosystem for a metagenome from its sensor profile. We use the optimized model’s feature importance to identify the most impactful and predictive sensors in different environments. We next use the sensor profile from human gut metagenomes to classify their disease states and explore which sensors can explain differences between diseases. The sensors most predictive of environmental labels here, most of which correspond to uncharacterized proteins, are a useful starting point for the discovery of important environment signals and the development of possible diagnostic interventions.</jats:p>
</jats:sec>
Filipe Liu Samuel M. D. Seaver, Qizhi Zhang; Henry, Christopher S.
The ModelSEED Database for the integration of metabolic annotations and the reconstruction, comparison, and analysis of metabolic models for plants, fungi, and microbes Journal Article
In: 2020.
@article{seaver2020modelseed,
title = {The ModelSEED Database for the integration of metabolic annotations and the reconstruction, comparison, and analysis of metabolic models for plants, fungi, and microbes},
author = {Samuel M. D. Seaver, Filipe Liu, Qizhi Zhang, James Jeffryes, José P. Faria, Janaka N. Edirisinghe, Michael Mundy, Nicholas Chia, Elad Noor, Moritz E. Beber, Aaron A. Best, Matthew DeJongh, Jeffrey A. Kimbrel, Patrik D'haeseleer, Erik Pearson, Shane Canon, Elisha M. Wood-Charlson, Robert W. Cottingham, Adam P. Arkin and Christopher S. Henry},
year = {2020},
date = {2020-05-12},
keywords = {kbase},
pubstate = {published},
tppubtype = {article}
}
KBase: Ŧhe United States Đepartment of Energy Systems Biology Knowledgebase Journal Article
In: Nat. Biotechnol., vol. 36, no. 7, pp. 566–569, 2018.
@article{pmid29979655,
title = {KBase: Ŧhe United States Đepartment of Energy Systems Biology Knowledgebase},
year = {2018},
date = {2018-01-01},
journal = {Nat. Biotechnol.},
volume = {36},
number = {7},
pages = {566--569},
keywords = {kbase},
pubstate = {published},
tppubtype = {article}
}