Source code for reconstruction.ecoli.dataclasses.process.metabolism

"""
SimulationData for metabolism process

TODO:

- improved estimate of ILE/LEU abundance or some external data point
- implement L1-norm minimization for AA concentrations
- find concentration for PI[c]
- add (d)NTP byproduct concentrations
"""
# mypy: disable-error-code=attr-defined

from copy import copy
import itertools
import re
from typing import (
    Any,
    Callable,
    cast,
    Iterable,
    Optional,
    Set,
    Tuple,
    Union,
    TYPE_CHECKING,
)
from unum import Unum

from numba import njit
import numpy as np
import numpy.typing as npt
import sympy as sp
from sympy.parsing.sympy_parser import parse_expr

from ecoli.library.schema import bulk_name_to_idx, counts
from reconstruction.ecoli.dataclasses.getter_functions import (
    UNDEFINED_COMPARTMENT_IDS_TO_ABBREVS,
)
from reconstruction.ecoli.knowledge_base_raw import KnowledgeBaseEcoli

if TYPE_CHECKING:
    from reconstruction.ecoli.simulation_data import SimulationDataEcoli
from reconstruction.ecoli.dataclasses.constants import Constants
from wholecell.utils import units

KINETIC_CONSTRAINT_CONC_UNITS = units.umol / units.L
K_CAT_UNITS = 1 / units.s
"""Units for k :sub:`cat` values"""
METABOLITE_CONCENTRATION_UNITS = units.mol / units.L
"""Units for metabolite concentrations"""
DRY_MASS_UNITS = units.fg
"""Units for dry mass"""

USE_ALL_CONSTRAINTS = False  # False will remove defined constraints from objective

REVERSE_TAG = " (reverse)"
REVERSE_REACTION_ID = "{{}}{}".format(REVERSE_TAG)
ENZYME_REACTION_ID = "{}__{}"

VERBOSE = False


[docs] class InvalidReactionDirectionError(Exception): pass
[docs] class Metabolism(object): """Metabolism Args: raw_data: Raw data object sim_data: Simulation data object Attributes: solver (str): solver ID, should match a value in modular_fba.py, set by :py:meth:`~._set_solver_values` kinetic_objective_weight (float): weighting for the kinetic objective, 1-weighting for the homeostatic objective, set by :py:meth:`~._set_solver_values` kinetic_objective_weight_in_range (float): weighting for deviations from the kinetic target within min and max ranges, set by :py:meth:`~._set_solver_values` secretion_penalty_coeff (float): penalty on secretion fluxes, set by :py:meth:`~._set_solver_values` metabolite_charge (dict[str, int]): mapping of metabolite IDs to charge, set by :py:meth:`~._add_metabolite_charge` concentration_updates conc_dict (dict): nutrients_to_internal_conc (dict[str, dict[str, Unum]]): kinetic_constraint_reactions: kinetic_constraint_enzymes: kinetic_constraint_substrates: _kcats: _saturations: _enzymes: constraint_is_kcat_only: _compiled_enzymes: _compiled_saturation: reaction_stoich: maintenance_reaction: reaction_catalysts: catalyst_ids: reactions_with_catalyst: catalysis_matrix_I: catalysis_matrix_J: catalysis_matrix_V: use_all_constraints: constraints_to_disable: base_reaction_ids: reaction_id_to_base_reaction_id: amino_acid_export_kms: transport_reactions (list[str]): transport reaction IDs in the metabolic network (includes reverse reactions and reactions with kinetic constraints), set by :py:meth:`~._build_transport_reactions` aa_synthesis_pathways (dict[str, dict]): data for allosteric inhibition of amino acid pathways indexed by amino acid ID with location tag and nested dictionary with the following keys:: {'enzymes' (str): limiting/regulated enzyme ID in synthesis pathway with location tag, 'kcat_data' (units.Unum): kcat associated with enzyme reaction with units of 1/time, 'ki' (Tuple[units.Unum, units.Unum]]): lower and upper limits of KI associated with enzyme reaction with units of mol/volume} Set by :py:meth:`~._build_amino_acid_pathways` KI_aa_synthesis (numpy.ndarray[float]): KI for each AA for synthesis portion of supply (in units of :py:data:`~.METABOLITE_CONCENTRATION_UNITS`), set by :py:meth:`~.set_phenomological_supply_constants` KM_aa_export (numpy.ndarray[float]): KM for each AA for export portion of supply (in units of :py:data:`~.METABOLITE_CONCENTRATION_UNITS`), set by :py:meth:`~.set_phenomological_supply_constants` fraction_supply_rate (float): fraction of AA supply that comes from a base synthesis rate, set by :py:meth:`~.set_phenomological_supply_constants` fraction_import_rate (numpy.ndarray[float]): fraction of AA supply that comes from AA import if nutrients are present, set by :py:meth:`~.set_phenomological_supply_constants` ppgpp_synthesis_reaction (str): reaction ID for ppGpp synthesis (catalyzed by RelA and SpoT), set by :py:meth:`~._build_ppgpp_reactions` ppgpp_degradation_reaction (str): reaction ID for ppGpp degradation (catalyzed by SpoT), set by :py:meth:`~._build_ppgpp_reactions` ppgpp_reaction_names (list[str]): names of reaction involved in ppGpp, set by :py:meth:`~._build_ppgpp_reactions` ppgpp_reaction_metabolites (list[str]): names of metabolites in ppGpp reactions, set by :py:meth:`~._build_ppgpp_reactions` ppgpp_reaction_stoich (numpy.ndarray[int]): 2D array with metabolites on rows and reactions on columns containing the stoichiometric coefficient, set by :py:meth:`~._build_ppgpp_reactions` aa_to_exporters (dict[str, list]): dictonary that maps aa to transporters involved in export reactions. Set by :py:meth:`~.set_mechanistic_export_constants`. aa_to_exporters_matrix (numpy.ndarray[int]): correlation matrix. Columns correspond to exporting enzymes and rows to amino acids. Set by :py:meth:`~.set_mechanistic_export_constants`. aa_exporter_names (numpy.ndarray[str]): names of all exporters. Set by :py:meth:`~.set_mechanistic_export_constants`. aa_export_kms (numpy.ndarray[float]): kms corresponding to generic transport/enzyme reactions for each AA in concentration units. Set by :py:meth:`~.set_mechanistic_export_constants`. export_kcats_per_aa (numpy.ndarray[float]): kcats corresponding to generic export reactions for each AA. Units in counts/second. Set by :py:meth:`~.set_mechanistic_export_constants` and :py:meth:`~.set_mechanistic_export_constants`. aa_to_importers (dict[str, list]): dictonary that maps aa to transporters involved in import reactions. Set by :py:meth:`~.set_mechanistic_uptake_constants`. aa_to_importers_matrix (numpy.ndarray[int]): correlation matrix. Columns correspond to importing enzymes and rows to amino acids. Set by :py:meth:`~.set_mechanistic_export_constants`. aa_importer_names (numpy.ndarray[str]): names of all importers. Set by :py:meth:`~.set_mechanistic_export_constants`. import_kcats_per_aa (numpy.ndarray[float]): kcats corresponding to generic import reactions for each AA. Units in counts/second. Set by :py:meth:`~.set_mechanistic_export_constants`. aa_enzymes (numpy.ndarray[str]): enzyme ID with location tag for each enzyme that can catalyze an amino acid pathway with :py:attr:`~.enzyme_to_amino_acid` mapping these to each amino acid. Set by :py:meth:`~.set_mechanistic_supply_constants`. aa_kcats_fwd (numpy.ndarray[float]): forward kcat value for each synthesis pathway in units of :py:data:`~.K_CAT_UNITS`, ordered by amino acid molecule group. Set by :py:meth:`~.set_mechanistic_supply_constants`. aa_kcats_rev (numpy.ndarray[float]): reverse kcat value for each synthesis pathway in units of :py:data:`~.K_CAT_UNITS`, ordered by amino acid molecule group. Set by :py:meth:`~.set_mechanistic_supply_constants`. aa_kis (numpy.ndarray[float]): KI value for each synthesis pathway in units of :py:data:`~.METABOLITE_CONCENTRATION_UNITS`, ordered by amino acid molecule group. Will be inf if there is no inhibitory control. Set by :py:meth:`~.set_mechanistic_supply_constants`. aa_upstream_kms (list[list[float]]): KM value associated with the amino acid that feeds into each synthesis pathway in units of :py:data:`~.METABOLITE_CONCENTRATION_UNITS`, ordered by amino acid molecule group. Will be 0 if there is no upstream amino acid considered. Set by :py:meth:`~.set_mechanistic_supply_constants`. aa_reverse_kms (numpy.ndarray[float]): KM value associated with the amino acid in each synthesis pathway in units of :py:data:`~.METABOLITE_CONCENTRATION_UNITS`, ordered by amino acid molecule group. Will be inf if the synthesis pathway is not reversible. Set by :py:meth:`~.set_mechanistic_supply_constants`. aa_upstream_mapping (numpy.ndarray[int]): index of the upstream amino acid that feeds into each synthesis pathway, ordered by amino acid molecule group. Set by :py:meth:`~.set_mechanistic_supply_constants`. enzyme_to_amino_acid (numpy.ndarray[float]): relationship mapping from aa_enzymes to amino acids (n enzymes, m amino acids). Will contain a 1 if the enzyme associated with the row can catalyze the pathway for the amino acid associated with the column. Set by :py:meth:`~.set_mechanistic_supply_constants`. aa_forward_stoich (numpy.ndarray[float]): relationship mapping from upstream amino acids to downstream amino acids (n upstream, m downstream). Will contain a -1 if the amino acid associated with the row is required for synthesis of the amino acid associated with the column. Set by :py:meth:`~.set_mechanistic_supply_constants`. aa_reverse_stoich (numpy.ndarray[float]): relationship mapping from upstream amino acids to downstream amino acids (n downstream, m upstream). Will contain a -1 if the amino acid associated with the row is produced through a reverse reaction from the amino acid associated with the column. Set by :py:meth:`~.set_mechanistic_supply_constants`. aa_import_kis (numpy.ndarray[float]): inhibition constants for amino acid import based on the internal amino acid concentration specific_import_rates (numpy.ndarray[float]): import rates expected in rich media conditions for each amino acid normalized by dry cell mass in units of :py:data:`~.K_CAT_UNITS` / :py:data:`~.DRY_MASS_UNITS`, ordered by amino acid molecule group. Set by :py:meth:`~.set_mechanistic_supply_constants`. max_specific_import_rates (numpy.ndarray[float]): max import rates for each amino acid without including internal concentration inhibition normalized by dry cell mass in units of :py:data:`~.K_CAT_UNITS` / :py:data:`~.DRY_MASS_UNITS`, ordered by amino acid molecule group. Set by :py:meth:`~.set_mechanistic_supply_constants`. """ def __init__(self, raw_data: KnowledgeBaseEcoli, sim_data: "SimulationDataEcoli"): self._set_solver_values(sim_data.constants) self._build_biomass(raw_data, sim_data) self._build_metabolism(raw_data, sim_data) self._build_ppgpp_reactions(raw_data, sim_data) self._build_transport_reactions(raw_data, sim_data) self._build_amino_acid_pathways(raw_data, sim_data) self._add_metabolite_charge(raw_data)
[docs] def _add_metabolite_charge(self, raw_data: KnowledgeBaseEcoli): """ Compiles all metabolite charges. Args: raw_data: Raw data object Attributes set: - :py:attr:`~.metabolite_charge` """ self.metabolite_charge = {} for met in raw_data.metabolites: self.metabolite_charge[met["id"]] = met["molecular_charge"]
[docs] def _set_solver_values(self, constants: Constants): """ Sets values to be used in the FBA solver. Attributes set: - :py:attr:`~.solver` - :py:attr:`~.kinetic_objective_weight` - :py:attr:`~.secretion_penalty_coeff` """ self.solver = "glpk-linear" if "linear" in self.solver: self.kinetic_objective_weight = ( constants.metabolism_kinetic_objective_weight_linear ) else: self.kinetic_objective_weight = ( constants.metabolism_kinetic_objective_weight_quadratic ) self.kinetic_objective_weight_in_range = ( constants.metabolism_kinetic_objective_weight_in_range ) self.secretion_penalty_coeff = constants.secretion_penalty_coeff
[docs] def _build_biomass( self, raw_data: KnowledgeBaseEcoli, sim_data: "SimulationDataEcoli" ): """ Calculates metabolite concentration targets. Args: raw_data: Raw data object sim_data: Simulation data object Attributes set: - :py:attr:`~.concentration_updates` - :py:attr:`~.conc_dict` - :py:attr:`~.nutrients_to_internal_conc` """ wildtypeIDs = set(entry["molecule id"] for entry in raw_data.biomass) # Create vector of metabolite target concentrations # Since the data only covers certain metabolites, we need to rationally # expand the dataset to include the other molecules in the biomass # function. # First, load in metabolites that do have concentrations, then assign # compartments according to those given in the biomass objective. Or, # if there is no compartment, assign it to the cytoplasm. concentration_sources = [ "Park Concentration", "Lempp Concentration", "Kochanowski Concentration", "Sander Concentration", ] excluded = { "Park Concentration": { "GLT", # Steady state concentration reached with tRNA charging is much lower than Park "THR", # Attenuation needs concentration to be lower to match validation data "VAL", # Synthesis pathway kcat needs concentration to be lower and closer to KI }, "Lempp Concentration": { "ATP", # TF binding does not solve with average concentration "VAL", # Synthesis pathway kcat needs concentration to be lower and closer to KI }, "Kochanowski Concentration": { "ATP", # TF binding does not solve with average concentration }, "Sander Concentration": { "GLT", # Steady state concentration reached with tRNA charging is much lower than Sander }, } metaboliteIDs = [] metaboliteConcentrations = [] wildtypeIDtoCompartment = { wildtypeID[:-3]: wildtypeID[-3:] for wildtypeID in wildtypeIDs } # this assumes biomass reaction components only exist in a single compartment for row in raw_data.metabolite_concentrations: metabolite_id = row["Metabolite"] if not sim_data.getter.is_valid_molecule(metabolite_id): if VERBOSE: print( "Metabolite concentration for unknown molecule: {}".format( metabolite_id ) ) continue # Use average of both sources # TODO (Travis): geometric mean? conc = np.nanmean( [ row[source].asNumber(METABOLITE_CONCENTRATION_UNITS) for source in concentration_sources if metabolite_id not in excluded.get(source, set()) ] ) # Check that a value was in the datasets being used if not np.isfinite(conc): if VERBOSE: print( "No concentration in active datasets for {}".format( metabolite_id ) ) continue if metabolite_id in wildtypeIDtoCompartment: metaboliteIDs.append( metabolite_id + wildtypeIDtoCompartment[metabolite_id] ) else: metaboliteIDs.append(metabolite_id + "[c]") metaboliteConcentrations.append(conc) # CYS/SEL: concentration based on other amino acids aaConcentrations = [] for aaIndex, aaID in enumerate(sim_data.amino_acid_code_to_id_ordered.values()): if aaID in metaboliteIDs: metIndex = metaboliteIDs.index(aaID) aaConcentrations.append(metaboliteConcentrations[metIndex]) aaSmallestConc = min(aaConcentrations) metaboliteIDs.append("CYS[c]") metaboliteConcentrations.append(aaSmallestConc) metaboliteIDs.append("L-SELENOCYSTEINE[c]") metaboliteConcentrations.append(aaSmallestConc) # DGTP: set to smallest of all other DNTP concentrations dntpConcentrations = [] for dntpIndex, dntpID in enumerate(sim_data.molecule_groups.dntps): if dntpID in metaboliteIDs: metIndex = metaboliteIDs.index(dntpID) dntpConcentrations.append(metaboliteConcentrations[metIndex]) dntpSmallestConc = min(dntpConcentrations) metaboliteIDs.append("DGTP[c]") metaboliteConcentrations.append(dntpSmallestConc) # H: from reported pH hydrogenConcentration = 10 ** (-sim_data.constants.pH) metaboliteIDs.append(sim_data.molecule_ids.proton) metaboliteConcentrations.append(hydrogenConcentration) # PPI ppi_conc = sim_data.constants.ppi_concentration.asNumber( METABOLITE_CONCENTRATION_UNITS ) metaboliteIDs.append(sim_data.molecule_ids.ppi) metaboliteConcentrations.append(ppi_conc) metaboliteIDs.append("Pi[c]") metaboliteConcentrations.append(ppi_conc) # include metabolites that are part of biomass d = sim_data.mass.getBiomassAsConcentrations(sim_data.doubling_time) for key, value in d.items(): metaboliteIDs.append(key) metaboliteConcentrations.append( value.asNumber(METABOLITE_CONCENTRATION_UNITS) ) # Load relative metabolite changes relative_changes: dict[str, dict[str, float]] = {} for row in raw_data.relative_metabolite_concentrations: met = row["Metabolite"] met_id = met + wildtypeIDtoCompartment.get(met, "[c]") # AA concentrations are determined through charging if met_id in sim_data.molecule_groups.amino_acids: continue # Get relative metabolite change in each media condition for col, value in row.items(): # Skip the ID column and minimal column (only has values of 1) # or skip invalid values if col == "Metabolite" or col == "minimal" or not np.isfinite(value): continue if col not in relative_changes: relative_changes[col] = {} relative_changes[col][met_id] = value ## Add manually curated values for other media for ( media, data, ) in sim_data.adjustments.relative_metabolite_concentrations_changes.items(): if media not in relative_changes: relative_changes[media] = {} for met, change in data.items(): if met not in relative_changes[media]: relative_changes[media][met] = change # save concentrations as class variables unique_ids, counts = np.unique(metaboliteIDs, return_counts=True) if np.any(counts > 1): raise ValueError( "Multiple concentrations for metabolite(s): {}".format( ", ".join(unique_ids[counts > 1]) ) ) # TODO (Travis): only pass raw_data and sim_data and create functions to load absolute and relative concentrations conc_dict = dict( zip( metaboliteIDs, METABOLITE_CONCENTRATION_UNITS * np.array(metaboliteConcentrations), ) ) all_metabolite_ids = {met["id"] for met in raw_data.metabolites} linked_metabolites = self._build_linked_metabolites(raw_data, conc_dict) self.concentration_updates = ConcentrationUpdates( conc_dict, relative_changes, raw_data.equilibrium_reactions, sim_data.external_state.exchange_dict, all_metabolite_ids, linked_metabolites, ) self.conc_dict = self.concentration_updates.concentrations_based_on_nutrients( "minimal" ) self.nutrients_to_internal_conc = {} self.nutrients_to_internal_conc["minimal"] = self.conc_dict.copy()
[docs] def _build_linked_metabolites( self, raw_data: KnowledgeBaseEcoli, conc_dict: dict[str, Unum] ) -> dict[str, dict[str, Any]]: """ Calculates ratio between linked metabolites to keep it constant throughout a simulation. Args: raw_data: Raw data object conc_dict: Mapping of metabolite IDs to homeostatic concentrations calculated by :py:meth:`~_build_biomass` Returns: Mapping from a linked metabolite to its lead metabolite and concentration ratio to be maintained:: {'lead' (str): metabolite to link the concentration to, 'ratio' (float): ratio to multiply the lead concentration by} """ linked_metabolites = {} for row in raw_data.linked_metabolites: lead = row["Lead metabolite"] linked = row["Linked metabolite"] ratio = units.strip_empty_units(conc_dict[linked] / conc_dict[lead]) linked_metabolites[linked] = {"lead": lead, "ratio": ratio} return linked_metabolites
[docs] def _build_metabolism( self, raw_data: KnowledgeBaseEcoli, sim_data: "SimulationDataEcoli" ): """ Build the matrices/vectors for metabolism (FBA) Reads in and stores reaction and kinetic constraint information Args: raw_data: Raw data object sim_data: Simulation data object Attributes set: - :py:attr:`~.kinetic_constraint_reactions` - :py:attr:`~.kinetic_constraint_enzymes` - :py:attr:`~.kinetic_constraint_substrates` - :py:attr:`~._kcats` - :py:attr:`~._saturations` - :py:attr:`~._enzymes` - :py:attr:`~.constraint_is_kcat_only` - :py:attr:`~._compiled_enzymes` - :py:attr:`~._compiled_saturation` - :py:attr:`~.reaction_stoich` - :py:attr:`~.maintenance_reaction` - :py:attr:`~.reaction_catalysts` - :py:attr:`~.catalyst_ids` - :py:attr:`~.reactions_with_catalyst` - :py:attr:`~.catalysis_matrix_I` - :py:attr:`~.catalysis_matrix_J` - :py:attr:`~.catalysis_matrix_V` - :py:attr:`~.use_all_constraints` - :py:attr:`~.constraints_to_disable` - :py:attr:`~.base_reaction_ids` - :py:attr:`~.reaction_id_to_base_reaction_id` - :py:attr:`~.amino_acid_export_kms` """ ( base_rxn_ids, reaction_stoich, reversible_reactions, catalysts, rxn_id_to_base_rxn_id, ) = self.extract_reactions(raw_data, sim_data) # Load kinetic reaction constraints from raw_data known_metabolites = set(self.conc_dict) raw_constraints = self.extract_kinetic_constraints( raw_data, sim_data, stoich=reaction_stoich, catalysts=catalysts, known_metabolites=known_metabolites, ) # Make modifications from kinetics data ( constraints, reaction_stoich, catalysts, reversible_reactions, rxn_id_to_base_rxn_id, ) = self._replace_enzyme_reactions( raw_constraints, reaction_stoich, catalysts, reversible_reactions, rxn_id_to_base_rxn_id, ) # Create symbolic kinetic equations ( self.kinetic_constraint_reactions, self.kinetic_constraint_enzymes, self.kinetic_constraint_substrates, self._kcats, self._saturations, self._enzymes, self.constraint_is_kcat_only, ) = self._lambdify_constraints(constraints) self._compiled_enzymes: Optional[ Callable[[npt.NDArray[np.float64]], npt.NDArray[np.float64]] ] = None self._compiled_saturation: Optional[ Callable[[npt.NDArray[np.float64]], npt.NDArray[np.float64]] ] = None # TODO: move this to a sim_data analysis script if VERBOSE: print("\nSummary of included metabolism kinetics:") print( "Reactions with kinetics: {}".format( len(self.kinetic_constraint_reactions) ) ) print( "Enzymes with kinetics: {}".format(len(self.kinetic_constraint_enzymes)) ) print( "Metabolites in kinetics: {}".format( len(self.kinetic_constraint_substrates) ) ) print( "Number of kcat values: {}".format( len([k for c in constraints.values() for k in c["kcat"]]) ) ) print( "Number of saturation terms: {}".format( len([s for c in constraints.values() for s in c["saturation"]]) ) ) # Verify no substrates with unknown concentrations have been added unknown = { m for m in self.kinetic_constraint_substrates if m not in known_metabolites } if unknown: raise ValueError( "Unknown concentration for {}. Need to remove" " kinetics saturation term.".format(", ".join(unknown)) ) # Extract data reactions_with_catalyst = sorted(catalysts) catalyst_ids = sorted({c for all_cat in catalysts.values() for c in all_cat}) # Create catalysis matrix (to be used in the simulation) catalysisMatrixI = [] catalysisMatrixJ = [] catalysisMatrixV = [] for row, reaction in enumerate(reactions_with_catalyst): for catalyst in catalysts[reaction]: col = catalyst_ids.index(catalyst) catalysisMatrixI.append(row) catalysisMatrixJ.append(col) catalysisMatrixV.append(1) catalysisMatrixI = np.array(catalysisMatrixI) catalysisMatrixJ = np.array(catalysisMatrixJ) catalysisMatrixV = np.array(catalysisMatrixV) # Properties for FBA reconstruction self.reaction_stoich = reaction_stoich # TODO (ggsun): add this as a raw .tsv file self.maintenance_reaction = { "ATP[c]": -1, "WATER[c]": -1, "ADP[c]": +1, "Pi[c]": +1, "PROTON[c]": +1, } # Properties for catalysis matrix (to set hard bounds) self.reaction_catalysts = catalysts self.catalyst_ids = catalyst_ids self.reactions_with_catalyst = reactions_with_catalyst self.catalysis_matrix_I = catalysisMatrixI self.catalysis_matrix_J = catalysisMatrixJ self.catalysis_matrix_V = catalysisMatrixV # Properties for setting flux targets self.use_all_constraints = USE_ALL_CONSTRAINTS self.constraints_to_disable = [ rxn["disabled reaction"] for rxn in raw_data.disabled_kinetic_reactions ] # Properties for conversion of fluxes to those for base reaction IDs self.base_reaction_ids = base_rxn_ids self.reaction_id_to_base_reaction_id = rxn_id_to_base_rxn_id self.amino_acid_export_kms = raw_data.amino_acid_export_kms
[docs] def _build_ppgpp_reactions( self, raw_data: KnowledgeBaseEcoli, sim_data: "SimulationDataEcoli" ): """ Creates structures for ppGpp reactions for use in polypeptide_elongation. Args: raw_data: Raw data object sim_data: Simulation data object Attributes set: - :py:attr:`~.ppgpp_synthesis_reaction` - :py:attr:`~.ppgpp_degradation_reaction` - :py:attr:`~.ppgpp_reaction_names` - :py:attr:`~.ppgpp_reaction_metabolites` - :py:attr:`~.ppgpp_reaction_stoich` """ self.ppgpp_synthesis_reaction = "GDPPYPHOSKIN-RXN" self.ppgpp_degradation_reaction = "PPGPPSYN-RXN" self.ppgpp_reaction_names = [ self.ppgpp_synthesis_reaction, self.ppgpp_degradation_reaction, ] self.ppgpp_reaction_metabolites = [] # Indices (i: metabolite, j: reaction) and values (v: stoichiometry) # for sparse reaction matrix metabolite_indices: dict[str, int] = {} new_index = 0 rxn_i = [] rxn_j = [] rxn_v = [] # Record sparse indices in the matrix for j, rxn in enumerate(self.ppgpp_reaction_names): for met, stoich in self.reaction_stoich[rxn].items(): idx = metabolite_indices.get(met, new_index) if idx == new_index: metabolite_indices[met] = new_index self.ppgpp_reaction_metabolites.append(met) new_index += 1 rxn_i.append(idx) rxn_j.append(j) rxn_v.append(stoich) # Assemble matrix based on indices # new_index is number of metabolites, j+1 is number of reactions self.ppgpp_reaction_stoich = np.zeros((new_index, j + 1), dtype=np.int32) self.ppgpp_reaction_stoich[rxn_i, rxn_j] = rxn_v
[docs] def _build_transport_reactions( self, raw_data: KnowledgeBaseEcoli, sim_data: "SimulationDataEcoli" ): """ Creates list of transport reactions that are included in the reaction network. Args: raw_data: Raw data object sim_data: Simulation data object Attributes set: - :py:attr:`~.transport_reactions` """ transport_reactions = [ rxn_id for rxn_id, stoich in self.reaction_stoich.items() if self._is_transport_rxn(stoich) ] self.transport_reactions = transport_reactions
[docs] def _build_amino_acid_pathways( self, raw_data: KnowledgeBaseEcoli, sim_data: "SimulationDataEcoli" ): """ Creates mapping between enzymes and amino acid pathways with allosteric inhibition feedback from the amino acid. Args: raw_data: Raw data object sim_data: Simulation data object Attributes set: - :py:attr:`~.aa_synthesis_pathways` """ self.aa_synthesis_pathways = {} cytoplasm_tag = "[c]" for row in raw_data.amino_acid_pathways: data: dict[str, Any] = {} data["enzymes"] = [ e + sim_data.getter.get_compartment_tag(e) for e in row["Enzymes"] ] data["reverse enzymes"] = [ e + sim_data.getter.get_compartment_tag(e) for e in row["Reverse enzymes"] ] data["kcat_data"] = 0 / units.s if units.isnan(row["kcat"]) else row["kcat"] if units.isnan(row["KI, lower bound"]) or units.isnan( row["KI, lower bound"] ): data["ki"] = None else: data["ki"] = (row["KI, lower bound"], row["KI, upper bound"]) data["upstream"] = { k + cytoplasm_tag: v for k, v in row["Upstream amino acids"].items() } data["reverse"] = { k + cytoplasm_tag: v for k, v in row["Reverse amino acids"].items() } data["km, upstream"] = { k + cytoplasm_tag: v for k, v in row["KM, upstream"].items() } data["km, reverse"] = row["KM, reverse"] data["km, degradation"] = ( np.inf * units.mol / units.L if units.isnan(row["KM, degradation"]) else row["KM, degradation"] ) data["downstream"] = { k + cytoplasm_tag: v for k, v in row["Downstream amino acids"].items() } self.aa_synthesis_pathways[row["Amino acid"] + cytoplasm_tag] = data self.aa_synthesis_pathway_adjustments: dict[str, dict[str, float]] = {} for row in raw_data.adjustments.amino_acid_pathways: # Read data from row aa = row["Amino acid"] + cytoplasm_tag parameter = row["Parameter"] factor = row["Factor"] # Store adjustments to be used later adjustments = self.aa_synthesis_pathway_adjustments.get(aa, {}) adjustments[parameter] = factor self.aa_synthesis_pathway_adjustments[aa] = adjustments self.amino_acid_uptake_rates = {} for row in raw_data.amino_acid_uptake_rates: rates = {} rates["uptake"] = row["Uptake"] rates["LB"] = row["Uptake, LB"] rates["UB"] = row["Uptake, UB"] self.amino_acid_uptake_rates[row["Amino acid"]] = rates
[docs] def get_kinetic_constraints(self, enzymes: Unum, substrates: Unum) -> Unum: """ Allows for dynamic code generation for kinetic constraint calculation for use in Metabolism process. Inputs should be unitless but the order of magnitude should match the kinetics parameters (umol/L/s). If trying to pickle sim_data object after function has been called, _compiled_enzymes and _compiled_saturation might not be able to be pickled. See __getstate__(), __setstate__() comments on PR 111 to address. Returns np.array of floats of the kinetic constraint target for each reaction with kinetic parameters Args: enzymes: concentrations of enzymes associated with kinetic constraints (mol / volume units) substrates: concentrations of substrates associated with kinetic constraints (mol / volume units) Returns: Array of dimensions (n reactions, 3) where each row contains the min, mean and max kinetic constraints for each reaction with kinetic constraints (mol / volume / time units) """ if self._compiled_enzymes is None: self._compiled_enzymes = eval("lambda e: {}".format(self._enzymes)) if self._compiled_saturation is None: self._compiled_saturation = eval("lambda s: {}".format(self._saturations)) # Strip units from args enzs = enzymes.asNumber(KINETIC_CONSTRAINT_CONC_UNITS) subs = substrates.asNumber(KINETIC_CONSTRAINT_CONC_UNITS) capacity = np.array(self._compiled_enzymes(enzs))[:, None] * self._kcats saturation = np.array( [[min(v), sum(v) / len(v), max(v)] for v in self._compiled_saturation(subs)] ) return KINETIC_CONSTRAINT_CONC_UNITS * K_CAT_UNITS * capacity * saturation
[docs] def exchange_constraints( self, exchangeIDs, coefficient, targetUnits, media_id, unconstrained, constrained, concModificationsBasedOnCondition=None, ): """ Called during Metabolism process Returns the homeostatic objective concentrations based on the current nutrients Returns levels for external molecules available to exchange based on the current nutrients """ newObjective = self.concentration_updates.concentrations_based_on_nutrients( imports=unconstrained.union(constrained), media_id=media_id, conversion_units=targetUnits, ) if concModificationsBasedOnCondition is not None: newObjective.update(concModificationsBasedOnCondition) externalMoleculeLevels = np.zeros(len(exchangeIDs), np.float64) for index, moleculeID in enumerate(exchangeIDs): if moleculeID in unconstrained: externalMoleculeLevels[index] = np.inf elif moleculeID in constrained: externalMoleculeLevels[index] = ( constrained[moleculeID] * coefficient ).asNumber(targetUnits) else: externalMoleculeLevels[index] = 0.0 return externalMoleculeLevels, newObjective
[docs] def set_phenomological_supply_constants(self, sim_data: "SimulationDataEcoli"): """ Sets constants to determine amino acid supply during translation. Used with aa_supply_scaling() during simulations but supply can alternatively be determined mechanistically. This approach may require manually adjusting constants (fraction_supply_inhibited and fraction_supply_exported) but has less variability related to gene expression and regulation. Args: sim_data: simulation data Attributes set: - :py:attr:`~.KI_aa_synthesis` - :py:attr:`~.KM_aa_export` - :py:attr:`~.fraction_supply_rate` - :py:attr:`~.fraction_import_rate` Assumptions: - Each internal amino acid concentration in 'minimal_plus_amino_acids' media is not lower than in 'minimal' media TODO (Travis): Better handling of concentration assumption """ aa_ids = sim_data.molecule_groups.amino_acids conc = self.concentration_updates.concentrations_based_on_nutrients aa_conc_basal = np.array( [ conc(media_id="minimal")[aa].asNumber(METABOLITE_CONCENTRATION_UNITS) for aa in aa_ids ] ) aa_conc_aa_media = np.array( [ conc(media_id="minimal_plus_amino_acids")[aa].asNumber( METABOLITE_CONCENTRATION_UNITS ) for aa in aa_ids ] ) # Lower concentrations might produce strange rates (excess supply or # negative import when present externally) and constants so raise # to double check the implementation if not np.all(aa_conc_basal <= aa_conc_aa_media): aas = np.array(aa_ids)[np.where(aa_conc_basal > aa_conc_aa_media)] raise ValueError( "Check that amino acid concentrations should be lower in amino acid media for {}".format( aas ) ) f_inhibited = sim_data.constants.fraction_supply_inhibited f_exported = sim_data.constants.fraction_supply_exported # Assumed units of METABOLITE_CONCENTRATION_UNITS for KI and KM self.KI_aa_synthesis = f_inhibited * aa_conc_basal / (1 - f_inhibited) self.KM_aa_export = (1 / f_exported - 1) * aa_conc_aa_media self.fraction_supply_rate = ( 1 - f_inhibited + aa_conc_basal / (self.KM_aa_export + aa_conc_basal) ) self.fraction_import_rate = 1 - ( self.fraction_supply_rate + 1 / (1 + aa_conc_aa_media / self.KI_aa_synthesis) - f_exported )
[docs] def aa_supply_scaling( self, aa_conc: Unum, aa_present: Unum ) -> npt.NDArray[np.float64]: """ Called during polypeptide_elongation process Determine amino acid supply rate scaling based on current amino acid concentrations. Args: aa_conc: internal concentration for each amino acid (ndarray[float]) aa_present: whether each amino acid is in the external environment or not (ndarray[bool]) Returns: Scaling for the supply of each amino acid with higher supply rate if >1, lower supply rate if <1 """ aa_conc = aa_conc.asNumber(METABOLITE_CONCENTRATION_UNITS) aa_supply = self.fraction_supply_rate aa_import = aa_present * self.fraction_import_rate aa_synthesis = 1 / (1 + aa_conc / self.KI_aa_synthesis) aa_export = aa_conc / (self.KM_aa_export + aa_conc) supply_scaling = aa_supply + aa_import + aa_synthesis - aa_export return supply_scaling
[docs] def get_aa_to_transporters_mapping_data( self, sim_data: "SimulationDataEcoli", export: bool = False ) -> tuple[dict[str, list], npt.NDArray[np.float64], npt.NDArray[np.str_]]: """ Creates a dictionary that maps amino acids with their transporters. Based on this dictionary, it creates a correlation matrix with rows as AA and columns as transporters. Args: sim_data: simulation data export: if True, the parameters calculated are for mechanistic export instead of uptake Returns: 3-element tuple containing - aa_to_transporters: dictonary that maps aa to transporters involved in transport reactions - aa_to_transporters_matrix: correlation matrix. Columns correspond to transporter enzymes and rows to amino acids - aa_transporters_names: names of all transporters """ def matches_direction(direction): if export: return direction < 0 else: return direction > 0 # Mapping aminoacids to their transporters # CYS does not have any uptake reaction, so we initialize the dict with it to ensure # the presence of the 21 AAs # TODO (Santiago): Reversible reactions? aa_to_transporters: dict[str, list[str]] = {"CYS[c]": []} for reaction in self.transport_reactions: for aa in sim_data.molecule_groups.amino_acids: if aa in self.reaction_stoich[reaction] and matches_direction( self.reaction_stoich[reaction][aa] ): if aa not in aa_to_transporters: aa_to_transporters[aa] = [] aa_to_transporters[aa] += self.reaction_catalysts.get(reaction, []) aa_to_transporters = { aa: aa_to_transporters[aa] for aa in sim_data.molecule_groups.amino_acids } c = 0 transporters_to_idx = {} for aa, transporters in aa_to_transporters.items(): for transporter in transporters: if transporter not in transporters_to_idx: transporters_to_idx[transporter] = c c += 1 aa_to_transporters_matrix = [[0]] * len(aa_to_transporters) for i, trnspts in enumerate(aa_to_transporters.values()): temp = [0] * len(transporters_to_idx) for tr in trnspts: temp[transporters_to_idx[tr]] = 1 aa_to_transporters_matrix[i] = temp aa_transporters_names = list(transporters_to_idx.keys()) return ( aa_to_transporters, np.array(aa_to_transporters_matrix), np.array(aa_transporters_names), )
[docs] def set_mechanistic_export_constants( self, sim_data: "SimulationDataEcoli", cell_specs: dict[str, dict], basal_container: np.ndarray, ): """ Calls get_aa_to_transporters_mapping_data() for AA export, which calculates the total amount of export transporter counts per AA. Kcats are calculated using the same exchange rates as for uptake and transporter counts. Missing KMs are calculated based on present KMs. This is done by calculating the average factor for KMs compared to estimated concentration (av_factor = sum(KM / concentration) / n_aa_with_kms). ** KM = av_factor * concentration Args: sim_data: simulation data cell_specs: mapping from condition to calculated cell properties basal_container: average initial bulk molecule counts in the basal condition (structured Numpy array, see :ref:`bulk`) Attributes set: - :py:attr:`~.aa_to_exporters` - :py:attr:`~.aa_to_exporters_matrix` - :py:attr:`~.aa_exporter_names` - :py:attr:`~.aa_export_kms` - :py:attr:`~.export_kcats_per_aa` """ self.aa_to_exporters, self.aa_to_exporters_matrix, self.aa_exporter_names = ( self.get_aa_to_transporters_mapping_data(sim_data, export=True) ) aa_names = sim_data.molecule_groups.amino_acids aa_idx = bulk_name_to_idx(aa_names, basal_container["id"]) counts_to_molar = ( sim_data.constants.cell_density / cell_specs["basal"]["avgCellDryMassInit"] ) / sim_data.constants.n_avogadro aa_conc = { aa: counts * counts_to_molar for aa, counts in zip(aa_names, counts(basal_container, aa_idx)) } aa_with_km = {} # Calculate average factor to estimate missing KMs coeff_estimate_kms = 0.0 for export_kms in self.amino_acid_export_kms: aa_with_km[export_kms["Amino Acid"]] = export_kms["KM"] coef_per_aa = 0 for km in export_kms["KM"].values(): coef_per_aa += ( km.asUnit(METABOLITE_CONCENTRATION_UNITS) / aa_conc[export_kms["Amino Acid"]] ) coeff_estimate_kms += coef_per_aa / len(export_kms["KM"]) coeff_estimate_kms = coeff_estimate_kms / len(self.amino_acid_export_kms) # Calculate estimated KMs for each AA single_kms = {} for aa in aa_names: if aa in aa_with_km: single_kms[aa] = np.mean(list(aa_with_km[aa].values())) else: single_kms[aa] = coeff_estimate_kms * aa_conc[aa] self.aa_export_kms = np.array( [single_kms[aa].asNumber(METABOLITE_CONCENTRATION_UNITS) for aa in aa_names] )
[docs] def set_mechanistic_uptake_constants( self, sim_data: "SimulationDataEcoli", cell_specs: dict[str, dict], with_aa_container: np.ndarray, ): """ Based on the matrix calculated in get_aa_to_transporters_mapping_data(), we calculate the total amount of transporter counts per AA. Args: sim_data: simulation data cell_specs: mapping from condition to calculated cell properties with_aa_container: average initial bulk molecule counts in the ``with_aa`` condition (structured Numpy array, see :ref:`bulk`) Attributes set: - :py:attr:`~.aa_to_importers` - :py:attr:`~.aa_to_importers_matrix` - :py:attr:`~.aa_importer_names` - :py:attr:`~.import_kcats_per_aa` - :py:attr:`~.export_kcats_per_aa` TODO: - Include external amino acid concentrations and KM values """ aa_names = sim_data.molecule_groups.amino_acids aa_idx = bulk_name_to_idx(aa_names, with_aa_container["id"]) counts_to_molar = ( sim_data.constants.cell_density / cell_specs["with_aa"]["avgCellDryMassInit"] ) / sim_data.constants.n_avogadro aa_conc = counts(with_aa_container, aa_idx) * counts_to_molar.asNumber( METABOLITE_CONCENTRATION_UNITS ) exchange_rates = self.specific_import_rates * cell_specs["with_aa"][ "avgCellDryMassInit" ].asNumber(units.fg) self.aa_to_importers, self.aa_to_importers_matrix, self.aa_importer_names = ( self.get_aa_to_transporters_mapping_data(sim_data) ) aa_importer_idx = bulk_name_to_idx( self.aa_importer_names, with_aa_container["id"] ) importer_counts = counts(with_aa_container, aa_importer_idx) aa_exporter_idx = bulk_name_to_idx( self.aa_exporter_names, with_aa_container["id"] ) exporter_counts = counts(with_aa_container, aa_exporter_idx) counts_per_aa_import = self.aa_to_importers_matrix.dot(importer_counts) counts_per_aa_export = self.aa_to_exporters_matrix.dot(exporter_counts) # Solve for the two unknown kcats with the calculated net exchange rate # in rich media conditions and the assumption that import and export # rates are equal at the export KM based on how the export KM values # were curated. # Import will decrease and export will increase with higher amino acids # for stable amino acid concentrations. import_saturation_in_rich = 1 / (1 + aa_conc / self.aa_import_kis) export_saturation_in_rich = 1 / (1 + self.aa_export_kms / aa_conc) import_saturation_at_km = 1 / (1 + self.aa_export_kms / self.aa_import_kis) export_saturation_at_km = 0.5 import_capacity_at_km = counts_per_aa_import * import_saturation_at_km export_capacity_at_km = counts_per_aa_export * export_saturation_at_km with np.errstate(divide="ignore", invalid="ignore"): import_vs_export_kcat = export_capacity_at_km / import_capacity_at_km kcat_export = exchange_rates / ( import_vs_export_kcat * counts_per_aa_import * import_saturation_in_rich - counts_per_aa_export * export_saturation_in_rich ) kcat_export[~np.isfinite(kcat_export)] = 0 kcat_import = import_vs_export_kcat * kcat_export kcat_import[~np.isfinite(kcat_import)] = 0 if np.any(kcat_export < 0) or np.any(kcat_import < 0): raise ValueError( "Could not solve for positive transport kcat." " Check assumptions or amino acid concentrations compared to KMs." ) self.export_kcats_per_aa = kcat_export self.import_kcats_per_aa = kcat_import
[docs] def set_mechanistic_supply_constants( self, sim_data: "SimulationDataEcoli", cell_specs: dict[str, dict], basal_container: np.ndarray, with_aa_container: np.ndarray, ): """ Sets constants to determine amino acid supply during translation. Used with amino_acid_synthesis() and amino_acid_import() during simulations but supply can alternatively be determined phenomologically. This approach is more detailed and should better respond to environmental changes and perturbations but has more variability related to gene expression and regulation. Args: sim_data: simulation data cell_specs: mapping from condition to calculated cell properties basal_container: average initial bulk molecule counts in the basal condition (structured Numpy array, see :ref:`bulk`) with_aa_container: average initial bulk molecule counts in the ``with_aa`` condition Sets class attributes: - :py:attr:`~.aa_enzymes` - :py:attr:`~.aa_kcats_fwd` - :py:attr:`~.aa_kcats_rev` - :py:attr:`~.aa_kis` - :py:attr:`~.aa_upstream_kms` - :py:attr:`~.aa_reverse_kms` - :py:attr:`~.aa_upstream_mapping` - :py:attr:`~.enzyme_to_amino_acid` - :py:attr:`~.aa_forward_stoich` - :py:attr:`~.aa_reverse_stoich` - :py:attr:`~.aa_import_kis` - :py:attr:`~.specific_import_rates` - :py:attr:`~.max_specific_import_rates` Assumptions: - Only one reaction is limiting in an amino acid pathway (typically the first and one with KI) and the kcat for forward or reverse directions will apply to all enzymes that can catalyze that step - kcat for reverse and degradation reactions is the same (each amino acid only has reverse or degradation at this point but that could change with modifications to the amino_acid_pathways flat file) TODO: - Search for new kcat/KM values in literature or use metabolism_kinetics.tsv - Consider multiple reaction steps - Include mulitple amino acid inhibition on importers (currently amino acids only inhibit their own import but some transporters import multiple amino acids and will be inhibited by all of the amino acids for the import of each amino acid) """ aa_ids = sim_data.molecule_groups.amino_acids n_aas = len(aa_ids) self.aa_to_index = {aa: i for i, aa in enumerate(aa_ids)} conc = self.concentration_updates.concentrations_based_on_nutrients # Measured data used as targets for calculations measured_uptake_rates = {} fwd_kcat_targets = {} for aa in aa_ids: aa_no_tag = aa[:-3] measured_uptake_rates[aa_no_tag] = ( self.amino_acid_uptake_rates[aa_no_tag]["uptake"].asNumber( units.mmol / units.g / units.h ) if aa_no_tag in self.amino_acid_uptake_rates else 0 ) if ( kcat := self.aa_synthesis_pathways[aa]["kcat_data"].asNumber( K_CAT_UNITS ) ) > 0: fwd_kcat_targets[aa_no_tag] = kcat default_fwd_target = np.mean(list(fwd_kcat_targets.values())) # Allosteric inhibition constants to match required supply rate basal_rates = ( sim_data.translation_supply_rate["minimal"] * cell_specs["basal"]["avgCellDryMassInit"] * sim_data.constants.n_avogadro ).asNumber(K_CAT_UNITS) with_aa_rates = ( sim_data.translation_supply_rate["minimal_plus_amino_acids"] * cell_specs["with_aa"]["avgCellDryMassInit"] * sim_data.constants.n_avogadro ).asNumber(K_CAT_UNITS) basal_supply_mapping = dict(zip(aa_ids, basal_rates)) with_aa_supply_mapping = dict(zip(aa_ids, with_aa_rates)) aa_enzymes = [] enzyme_to_aa_fwd = [] enzyme_to_aa_rev = [] aa_kcats_fwd = {} aa_kcats_rev = {} aa_kis = {} upstream_aas_for_km = {} aa_upstream_kms = {} aa_reverse_kms = {} aa_degradation_kms = {} fwd_rates: dict[str, float] = {} rev_rates: dict[str, float] = {} deg_rates: dict[str, float] = {} calculated_uptake_rates = {} minimal_conc = conc("minimal") with_aa_conc = conc("minimal_plus_amino_acids") # Get order of amino acids to calculate parameters for to ensure that # parameters that are dependent on other amino acids are run after # those calculations have completed self.aa_forward_stoich = np.eye(n_aas) self.aa_reverse_stoich = np.eye(n_aas) dependencies: dict[str, set[str]] = {} for aa in aa_ids: for downstream_aa in self.aa_synthesis_pathways[aa]["downstream"]: if units.isfinite( self.aa_synthesis_pathways[downstream_aa]["km, degradation"] ): dependencies.setdefault(aa, set()).add(downstream_aa) # Convert individual supply calculations to overall supply based on dependencies # via dot product (self.aa_forward_stoich @ supply) for upstream_aa, stoich in self.aa_synthesis_pathways[aa][ "upstream" ].items(): self.aa_forward_stoich[ self.aa_to_index[upstream_aa], self.aa_to_index[aa] ] = -stoich dependencies.setdefault(upstream_aa, set()).add(aa) for reverse_aa, stoich in self.aa_synthesis_pathways[aa]["reverse"].items(): self.aa_reverse_stoich[ self.aa_to_index[reverse_aa], self.aa_to_index[aa] ] = -stoich dependencies.setdefault(reverse_aa, set()).add(aa) ordered_aa_ids: list[str] = [] for _ in aa_ids: # limit number of iterations number of amino acids in case there are cyclic links for aa in sorted(set(aa_ids) - set(ordered_aa_ids)): for downstream_aa in dependencies.get(aa, set()): if downstream_aa not in ordered_aa_ids: break else: ordered_aa_ids.append(aa) if len(ordered_aa_ids) != n_aas: raise RuntimeError( "Could not determine amino acid order to calculate dependencies first." " Make sure there are no cyclical pathways for amino acids that can degrade." ) for amino_acid in ordered_aa_ids: data = self.aa_synthesis_pathways[amino_acid] fwd_enzymes = data["enzymes"] fwd_enzymes_basal_idx = bulk_name_to_idx(fwd_enzymes, basal_container["id"]) fwd_enzymes_basal = counts(basal_container, fwd_enzymes_basal_idx).sum() fwd_enzymes_with_aa_idx = bulk_name_to_idx( fwd_enzymes, with_aa_container["id"] ) fwd_enzymes_with_aa = counts( with_aa_container, fwd_enzymes_with_aa_idx ).sum() rev_enzymes = data["reverse enzymes"] rev_enzymes_basal_idx = bulk_name_to_idx(rev_enzymes, basal_container["id"]) rev_enzymes_basal = counts(basal_container, rev_enzymes_basal_idx).sum() rev_enzymes_with_aa_idx = bulk_name_to_idx( rev_enzymes, with_aa_container["id"] ) rev_enzymes_with_aa = counts( with_aa_container, rev_enzymes_with_aa_idx ).sum() aa_conc_basal = minimal_conc[amino_acid] aa_conc_with_aa = with_aa_conc[amino_acid] if data["ki"] is None: ki = np.inf * units.mol / units.L else: # Get largest dynamic range possible given the range of measured KIs lower_limit, upper_limit = data["ki"] if aa_conc_basal < lower_limit: ki = lower_limit elif aa_conc_basal > upper_limit: ki = upper_limit else: ki = aa_conc_basal upstream_aa = [aa for aa in data["upstream"]] km_conc_basal = METABOLITE_CONCENTRATION_UNITS * np.array( [ minimal_conc[aa].asNumber(METABOLITE_CONCENTRATION_UNITS) for aa in upstream_aa ] ) km_conc_with_aa = METABOLITE_CONCENTRATION_UNITS * np.array( [ with_aa_conc[aa].asNumber(METABOLITE_CONCENTRATION_UNITS) for aa in upstream_aa ] ) kms_upstream = data["km, upstream"] kms = METABOLITE_CONCENTRATION_UNITS * np.array( [ kms_upstream.get(aa, minimal_conc[aa]).asNumber( METABOLITE_CONCENTRATION_UNITS ) for aa in upstream_aa ] ) # TODO: better way to fill in this missing data if data["reverse"]: if units.isnan(data["km, reverse"]): km_reverse = ( minimal_conc[amino_acid] * 10 ) # TODO: better way to fill in this missing data else: km_reverse = data["km, reverse"] # TODO: remove this if separating reverse and deg enzymes elif ( data["reverse enzymes"] and not units.isfinite(data["km, degradation"]) and amino_acid != "L-SELENOCYSTEINE[c]" ): km_reverse = ( minimal_conc[amino_acid] * 10 ) # TODO: better way to fill in this missing data else: km_reverse = np.inf * units.mol / units.L km_degradation = data["km, degradation"] # Make required adjustments in order to get positive kcats and import rates for parameter, factor in self.aa_synthesis_pathway_adjustments.get( amino_acid, {} ).items(): if parameter == "ki": ki *= factor elif parameter == "km_degradation": km_degradation *= factor elif parameter == "km_reverse": km_reverse *= factor elif parameter == "kms": kms *= factor else: raise ValueError( f"Unexpected parameter adjustment ({parameter}) for {amino_acid}." ) if units.isfinite(km_reverse) and units.isfinite(km_degradation): raise ValueError( "Currently cannot have a reverse and degradation KM for amino acid" f" synthesis pathways ({amino_acid}). Consider a method to solve for separate" " kcats and implement matching saturation calculation in amino_acid_synthesis()" " and calc_kcats()" ) def calc_kcats( aa_conc_basal, km_conc_basal, aa_conc_with_aa, km_conc_with_aa, kms, km_reverse, km_degradation, ki, uptake_rate, ): # Calculate kcat value to ensure sufficient supply to double fwd_fraction_basal = units.strip_empty_units( 1 / (1 + aa_conc_basal / ki) * np.prod(1 / (1 + kms / km_conc_basal)) ) rev_fraction_basal = units.strip_empty_units( 1 / ( 1 + km_reverse / aa_conc_basal * (1 + aa_conc_basal / km_degradation) ) ) deg_fraction_basal = units.strip_empty_units( 1 / ( 1 + km_degradation / aa_conc_basal * (1 + aa_conc_basal / km_reverse) ) ) loss_fraction_basal = rev_fraction_basal + deg_fraction_basal fwd_capacity_basal = fwd_enzymes_basal * fwd_fraction_basal rev_capacity_basal = rev_enzymes_basal * loss_fraction_basal fwd_fraction_with_aa = units.strip_empty_units( 1 / (1 + aa_conc_with_aa / ki) * np.prod(1 / (1 + kms / km_conc_with_aa)) ) rev_fraction_with_aa = units.strip_empty_units( 1 / ( 1 + km_reverse / aa_conc_with_aa * (1 + aa_conc_with_aa / km_degradation) ) ) deg_fraction_with_aa = units.strip_empty_units( 1 / ( 1 + km_degradation / aa_conc_with_aa * (1 + aa_conc_with_aa / km_reverse) ) ) loss_fraction_with_aa = rev_fraction_with_aa + deg_fraction_with_aa fwd_capacity_with_aa = fwd_enzymes_with_aa * fwd_fraction_with_aa rev_capacity_with_aa = rev_enzymes_with_aa * loss_fraction_with_aa supply_basal = basal_supply_mapping[amino_acid] supply_with_aa = with_aa_supply_mapping[amino_acid] downstream_basal = 0 downstream_with_aa = 0 for i, stoich in enumerate( self.aa_forward_stoich[self.aa_to_index[amino_acid], :] ): if stoich < 0: downstream_aa = aa_ids[i] downstream_basal += -stoich * fwd_rates[downstream_aa][0] downstream_with_aa += -stoich * fwd_rates[downstream_aa][1] for i, stoich in enumerate( self.aa_reverse_stoich[self.aa_to_index[amino_acid], :] ): if stoich < 0: downstream_aa = aa_ids[i] downstream_basal += stoich * rev_rates[downstream_aa][0] downstream_with_aa += stoich * rev_rates[downstream_aa][1] uptake = ( units.mmol / units.g / units.h * uptake_rate * cell_specs["with_aa"]["avgCellDryMassInit"] ).asNumber(units.count * K_CAT_UNITS) balance_basal = supply_basal + downstream_basal balance_with_aa = supply_with_aa + downstream_with_aa - uptake A = np.array( [ [fwd_capacity_basal, -rev_capacity_basal], [fwd_capacity_with_aa, -rev_capacity_with_aa], ] ) b = np.array([balance_basal, balance_with_aa]) try: kcat_fwd, kcat_rev = np.linalg.solve(A, b) except np.linalg.LinAlgError: if VERBOSE: print( f"Warning: could not solve directly for {amino_acid} kcats - switching to least squares" ) kcat_fwd, kcat_rev = np.linalg.lstsq(A, b, rcond=None)[0] fwd_rate = ( kcat_fwd * fwd_enzymes_basal * fwd_fraction_basal, kcat_fwd * fwd_enzymes_with_aa * fwd_fraction_with_aa, ) rev_rate = ( kcat_rev * rev_enzymes_basal * rev_fraction_basal, kcat_rev * rev_enzymes_with_aa * rev_fraction_with_aa, ) deg_rate = ( kcat_rev * rev_enzymes_basal * deg_fraction_basal, kcat_rev * rev_enzymes_with_aa * deg_fraction_with_aa, ) return kcat_fwd, kcat_rev, fwd_rate, rev_rate, deg_rate, uptake # Fit forward and reverse kcats by adjusting the uptake rate def objective(aa, uptake, kcat_fwd, kcat_rev): aa = aa[:-3] diffs = np.array( [ measured_uptake_rates[aa] - uptake, fwd_kcat_targets.get(aa, default_fwd_target) - kcat_fwd, 0 if units.isfinite(km_reverse) else kcat_rev, # no penalty if reverse, minimize if degradation ] ) weights = np.array([1000, 1, 1]) return np.linalg.norm(weights * diffs) # TODO (travis): use a more rigorous method to fit the kcats (eg. gradient descent) # It would be better to include all amino acids in an objective and solve for all # kcats iteratively instead of solving for kcats for a single amino acid and then the next best_objective = None kcat_fwd = None n_factors = 500 if VERBOSE: print("uptake:") for factor in np.logspace(-1, 1, n_factors): results = calc_kcats( aa_conc_basal, km_conc_basal, aa_conc_with_aa, km_conc_with_aa, kms, km_reverse, km_degradation, ki, factor * measured_uptake_rates[amino_acid[:-3]], ) new_kcat_fwd, new_kcat_rev, *_ = results if VERBOSE: print(f"\t{factor:.2f}:\t{new_kcat_fwd:5.1f}\t{new_kcat_rev:5.1f}") if new_kcat_fwd >= 0 and new_kcat_rev >= 0: new_objective = objective( amino_acid, factor * measured_uptake_rates[amino_acid[:-3]], new_kcat_fwd, new_kcat_rev, ) if best_objective is None or new_objective < best_objective: kcat_fwd, kcat_rev, fwd_rate, rev_rate, deg_rate, uptake = ( results ) data["kcat"] = kcat_fwd * K_CAT_UNITS best_objective = new_objective # Vary input parameters for kcat calculations for debugging purposes if VERBOSE: print("KMs:") for factor in np.logspace(-1, 1, n_factors): new_kcat_fwd, new_kcat_rev, *_ = calc_kcats( aa_conc_basal, km_conc_basal, aa_conc_with_aa, km_conc_with_aa, factor * kms, km_reverse, km_degradation, ki, measured_uptake_rates[amino_acid[:-3]], ) print(f"\t{factor:.2f}:\t{new_kcat_fwd:5.1f}\t{new_kcat_rev:5.1f}") print("km_reverse:") for factor in np.logspace(-1, 1, n_factors): new_kcat_fwd, new_kcat_rev, *_ = calc_kcats( aa_conc_basal, km_conc_basal, aa_conc_with_aa, km_conc_with_aa, kms, factor * km_reverse, km_degradation, ki, measured_uptake_rates[amino_acid[:-3]], ) print(f"\t{factor:.2f}:\t{new_kcat_fwd:5.1f}\t{new_kcat_rev:5.1f}") print("km_degradation:") for factor in np.logspace(-1, 1, n_factors): new_kcat_fwd, new_kcat_rev, *_ = calc_kcats( aa_conc_basal, km_conc_basal, aa_conc_with_aa, km_conc_with_aa, kms, km_reverse, factor * km_degradation, ki, measured_uptake_rates[amino_acid[:-3]], ) print(f"\t{factor:.2f}:\t{new_kcat_fwd:5.1f}\t{new_kcat_rev:5.1f}") print("ki:") for factor in np.logspace(-1, 1, n_factors): new_kcat_fwd, new_kcat_rev, *_ = calc_kcats( aa_conc_basal, km_conc_basal, aa_conc_with_aa, km_conc_with_aa, kms, km_reverse, km_degradation, factor * ki, measured_uptake_rates[amino_acid[:-3]], ) print(f"\t{factor:.2f}:\t{new_kcat_fwd:5.1f}\t{new_kcat_rev:5.1f}") print(f"*** {amino_acid}: {kcat_fwd:5.1f} {kcat_rev:5.1f} ***") if kcat_fwd is None: raise ValueError( "Could not find positive forward and reverse" f" kcat for {amino_acid}. Run with VERBOSE to check input" " parameters like KM and KI or check concentrations." ) aa_enzymes += fwd_enzymes + rev_enzymes enzyme_to_aa_fwd += [amino_acid] * len(fwd_enzymes) + [None] * len( rev_enzymes ) enzyme_to_aa_rev += [None] * len(fwd_enzymes) + [amino_acid] * len( rev_enzymes ) aa_kcats_fwd[amino_acid] = kcat_fwd aa_kcats_rev[amino_acid] = kcat_rev aa_kis[amino_acid] = ki.asNumber(METABOLITE_CONCENTRATION_UNITS) upstream_aas_for_km[amino_acid] = upstream_aa aa_upstream_kms[amino_acid] = kms.asNumber(METABOLITE_CONCENTRATION_UNITS) aa_reverse_kms[amino_acid] = km_reverse.asNumber( METABOLITE_CONCENTRATION_UNITS ) aa_degradation_kms[amino_acid] = km_degradation.asNumber( METABOLITE_CONCENTRATION_UNITS ) fwd_rates[amino_acid] = fwd_rate rev_rates[amino_acid] = rev_rate deg_rates[amino_acid] = deg_rate calculated_uptake_rates[amino_acid] = uptake self.aa_enzymes = np.unique(aa_enzymes) self.aa_kcats_fwd = np.array([aa_kcats_fwd[aa] for aa in aa_ids]) self.aa_kcats_rev = np.array([aa_kcats_rev[aa] for aa in aa_ids]) self.aa_kis = np.array([aa_kis[aa] for aa in aa_ids]) self.aa_reverse_kms = np.array([aa_reverse_kms[aa] for aa in aa_ids]) self.aa_degradation_kms = np.array([aa_degradation_kms[aa] for aa in aa_ids]) # Import inhibition of transporters rich_conc = np.array( [with_aa_conc[aa].asNumber(METABOLITE_CONCENTRATION_UNITS) for aa in aa_ids] ) self.aa_import_kis: np.ndarray = ( rich_conc.copy() ) # Assume this conc is the inhibition constant: TODO: find KIs saturation = 1 / (1 + rich_conc / self.aa_import_kis) self.specific_import_rates: np.ndarray = np.array( [calculated_uptake_rates[aa] for aa in aa_ids] ) / cell_specs["with_aa"]["avgCellDryMassInit"].asNumber(DRY_MASS_UNITS) self.max_specific_import_rates = self.specific_import_rates / saturation # KMs for upstream amino acids upstream_kms = [aa_upstream_kms[aa] for aa in aa_ids] upstream_aas = [upstream_aas_for_km[aa] for aa in aa_ids] self.aa_upstream_kms = np.zeros((n_aas, n_aas)) for i, (kms, aas) in enumerate(zip(upstream_kms, upstream_aas)): for km, aa in zip(kms, aas): self.aa_upstream_kms[i, self.aa_to_index[aa]] = km # Convert enzyme counts to an amino acid basis via dot product (counts @ self.enzyme_to_amino_acid) self.enzyme_to_amino_acid_fwd = np.zeros((len(self.aa_enzymes), n_aas)) self.enzyme_to_amino_acid_rev = np.zeros((len(self.aa_enzymes), n_aas)) enzyme_mapping = {e: i for i, e in enumerate(self.aa_enzymes)} aa_mapping = {a: i for i, a in enumerate(aa_ids)} for enzyme, fwd, rev in zip(aa_enzymes, enzyme_to_aa_fwd, enzyme_to_aa_rev): if fwd is not None: self.enzyme_to_amino_acid_fwd[ enzyme_mapping[enzyme], aa_mapping[fwd] ] = 1 if rev is not None: self.enzyme_to_amino_acid_rev[ enzyme_mapping[enzyme], aa_mapping[rev] ] = 1 # Concentrations for reference in analysis plot conversion = ( sim_data.constants.cell_density / sim_data.constants.n_avogadro * sim_data.mass.cell_dry_mass_fraction ) aa_enzymes_basal_idx = bulk_name_to_idx(self.aa_enzymes, basal_container["id"]) basal_counts = counts(basal_container, aa_enzymes_basal_idx) aa_enzymes_with_aa_idx = bulk_name_to_idx( self.aa_enzymes, with_aa_container["id"] ) with_aa_counts = counts(with_aa_container, aa_enzymes_with_aa_idx) self.aa_supply_enzyme_conc_with_aa = ( conversion * with_aa_counts / cell_specs["with_aa"]["avgCellDryMassInit"] ) self.aa_supply_enzyme_conc_basal = ( conversion * basal_counts / cell_specs["basal"]["avgCellDryMassInit"] ) # Check calculations that could end up negative neg_idx = np.where(self.max_specific_import_rates < 0)[0] if len(neg_idx): aas = ", ".join([aa_ids[idx] for idx in neg_idx]) print(f"{self.max_specific_import_rates = }") raise ValueError( f"Import rate was determined to be negative for {aas}." " Check input parameters like supply and synthesis or enzyme expression." )
[docs] def get_pathway_enzyme_counts_per_aa( self, enzyme_counts: np.ndarray ) -> Tuple[np.ndarray, np.ndarray]: """ Get the counts of enzymes for forward and reverse reactions in the amino acid synthesis network based on all of the enzymes used in the network. Useful to get the counts to pass to amino_acid_synthesis() from counts based on self.aa_enzymes. Args: enzyme_counts: counts of all enzymes in the amino acid network Returns: 2-element tuple containing - counts_per_aa_fwd: counts of enzymes for the forward reaction for each amino acid - counts_per_aa_rev: counts of enzymes for the reverse reaction for each amino acid """ counts_per_aa_fwd = enzyme_counts @ self.enzyme_to_amino_acid_fwd counts_per_aa_rev = enzyme_counts @ self.enzyme_to_amino_acid_rev return counts_per_aa_fwd, counts_per_aa_rev
[docs] def amino_acid_synthesis( self, counts_per_aa_fwd: npt.NDArray[np.int64], counts_per_aa_rev: npt.NDArray[np.int64], aa_conc: Union[units.Unum, npt.NDArray[np.float64]], ) -> tuple[ npt.NDArray[np.float64], npt.NDArray[np.float64], npt.NDArray[np.float64] ]: """ Calculate the net rate of synthesis for amino acid pathways (can be negative with reverse reactions). Args: counts_per_aa_fwd: counts for enzymes in forward reactions for each amino acid counts_per_aa_rev: counts for enzymes in loss reactions for each amino acid aa_conc: concentrations of each amino acid with mol/volume units Returns: 3-element tuple containing - synthesis: net rate of synthesis for each amino acid pathway. array is unitless but represents counts of amino acid per second - forward_fraction: saturated fraction for forward reactions - loss_fraction: saturated fraction for loss reactions .. note:: Currently does not match saturation terms used in calc_kcats since it assumes only a reverse or degradation KM exists for simpler calculations """ # Convert to appropriate arrays if units.hasUnit(aa_conc): aa_conc = aa_conc.asNumber(METABOLITE_CONCENTRATION_UNITS) return amino_acid_synthesis_jit( counts_per_aa_fwd, counts_per_aa_rev, aa_conc, self.aa_upstream_kms, self.aa_kis, self.aa_reverse_kms, self.aa_degradation_kms, self.aa_forward_stoich, self.aa_kcats_fwd, self.aa_reverse_stoich, self.aa_kcats_rev, )
[docs] def amino_acid_export( self, aa_transporters_counts: npt.NDArray[np.int64], aa_conc: Union[units.Unum, npt.NDArray[np.float64]], mechanistic_uptake: bool, ) -> npt.NDArray[np.float64]: """ Calculate the rate of amino acid export. Args: aa_transporters_counts: counts of each transporter aa_conc: concentrations of each amino acid with mol/volume units mechanistic_uptake: if true, the uptake is calculated based on transporters Returns: Rate of export for each amino acid (unitless but represents counts of amino acid per second) """ if units.hasUnit(aa_conc): aa_conc = aa_conc.asNumber(METABOLITE_CONCENTRATION_UNITS) return amino_acid_export_jit( aa_transporters_counts, aa_conc, mechanistic_uptake, self.aa_to_exporters_matrix, self.export_kcats_per_aa, self.aa_export_kms, )
[docs] def amino_acid_import( self, aa_in_media: npt.NDArray[np.bool_], dry_mass: units.Unum, internal_aa_conc: Union[units.Unum, npt.NDArray[np.float64]], aa_transporters_counts: npt.NDArray[np.int64], mechanisitic_uptake: bool, ) -> npt.NDArray[np.float64]: """ Calculate the rate of amino acid uptake. Args: aa_in_media: bool for each amino acid being present in current media dry_mass: current dry mass of the cell, with mass units internal_aa_conc: internal concentrations of amino acids aa_transporters_counts: counts of each transporter mechanisitic_uptake: if true, the uptake is calculated based on transporters Returns: Rate of uptake for each amino acid (unitless but represents counts of amino acid per second) """ if units.hasUnit(internal_aa_conc): internal_aa_conc = internal_aa_conc.asNumber(METABOLITE_CONCENTRATION_UNITS) dry_mass = dry_mass.asNumber(DRY_MASS_UNITS) return amino_acid_import_jit( aa_in_media, dry_mass, internal_aa_conc, aa_transporters_counts, mechanisitic_uptake, self.aa_import_kis, self.aa_to_importers_matrix, self.import_kcats_per_aa, self.max_specific_import_rates, )
[docs] def get_amino_acid_conc_conversion(self, conc_units): return units.strip_empty_units(conc_units / METABOLITE_CONCENTRATION_UNITS)
[docs] @staticmethod def extract_reactions( raw_data: KnowledgeBaseEcoli, sim_data: "SimulationDataEcoli" ) -> tuple[ list[str], dict[str, dict[str, int]], list[str], dict[str, list[str]], dict[str, str], ]: """ Extracts reaction data from raw_data to build metabolism reaction network with stoichiometry, reversibility and enzyme catalysts. Args: raw_data: knowledge base data sim_data: simulation data Returns: 5-element tuple containing - base_rxn_ids: list of base reaction IDs from which reaction IDs were derived from - reaction_stoich: stoichiometry of metabolites for each reaction:: {reaction ID: {metabolite ID with location tag: stoichiometry}} - reversible_reactions: reaction IDs for reactions that have a reverse complement, does not have reverse tag - reaction_catalysts: enzyme catalysts for each reaction with known catalysts, likely a subset of reactions in stoich:: {reaction ID: enzyme IDs with location tag} - rxn_id_to_base_rxn_id: mapping from reaction IDs to the IDs of the base reactions they were derived from:: {reaction ID: base ID} """ compartment_ids_to_abbreviations = { comp["id"]: comp["abbrev"] for comp in raw_data.compartments } compartment_ids_to_abbreviations.update(UNDEFINED_COMPARTMENT_IDS_TO_ABBREVS) valid_directions = {"L2R", "R2L", "BOTH"} forward_directions = {"L2R", "BOTH"} reverse_directions = {"R2L", "BOTH"} metabolite_ids = {met["id"] for met in cast(Any, raw_data).metabolites} # Build mapping from each complexation subunit to all downstream # complexes containing the subunit, including itself # Start by building mappings from subunits to complexes that are # directly formed from the subunit through a single reaction subunit_id_to_parent_complexes: dict[str, list[str]] = {} for comp_reaction in itertools.chain( cast(Any, raw_data).complexation_reactions, cast(Any, raw_data).equilibrium_reactions, ): complex_id = None # Find ID of complex for mol_id, coeff in comp_reaction["stoichiometry"].items(): if coeff > 0: complex_id = mol_id break assert complex_id is not None # Map each subunit to found complex for mol_id, coeff in comp_reaction["stoichiometry"].items(): if mol_id == complex_id or mol_id in metabolite_ids: continue elif mol_id in subunit_id_to_parent_complexes: subunit_id_to_parent_complexes[mol_id].append(complex_id) else: subunit_id_to_parent_complexes[mol_id] = [complex_id] # Recursive function that returns a list of all downstream complexes # containing the given subunit, including itself def get_all_complexes(subunit_id): all_downstream_complex_ids = [subunit_id] if subunit_id not in subunit_id_to_parent_complexes: return all_downstream_complex_ids # Get downstream complexes of all parent complexes for parent_complex_id in subunit_id_to_parent_complexes[subunit_id]: all_downstream_complex_ids.extend(get_all_complexes(parent_complex_id)) # Remove duplicates return sorted(set(all_downstream_complex_ids)) subunit_id_to_all_downstream_complexes = { subunit_id: get_all_complexes(subunit_id) for subunit_id in subunit_id_to_parent_complexes.keys() } # Initialize variables to store reaction information all_base_rxns = set() reaction_stoich = {} reversible_reactions = [] reaction_catalysts = {} rxn_id_to_base_rxn_id = {} # Load and parse reaction information from raw_data for reaction in cast(Any, raw_data).metabolic_reactions: reaction_id = reaction["id"] stoich = reaction["stoichiometry"] direction = reaction["direction"] if len(stoich) <= 1: raise Exception( "Invalid biochemical reaction: {}, {}".format(reaction_id, stoich) ) if direction not in valid_directions: raise InvalidReactionDirectionError( f"The direction {direction} given for reaction {reaction_id} is invalid." ) forward = direction in forward_directions reverse = direction in reverse_directions def convert_compartment_tags(met_id): new_met_id = met_id for comp_id, comp_abbrev in compartment_ids_to_abbreviations.items(): new_met_id = new_met_id.replace(f"[{comp_id}]", f"[{comp_abbrev}]") return new_met_id # All protein complexes that contain an enzyme subunit are assumed # to retain the enzyme's catalytic activity catalysts_for_this_rxn = [] all_potential_catalysts = [] for catalyst in reaction["catalyzed_by"]: all_potential_catalysts.extend( subunit_id_to_all_downstream_complexes.get(catalyst, [catalyst]) ) for catalyst in sorted(set(all_potential_catalysts)): if sim_data.getter.is_valid_molecule(catalyst): catalysts_with_loc = catalyst + sim_data.getter.get_compartment_tag( catalyst ) catalysts_for_this_rxn.append(catalysts_with_loc) # If we don't have the catalyst in our reconstruction, drop it else: if VERBOSE: print( "Skipping catalyst {} for {} since it is not in the model".format( catalyst, reaction_id ) ) # Get base reaction ID of this reaction # If reaction ID does not end with a dot, the given reaction ID is # already a base reaction ID if reaction_id[-1] != ".": base_reaction_id = reaction_id # If reaction ID ends with a dot, find the base reaction ID based # on the following rules (provided by EcoCyc): # The parsing instructions for obtaining the base rxn-ID are, effectively: # 1: Check whether the full rxn-ID ends with a dot. # 2: If there is no dot at the end, it is already a base rxn-ID. # Otherwise, find the position of a second dot, to the left of the dot at the end. # 3: Extract the string between the last dot and the second to last dot. # If this intervening string consists of only digits, then convert this string to an integer. # In this case, this rxn-ID stands for a generic rxn, and has extra suffixes # that need to be trimmed off, to retrieve the base rxn-ID. # The extracted integer indicates the length of the suffixes to be trimmed off. # 4: To find the end position of the base rxn-ID, subtract the integer (obtained by 3: ) # from the position of the second dot (obtained by 2: ) . # 5: Retrieve the base rxn-ID, which is the substring from the very left (position 0) # to the end position (obtained by 4: ) . else: reaction_id_split = reaction_id[:-1].split(".") suffix_length = int(reaction_id_split[-1]) base_reaction_id = ".".join(reaction_id_split[:-1])[:-suffix_length] if forward: reaction_stoich[reaction_id] = { convert_compartment_tags(moleculeID): stoichCoeff for moleculeID, stoichCoeff in stoich.items() } if len(catalysts_for_this_rxn) > 0: reaction_catalysts[reaction_id] = catalysts_for_this_rxn rxn_id_to_base_rxn_id[reaction_id] = base_reaction_id if reverse: reverse_reaction_id = REVERSE_REACTION_ID.format(reaction_id) reaction_stoich[reverse_reaction_id] = { convert_compartment_tags(moleculeID): -stoichCoeff for moleculeID, stoichCoeff in stoich.items() } if len(catalysts_for_this_rxn) > 0: reaction_catalysts[reverse_reaction_id] = list( catalysts_for_this_rxn ) rxn_id_to_base_rxn_id[reverse_reaction_id] = base_reaction_id if forward and reverse: reversible_reactions.append(reaction_id) if base_reaction_id not in all_base_rxns: all_base_rxns.add(base_reaction_id) base_rxn_ids = sorted(list(all_base_rxns)) return ( base_rxn_ids, reaction_stoich, reversible_reactions, reaction_catalysts, rxn_id_to_base_rxn_id, )
[docs] @staticmethod def match_reaction( stoich: dict[str, dict[str, int]], catalysts: dict[str, list[str]], rxn_to_match: str, enz: str, mets: list[str], direction: Optional[str] = None, ) -> list[str]: """ Matches a given reaction (rxn_to_match) to reactions that exist in stoich given that enz is known to catalyze the reaction and mets are reactants in the reaction. Can perform a fuzzy reaction match since rxn_to_match just needs to be part of the actual reaction name to match specific instances of a reaction. (eg. rxn_to_match="ALCOHOL-DEHYDROG-GENERIC-RXN" can match "ALCOHOL-DEHYDROG-GENERIC-RXN-ETOH/NAD//ACETALD/NADH/PROTON.30."). Args: stoich: stoichiometry of metabolites for each reaction:: {reaction ID: {metabolite ID with location tag: stoichiometry}} catalysts: enzyme catalysts for each reaction with known catalysts, likely a subset of reactions in stoich:: {reaction ID: enzyme IDs with location tag} rxn_to_match: reaction ID from kinetics to match to existing reactions enz: enzyme ID with location tag mets: metabolite IDs with no location tag from kinetics direction: reaction directionality, ``'forward'`` or ``'reverse'`` or ``None`` Returns: Matched reaction IDs in stoich """ # Mapping to handle instances of metabolite classes in kinetics # Keys: specific molecules in kinetics file # Values: class of molecules in reactions file that contain the key class_mets = { "RED-THIOREDOXIN-MONOMER": "Red-Thioredoxin", "RED-THIOREDOXIN2-MONOMER": "Red-Thioredoxin", "RED-GLUTAREDOXIN": "Red-Glutaredoxins", "GRXB-MONOMER": "Red-Glutaredoxins", "GRXC-MONOMER": "Red-Glutaredoxins", "OX-FLAVODOXIN1": "Oxidized-flavodoxins", "OX-FLAVODOXIN2": "Oxidized-flavodoxins", } # Match full reaction name from partial reaction in kinetics. Must # also match metabolites since there can be multiple reaction instances. match = False match_candidates = [] if rxn_to_match in stoich: match_candidates.append(rxn_to_match) else: for long_rxn, long_mets in stoich.items(): if rxn_to_match in long_rxn and not long_rxn.endswith(REVERSE_TAG): match = True stripped_enzs = {e[:-3] for e in catalysts.get(long_rxn, [])} stripped_mets = {m[:-3] for m in long_mets} if ( np.all([class_mets.get(m, m) in stripped_mets for m in mets]) and enz in stripped_enzs ): match_candidates.append(long_rxn) if len(match_candidates) == 0: if VERBOSE: if match: print( "Partial reaction match: {} {} {} {} {}".format( rxn_to_match, enz, stripped_enzs, mets, stripped_mets ) ) else: print("No reaction match: {}".format(rxn_to_match)) # Determine direction of kinetic reaction from annotation or # metabolite stoichiometry. rxn_matches = [] for rxn in match_candidates: reverse_rxn = REVERSE_REACTION_ID.format(rxn) reverse_rxn_exists = reverse_rxn in stoich if direction: reverse = direction == "reverse" else: s = {k[:-3]: v for k, v in stoich.get(rxn, {}).items()} direction_ = np.unique( np.sign([s.get(class_mets.get(m, m), 0) for m in mets]) ) if len(direction_) == 0 and not reverse_rxn_exists: reverse = False elif len(direction_) != 1 or direction_[0] == 0: if VERBOSE: print( "Conflicting directionality: {} {} {}".format( rxn, mets, direction_ ) ) continue else: reverse = direction_[0] > 0 # Verify a reverse reaction exists in the model if reverse: if reverse_rxn_exists: rxn_matches.append(reverse_rxn) continue else: if VERBOSE: print("No reverse reaction: {} {}".format(rxn, mets)) continue rxn_matches.append(rxn) return sorted(rxn_matches)
[docs] @staticmethod def temperature_adjusted_kcat( kcat: Unum, temp: Union[float, str] = "" ) -> npt.NDArray[np.float64]: """ Args: kcat: enzyme turnover number(s) (1 / time) temp: temperature of measurement, defaults to 25 if '' Returns: Temperature adjusted kcat values, in units of 1/s """ if isinstance(temp, str): temp = 25 return 2 ** ((37.0 - temp) / 10.0) * kcat.asNumber(K_CAT_UNITS)
[docs] @staticmethod def _construct_default_saturation_equation( mets: list[str], kms: list[float], kis: list[float], known_mets: Iterable[str] ) -> str: """ Args: mets: metabolite IDs with location tag for KM and KI parameters ordered to match order of kms then kis kms: KM parameters associated with mets kis: KI parameters associated with mets known_mets: metabolite IDs with location tag with known concentrations Returns: Saturation equation with metabolites to replace delimited by double quote (e.g. "metabolite") """ # Check input dimensions n_params = len(kms) + len(kis) if n_params == 0: return "1" if n_params != len(mets): if VERBOSE: print("Saturation parameter mismatch: {} {} {}".format(mets, kms, kis)) return "1" terms = [] # Add KM terms for m, k in zip(mets, kms): if m in known_mets: terms.append('1+{}/"{}"'.format(k, m)) elif VERBOSE: print("Do not have concentration for {} with KM={}".format(m, k)) # Add KI terms for m, k in zip(mets[len(kms) :], kis): if m in known_mets: terms.append('1+"{}"/{}'.format(m, k)) elif VERBOSE: print("Do not have concentration for {} with KI={}".format(m, k)) # Enclose groupings if being multiplied together if len(terms) > 1: terms[0] = "(" + terms[0] terms[-1] += ")" elif len(terms) == 0: return "1" return "1/({})".format(")*(".join(terms))
[docs] @staticmethod def _extract_custom_constraint( constraint: dict[str, Any], reactant_tags: dict[str, str], product_tags: dict[str, str], known_mets: set[str], ) -> tuple[Optional[npt.NDArray[np.float64]], list[str]]: """ Args: constraint: values defining a kinetic constraint:: {'customRateEquation' (str): mathematical representation of rate (must contain 'kcat*E'), 'customParameterVariables' (dict[str, str]): mapping of variable names in the rate equation to metabolite IDs without location tags (must contain 'E'), 'customParameterConstants' (list[str]): constant strings in the rate equation that correspond to values (must contain 'kcat'), 'customParameterConstantValues' (list[float]): values for each of the constant strings, 'Temp' (float or ''): temperature of measurement} reactant_tags: mapping of molecule IDs without a location tag to molecule IDs with a location tag for all reactants product_tags: mapping of molecule IDs without a location tag to molecule IDs with a location tag for all products known_mets: molecule IDs with a location tag for molecules with known concentrations Returns: 2-element tuple containing - kcats: temperature adjusted kcat value, in units of 1/s - saturation: saturation equation with metabolites to replace delimited by double quote (eg. "metabolite") """ equation = constraint["customRateEquation"] variables = constraint["customParameterVariables"] constant_keys = constraint["customParameterConstants"] constant_values = constraint["customParameterConstantValues"] temp = constraint["Temp"] # Need to have these in the constraint kcat_str = "kcat" enzyme_str = "E" capacity_str = "{}*{}".format(kcat_str, enzyme_str) # Need to replace these symbols in equations symbol_sub = { "^": "**", } # Make sure kcat exists if kcat_str not in constant_keys: if VERBOSE: print( "Missing {} in custom constants: {}".format(kcat_str, constant_keys) ) return None, [] custom_kcat = ( 1 / units.s * np.array([constant_values[constant_keys.index(kcat_str)]]) ) kcats = Metabolism.temperature_adjusted_kcat(custom_kcat, temp) # Make sure equation can be parsed, otherwise just return kcat if enzyme_str not in variables: if VERBOSE: print( "Missing enzyme key ({}) in custom variables: {}".format( enzyme_str, variables ) ) return kcats, [] if capacity_str not in equation: if VERBOSE: print( "Expected to find {} in custom equation: {}".format( capacity_str, equation ) ) return kcats, [] if len(constant_keys) != len(constant_values): if VERBOSE: print( "Mismatch between constants: {} {}".format( constant_keys, constant_values ) ) return kcats, [] variables_with_tags = { k: reactant_tags.get(v, product_tags.get(v, None)) for k, v in variables.items() if k != enzyme_str and (v in reactant_tags or v in product_tags) } # Substitute values into custom equations ## Replace terms with known constant values or sim molecule IDs with concentrations custom_subs = {k: str(v) for k, v in zip(constant_keys, constant_values)} custom_subs.update( { k: '"{}"'.format(v) for k, v in variables_with_tags.items() if v in known_mets } ) ## Remove capacity to get only saturation term new_equation = equation.replace(capacity_str, "1") ## Tokenize equation to terms and symbols parsed_variables = re.findall(r"\w*", new_equation)[ :-1 ] # Remove trailing empty match ## Ensure valid input of known variables or a float term for v in parsed_variables: if not (v == "" or v in custom_subs): try: float(v) except ValueError: if VERBOSE: print( "Unknown value encountered in custom equation {}: {}".format( equation, v ) ) return kcats, [] parsed_symbols = re.findall(r"\W", new_equation) tokenized_equation = np.array(parsed_variables) symbol_idx_mask = tokenized_equation == "" ## Verify tokenized equation matches original before replacements tokenized_equation[symbol_idx_mask] = parsed_symbols if "".join(tokenized_equation) != new_equation: if VERBOSE: print("Error parsing custom equation: {}".format(equation)) return kcats, [] ## Perform replacement of symbols tokenized_equation[symbol_idx_mask] = [ symbol_sub.get(s, s) for s in parsed_symbols ] # Reconstruct saturation equation with replacements saturation = [ "".join([custom_subs.get(token, token) for token in tokenized_equation]) ] return kcats, saturation
[docs] @staticmethod def extract_kinetic_constraints( raw_data: KnowledgeBaseEcoli, sim_data: "SimulationDataEcoli", stoich: Optional[dict[str, dict[str, int]]] = None, catalysts: Optional[dict[str, list[str]]] = None, known_metabolites: Optional[Set[str]] = None, ) -> dict[tuple[str, str], dict[str, list[Any]]]: """ Load and parse kinetic constraint information from raw_data Args: raw_data: knowledge base data sim_data: simulation data stoich: stoichiometry of metabolites for each reaction (if ``None``, data is loaded from ``raw_data`` and ``sim_data``):: {reaction ID: {metabolite ID with location tag: stoichiometry}} catalysts: enzyme catalysts for each reaction with known catalysts, likely a subset of reactions in ``stoich`` (if ``None``, data is loaded from ``raw_data`` and ``sim_data``:: {reaction ID: enzyme IDs with location tag} known_metabolites: metabolites with known concentrations Returns: Valid kinetic constraints for each reaction/enzyme pair:: {(reaction ID, enzyme with location tag): { 'kcat': kcat values (list[float]), 'saturation': saturation equations (list[str]) }} """ # Load data for optional args if needed if stoich is None or catalysts is None: _, loaded_stoich, _, loaded_catalysts, _ = Metabolism.extract_reactions( raw_data, sim_data ) if stoich is None: stoich = loaded_stoich if catalysts is None: catalysts = loaded_catalysts known_metabolites_ = set() if known_metabolites is None else known_metabolites constraints: dict[tuple[str, str], dict[str, list]] = {} for constraint in cast(Any, raw_data).metabolism_kinetics: rxn = constraint["reactionID"] enzyme = constraint["enzymeID"] metabolites = constraint["substrateIDs"] direction = constraint["direction"] kms = list(constraint["kM"].asNumber(KINETIC_CONSTRAINT_CONC_UNITS)) kis = list(constraint["kI"].asNumber(KINETIC_CONSTRAINT_CONC_UNITS)) n_reactants = len(metabolites) - len(kis) matched_rxns = Metabolism.match_reaction( stoich, catalysts, rxn, enzyme, metabolites[:n_reactants], direction ) for matched_rxn in matched_rxns: # Ensure enzyme catalyzes reaction in model enzymes_tag_conversion = { e[:-3]: e for e in catalysts.get(matched_rxn, []) } if enzyme not in enzymes_tag_conversion: if VERBOSE: print("{} does not catalyze {}".format(enzyme, matched_rxn)) continue # Update metabolites with a location tag from the reaction # First look in reactants but some products can inhibit reactant_tags = { k[:-3]: k for k, v in stoich[matched_rxn].items() if v < 0 } product_tags = { k[:-3]: k for k, v in stoich[matched_rxn].items() if v > 0 } mets_with_tag = [ reactant_tags.get(met, product_tags.get(met, "")) for met in metabolites if met in reactant_tags or met in product_tags ] if len(mets_with_tag) != len(metabolites): # Warn if verbose but no continue since we can still use kcat if VERBOSE: print( "Could not match all metabolites: {} {}".format( metabolites, mets_with_tag ) ) # Extract kcat and saturation parameters if constraint["rateEquationType"] == "custom": kcats, saturation = Metabolism._extract_custom_constraint( constraint, reactant_tags, product_tags, known_metabolites_ ) if kcats is None: continue else: kcats = Metabolism.temperature_adjusted_kcat( constraint["kcat"], constraint["Temp"] ) if len(kcats) > 1: if len(kcats) != len(kms) or len(kms) != len(mets_with_tag): if VERBOSE: print( "Could not align kcats and kms: {} {} {} {}".format( rxn, kcats, kms, mets_with_tag ) ) continue saturation = [ Metabolism._construct_default_saturation_equation( [m], [km], [], known_metabolites_ ) for m, km in zip(mets_with_tag, kms) ] else: saturation = [ Metabolism._construct_default_saturation_equation( mets_with_tag, kms, kis, known_metabolites_ ) ] saturation = [s for s in saturation if s != "1"] # Add new kcats and saturation terms for the enzymatic reaction key = (matched_rxn, enzymes_tag_conversion[enzyme]) entries = constraints.get(key, {}) entries["kcat"] = entries.get("kcat", []) + list(kcats) entries["saturation"] = entries.get("saturation", []) + saturation constraints[key] = entries return constraints
[docs] @staticmethod def _replace_enzyme_reactions( constraints: dict[tuple[str, str], dict[str, list[Any]]], stoich: dict[str, dict[str, int]], rxn_catalysts: dict[str, list[str]], reversible_rxns: list[str], rxn_id_to_compiled_id: dict[str, str], ) -> tuple[ dict[str, Any], dict[str, dict[str, int]], dict[str, list[str]], list[str], dict[str, str], ]: """ Modifies reaction IDs in data structures to duplicate reactions with kinetic constraints and multiple enzymes. Args: constraints: valid kinetic constraints for each reaction/enzyme pair:: {(reaction ID, enzyme with location tag): { 'kcat': kcat values (list[float]), 'saturation': saturation equations (list[str]) }} stoich: stoichiometry of metabolites for each reaction (if None, data is loaded from raw_data and sim_data):: {reaction ID: {metabolite ID with location tag: stoichiometry}} rxn_catalysts: enzyme catalysts for each reaction with known catalysts, likely a subset of reactions in stoich (if None, data is loaded from raw_data and sim_data):: {reaction ID: enzyme IDs with location tag} reversible_rxns: reaction IDs for reactions that have a reverse complement, does not have reverse tag rxn_id_to_compiled_id: mapping from reaction IDs to the IDs of the original reactions they were derived from Returns: 5-element tuple containing - new_constraints: valid kinetic constraints for each reaction:: {reaction ID: { 'enzyme': enzyme catalyst (str), 'kcat': kcat values (list[float]), 'saturation': saturation equations (list[str]) }} - stoich: stoichiometry of metabolites for each reaction with updated reactions for enzyme catalyzed kinetic reactions:: {reaction ID: {metabolite ID with location tag: stoichiometry}} - rxn_catalysts: enzyme catalysts for each reaction with known catalysts, likely a subset of reactions in stoich with updated reactions for enzyme catalyzed kinetic reactions:: {reaction ID: enzyme IDs with location tag} - reversible_rxns: reaction IDs for reactions that have a reverse complement with updated reactions for enzyme catalyzed kinetic reactions, does not have reverse tag - rxn_id_to_compiled_id: mapping from reaction IDs to the IDs of the original reactions they were derived from, with updated reactions for enzyme catalyzed kinetic reactions """ new_constraints = {} n_catalysts = {rxn: len(catalysts) for rxn, catalysts in rxn_catalysts.items()} # Split out reactions that are kinetically constrained and that have # more than one enzyme that catalyzes the reaction for (rxn, enzyme), constraint in constraints.items(): if n_catalysts[rxn] > 1: # Create new reaction name with enzyme appended to the end if rxn.endswith(REVERSE_TAG): new_rxn = REVERSE_REACTION_ID.format( ENZYME_REACTION_ID.format(rxn[: -len(REVERSE_TAG)], enzyme[:-3]) ) else: new_rxn = ENZYME_REACTION_ID.format(rxn, enzyme[:-3]) # Add the new reaction to appropriate lists and dicts stoich[new_rxn] = copy(stoich[rxn]) rxn_catalysts[new_rxn] = [enzyme] if rxn in reversible_rxns: reversible_rxns.append(new_rxn) # Remove enzyme from old reaction and remove old reaction if no # more enzyme catalysts rxn_catalysts[rxn].pop(rxn_catalysts[rxn].index(enzyme)) if len(rxn_catalysts[rxn]) == 0: stoich.pop(rxn) rxn_catalysts.pop(rxn) if rxn in reversible_rxns: reversible_rxns.pop(reversible_rxns.index(rxn)) else: new_rxn = rxn rxn_id_to_compiled_id[new_rxn] = rxn_id_to_compiled_id[rxn] # noinspection PyTypeChecker new_constraints[new_rxn] = dict(constraints[(rxn, enzyme)], enzyme=enzyme) return ( new_constraints, stoich, rxn_catalysts, reversible_rxns, rxn_id_to_compiled_id, )
[docs] @staticmethod def _lambdify_constraints( constraints: dict[str, Any], ) -> tuple[ list[str], list[str], list[str], npt.NDArray[np.float64], str, str, npt.NDArray[np.bool_], ]: """ Creates str representations of kinetic terms to be used to create kinetic constraints that are returned with getKineticConstraints(). Args: constraints: valid kinetic constraints for each reaction:: {reaction ID: { 'enzyme': enzyme catalyst (str), 'kcat': kcat values (list[float]), 'saturation': saturation equations (list[str]) }} Returns: 7-element tuple containing - rxns: sorted reaction IDs for reactions with a kinetic constraint - enzymes: sorted enzyme IDs for enzymes that catalyze a kinetic reaction - substrates: sorted substrate IDs for substrates that are needed for kinetic saturation terms - all_kcats: (n rxns, 3) min, mean and max kcat value for each reaction - all_saturations: sympy str representation of a list of saturation terms (eg. '[s[0] / (1 + s[0]), 2 / (2 + s[1])]') - all_enzymes: sympy str representation of enzymes for each reaction (e.g. '[e[0], e[2], e[1]]') - constraint_is_kcat_only: True if reaction only has kcat values and no saturation terms """ # Ordered lists of constraint related IDs rxns = sorted(constraints) enzymes = sorted({c["enzyme"] for c in constraints.values()}) substrates = sorted( { match.strip('"') for c in constraints.values() for s in c["saturation"] for match in re.findall('".+?"', s) } ) # Mapping to replace molecule IDs with generic list strings enzyme_sub = {e: "e[{}]".format(i) for i, e in enumerate(enzymes)} substrate_sub = { '"{}"'.format(s): "s[{}]".format(i) for i, s in enumerate(substrates) } # Mapping to replace generic list strings with sympy variables. # Need separate mapping from above because sympy handles '[]' as indexing # so location tags are not parsed properly. enzyme_symbols = { "e": [sp.symbols("e[{}]".format(i)) for i in range(len(enzymes))] } substrate_symbols = { "s": [sp.symbols("s[{}]".format(i)) for i in range(len(substrates))] } # Values to return all_kcats = np.zeros((len(rxns), 3)) all_saturations = [] all_enzymes = [] constraint_is_kcat_only = [] # Extract out data from each constraint for i, rxn in enumerate(rxns): kcats = constraints[rxn]["kcat"] saturation = constraints[rxn]["saturation"] enzyme = constraints[rxn]["enzyme"] # Parse saturation equations into sympy format # If no saturation data is known, assume open range from 0 to 1 saturations = [] for sat in saturation: if sat == "1": continue for token, replace in substrate_sub.items(): sat = sat.replace(token, replace) saturations.append(parse_expr(sat, local_dict=substrate_symbols)) if len(saturations) == 0: saturations = [0, 1] constraint_is_kcat_only.append(True) else: constraint_is_kcat_only.append(False) # Save values for this constraint all_kcats[i, :] = [np.min(kcats), np.mean(kcats), np.max(kcats)] all_saturations.append(saturations) all_enzymes.append( parse_expr(enzyme_sub[enzyme], local_dict=enzyme_symbols) ) # Convert to str to save as class attr to be executed all_saturations = str(all_saturations) all_enzymes = str(all_enzymes) constraint_is_kcat_only = np.array(constraint_is_kcat_only) return ( rxns, enzymes, substrates, all_kcats, all_saturations, all_enzymes, constraint_is_kcat_only, )
[docs] def _is_transport_rxn(self, stoich: dict[str, int]) -> bool: """ Determines if the metabolic reaction with a given stoichiometry is a transport reactions that transports metabolites between different compartments. A metabolic reaction is considered to be a transport reaction if the substrate set and the product share the same metabolite tagged into different compartments. Args: stoich: Stoichiometry of the metabolic reaction:: { metabolite ID (str): stoichiometric coefficient (int) } Returns: True if given stoichiometry is a transport reaction """ is_transport_rxn = False # Get IDs of all substrates and products substrates, products = [], [] for mol_id, coeff in stoich.items(): if coeff < 0: substrates.append(mol_id) else: products.append(mol_id) # Get mapping from IDs to IDs without compartments substrates_tagged_to_no_tag = { mol_id: re.sub(r"\[.*]", "", mol_id) for mol_id in substrates } products_tagged_to_no_tag = { mol_id: re.sub(r"\[.*]", "", mol_id) for mol_id in products } overlap_no_tag = set(substrates_tagged_to_no_tag.values()) & set( products_tagged_to_no_tag.values() ) for mol_id_no_tag in list(overlap_no_tag): substrates_tagged = [ mol_tagged for mol_tagged in substrates if substrates_tagged_to_no_tag[mol_tagged] == mol_id_no_tag ] products_tagged = [ mol_tagged for mol_tagged in products if products_tagged_to_no_tag[mol_tagged] == mol_id_no_tag ] overlap_tagged = set(substrates_tagged) & set(products_tagged) # Tag reaction as a transport reaction if there is no overlap # between those substrates and products with locations included if len(overlap_tagged) == 0: is_transport_rxn = True break return is_transport_rxn
@njit def np_apply_along_axis(func1d, axis, arr): if arr.ndim != 2: raise RuntimeError("Array must have 2 dimensions.") if axis not in [0, 1]: raise RuntimeError("Axis must be 0 or 1.") if axis == 0: result = np.empty(arr.shape[1]) for i in range(len(result)): result[i] = func1d(arr[:, i]) else: result = np.empty(arr.shape[0]) for i in range(len(result)): result[i] = func1d(arr[i, :]) return result @njit def np_prod(array, axis): return np_apply_along_axis(np.prod, axis, array) @njit def amino_acid_synthesis_jit( counts_per_aa_fwd, counts_per_aa_rev, aa_conc, aa_upstream_kms, aa_kis, aa_reverse_kms, aa_degradation_kms, aa_forward_stoich, aa_kcats_fwd, aa_reverse_stoich, aa_kcats_rev, ): km_saturation = np_prod(1 / (1 + aa_upstream_kms / aa_conc), axis=1) # Determine saturation fraction for reactions forward_fraction = 1 / (1 + aa_conc / aa_kis) * km_saturation reverse_fraction = 1 / (1 + aa_reverse_kms / aa_conc) deg_fraction = 1 / (1 + aa_degradation_kms / aa_conc) loss_fraction = reverse_fraction + deg_fraction # Calculate synthesis rate synthesis = ( aa_forward_stoich.astype(np.float64) @ (aa_kcats_fwd * counts_per_aa_fwd * forward_fraction).astype(np.float64) - aa_reverse_stoich.astype(np.float64) @ (aa_kcats_rev * counts_per_aa_rev * reverse_fraction).astype(np.float64) - aa_kcats_rev * counts_per_aa_rev * deg_fraction ) return synthesis, forward_fraction, loss_fraction @njit def amino_acid_export_jit( aa_transporters_counts, aa_conc, mechanistic_uptake, aa_to_exporters_matrix, export_kcats_per_aa, aa_export_kms, ): if mechanistic_uptake: # Export based on mechanistic model trans_counts_per_aa = aa_to_exporters_matrix.astype( np.float64 ) @ aa_transporters_counts.astype(np.float64) export_rates = ( export_kcats_per_aa * trans_counts_per_aa / (1 + aa_export_kms / aa_conc) ) else: # Export is lumped with specific uptake rates in amino_acid_import # and not dependent on internal amino acid concentrations or # explicitly considered here export_rates = np.zeros(len(aa_conc)) return export_rates @njit def amino_acid_import_jit( aa_in_media, dry_mass, internal_aa_conc, aa_transporters_counts, mechanisitic_uptake, aa_import_kis, aa_to_importers_matrix, import_kcats_per_aa, max_specific_import_rates, ): saturation = 1 / (1 + internal_aa_conc / aa_import_kis) if mechanisitic_uptake: # Uptake based on mechanistic model counts_per_aa = aa_to_importers_matrix.astype( np.float64 ) @ aa_transporters_counts.astype(np.float64) import_rates = import_kcats_per_aa * counts_per_aa else: import_rates = max_specific_import_rates * dry_mass return import_rates * saturation * aa_in_media # Class used to update metabolite concentrations based on the current nutrient conditions
[docs] class ConcentrationUpdates(object): def __init__( self, concDict, relative_changes, equilibriumReactions, exchange_data_dict, all_metabolite_ids, linked_metabolites, ): self.units = units.getUnit(list(concDict.values())[0]) self.default_concentrations_dict = dict( (key, concDict[key].asNumber(self.units)) for key in concDict ) self.exchange_fluxes = self._exchange_flux_present(exchange_data_dict) self.relative_changes = relative_changes self._all_metabolite_ids = all_metabolite_ids self.linked_metabolites = linked_metabolites # factor of internal amino acid increase if amino acids present in nutrients self.molecule_scale_factors = { "L-ALPHA-ALANINE[c]": 2.0, "ARG[c]": 2.0, "ASN[c]": 2.0, "L-ASPARTATE[c]": 2.0, "CYS[c]": 2.0, "GLT[c]": 2.0, "GLN[c]": 2.0, "GLY[c]": 2.0, "HIS[c]": 2.0, "ILE[c]": 2.0, "LEU[c]": 2.0, "LYS[c]": 2.0, "MET[c]": 2.0, "PHE[c]": 2.0, "PRO[c]": 2.0, "SER[c]": 2.0, "THR[c]": 2.0, "TRP[c]": 2.0, "TYR[c]": 2.0, "L-SELENOCYSTEINE[c]": 2.0, "VAL[c]": 2.0, } self.molecule_set_amounts = self._add_molecule_amounts( equilibriumReactions, self.default_concentrations_dict ) # return adjustments to concDict based on nutrient conditions
[docs] def concentrations_based_on_nutrients( self, media_id: Optional[str] = None, imports: Optional[str] = None, conversion_units: Optional[units.Unum] = None, ) -> dict[str, Any]: if conversion_units: conversion = self.units.asNumber(conversion_units) else: conversion = self.units if imports is None and media_id is not None: imports = self.exchange_fluxes[media_id] concentrationsDict = self.default_concentrations_dict.copy() metaboliteTargetIds = sorted(concentrationsDict.keys()) concentrations = conversion * np.array( [concentrationsDict[k] for k in metaboliteTargetIds] ) concDict = dict(zip(metaboliteTargetIds, concentrations)) if imports is not None: # For faster conversions than .asNumber(conversion_units) for each setAmount if conversion_units: conversion_to_no_units = conversion_units.asUnit(self.units) # Adjust for measured concentration changes in different media if media_id in self.relative_changes: for mol_id, conc_change in self.relative_changes[media_id].items(): if mol_id in concDict: concDict[mol_id] *= conc_change for moleculeName, setAmount in self.molecule_set_amounts.items(): if ( moleculeName in imports and ( moleculeName[:-3] + "[c]" not in self.molecule_scale_factors or moleculeName == "L-SELENOCYSTEINE[c]" ) ) or ( moleculeName in self.molecule_scale_factors and moleculeName[:-3] + "[p]" in imports ): if conversion_units: setAmount = (setAmount / conversion_to_no_units).asNumber() concDict[moleculeName] = setAmount for met, linked in self.linked_metabolites.items(): concDict[met] = concDict[linked["lead"]] * linked["ratio"] return concDict
[docs] def _exchange_flux_present( self, exchange_data: dict[str, Any] ) -> dict[str, set[str]]: """ Caches the presence of exchanges in each media condition based on exchange_data to set concentrations in concentrations_based_on_nutrients(). Args: exchange_data: dictionary of exchange data for all media conditions with keys:: {importUnconstrainedExchangeMolecules (dict[str, set[str]]): exchange molecules (with location tag) for each media key that do not have an upper bound on their flux, importConstrainedExchangeMolecules (dict[str, dict[str, float with mol/mass/time units]]): constrained molecules (with location tag) for each media key with upper bound flux constraints} Returns: Sets of molecules IDs (with location tags) that can be imported for each media ID """ exchange_fluxes = {} for media, env in exchange_data.items(): exchange_fluxes[media] = {mol for mol, conc in env.items() if conc > 0} return exchange_fluxes
[docs] def _add_molecule_amounts(self, equilibriumReactions, concDict): moleculeSetAmounts = {} for reaction in equilibriumReactions: # We only want to do this for species with standard Michaelis-Menten kinetics initially if len(reaction["stoichiometry"]) != 3: continue moleculeName = [ mol_id for mol_id in reaction["stoichiometry"].keys() if mol_id in self._all_metabolite_ids ][0] amountToSet = 1e-4 moleculeSetAmounts[moleculeName + "[p]"] = amountToSet * self.units moleculeSetAmounts[moleculeName + "[c]"] = amountToSet * self.units for moleculeName, scaleFactor in self.molecule_scale_factors.items(): moleculeSetAmounts[moleculeName] = ( scaleFactor * concDict[moleculeName] * self.units ) return moleculeSetAmounts