Source code for ecoli.processes.metabolism_redux

"""
MetabolismRedux
"""

import numpy as np
import numpy.typing as npt
import time
from typing import Callable, cast, Optional
from unum import Unum
import warnings
from scipy.sparse import csr_matrix

from vivarium.core.process import Step
from vivarium.library.units import units as vivunits

from ecoli.library.schema import numpy_schema, bulk_name_to_idx, listener_schema, counts

from wholecell.utils import units

from ecoli.processes.registries import topology_registry
import cvxpy as cp
from typing import Iterable, Mapping
from dataclasses import dataclass

from reconstruction.ecoli.dataclasses.process.metabolism import REVERSE_TAG

COUNTS_UNITS = units.mmol
VOLUME_UNITS = units.L
MASS_UNITS = units.g
TIME_UNITS = units.s
CONC_UNITS = COUNTS_UNITS / VOLUME_UNITS
CONVERSION_UNITS = MASS_UNITS * TIME_UNITS / VOLUME_UNITS
GDCW_BASIS = units.mmol / units.g / units.h


NAME = "ecoli-metabolism-redux"
TOPOLOGY = topology_registry.access("ecoli-metabolism")
# TODO (Cyrus) - Re-add when kinetics are added.
# TOPOLOGY['kinetic_flux_targets'] = ('rates', 'fluxes')
topology_registry.register(NAME, TOPOLOGY)

# TODO (Cyrus) - Remove when have a better way to handle these rxns.
# ParCa mistakes in carbon gen, efflux/influx proton gen, mass gen
BAD_RXNS = [
    "RXN-12440",
    "TRANS-RXN-121",
    "TRANS-RXN-300",
    "TRANS-RXN-8",
    "R15-RXN-MET/CPD-479//CPD-479/MET.25.",
]


[docs] class MetabolismRedux(Step): name = NAME topology = TOPOLOGY defaults = { "stoich_dict": {}, "reaction_catalysts": {}, # TODO (Cyrus) -- get these passed in, subset of the stoichimetry "kinetic_rates": [], "media_id": "minimal", "imports": {}, "concentration_updates": None, "maintenance_reaction": {}, "nutrient_to_doubling_time": {}, "use_trna_charging": False, "include_ppgpp": False, "mechanistic_aa_transport": False, "aa_targets_not_updated": set(), "import_constraint_threshold": 0, "exchange_molecules": [], "non_growth_associated_maintenance": 8.39 * units.mmol / (units.g * units.h), "avogadro": 6.02214076e23 / units.mol, "cell_density": 1100 * units.g / units.L, "dark_atp": 33.565052868380675 * units.mmol / units.g, "cell_dry_mass_fraction": 0.3, "get_biomass_as_concentrations": lambda doubling_time: {}, "ppgpp_id": "ppgpp", "get_ppGpp_conc": lambda media: 0.0, "exchange_data_from_media": lambda media: [], "get_masses": lambda exchanges: [], "doubling_time": 44.0 * units.min, "amino_acid_ids": {}, "linked_metabolites": None, "aa_exchange_names": [], "removed_aa_uptake": [], "seed": 0, "base_reaction_ids": [], "fba_reaction_ids_to_base_reaction_ids": [], "constraints_to_disable": [], "kinetic_objective_weight": 1e-7, "kinetic_objective_weight_in_range": 1e-10, "secretion_penalty_coeff": 1e-3, "time_step": 1, } def __init__(self, parameters): super().__init__(parameters) # Use information from the environment and sim self.nutrient_to_doubling_time = self.parameters["nutrient_to_doubling_time"] self.use_trna_charging = self.parameters["use_trna_charging"] self.include_ppgpp = self.parameters["include_ppgpp"] self.mechanistic_aa_transport = self.parameters["mechanistic_aa_transport"] self.current_timeline = self.parameters["current_timeline"] self.media_id = self.parameters["media_id"] self.exchange_molecules = self.parameters["exchange_molecules"] self.aa_names = self.parameters["aa_names"] self.aa_targets_not_updated = self.parameters["aa_targets_not_updated"] stoich_dict = dict(sorted(self.parameters["stoich_dict"].items())) for rxn in BAD_RXNS: stoich_dict.pop(rxn) # Add maintenance reaction stoich_dict["maintenance_reaction"] = self.parameters["maintenance_reaction"] # Get all metabolite names self.metabolite_names = set() for reaction, stoich in stoich_dict.items(): self.metabolite_names.update(stoich.keys()) self.metabolite_names.update(set(self.parameters["exchange_molecules"])) self.metabolite_names = sorted(list(self.metabolite_names)) self.reaction_names = list(stoich_dict.keys()) metabolites_idx = { species: i for i, species in enumerate(self.metabolite_names) } # Get indices of catalysts for each reaction reaction_catalysts = self.parameters["reaction_catalysts"] self.catalyst_ids = self.parameters["catalyst_ids"] catalyst_idx = {catalyst: i for i, catalyst in enumerate(self.catalyst_ids)} # Create stoichiometry matrix (dot reaction fluxes to get # molecule deltas) and catalysis matrix (dot catalyst counts # to get catalyst counts per enzyme-catalyzed reaction) coeffs = [] met_ind = [] rxn_ind = [] catalyst_ind = [] catalyzed_rxn_ind = [] catalyzed_rxn_idx = 0 for rxn_idx, (reaction, stoich) in enumerate(stoich_dict.items()): for species, coefficient in stoich.items(): met_ind.append(metabolites_idx[species]) rxn_ind.append(rxn_idx) coeffs.append(coefficient) enzyme_idx = [ catalyst_idx[catalyst] for catalyst in reaction_catalysts.get(reaction, []) ] if len(enzyme_idx) > 1: catalyst_ind.extend(enzyme_idx) catalyzed_rxn_ind.extend([catalyzed_rxn_idx] * len(enzyme_idx)) catalyzed_rxn_idx += 1 self.stoichiometry = csr_matrix((coeffs, (met_ind, rxn_ind)), dtype=int) self.catalysis_matrix = csr_matrix( ([1] * len(catalyst_ind), (catalyzed_rxn_ind, catalyst_ind)), dtype=int ) self.media_id = self.parameters["media_id"] self.cell_density = self.parameters["cell_density"] self.n_avogadro = self.parameters["avogadro"] self.ngam = self.parameters["non_growth_associated_maintenance"] self.gam = ( self.parameters["dark_atp"] * self.parameters["cell_dry_mass_fraction"] ) # new variables for the model self.cell_mass = None self.previous_mass = None self.reaction_fluxes = None # methods from config self._biomass_concentrations = {} # type: dict self._getBiomassAsConcentrations = self.parameters[ "get_biomass_as_concentrations" ] self.concentration_updates = self.parameters["concentration_updates"] self.exchange_constraints = self.parameters["exchange_constraints"] self.get_kinetic_constraints = self.parameters["get_kinetic_constraints"] self.kinetic_constraint_reactions = self.parameters[ "kinetic_constraint_reactions" ] # Include ppGpp concentration target in objective if not handled # kinetically in other processes self.ppgpp_id = self.parameters["ppgpp_id"] self.getppGppConc = self.parameters["get_ppGpp_conc"] # go through all media in the timeline and add to metaboliteNames homeostatic_metabolites = set() conc_from_nutrients = ( self.concentration_updates.concentrations_based_on_nutrients ) if self.include_ppgpp: homeostatic_metabolites.add(self.ppgpp_id) for _, media_id in self.current_timeline: exchanges = self.parameters["exchange_data_from_media"](media_id) homeostatic_metabolites.update( conc_from_nutrients(imports=exchanges["importExchangeMolecules"]) ) self.homeostatic_metabolites = list(sorted(homeostatic_metabolites)) # Setup homeostatic objective concentration targets # Determine concentrations based on starting environment conc_dict = conc_from_nutrients( media_id=self.current_timeline[0][1], imports=parameters["imports"] ) doubling_time = self.parameters["doubling_time"] conc_dict.update(self.getBiomassAsConcentrations(doubling_time)) if self.include_ppgpp: conc_dict[self.ppgpp_id] = self.getppGppConc(doubling_time) self.homeostatic_objective = dict( (key, conc_dict[key].asNumber(CONC_UNITS)) for key in conc_dict ) # Include all concentrations that will be present in a sim for constant # length listeners for met in self.homeostatic_metabolites: if met not in self.homeostatic_objective: self.homeostatic_objective[met] = 0.0 # Molecules with concentration updates for listener self.linked_metabolites = self.parameters["linked_metabolites"] doubling_time = self.nutrient_to_doubling_time.get( self.media_id, self.nutrient_to_doubling_time["minimal"] ) update_molecules = list(self.getBiomassAsConcentrations(doubling_time).keys()) if self.use_trna_charging: update_molecules += [ aa for aa in self.aa_names if aa not in self.aa_targets_not_updated ] update_molecules += list(self.linked_metabolites.keys()) if self.include_ppgpp: update_molecules += [self.parameters["ppgpp_id"]] self.conc_update_molecules = sorted(update_molecules) self.aa_exchange_names = self.parameters["aa_exchange_names"] self.removed_aa_uptake = self.parameters["removed_aa_uptake"] self.aa_environment_names = [aa[:-3] for aa in self.aa_exchange_names] # Remove disabled reactions so they don't get included in the FBA # problem setup constraints_to_disable = self.parameters["constraints_to_disable"] self.active_constraints_mask = np.array( [ (rxn not in constraints_to_disable) for rxn in self.kinetic_constraint_reactions ] ) # Network flow initialization self.network_flow_model = NetworkFlowModel( self.stoichiometry, self.metabolite_names, self.reaction_names, self.homeostatic_metabolites, self.kinetic_constraint_reactions, self.parameters["get_mass"], self.gam.asNumber(), self.active_constraints_mask, ) # important bulk molecule names self.catalyst_ids = self.parameters["catalyst_ids"] self.aa_names = self.parameters["aa_names"] self.kinetic_constraint_enzymes = self.parameters["kinetic_constraint_enzymes"] self.kinetic_constraint_substrates = self.parameters[ "kinetic_constraint_substrates" ] # objective weights self.kinetic_objective_weight = self.parameters["kinetic_objective_weight"] self.secretion_penalty_coeff = self.parameters["secretion_penalty_coeff"] self.kinetic_objective_weight_in_range = self.parameters[ "kinetic_objective_weight_in_range" ] # Helper indices for Numpy indexing self.homeostatic_metabolite_idx = None # Cache uptake parameters from previous timestep self.allowed_exchange_uptake = None # Track updated AA concentration targets with tRNA charging self.aa_targets = {} self.aa_targets_not_updated = self.parameters["aa_targets_not_updated"] self.aa_names = self.parameters["aa_names"] self.import_constraint_threshold = self.parameters[ "import_constraint_threshold" ] # Get conversion matrix to compile individual fluxes in the FBA # solution to the fluxes of base reactions self.base_reaction_ids = self.parameters["base_reaction_ids"] self.base_reaction_ids.append("maintenance_reaction") fba_reaction_ids_to_base_reaction_ids = self.parameters[ "fba_reaction_ids_to_base_reaction_ids" ] fba_reaction_ids_to_base_reaction_ids["maintenance_reaction"] = ( "maintenance_reaction" ) fba_reaction_id_to_index = { rxn_id: i for (i, rxn_id) in enumerate(self.reaction_names) } base_reaction_id_to_index = { rxn_id: i for (i, rxn_id) in enumerate(self.base_reaction_ids) } base_rxn_indexes = [] fba_rxn_indexes = [] v = [] for fba_rxn_id in self.reaction_names: base_rxn_id = fba_reaction_ids_to_base_reaction_ids[fba_rxn_id] base_rxn_indexes.append(base_reaction_id_to_index[base_rxn_id]) fba_rxn_indexes.append(fba_reaction_id_to_index[fba_rxn_id]) if fba_rxn_id.endswith(REVERSE_TAG): v.append(-1) else: v.append(1) base_rxn_indexes = np.array(base_rxn_indexes) fba_rxn_indexes = np.array(fba_rxn_indexes) v = np.array(v) shape = (len(self.base_reaction_ids), len(self.reaction_names)) self.reaction_mapping_matrix = csr_matrix( (v, (base_rxn_indexes, fba_rxn_indexes)), shape=shape )
[docs] def ports_schema(self): return { "bulk": numpy_schema("bulk"), "bulk_total": numpy_schema("bulk"), # 'kinetic_flux_targets': {reaction_id: {} for reaction_id # in self.parameters['kinetic_rates']}, "environment": { "media_id": {"_default": "", "_updater": "set"}, "exchange": { str(element): {"_default": 0} for element in self.exchange_molecules }, "exchange_data": { "unconstrained": {"_default": {}}, "constrained": {"_default": set()}, }, }, "boundary": {"external": {"*": {"_default": 0 * vivunits.mM}}}, "polypeptide_elongation": { "aa_count_diff": { "_default": {}, "_emit": True, "_divider": "empty_dict", }, "gtp_to_hydrolyze": {"_default": 0, "_emit": True, "_divider": "zero"}, "aa_exchange_rates": { "_default": 0, "_emit": True, "_updater": "set", "_divider": "zero", }, }, "listeners": { "mass": listener_schema( { "cell_mass": 0.0, "dry_mass": 0.0, "rna_mass": 0.0, "protein_mass": 0.0, } ), "fba_results": listener_schema( { "media_id": "", "conc_updates": ([], self.conc_update_molecules), "base_reaction_fluxes": ([], self.base_reaction_ids), "solution_fluxes": ([], self.network_flow_model.rxns), "solution_dmdt": ([], self.network_flow_model.mets), "time_per_step": 0.0, "estimated_fluxes": ([], self.network_flow_model.rxns), "estimated_homeostatic_dmdt": ( [], np.array(self.network_flow_model.mets)[ self.network_flow_model.homeostatic_idx ], ), "target_homeostatic_dmdt": ( [], np.array(self.network_flow_model.mets)[ self.network_flow_model.homeostatic_idx ], ), "estimated_exchange_dmdt": {}, "estimated_intermediate_dmdt": [], "target_kinetic_fluxes": [], "target_kinetic_bounds": [], "reaction_catalyst_counts": [], "maintenance_target": 0, } ), }, "global_time": {"_default": 0.0}, "timestep": {"_default": self.parameters["time_step"]}, "next_update_time": { "_default": self.parameters["time_step"], "_updater": "set", "_divider": "set", }, }
[docs] def update_condition(self, timestep, states): """ See :py:meth:`~ecoli.processes.partition.Requester.update_condition`. """ if states["next_update_time"] <= states["global_time"]: if states["next_update_time"] < states["global_time"]: warnings.warn( f"{self.name} updated at t=" f"{states['global_time']} instead of t=" f"{states['next_update_time']}. Decrease the " "timestep for the global clock process for more " "accurate timekeeping." ) return True return False
[docs] def next_update(self, timestep, states): # Initialize indices if self.homeostatic_metabolite_idx is None: bulk_ids = states["bulk"]["id"] self.homeostatic_metabolite_idx = bulk_name_to_idx( self.homeostatic_metabolites, bulk_ids ) self.catalyst_idx = bulk_name_to_idx(self.catalyst_ids, bulk_ids) self.kinetics_enzymes_idx = bulk_name_to_idx( self.kinetic_constraint_enzymes, bulk_ids ) self.kinetics_substrates_idx = bulk_name_to_idx( self.kinetic_constraint_substrates, bulk_ids ) self.aa_idx = bulk_name_to_idx(self.aa_names, bulk_ids) unconstrained = states["environment"]["exchange_data"]["unconstrained"] constrained = states["environment"]["exchange_data"]["constrained"] new_allowed_exchange_uptake = set(unconstrained).union(constrained.keys()) new_exchange_molecules = set(self.exchange_molecules).union( set(new_allowed_exchange_uptake) ) # set up network flow model exchanges and uptakes if (new_exchange_molecules != self.exchange_molecules) or ( new_allowed_exchange_uptake != self.allowed_exchange_uptake ): self.network_flow_model.set_up_exchanges( new_exchange_molecules, new_allowed_exchange_uptake ) self.exchange_molecules = new_exchange_molecules self.allowed_exchange_uptake = new_allowed_exchange_uptake # extract the states from the ports homeostatic_metabolite_counts = counts( states["bulk"], self.homeostatic_metabolite_idx ) self.timestep = states["timestep"] # TODO (Cyrus) - Implement kinetic model # kinetic_flux_targets = states['kinetic_flux_targets'] # needed for kinetics current_catalyst_counts = counts(states["bulk"], self.catalyst_idx) translation_gtp = states["polypeptide_elongation"]["gtp_to_hydrolyze"] kinetic_enzyme_counts = counts( states["bulk"], self.kinetics_enzymes_idx ) # kinetics related kinetic_substrate_counts = counts(states["bulk"], self.kinetics_substrates_idx) # cell mass difference for calculating GAM if self.cell_mass is not None: self.previous_mass = self.cell_mass self.cell_mass = states["listeners"]["mass"]["cell_mass"] * units.fg dry_mass = states["listeners"]["mass"]["dry_mass"] * units.fg cell_volume = self.cell_mass / self.cell_density # Coefficient to convert between flux (mol/g DCW/hr) basis # and concentration (M) basis conversion_coeff = ( dry_mass / self.cell_mass * self.cell_density * self.timestep * units.s ) self.counts_to_molar = (1 / (self.n_avogadro * cell_volume)).asUnit(CONC_UNITS) # maintenance target flux_ngam = self.ngam * conversion_coeff flux_gtp = self.counts_to_molar * translation_gtp total_ngam = flux_ngam + flux_gtp ngam_target = total_ngam.asNumber() # binary kinetic targets - sum up enzyme counts for each reaction reaction_catalyst_counts = self.catalysis_matrix.dot(current_catalyst_counts) # Get reaction indices whose fluxes should be set to zero # because there are no enzymes to catalyze the rxn binary_kinetic_idx = np.where(reaction_catalyst_counts == 0)[0] # TODO: Figure out how to handle changing media ID ## Determine updates to concentrations depending on the current state doubling_time = self.nutrient_to_doubling_time.get( states["environment"]["media_id"], self.nutrient_to_doubling_time["minimal"] ) if self.include_ppgpp: # Sim does not include ppGpp regulation so do not update biomass # by RNA/protein ratio and include ppGpp concentration as a # homeostatic target conc_updates = self.getBiomassAsConcentrations(doubling_time) conc_updates[self.ppgpp_id] = self.getppGppConc(doubling_time).asUnit( CONC_UNITS ) else: # Sim includes ppGpp regulation so update biomass based on # RNA/protein ratio which is controlled by ppGpp and other growth # regulation rp_ratio = ( states["listeners"]["mass"]["rna_mass"] / states["listeners"]["mass"]["protein_mass"] ) conc_updates = self.getBiomassAsConcentrations( doubling_time, rp_ratio=rp_ratio ) if self.use_trna_charging: conc_updates.update( self.update_amino_acid_targets( self.counts_to_molar, states["polypeptide_elongation"]["aa_count_diff"], dict(zip(self.aa_names, counts(states["bulk_total"], self.aa_idx))), ) ) # Converted from units to make reproduction from listener data # accurate to model results (otherwise can have floating point diffs) conc_updates = { met: conc.asNumber(CONC_UNITS) for met, conc in conc_updates.items() } self.homeostatic_objective = {**self.homeostatic_objective, **conc_updates} homeostatic_concs = np.zeros(len(self.homeostatic_metabolite_idx)) for i, met in enumerate(self.homeostatic_metabolites): homeostatic_concs[i] = self.homeostatic_objective[met] homeostatic_metabolite_concentrations = ( homeostatic_metabolite_counts * self.counts_to_molar.asNumber() ) target_homeostatic_dmdt = ( homeostatic_concs - homeostatic_metabolite_concentrations ) / self.timestep aa_uptake_package = None if self.mechanistic_aa_transport: aa_in_media = np.array( [ states["boundary"]["external"][aa_name].to("mM").magnitude > self.import_constraint_threshold for aa_name in self.aa_environment_names ] ) aa_in_media[self.removed_aa_uptake] = False exchange_rates = ( states["polypeptide_elongation"]["aa_exchange_rates"] * timestep ).asNumber(CONC_UNITS / TIME_UNITS) aa_uptake_package = ( exchange_rates[aa_in_media], self.aa_exchange_names[aa_in_media], True, ) # kinetic constraints # TODO (Cyrus) eventually collect isozymes in single reactions, map # enzymes to reacts via stoich instead of kinetic_constraint_reactions kinetic_enzyme_conc = self.counts_to_molar * kinetic_enzyme_counts kinetic_substrate_conc = self.counts_to_molar * kinetic_substrate_counts kinetic_constraints = self.get_kinetic_constraints( kinetic_enzyme_conc, kinetic_substrate_conc ) # kinetic enzyme_kinetic_boundaries = ( ((self.timestep * units.s) * kinetic_constraints) .asNumber(CONC_UNITS) .astype(float) ) target_kinetic_values = enzyme_kinetic_boundaries[:, 1] target_kinetic_bounds = enzyme_kinetic_boundaries[:, [0, 2]] objective_weights = { "secretion": self.secretion_penalty_coeff, "efficiency": 0.0001, "kinetics": self.kinetic_objective_weight, "kinetics_in_range": self.kinetic_objective_weight_in_range, } solution: FlowResult = self.network_flow_model.solve( homeostatic_concs=homeostatic_metabolite_concentrations, homeostatic_dm_targets=target_homeostatic_dmdt, ngam_target=ngam_target, kinetic_targets=enzyme_kinetic_boundaries, binary_kinetic_idx=binary_kinetic_idx, objective_weights=objective_weights, aa_uptake_package=aa_uptake_package, ) self.reaction_fluxes = solution.velocities self.metabolite_dmdt = solution.dm_dt self.metabolite_exchange = solution.exchanges self.maintenance_flux = solution.maintenance_flux # recalculate flux concentrations to counts estimated_reaction_fluxes = self.concentrationToCounts(self.reaction_fluxes) metabolite_dmdt_counts = self.concentrationToCounts(self.metabolite_dmdt) target_kinetic_flux = self.concentrationToCounts(target_kinetic_values) maintenance_flux = self.concentrationToCounts(self.maintenance_flux) target_homeostatic_dmdt = self.concentrationToCounts(target_homeostatic_dmdt) estimated_exchange_array = self.concentrationToCounts(self.metabolite_exchange) target_kinetic_bounds = self.concentrationToCounts(target_kinetic_bounds) estimated_homeostatic_dmdt = metabolite_dmdt_counts[ self.network_flow_model.homeostatic_idx ] # Ensure counts do not go negative final_metabolite_counts = np.fmax( homeostatic_metabolite_counts + estimated_homeostatic_dmdt, 0 ) estimated_homeostatic_dmdt = ( final_metabolite_counts - homeostatic_metabolite_counts ) estimated_intermediate_dmdt = metabolite_dmdt_counts[ self.network_flow_model.intermediates_idx ] estimated_exchange_dmdt = { str(metabolite[:-3]): -exchange for metabolite, exchange in zip( self.network_flow_model.mets, estimated_exchange_array ) if metabolite in new_exchange_molecules } return { "bulk": [(self.homeostatic_metabolite_idx, estimated_homeostatic_dmdt)], "environment": { "exchange": estimated_exchange_dmdt # changes to external metabolites }, "listeners": { "fba_results": { "media_id": states["environment"]["media_id"], "conc_updates": [ conc_updates.get(m, 0) for m in self.conc_update_molecules ], "estimated_fluxes": estimated_reaction_fluxes, "estimated_homeostatic_dmdt": estimated_homeostatic_dmdt, "target_homeostatic_dmdt": target_homeostatic_dmdt, "target_kinetic_fluxes": target_kinetic_flux, "target_kinetic_bounds": target_kinetic_bounds, "estimated_exchange_dmdt": estimated_exchange_dmdt, "estimated_intermediate_dmdt": estimated_intermediate_dmdt, "maintenance_target": maintenance_flux, "solution_fluxes": solution.velocities, "solution_dmdt": solution.dm_dt, "reaction_catalyst_counts": reaction_catalyst_counts, "time_per_step": time.time(), "base_reaction_fluxes": self.reaction_mapping_matrix.dot( estimated_reaction_fluxes ), } }, "next_update_time": states["global_time"] + states["timestep"], }
[docs] def concentrationToCounts(self, concs): return np.rint( np.dot( concs, (CONC_UNITS / self.counts_to_molar * self.timestep).asNumber() ) ).astype(int)
[docs] def getBiomassAsConcentrations( self, doubling_time: Unum, rp_ratio: Optional[float] = None ) -> dict[str, Unum]: """ Caches the result of the sim_data function to improve performance since function requires computation but won't change for a given doubling_time. Args: doubling_time: doubling time of the cell to get the metabolite concentrations for Returns: Mapping from metabolite IDs to concentration targets """ # TODO (Cyrus) Repeats code found in processes/metabolism.py Should think of a way to share. minutes = doubling_time.asNumber(units.min) # hashable if (minutes, rp_ratio) not in self._biomass_concentrations: self._biomass_concentrations[(minutes, rp_ratio)] = ( self._getBiomassAsConcentrations(doubling_time, rp_ratio) ) return self._biomass_concentrations[(minutes, rp_ratio)]
[docs] def update_amino_acid_targets( self, counts_to_molar: Unum, count_diff: dict[str, float], amino_acid_counts: dict[str, int], ) -> dict[str, Unum]: """ Finds new amino acid concentration targets based on difference in supply and number of amino acids used in polypeptide_elongation. Skips updates to molecules defined in self.aa_targets_not_updated: - L-SELENOCYSTEINE: rare AA that led to high variability when updated Args: counts_to_molar: conversion from counts to molar Returns: ``{AA name (str): new target AA conc (float with mol/volume units)}`` """ if len(self.aa_targets): for aa, diff in count_diff.items(): if aa in self.aa_targets_not_updated: continue self.aa_targets[aa] += diff # TODO (Santiago): Improve targets update if self.aa_targets[aa] < 0: print( "Warning: updated amino acid target for " f"{aa} was negative - adjusted to be positive." ) self.aa_targets[aa] = 1 # First time step of a simulation so set target to current counts to # prevent concentration jumps between generations else: for aa, counts in amino_acid_counts.items(): if aa in self.aa_targets_not_updated: continue self.aa_targets[aa] = counts conc_updates = { aa: counts * counts_to_molar for aa, counts in self.aa_targets.items() } # Update linked metabolites that will follow an amino acid for met, link in self.linked_metabolites.items(): conc_updates[met] = ( conc_updates.get(link["lead"], 0 * counts_to_molar) * link["ratio"] ) return conc_updates
[docs] @dataclass class FlowResult: """Reaction velocities and dm/dt for an FBA solution, with metrics.""" velocities: Iterable[float] dm_dt: Iterable[float] exchanges: Iterable[float] maintenance_flux: float objective: float
[docs] class NetworkFlowModel: # TODO Documentation """A network flow model for estimating fluxes in the metabolic network based on network structure. Flow is mainly driven by precursor demand (homeostatic objective) and availability of nutrients.""" def __init__( self, stoich_arr: npt.NDArray[np.int64], metabolites: Iterable[str], reactions: Iterable[str], homeostatic_metabolites: Iterable[str], kinetic_reactions: Iterable[str], get_mass: Callable[[str], Unum], gam: float = 0, active_constraints_mask: Optional[npt.NDArray[np.bool_]] = None, ): self.S_orig = csr_matrix(stoich_arr.astype(np.int64)) self.S_exch = np.zeros(0) self.n_mets, self.n_orig_rxns = self.S_orig.shape self.mets = metabolites self.met_map = {metabolite: i for i, metabolite in enumerate(metabolites)} self.rxns = reactions self.rxn_map = {reaction: i for i, reaction in enumerate(reactions)} self.kinetic_rxn_idx = ( np.array([self.rxn_map[rxn] for rxn in kinetic_reactions]) if kinetic_reactions else None ) self.get_mass = get_mass self.gam = gam # steady state indices, secretion indices self.intermediates = list(set(self.mets) - set(homeostatic_metabolites)) self.intermediates_idx = np.array( [self.met_map[met] for met in self.intermediates] ) self.homeostatic_idx = np.array( [self.met_map[met] for met in homeostatic_metabolites] ) # TODO (Cyrus) - use name provided self.maintenance_idx = ( self.rxn_map["maintenance_reaction"] if "maintenance_reaction" in self.rxn_map else None ) self.active_constraints_mask = active_constraints_mask
[docs] def set_up_exchanges(self, exchanges: set[str], uptakes: set[str]): """Set up exchange reactions for the network flow model. Exchanges allow certain metabolites to have flow out of the system. Uptakes allow certain metabolites to also have flow into the system.""" all_exchanges = exchanges.copy() all_exchanges.update(uptakes) # All exchanges can secrete but only uptakes go in both directions self.S_exch = np.zeros((self.n_mets, len(exchanges) + len(uptakes))) self.exchanges = [] secretion_idx = [] exchange_masses = [] exch_idx = 0 for met in all_exchanges: exch_name = met + " exchange" met_idx = self.met_map[met] exch_mass = self.get_mass(met).asNumber(MASS_UNITS / COUNTS_UNITS) if met in uptakes: self.S_exch[met_idx, exch_idx] = 1 self.exchanges.append(exch_name) exchange_masses.append(exch_mass) exch_idx += 1 self.exchanges.append(exch_name + " rev") secretion_idx.append(exch_idx) exchange_masses.append(-exch_mass) self.S_exch[met_idx, exch_idx] = -1 exch_idx += 1 self.S_exch = cast(csr_matrix, csr_matrix(self.S_exch)) _, self.n_exch_rxns = self.S_exch.shape self.secretion_idx = np.array(secretion_idx, dtype=int) self.exchange_masses = np.array(exchange_masses)
[docs] def solve( self, homeostatic_concs: Iterable[float], homeostatic_dm_targets: Iterable[float], ngam_target: float = 0, kinetic_targets: Optional[npt.NDArray[np.float64]] = None, binary_kinetic_idx: Optional[list[int]] = None, objective_weights: Optional[Mapping[str, float]] = None, aa_uptake_package: Optional[Mapping[str, float]] = None, upper_flux_bound: float = 100, # ortools > 9.5 required for Python 3.11 but will only # get support in the 9/2023 release of cvxpy solver=cp.GLOP, ) -> FlowResult: """Solve the network flow model for fluxes and dm/dt values.""" # Mypy fixes objective_weights = cast(Mapping[str, float], objective_weights) # Convert to array homeostatic_concs = np.array(homeostatic_concs) homeostatic_dm_targets = np.array(homeostatic_dm_targets) target_fluxes = np.zeros(self.n_orig_rxns) if kinetic_targets is not None: target_fluxes[self.kinetic_rxn_idx] += kinetic_targets[:, 1] # set up variables v_diff_in_range = cp.Variable(self.n_orig_rxns) v_diff_outside_range = cp.Variable(self.n_orig_rxns) v = target_fluxes + v_diff_in_range + v_diff_outside_range e = cp.Variable(self.n_exch_rxns) dm = self.S_orig @ v + self.S_exch @ e exch = self.S_exch @ e total_maintenance = ngam_target + self.gam * e @ self.exchange_masses constr = [] constr.append(dm[self.intermediates_idx] == 0) if self.maintenance_idx is not None: constr.append(v[self.maintenance_idx] == total_maintenance) constr.append(v[self.maintenance_idx] >= ngam_target) # If enzymes not present, constrain rxn flux to 0 if binary_kinetic_idx is not None: if len(binary_kinetic_idx) > 0: constr.append(v[binary_kinetic_idx] == 0) constr.extend([v >= 0, v <= upper_flux_bound, e >= 0, e <= upper_flux_bound]) if aa_uptake_package: levels, molecules, force = aa_uptake_package for level, mol in zip(levels, molecules): exch_idx = self.exchanges.index(mol + " exchange") constr.append(e[exch_idx] == level) # Calculate target concs (current + delta) for denominator of objective # (target conc - actual conc) / (target conc) is the same as # (target delta - actual delta) / (target conc) homeostatic_target_concs = homeostatic_concs + homeostatic_dm_targets # Fix divide by zero homeostatic_target_concs[homeostatic_target_concs == 0] = 1 loss = 0 loss += cp.norm1( (dm[self.homeostatic_idx] - homeostatic_dm_targets) / homeostatic_target_concs ) if "secretion" in objective_weights: loss += objective_weights["secretion"] * cp.sum( e[self.secretion_idx] @ -self.exchange_masses[self.secretion_idx] ) if "kinetics" in objective_weights: # Mypy fixes kinetic_targets = cast(npt.NDArray[np.float64], kinetic_targets) # Fix divide by zero nonzero_kinetic_targets = kinetic_targets[:, 1].copy() nonzero_kinetic_targets[nonzero_kinetic_targets == 0] = 1 # Calculate lower and upper limit for flux diff lower_flux_diff = kinetic_targets[:, 0] - kinetic_targets[:, 1] upper_flux_diff = kinetic_targets[:, 2] - kinetic_targets[:, 1] constr.extend( [ v_diff_in_range[self.kinetic_rxn_idx] >= lower_flux_diff, v_diff_in_range[self.kinetic_rxn_idx] <= upper_flux_diff, ] ) # Heavily weight fluxes outside limits loss += objective_weights["kinetics"] * cp.norm1( (v_diff_outside_range[self.kinetic_rxn_idx] / nonzero_kinetic_targets)[ self.active_constraints_mask ] ) # Lightly weight fluxes in expected range loss += ( objective_weights["kinetics"] * objective_weights["kinetics_in_range"] * cp.norm1( (v_diff_in_range[self.kinetic_rxn_idx] / nonzero_kinetic_targets)[ self.active_constraints_mask ] ) ) p = cp.Problem(cp.Minimize(loss), constr) p.solve(solver=solver, verbose=False) if p.status != "optimal": raise ValueError( "Network flow model of metabolism did not " "converge to an optimal solution." ) velocities = np.array(v.value) dm_dt = np.array(dm.value) exchanges = np.array(exch.value) maintenance_flux = total_maintenance.value objective = p.value return FlowResult( velocities=velocities, dm_dt=dm_dt, exchanges=exchanges, maintenance_flux=maintenance_flux, objective=objective, )
def test_network_flow_model(): """Test the network flow model on a simple example, using only the homeostatic objective along with secretion and efficiency penalties.""" S_matrix = np.array([[-1, 1, 0], [0, -1, 1], [1, 0, -1]]).T metabolites = ["A", "B", "C"] reactions = ["r1", "r2", "r3"] homeostatic_metabolites = {"C": 1} exchanges = {"A"} uptakes = {"A"} model = NetworkFlowModel( stoich_arr=S_matrix, reactions=reactions, metabolites=metabolites, homeostatic_metabolites=list(homeostatic_metabolites.keys()), kinetic_reactions=None, get_mass=lambda _: 1 * units.g / units.mol, ) model.set_up_exchanges(exchanges=exchanges, uptakes=uptakes) solution: FlowResult = model.solve( homeostatic_concs=list(homeostatic_metabolites.values()), homeostatic_dm_targets=list(homeostatic_metabolites.values()), objective_weights={"secretion": 0.01, "efficiency": 0.0001}, upper_flux_bound=100, ) assert np.isclose( solution.velocities, np.array([1, 1, 0]) ).all(), "Network flow toy model did not converge to correct solution." # TODO (Cyrus) Add test for entire process if __name__ == "__main__": test_network_flow_model()