Source code for reconstruction.ecoli.dataclasses.state.internal_state

"""
SimulationData state associated data

"""

from wholecell.utils import units

from reconstruction.ecoli.dataclasses.state.bulkMolecules import BulkMolecules
from reconstruction.ecoli.dataclasses.state.uniqueMolecules import UniqueMolecules

import numpy as np


[docs] class InternalState(object): """Internal State""" def __init__(self, raw_data, sim_data): self.bulk_molecules = BulkMolecules(raw_data, sim_data) self.unique_molecule = UniqueMolecules(raw_data, sim_data) self._build_bulk_molecules(sim_data) self._build_unique_molecules(sim_data) self._build_compartments(raw_data, sim_data)
[docs] def _build_bulk_molecules(self, sim_data): """ Add data (IDs and mass) for all classes of bulk molecules. """ all_bulk_molecule_ids = sim_data.getter.get_all_valid_molecules() all_bulk_molecule_ids_with_compartments, all_bulk_molecule_masses = ( self._build_bulk_molecule_specs(sim_data, all_bulk_molecule_ids) ) self.bulk_molecules.add_to_bulk_state( all_bulk_molecule_ids_with_compartments, all_bulk_molecule_masses ) sim_data.molecule_groups.bulk_molecules_binomial_division.extend( all_bulk_molecule_ids_with_compartments )
[docs] def _build_bulk_molecule_specs(self, sim_data, molecule_ids): """ Builds a list of molecule IDs with compartment tags and a corresponding array of molecular masses to add to the bulk state. Args: molecule_ids (List[str]): List of molecule IDs w/o compartment tags Returns: molecule_ids_with_compartments (List[str]): List of molecule IDs with compartment tags masses (np.ndarray): Array of molecular masses divided into submasses """ molecule_ids_with_compartments = [] masses = [] # Loop through each molecule species and associated compartments for molecule_id in molecule_ids: mw = sim_data.getter.get_submass_array(molecule_id).asNumber( units.g / units.mol ) for loc in sim_data.getter.get_compartment(molecule_id): molecule_ids_with_compartments.append("{}[{}]".format(molecule_id, loc)) masses.append(mw) masses = (units.g / units.mol) * np.array(masses) return molecule_ids_with_compartments, masses
[docs] def _build_unique_molecules(self, sim_data): """ Add data (name, mass, and attribute data structure) for all classes of unique molecules. """ # Set up dictionary for quick indexing of bulk molecule masses bulk_molecule_id_to_mass = { molecule_id: mass for (molecule_id, mass) in zip( self.bulk_molecules.bulk_data["id"], self.bulk_molecules.bulk_data["mass"], ) } # Add active RNA polymerase # The attributes of active RNA polymerases are given as: # - domain_index (32-bit int): Domain index of the chromosome domain # that the RNAP is bound to. This value is used to split the RNAPs at # cell division. # - coordinates (64-bit int): Location of the RNAP on the chromosome, # in base pairs from origin. # - is_forward (bool): True if RNAP is moving in the positive direction # of the genomic coordinates, False if RNAP is moving in the negative # direction. This is determined by the orientation of the TU that the # RNAP is transcribing. RNAP_mass = bulk_molecule_id_to_mass[sim_data.molecule_ids.full_RNAP] RNAP_attributes = { "domain_index": "i4", "coordinates": "i8", "is_forward": "?", } self.unique_molecule.add_to_unique_state( "active_RNAP", RNAP_attributes, RNAP_mass ) # RNAPs are divided based on the index of the chromosome domain they # are bound to sim_data.molecule_groups.unique_molecules_domain_index_division.append( "active_RNAP" ) # Add RNAs # This molecule represents all RNAs that should be represented as # unique molecules. These RNAs include all partially transcribed RNAs # and fully transcribed mRNAs. The attributes of RNAs are given as: # - TU_index (64-bit int): Index of the transcription unit that the # RNA molecule is representing. Determines the sequence and the length # of the fully elongated RNA. # - transcript_length (64-bit int): Current length of the RNA. # - is_mRNA (bool): True if RNA represents an mRNA molecule. # - is_full_transcript (bool): True if RNA is fully transcribed and # released from the RNA polymerase, False if RNA is being actively # transcribed. This cannot be True if 'is_mRNA' is False, since all # non-mRNAs are represented as bulk molecules when they are fully # transcribed. # - can_translate (bool): True if the 5' end of the mRNA molecule is # available for translation. This flag is used only for mRNA molecules. # - RNAP_index (64-bit int): Unique index of the RNA polymerase that is # synthesizing the RNA or have synthesized the mRNA. For fully # transcribed mRNAs that are added at initialization, this attribute # is set to -1. RNA_mass = (units.g / units.mol) * np.zeros_like(RNAP_mass) RNA_attributes = { "TU_index": "i8", "transcript_length": "i8", "is_mRNA": "?", "is_full_transcript": "?", "can_translate": "?", "RNAP_index": "i8", } self.unique_molecule.add_to_unique_state("RNA", RNA_attributes, RNA_mass) # Fully transcribed mRNAs are divided binomially, partial transcripts # are divided based on which chromosome domains their associated RNAPs # are bound to sim_data.molecule_groups.unique_molecules_RNA_division.append("RNA") # Add active ribosomes # The attributes of active ribosomes are given as: # - protein_index (64-bit int): Index of the protein monomer that the # ribosome is translating # - peptide_length (64-bit int): Current length of the polypeptide that # the ribosome is translating, in number of amino acids # - mRNA_index (64-bit int): Unique index of the mRNA that the ribosome # is bound to # - pos_on_mRNA (64-bit int): Location of the ribosome on the bound # mRNA, in number of bases from the transcription start site ribosome_30S_mass = bulk_molecule_id_to_mass[ sim_data.molecule_ids.s30_full_complex ] ribosome_50S_mass = bulk_molecule_id_to_mass[ sim_data.molecule_ids.s50_full_complex ] ribosome_mass = ribosome_30S_mass + ribosome_50S_mass ribosome_attributes = { "protein_index": "i8", "peptide_length": "i8", "mRNA_index": "i8", "pos_on_mRNA": "i8", } self.unique_molecule.add_to_unique_state( "active_ribosome", ribosome_attributes, ribosome_mass ) # Active ribosomes are divided such that they always follow the mRNA # molecule they are bound to sim_data.molecule_groups.unique_molecules_active_ribosome_division.append( "active_ribosome" ) # Add full chromosomes # One full chromosome molecule is added when chromosome replication is # complete, and sets cell division to happen after a length of time # specified by the D period (if D_PERIOD_DIVISION is set to True). # The 'has_triggered_division' attribute is initially set to False, and # is reset to True when division_time was reached and the cell has # divided. The 'domain_index' keeps track of the index of the oldest # chromosome domain that is part of the full chromosome. full_chromosome_mass = (units.g / units.mol) * np.zeros_like(RNAP_mass) full_chromosome_mass[sim_data.submass_name_to_index["DNA"]] = ( sim_data.getter.get_mass(sim_data.molecule_ids.full_chromosome) ) full_chromosome_attributes = { "division_time": "f8", "has_triggered_division": "?", "domain_index": "i4", } self.unique_molecule.add_to_unique_state( "full_chromosome", full_chromosome_attributes, full_chromosome_mass ) # Full chromosomes are divided based on their domain index sim_data.molecule_groups.unique_molecules_domain_index_division.append( "full_chromosome" ) # Add chromosome domains # Chromosome domains are zero-mass molecules that accounts for the # structures of replicating chromosomes. Each replication initiation # event creates two new chromosome domains that are given a unique # integer 'domain_index'. These two new domains are child domains of # the original domain that the origin belonged to. chromosome_domain_mass = (units.g / units.mol) * np.zeros_like(RNAP_mass) chromosome_domain_attributes = { "domain_index": "i4", "child_domains": ("i4", 2), } self.unique_molecule.add_to_unique_state( "chromosome_domain", chromosome_domain_attributes, chromosome_domain_mass ) # Chromosome domains are divided based on their domain index sim_data.molecule_groups.unique_molecules_domain_index_division.append( "chromosome_domain" ) # Add active replisomes # Note that the replisome does not functionally replicate the # chromosome, but instead keeps track of the mass associated with # essential subunits of the replisome complex (if the mechanistic # replisome option is turned on). The list of essential subunits and # their stoichiometry were taken from Reyes-Lamothe et al., 2010. replisome_mass = (units.g / units.mol) * np.zeros_like(RNAP_mass) replisome_attributes = { "domain_index": "i4", "right_replichore": "?", "coordinates": "i8", } self.unique_molecule.add_to_unique_state( "active_replisome", replisome_attributes, replisome_mass ) # Active replisomes are divided based on their domain index sim_data.molecule_groups.unique_molecules_domain_index_division.append( "active_replisome" ) # Add origins of replication # Note that origins are conceptual molecules and have zero mass. The # chromosomeIndexes of oriC's determine the chromosomeIndexes of the # new partial chromosomes and replisomes initiated on the same oriC. originMass = (units.g / units.mol) * np.zeros_like(RNAP_mass) originAttributes = { "domain_index": "i4", } self.unique_molecule.add_to_unique_state("oriC", originAttributes, originMass) # oriC's are divided based on their domain index sim_data.molecule_groups.unique_molecules_domain_index_division.append("oriC") # Add promoters # Promoters are sequences on the DNA where RNA polymerases bind to and # initiate transcription. They can also be bound to transcription # factors(TFs), if the transcription unit associated with the promoter # is regulated by TFs. The promoter itself has zero mass but can hold # the mass of the transcription factor that it is bound to. Its # attributes are given as: # - TU_index (64-bit int): Index of the transcription unit that # the promoter is associated with. This determines which TFs the # promoter can bind to, and how the transcription probability is # calculated. # - coordinates (64-bit int): Location of the promoter on the # chromosome, in base pairs from origin. This value does not change # after the molecule is initialized. # - domain_index (32-bit int): Domain index of the chromosome domain # that the promoter belongs to. This value is used to split the # promoters at cell division. # - bound_TF (boolean array of length n_tf): A boolean array that # shows which TFs the promoter is bound to. Note that one promoter can # bind to multiple TFs. n_tf = len(sim_data.process.transcription_regulation.tf_ids) promoter_mass = (units.g / units.mol) * np.zeros_like(RNAP_mass) promoter_attributes = { "TU_index": "i8", "coordinates": "i8", "domain_index": "i4", "bound_TF": ("?", n_tf), } self.unique_molecule.add_to_unique_state( "promoter", promoter_attributes, promoter_mass ) # Promoters are divided based on their domain index sim_data.molecule_groups.unique_molecules_domain_index_division.append( "promoter" ) # Add genes # Genes are sequences on the DNA that encode for particular cistrons. # Its attributes are given as: # - cistron_index (64-bit int): Index of the cistron that the gene # encodes for. # - coordinates (64-bit int): Location of the gene on the chromosome, in # base pairs from origin. This value does not change after the molecule # is initialized. # - domain_index (32-bit int): Domain index of the chromosome domain # that the gene belongs to. This value is used to split the genes at # cell division. gene_mass = (units.g / units.mol) * np.zeros_like(RNAP_mass) gene_attributes = { "cistron_index": "i8", "coordinates": "i8", "domain_index": "i4", } self.unique_molecule.add_to_unique_state("gene", gene_attributes, gene_mass) # Genes are divided based on their domain index sim_data.molecule_groups.unique_molecules_domain_index_division.append("gene") # Add chromosomal segments # Chromosomal segments are segments of DNA that are topologically # constrained, such that the changes in the linking number of the # segment do not spread beyond its boundaries. For a relaxed DNA double # helix, the linking number is proportional to the length of the # chromosomal segment. Currently only active RNA polymerases and # replisomes serve as segment boundaries. Its attributes are given as: # - boundary_molecule_indexes (pair of 64-bit ints): Unique indexes # of molecules at the boundaries of the chromosomal segment # - boundary_coordinates (pair of 64-bit ints): Coordinates of the # boundaries of the chromosomal segment # - domain_index (32-bit int): Domain index of the chromosome domain # that the segment belongs to. # - linking_number (64-bit float): Linking number of the chromosomal # segment. chromosomal_segment_mass = (units.g / units.mol) * np.zeros_like(RNAP_mass) chromosomal_segment_attributes = { "boundary_molecule_indexes": ("i8", 2), "boundary_coordinates": ("i8", 2), "domain_index": "i4", "linking_number": "f8", } self.unique_molecule.add_to_unique_state( "chromosomal_segment", chromosomal_segment_attributes, chromosomal_segment_mass, ) # Chromosomal segments are divided based on their domain indexes, but # division occurs after all chromosome-bound molecules are divided to # properly reset the boundary_molecule_indexes attribute sim_data.molecule_groups.unique_molecules_chromosomal_segment_division.append( "chromosomal_segment" ) # Add DnaA boxes # DnaA boxes are 9-base sequence motifs on the DNA that bind to the # protein DnaA. Except for DnaA boxes close to the origin, these boxes # serve no functional role in replication initiation, but can # effectively titrate away free DnaA molecules and control its # concentration. The molecule itself has zero mass but it can hold the # mass of the DnaA protein that it is bound to. Its attributes are # given as: # - coordinates (64-bit int): Location of the middle base (5th base) of # the DnaA box, in base pairs from origin. This value does not change # after the molecule is initialized. # - domain_index (32-bit int): Domain index of the chromosome domain # that the DnaA box belongs to. This value is used to allocate DnaA # boxes to the two daughter cells at cell division. # - DnaA_bound (boolean): True if bound to a DnaA protein, False if not DnaA_box_mass = (units.g / units.mol) * np.zeros_like(RNAP_mass) DnaA_box_attributes = { "coordinates": "i8", "domain_index": "i4", "DnaA_bound": "?", } self.unique_molecule.add_to_unique_state( "DnaA_box", DnaA_box_attributes, DnaA_box_mass ) # DnaA boxes are divided based on their domain index sim_data.molecule_groups.unique_molecules_domain_index_division.append( "DnaA_box" )
[docs] def _build_compartments(self, raw_data, sim_data): _ = sim_data compartmentData = np.empty( len(raw_data.compartments), dtype=[("id", "U20"), ("compartmentAbbreviation", "U1")], ) compartmentData["id"] = [x["id"] for x in raw_data.compartments] compartmentData["compartmentAbbreviation"] = [ x["abbrev"] for x in raw_data.compartments ] self.compartments = compartmentData