Source code for reconstruction.ecoli.dataclasses.molecule_groups

"""
SimulationData moleculeGroups
"""

POLYMERIZED_FRAGMENT_PREFIX = "polymerized_"


[docs] class MoleculeGroups(object): """ Helper class to extract molecule IDs of "special" groups of molecules. All values returned are lists of strings. """ def __init__(self, raw_data, sim_data): self._build_molecule_groups(raw_data, sim_data)
[docs] def _build_molecule_groups(self, raw_data, sim_data): aa_ids = list(sim_data.amino_acid_code_to_id_ordered.values()) ntp_ids = list(sim_data.ntp_code_to_id_ordered.values()) nmp_ids = list(sim_data.nmp_code_to_id_ordered.values()) dntp_ids = list(sim_data.dntp_code_to_id_ordered.values()) polymerized_aa_ids = [POLYMERIZED_FRAGMENT_PREFIX + aa_id for aa_id in aa_ids] polymerized_ntp_ids = [ POLYMERIZED_FRAGMENT_PREFIX + ntp_id for ntp_id in ntp_ids ] polymerized_dntp_ids = [ POLYMERIZED_FRAGMENT_PREFIX + dntp_id for dntp_id in dntp_ids ] # Build list of rRNA IDs from raw data s30_16s_rRNA = [ rna["id"] + "[c]" for rna in raw_data.rnas if rna["id"].startswith("RRS") ] s50_23s_rRNA = [ rna["id"] + "[c]" for rna in raw_data.rnas if rna["id"].startswith("RRL") ] s50_5s_rRNA = [ rna["id"] + "[c]" for rna in raw_data.rnas if rna["id"].startswith("RRF") ] # Build list of rRNA cistron IDs from raw data rrnA_rRNA = [ rna["id"] for rna in raw_data.rnas if rna["type"] == "rRNA" and rna["id"][3] == "A" ] rrnB_rRNA = [ rna["id"] for rna in raw_data.rnas if rna["type"] == "rRNA" and rna["id"][3] == "B" ] rrnC_rRNA = [ rna["id"] for rna in raw_data.rnas if rna["type"] == "rRNA" and rna["id"][3] == "C" ] rrnD_rRNA = [ rna["id"] for rna in raw_data.rnas if rna["type"] == "rRNA" and rna["id"][3] in ["D", "F"] ] rrnE_rRNA = [ rna["id"] for rna in raw_data.rnas if rna["type"] == "rRNA" and rna["id"][3] == "E" ] rrnG_rRNA = [ rna["id"] for rna in raw_data.rnas if rna["type"] == "rRNA" and rna["id"][3] == "G" ] rrnH_rRNA = [ rna["id"] for rna in raw_data.rnas if rna["type"] == "rRNA" and rna["id"][3] == "H" ] # Build list of ribosomal proteins from raw data monomer_ids = set([monomer["id"] for monomer in raw_data.proteins]) complex_id_to_subunit_ids = {} for rxn in raw_data.complexation_reactions: complex_ids = [] subunit_ids = [] for mol, v in rxn["stoichiometry"].items(): if v is None or v < 0: subunit_ids.append(mol) else: complex_ids.append(mol) assert len(complex_ids) == 1 complex_id_to_subunit_ids[complex_ids[0]] = subunit_ids def find_protein_subunits(molecule_id): """ Recursive function to find all protein monomers that are subunits of a given molecule. """ subunits = [] if molecule_id in monomer_ids: subunits.append(molecule_id) elif molecule_id in complex_id_to_subunit_ids: for mol in complex_id_to_subunit_ids[molecule_id]: subunits.extend(find_protein_subunits(mol)) return subunits s30_proteins = [ mol + "[c]" for mol in find_protein_subunits( sim_data.molecule_ids.s30_full_complex[:-3] ) ] s50_proteins = [ mol + "[c]" for mol in find_protein_subunits( sim_data.molecule_ids.s50_full_complex[:-3] ) ] assert len(set(s30_proteins) & set(s50_proteins)) == 0 ribosomal_proteins = sorted(s30_proteins + s50_proteins) # Build list of RNA polymerase subunits from raw data RNAP_subunits = [ mol + "[c]" for mol in find_protein_subunits(sim_data.molecule_ids.full_RNAP[:-3]) ] molecule_groups = { "amino_acids": aa_ids, "ntps": ntp_ids, "nmps": nmp_ids, "dntps": dntp_ids, "polymerized_amino_acids": polymerized_aa_ids, "polymerized_ntps": polymerized_ntp_ids, "polymerized_dntps": polymerized_dntp_ids, "polymerized_subunits": polymerized_aa_ids + polymerized_ntp_ids + polymerized_dntp_ids, "s30_proteins": s30_proteins, "s30_16s_rRNA": s30_16s_rRNA, "s50_protein_complexes": ["CPLX0-3956[c]"], "s50_proteins": s50_proteins, "s50_23s_rRNA": s50_23s_rRNA, "s50_5s_rRNA": s50_5s_rRNA, "rrnA_rRNA": rrnA_rRNA, "rrnB_rRNA": rrnB_rRNA, "rrnC_rRNA": rrnC_rRNA, "rrnD_rRNA": rrnD_rRNA, "rrnE_rRNA": rrnE_rRNA, "rrnG_rRNA": rrnG_rRNA, "rrnH_rRNA": rrnH_rRNA, "rrn_operons": [ "rrnA_rRNA", "rrnB_rRNA", "rrnC_rRNA", "rrnD_rRNA", "rrnE_rRNA", "rrnG_rRNA", "rrnH_rRNA", ], "lipids": ["CPD-8260[c]", "CPD-12819[c]", "CPD-12824[c]"], "polyamines": [ "GAMMA-GLUTAMYL-PUTRESCINE[c]", "PUTRESCINE[c]", "GLUTATHIONYLSPERMIDINE[c]", "SPERMIDINE[c]", "N1-ACETYLSPERMINE[c]", "SPERMINE[c]", ], # TODO: 'EG10245-MONOMER[c]' (DNAP III subunit tau) should be added # to the list of trimer subunits once frame-shifting proteins are # produced. "replisome_trimer_subunits": ["CPLX0-2361[c]", "CPLX0-3761[c]"], "replisome_monomer_subunits": [ "CPLX0-3621[c]", "EG10239-MONOMER[c]", "EG11500-MONOMER[c]", "EG11412-MONOMER[c]", ], "exoRNases": [ "EG11620-MONOMER[c]", "G7175-MONOMER[c]", "EG10858-MONOMER[c]", "EG10863-MONOMER[c]", "EG11259-MONOMER[c]", "CPLX0-3602[c]", "EG10746-MONOMER[c]", "G7842-MONOMER[c]", "EG10743-MONOMER[c]", ], "endoRNase_rnas": [ "EG10856_RNA", "EG10857_RNA", "EG10859_RNA", "EG10860_RNA", "EG10861_RNA", "EG10862_RNA", "EG11299_RNA", "G7175_RNA", "G7365_RNA", ], "exoRNase_rnas": [ "EG11620_RNA", "G7175_RNA", "EG10858_RNA", "EG10863_RNA", "EG11259_RNA", "EG11547_RNA", "EG10746_RNA", "G7842_RNA", "EG10743_RNA", ], "RNAP_subunits": RNAP_subunits, "ribosomal_proteins": ribosomal_proteins, "carbon_sources": ["GLC[p]", "ACET[p]", "SUC[p]"], # Common names of the seven rRNA operons "rRNA_operons": ["rrnA", "rrnB", "rrnC", "rrnD", "rrnE", "rrnG", "rrnH"], # List of IDs of genes that consist each of the seven rRNA operons "rrnA": ["EG30070", "EG30077", "EG30008", "EG30043", "EG30084"], "rrnB": ["EG30071", "EG30078", "EG30032", "EG30085"], "rrnC": ["EG30072", "EG30079", "EG30033", "EG30086"], "rrnD": [ "EG30091", "EG30103", "EG30073", "EG30080", "EG30009", "EG30044", "EG30087", ], "rrnE": ["EG30074", "EG30081", "EG30034", "EG30088"], "rrnG": ["EG30075", "EG30082", "EG30035", "EG30089"], "rrnH": ["EG30076", "EG30083", "EG30010", "EG30045", "EG30090"], } # Initialize molecule groups for how molecules are split between two # daughter cells at cell division (populated later by InternalState) molecule_groups["bulk_molecules_binomial_division"] = [] molecule_groups["bulk_molecules_equal_division"] = [] molecule_groups["unique_molecules_active_ribosome_division"] = [] molecule_groups["unique_molecules_RNA_division"] = [] molecule_groups["unique_molecules_domain_index_division"] = [] molecule_groups["unique_molecules_chromosomal_segment_division"] = [] self.__dict__.update(molecule_groups)