Source code for ecoli.processes.rna_degradation

"""
===============
RNA Degradation
===============

Mathematical formulations

* ``dr/dt = Kb - kcatEndoRNase * EndoRNase * r/Km / (1 + Sum(r/Km))``

where

* r = RNA counts
* Kb = RNA production given a RNAP synthesis rate
* kcatEndoRNase = enzymatic activity for EndoRNases
* Km = Michaelis-Menten constants fitted to recapitulate first-order
* RNA decay: ``kd * r = kcatEndoRNase * EndoRNase * r/Km / (1 + sum(r/Km))``

This sub-model encodes molecular simulation of RNA degradation as two main
steps guided by RNases, "endonucleolytic cleavage" and "exonucleolytic
digestion":

1. Compute total counts of RNA to be degraded (D) and total capacity for
   endo-cleavage (C) at each time point
2. Evaluate C and D. If C > D, then define a fraction of active endoRNases
3. Dissect RNA degraded into different species (mRNA, tRNA, and rRNA) by
   accounting endoRNases specificity
4. Update RNA fragments (assumption: fragments are represented as a pool of
   nucleotides) created because of endonucleolytic cleavage
5. Compute total capacity of exoRNases and determine fraction of nucleotides
   that can be digested
6. Update pool of metabolites (H and H2O) created because of exonucleolytic
   digestion
"""

import numpy as np

from ecoli.library.schema import (
    bulk_name_to_idx,
    counts,
    attrs,
    numpy_schema,
    listener_schema,
)

from wholecell.utils import units

from ecoli.processes.registries import topology_registry
from ecoli.processes.partition import PartitionedProcess


# Register default topology for this process, associating it with process name
NAME = "ecoli-rna-degradation"
TOPOLOGY = {
    "bulk": ("bulk",),
    "RNAs": ("unique", "RNA"),
    "active_ribosome": ("unique", "active_ribosome"),
    "listeners": ("listeners",),
    "timestep": ("timestep",),
}
topology_registry.register(NAME, TOPOLOGY)



[docs]
class RnaDegradation(PartitionedProcess):
    """RNA Degradation PartitionedProcess"""

    name = NAME
    topology = TOPOLOGY
    defaults = {
        "rna_ids": [],
        "mature_rna_ids": [],
        "cistron_ids": [],
        "cistron_tu_mapping_matrix": [],
        "mature_rna_cistron_indexes": [],
        "all_rna_ids": [],
        "n_total_RNAs": 0,
        "n_avogadro": 0.0,
        "cell_density": 1100 * units.g / units.L,
        "endoRNase_ids": [],
        "exoRNase_ids": [],
        "kcat_exoRNase": np.array([]) / units.s,
        "Kcat_endoRNases": np.array([]) / units.s,
        "charged_trna_names": [],
        "uncharged_trna_indexes": [],
        "rna_deg_rates": [],
        "is_mRNA": np.array([]),
        "is_rRNA": np.array([]),
        "is_tRNA": np.array([]),
        "is_miscRNA": np.array([]),
        "degrade_misc": False,
        "rna_lengths": np.array([]),
        "nt_counts": np.array([[]]),
        "polymerized_ntp_ids": [],
        "water_id": "h2o",
        "ppi_id": "ppi",
        "proton_id": "h+",
        "nmp_ids": [],
        "rrfa_idx": 0,
        "rrla_idx": 0,
        "rrsa_idx": 0,
        "ribosome30S": "ribosome30S",
        "ribosome50S": "ribosome50S",
        "Kms": np.array([]) * units.mol / units.L,
        "seed": 0,
        "emit_unique": False,
    }

    def __init__(self, parameters=None):
        super().__init__(parameters)
        self.rna_ids = self.parameters["rna_ids"]
        self.mature_rna_ids = self.parameters["mature_rna_ids"]
        self.n_transcribed_rnas = len(self.rna_ids)
        self.mature_rna_exists = len(self.mature_rna_ids) > 0
        self.cistron_ids = self.parameters["cistron_ids"]
        self.cistron_tu_mapping_matrix = self.parameters["cistron_tu_mapping_matrix"]
        self.mature_rna_cistron_indexes = self.parameters["mature_rna_cistron_indexes"]
        self.all_rna_ids = self.parameters["all_rna_ids"]
        self.n_total_RNAs = self.parameters["n_total_RNAs"]

        # Load constants
        self.n_avogadro = self.parameters["n_avogadro"]
        self.cell_density = self.parameters["cell_density"]

        # Load RNase kinetic data
        self.endoRNase_ids = self.parameters["endoRNase_ids"]
        self.exoRNase_ids = self.parameters["exoRNase_ids"]
        self.kcat_exoRNase = self.parameters["kcat_exoRNase"]
        self.Kcat_endoRNases = self.parameters["Kcat_endoRNases"]

        # Load information about uncharged/charged tRNA
        self.uncharged_trna_indexes = self.parameters["uncharged_trna_indexes"]
        self.charged_trna_names = self.parameters["charged_trna_names"]

        # Load first-order RNA degradation rates
        # (estimated by mRNA half-life data)
        self.rna_deg_rates = self.parameters["rna_deg_rates"]

        self.is_mRNA = self.parameters["is_mRNA"]
        self.is_rRNA = self.parameters["is_rRNA"]
        self.is_tRNA = self.parameters["is_tRNA"]

        # NEW to vivarium-ecoli
        self.is_miscRNA = self.parameters["is_miscRNA"]
        self.degrade_misc = self.parameters["degrade_misc"]

        self.rna_lengths = self.parameters["rna_lengths"]
        self.nt_counts = self.parameters["nt_counts"]

        # Build stoichiometric matrix
        self.polymerized_ntp_ids = self.parameters["polymerized_ntp_ids"]
        self.nmp_ids = self.parameters["nmp_ids"]
        self.water_id = self.parameters["water_id"]
        self.ppi_id = self.parameters["ppi_id"]
        self.proton_id = self.parameters["proton_id"]

        self.end_cleavage_metabolite_ids = self.polymerized_ntp_ids + [
            self.water_id,
            self.ppi_id,
            self.proton_id,
        ]
        nmp_idx = list(range(4))
        water_idx = self.end_cleavage_metabolite_ids.index(self.water_id)
        ppi_idx = self.end_cleavage_metabolite_ids.index(self.ppi_id)
        proton_idx = self.end_cleavage_metabolite_ids.index(self.proton_id)
        self.endo_degradation_stoich_matrix = np.zeros(
            (len(self.end_cleavage_metabolite_ids), self.n_total_RNAs), np.int64
        )
        self.endo_degradation_stoich_matrix[nmp_idx, :] = self.nt_counts.T
        self.endo_degradation_stoich_matrix[water_idx, :] = 0
        self.endo_degradation_stoich_matrix[ppi_idx, :] = 1
        self.endo_degradation_stoich_matrix[proton_idx, :] = 0

        # Load Michaelis-Menten constants fitted to recapitulate
        # first-order RNA decay model
        self.Kms = self.parameters["Kms"]

        self.seed = self.parameters["seed"]
        self.random_state = np.random.RandomState(seed=self.seed)

        # Numpy indices for bulk molecules
        self.water_idx = None


[docs]
    def ports_schema(self):
        return {
            "bulk": numpy_schema("bulk"),
            "active_ribosome": numpy_schema(
                "active_ribosome", emit=self.parameters["emit_unique"]
            ),
            "RNAs": numpy_schema("RNAs", emit=self.parameters["emit_unique"]),
            "listeners": {
                "mass": listener_schema({"cell_mass": 0.0, "dry_mass": 0.0}),
                "rna_degradation_listener": listener_schema(
                    {
                        "fraction_active_endornases": 0.0,
                        "diff_relative_first_order_decay": 0.0,
                        "fract_endo_rrna_counts": 0.0,
                        "count_rna_degraded": (
                            [0] * len(self.all_rna_ids),
                            self.all_rna_ids,
                        ),
                        "count_RNA_degraded_per_cistron": (
                            [0] * len(self.cistron_ids),
                            self.cistron_ids,
                        ),
                        "nucleotides_from_degradation": 0,
                        "fragment_bases_digested": 0,
                    }
                ),
            },
            "timestep": {"_default": self.parameters["time_step"]},
        }



[docs]
    def calculate_request(self, timestep, states):
        if self.water_idx is None:
            bulk_ids = states["bulk"]["id"]
            self.charged_trna_idx = bulk_name_to_idx(self.charged_trna_names, bulk_ids)
            self.bulk_rnas_idx = bulk_name_to_idx(self.all_rna_ids, bulk_ids)
            self.nmps_idx = bulk_name_to_idx(self.nmp_ids, bulk_ids)
            self.fragment_metabolites_idx = bulk_name_to_idx(
                self.end_cleavage_metabolite_ids, bulk_ids
            )
            self.fragment_bases_idx = bulk_name_to_idx(
                self.polymerized_ntp_ids, bulk_ids
            )
            self.endoRNase_idx = bulk_name_to_idx(self.endoRNase_ids, bulk_ids)
            self.exoRNase_idx = bulk_name_to_idx(self.exoRNase_ids, bulk_ids)
            self.water_idx = bulk_name_to_idx(self.water_id, bulk_ids)
            self.proton_idx = bulk_name_to_idx(self.proton_id, bulk_ids)

        # Compute factor that convert counts into concentration, and vice versa
        cell_mass = states["listeners"]["mass"]["cell_mass"] * units.fg
        cell_volume = cell_mass / self.cell_density
        counts_to_molar = 1 / (self.n_avogadro * cell_volume)

        # Get total counts of RNAs including free rRNAs, uncharged and charged tRNAs, and
        # active (translatable) unique mRNAs
        bulk_RNA_counts = counts(states["bulk"], self.bulk_rnas_idx)
        bulk_RNA_counts[self.uncharged_trna_indexes] += counts(
            states["bulk"], self.charged_trna_idx
        )

        TU_index, can_translate, is_full_transcript = attrs(
            states["RNAs"], ["TU_index", "can_translate", "is_full_transcript"]
        )

        TU_index_translatable_mRNAs = TU_index[can_translate]
        unique_RNA_counts = np.bincount(
            TU_index_translatable_mRNAs, minlength=self.n_total_RNAs
        )
        total_RNA_counts = bulk_RNA_counts + unique_RNA_counts

        # Compute RNA concentrations
        rna_conc_molar = counts_to_molar * total_RNA_counts

        # Get counts of endoRNases
        endoRNase_counts = counts(states["bulk"], self.endoRNase_idx)
        total_kcat_endoRNase = units.dot(self.Kcat_endoRNases, endoRNase_counts)

        # Calculate the fraction of active endoRNases for each RNA based on
        # Michaelis-Menten kinetics
        frac_endoRNase_saturated = (
            rna_conc_molar / self.Kms / (1 + units.sum(rna_conc_molar / self.Kms))
        ).asNumber()

        # Calculate difference in degradation rates from first-order decay
        # and the number of EndoRNases per one molecule of RNA
        total_endoRNase_counts = np.sum(endoRNase_counts)
        diff_relative_first_order_decay = units.sum(
            units.abs(
                self.rna_deg_rates * total_RNA_counts
                - total_kcat_endoRNase * frac_endoRNase_saturated
            )
        )
        endoRNase_per_rna = total_endoRNase_counts / np.sum(total_RNA_counts)

        requests = {"listeners": {"rna_degradation_listener": {}}}
        requests["listeners"]["rna_degradation_listener"][
            "fraction_active_endoRNases"
        ] = np.sum(frac_endoRNase_saturated)
        requests["listeners"]["rna_degradation_listener"][
            "diff_relative_first_order_decay"
        ] = diff_relative_first_order_decay.asNumber()
        requests["listeners"]["rna_degradation_listener"]["fract_endo_rrna_counts"] = (
            endoRNase_per_rna
        )

        # Dissect RNAse specificity into mRNA, tRNA, and rRNA
        # NEW to vivarium-ecoli: Degrade miscRNAs and mRNAs together
        if self.degrade_misc:
            is_transient_rna = self.is_mRNA | self.is_miscRNA
            mrna_specificity = np.dot(frac_endoRNase_saturated, is_transient_rna)
        else:
            mrna_specificity = np.dot(frac_endoRNase_saturated, self.is_mRNA)
        trna_specificity = np.dot(frac_endoRNase_saturated, self.is_tRNA)
        rrna_specificity = np.dot(frac_endoRNase_saturated, self.is_rRNA)

        n_total_mrnas_to_degrade = self._calculate_total_n_to_degrade(
            states["timestep"], mrna_specificity, total_kcat_endoRNase
        )
        n_total_trnas_to_degrade = self._calculate_total_n_to_degrade(
            states["timestep"], trna_specificity, total_kcat_endoRNase
        )
        n_total_rrnas_to_degrade = self._calculate_total_n_to_degrade(
            states["timestep"], rrna_specificity, total_kcat_endoRNase
        )

        # Compute RNAse specificity
        rna_specificity = frac_endoRNase_saturated / np.sum(frac_endoRNase_saturated)

        # Boolean variable that tracks existence of each RNA
        rna_exists = (total_RNA_counts > 0).astype(np.int64)

        # Compute degradation probabilities of each RNA: for mRNAs and rRNAs, this
        # is based on the specificity of each mRNA. For tRNAs and rRNAs,
        # this is distributed evenly.
        if self.degrade_misc:
            mrna_deg_probs = (
                1.0
                / np.dot(rna_specificity, is_transient_rna * rna_exists)
                * rna_specificity
                * is_transient_rna
                * rna_exists
            )
        else:
            mrna_deg_probs = (
                1.0
                / np.dot(rna_specificity, self.is_mRNA * rna_exists)
                * rna_specificity
                * self.is_mRNA
                * rna_exists
            )
        rrna_deg_probs = (
            1.0
            / np.dot(rna_specificity, self.is_rRNA * rna_exists)
            * rna_specificity
            * self.is_rRNA
            * rna_exists
        )
        trna_deg_probs = (
            1.0 / np.dot(self.is_tRNA, rna_exists) * self.is_tRNA * rna_exists
        )

        # Mask RNA counts into each class of RNAs
        if self.degrade_misc:
            mrna_counts = total_RNA_counts * is_transient_rna
        else:
            mrna_counts = total_RNA_counts * self.is_mRNA
        trna_counts = total_RNA_counts * self.is_tRNA
        rrna_counts = total_RNA_counts * self.is_rRNA

        # Determine number of individual RNAs to be degraded for each class
        # of RNA.
        n_mrnas_to_degrade = self._get_rnas_to_degrade(
            n_total_mrnas_to_degrade, mrna_deg_probs, mrna_counts
        )
        n_trnas_to_degrade = self._get_rnas_to_degrade(
            n_total_trnas_to_degrade, trna_deg_probs, trna_counts
        )
        n_rrnas_to_degrade = self._get_rnas_to_degrade(
            n_total_rrnas_to_degrade, rrna_deg_probs, rrna_counts
        )
        n_RNAs_to_degrade = n_mrnas_to_degrade + n_trnas_to_degrade + n_rrnas_to_degrade

        # Bulk RNAs (tRNAs and rRNAs) are degraded immediately. Unique RNAs
        # (mRNAs) are immediately deactivated (becomes unable to bind
        # ribosomes), but not degraded until transcription is finished and the
        # mRNA becomes a full transcript to simplify the transcript elongation
        # process.
        n_bulk_RNAs_to_degrade = n_RNAs_to_degrade.copy()
        n_bulk_RNAs_to_degrade[self.is_mRNA.astype(bool)] = 0
        self.n_unique_RNAs_to_deactivate = n_RNAs_to_degrade.copy()
        self.n_unique_RNAs_to_deactivate[np.logical_not(self.is_mRNA.astype(bool))] = 0

        requests.setdefault("bulk", []).extend(
            [
                (self.bulk_rnas_idx, n_bulk_RNAs_to_degrade),
                (
                    self.fragment_bases_idx,
                    counts(states["bulk"], self.fragment_bases_idx),
                ),
            ]
        )

        # Calculate the amount of water required for total RNA hydrolysis by
        # endo and exonucleases. We first calculate the number of unique RNAs
        # that should be degraded at this timestep.
        self.unique_mRNAs_to_degrade = np.logical_and(
            np.logical_not(can_translate), is_full_transcript
        )
        self.n_unique_RNAs_to_degrade = np.bincount(
            TU_index[self.unique_mRNAs_to_degrade], minlength=self.n_total_RNAs
        )

        # Assuming complete hydrolysis for now. Note that one additional water
        # molecule is needed for each RNA to hydrolyze the 5' diphosphate.
        water_for_degraded_rnas = np.dot(
            n_bulk_RNAs_to_degrade + self.n_unique_RNAs_to_degrade, self.rna_lengths
        )
        water_for_fragments = counts(states["bulk"], self.fragment_bases_idx).sum()
        requests["bulk"].append(
            (self.water_idx, water_for_degraded_rnas + water_for_fragments)
        )
        return requests



[docs]
    def evolve_state(self, timestep, states):
        # Get vector of numbers of RNAs to degrade for each RNA species
        n_degraded_bulk_RNA = counts(states["bulk"], self.bulk_rnas_idx)
        n_degraded_unique_RNA = self.n_unique_RNAs_to_degrade
        n_degraded_RNA = n_degraded_bulk_RNA + n_degraded_unique_RNA

        # Deactivate and degrade unique RNAs
        TU_index, can_translate = attrs(states["RNAs"], ["TU_index", "can_translate"])
        can_translate = can_translate.copy()
        n_deactivated_unique_RNA = self.n_unique_RNAs_to_deactivate

        # Deactive unique RNAs
        non_zero_deactivation = n_deactivated_unique_RNA > 0

        for index, n_degraded in zip(
            np.arange(n_deactivated_unique_RNA.size)[non_zero_deactivation],
            n_deactivated_unique_RNA[non_zero_deactivation],
        ):
            # Get mask for translatable mRNAs belonging to the degraded species
            mask = np.logical_and(TU_index == index, can_translate)

            # Choose n_degraded indexes randomly to deactivate
            can_translate[
                self.random_state.choice(
                    size=n_degraded, a=np.where(mask)[0], replace=False
                )
            ] = False

        count_RNA_degraded_per_cistron = self.cistron_tu_mapping_matrix.dot(
            n_degraded_RNA[: self.n_transcribed_rnas]
        )
        # Add degraded counts from mature RNAs
        if self.mature_rna_exists:
            count_RNA_degraded_per_cistron[self.mature_rna_cistron_indexes] += (
                n_degraded_RNA[self.n_transcribed_rnas :]
            )

        update = {
            "listeners": {
                "rna_degradation_listener": {
                    "count_rna_degraded": n_degraded_RNA,
                    "nucleotides_from_degradation": np.dot(
                        n_degraded_RNA, self.rna_lengths
                    ),
                    "count_RNA_degraded_per_cistron": count_RNA_degraded_per_cistron,
                }
            },
            # Degrade bulk RNAs
            "bulk": [(self.bulk_rnas_idx, -n_degraded_bulk_RNA)],
            "RNAs": {
                "set": {"can_translate": can_translate},
                # Degrade full mRNAs that are inactive
                "delete": np.where(self.unique_mRNAs_to_degrade)[0],
            },
        }

        # Modeling assumption: Once a RNA is cleaved by an endonuclease its
        # resulting nucleotides are lumped together as "polymerized fragments".
        # These fragments can carry over from previous timesteps. We are also
        # assuming that during endonucleolytic cleavage the 5'terminal
        # phosphate is removed. This is modeled as all of the fragments being
        # one long linear chain of "fragment bases".

        # Example:
        # PPi-Base-PO4(-)-Base-PO4(-)-Base-OH
        # ==>
        # Pi-FragmentBase-PO4(-)-FragmentBase-PO4(-)-FragmentBase + PPi
        # Note: Lack of -OH on 3' end of chain
        metabolites_endo_cleavage = np.dot(
            self.endo_degradation_stoich_matrix, n_degraded_RNA
        )

        # Increase polymerized fragment counts
        update["bulk"].append(
            (self.fragment_metabolites_idx, metabolites_endo_cleavage)
        )
        # fragment_metabolites overlaps with fragment_bases
        bulk_count_copy = states["bulk"].copy()
        if len(bulk_count_copy.dtype) > 1:
            bulk_count_copy = bulk_count_copy["count"]
        bulk_count_copy[self.fragment_metabolites_idx] += metabolites_endo_cleavage
        fragment_bases = bulk_count_copy[self.fragment_bases_idx]

        # Check if exonucleolytic digestion can happen
        if fragment_bases.sum() == 0:
            return update

        # Calculate exolytic cleavage events

        # Modeling assumption: We model fragments as one long fragment chain of
        # polymerized nucleotides. We are also assuming that there is no
        # sequence specificity or bias towards which nucleotides are
        # hydrolyzed.

        # Example:
        # Pi-FragmentBase-PO4(-)-FragmentBase-PO4(-)-FragmentBase + 3 H2O
        # ==>
        # 3 NMP + 3 H(+)
        # Note: Lack of -OH on 3' end of chain

        n_exoRNases = counts(states["bulk"], self.exoRNase_idx)
        n_fragment_bases = fragment_bases
        n_fragment_bases_sum = n_fragment_bases.sum()

        exornase_capacity = (
            n_exoRNases.sum() * self.kcat_exoRNase * (units.s * states["timestep"])
        )

        if exornase_capacity >= n_fragment_bases_sum:
            update["bulk"].extend(
                [
                    (self.nmps_idx, n_fragment_bases),
                    (self.water_idx, -n_fragment_bases_sum),
                    (self.proton_idx, n_fragment_bases_sum),
                    (self.fragment_bases_idx, -n_fragment_bases),
                ]
            )
            total_fragment_bases_digested = n_fragment_bases_sum

        else:
            fragment_specificity = n_fragment_bases / n_fragment_bases_sum
            possible_bases_to_digest = self.random_state.multinomial(
                exornase_capacity, fragment_specificity
            )
            n_fragment_bases_digested = n_fragment_bases - np.fmax(
                n_fragment_bases - possible_bases_to_digest, 0
            )

            total_fragment_bases_digested = n_fragment_bases_digested.sum()

            update["bulk"].extend(
                [
                    (self.nmps_idx, n_fragment_bases_digested),
                    (self.water_idx, -total_fragment_bases_digested),
                    (self.proton_idx, total_fragment_bases_digested),
                    (self.fragment_bases_idx, -n_fragment_bases_digested),
                ]
            )

        update["listeners"]["rna_degradation_listener"]["fragment_bases_digested"] = (
            total_fragment_bases_digested
        )

        # Note that once mRNAs have been degraded,
        # chromosome_structure.py will handle deleting the active
        # ribosomes that were translating those mRNAs.

        return update



[docs]
    def _calculate_total_n_to_degrade(
        self, timestep, specificity, total_kcat_endornase
    ):
        """
        Calculate the total number of RNAs to degrade for a specific class of
        RNAs, based on the specificity of endoRNases on that specific class and
        the total kcat value of the endoRNases.

        Args:
            specificity: Sum of fraction of active endoRNases for all RNAs
                in a given class
            total_kcat_endornase: The summed kcat of all existing endoRNases
        Returns:
            Total number of RNAs to degrade for the given class of RNAs
        """
        return np.round(
            (specificity * total_kcat_endornase * (units.s * timestep)).asNumber()
        )



[docs]
    def _get_rnas_to_degrade(self, n_total_rnas_to_degrade, rna_deg_probs, rna_counts):
        """
        Distributes the total count of RNAs to degrade for each class of RNAs
        into individual RNAs, based on the given degradation probabilities
        of individual RNAs. The upper bound is set by the current count of the
        specific RNA.

        Args:
            n_total_rnas_to_degrade: Total number of RNAs to degrade for the
                given class of RNAs (integer, scalar)
            rna_deg_probs: Degradation probabilities of each RNA (vector of
                equal length to the total number of different RNAs)
            rna_counts: Current counts of each RNA molecule (vector of equal
                length to the total number of different RNAs)
        Returns:
            Vector of equal length to rna_counts, specifying the number of
            molecules to degrade for each RNA
        """
        n_rnas_to_degrade = np.zeros_like(rna_counts)
        remaining_rna_counts = rna_counts

        while (
            n_rnas_to_degrade.sum() < n_total_rnas_to_degrade
            and remaining_rna_counts.sum() != 0
        ):
            n_rnas_to_degrade += np.fmin(
                self.random_state.multinomial(
                    n_total_rnas_to_degrade - n_rnas_to_degrade.sum(), rna_deg_probs
                ),
                remaining_rna_counts,
            )
            remaining_rna_counts = rna_counts - n_rnas_to_degrade

        return n_rnas_to_degrade