Source code for ecoli.analysis.antibiotics_colony.metrics

import ast
import os

import numpy as np
import pandas as pd
from scipy.stats import variation

from ecoli.analysis.antibiotics_colony import COUNTS_PER_FL_TO_NANOMOLAR

GLUCOSE_DATA = "data/colony_data/sim_dfs/2022-12-08_00-35-28_562633+0000.csv"
TET_DATA = "data/colony_data/sim_dfs/2023-01-05_01-00-44_215314+0000.csv"
AMP_DATA = "data/colony_data/sim_dfs/2022-12-08_17-03-56_357734+0000.csv"
PERIPLASMIC_VOLUME_FRACTION = 0.2



[docs]
def calculate_glucose_metrics(
    data, out="out/analysis/paper_figures/metrics/glc_metrics.txt"
):
    metrics = {}
    max_time = data.Time.max()

    # get birth, end time for each agent
    agent_lifetimes = data.groupby("Agent ID").agg(
        birth_time=("Time", "min"), end_time=("Time", "max")
    )

    # get lifetime for each agent, which generation it was in
    agent_lifetimes["gen_time"] = agent_lifetimes.end_time - agent_lifetimes.birth_time
    agent_lifetimes["generation"] = agent_lifetimes.index.map(len)

    # get mean, std. dev generation time
    # exclude cells present at end of sim and those that died
    non_final_agent_lifetimes = agent_lifetimes[
        [
            (agent_id + "0" in agent_lifetimes.index)
            for agent_id in agent_lifetimes.index
        ]
    ]
    metrics["Mean doubling time"] = (
        f"{non_final_agent_lifetimes.gen_time.mean() / 60} min"
    )
    metrics["Std dev doubling time"] = (
        f"{non_final_agent_lifetimes.gen_time.std() / 60} min"
    )

    # get standard deviation in start time for each generation
    for g, stddev, cv in zip(
        range(agent_lifetimes.generation.min(), agent_lifetimes.generation.max()),
        agent_lifetimes.groupby("generation").birth_time.std(),
        agent_lifetimes.groupby("generation").birth_time.agg(variation),
    ):
        metrics[f"Standard deviation in start time of generation {g}"] = (
            f"{stddev / 60} min"
        )
        metrics[f"CV in start time of generation {g}"] = cv

    # Get average proportion of cell lifetime in which ompF, tolC, ampC, or marR mRNAs are ever zero
    def prop_zero(series):
        return (series.values == 0).mean()

    def all_zero(series):
        return all(series.values == 0)

    prop_lifetime_zero = (
        data.groupby("Agent ID").agg(
            ompF=("ompF mRNA", prop_zero),
            tolC=("tolC mRNA", prop_zero),
            ampC=("ampC mRNA", prop_zero),
            marR=("marR mRNA", prop_zero),
        )
    ).mean()

    prop_all_zero = (
        data.groupby("Agent ID").agg(
            ompF=("ompF mRNA", all_zero),
            tolC=("tolC mRNA", all_zero),
            ampC=("ampC mRNA", all_zero),
            marR=("marR mRNA", all_zero),
        )
    ).mean()

    for mrna in ["ompF", "tolC", "ampC", "marR"]:
        metrics[f"Average proportion of cell lifetime in which {mrna} mRNA is zero"] = (
            prop_lifetime_zero[mrna]
        )

        metrics[f"Proportion of cells in which {mrna} was always zero"] = prop_all_zero[
            mrna
        ]

    # End-point expression
    monomers = ["OmpF monomer", "TolC monomer", "AmpC monomer", "MarR monomer"]
    endpoint_expression = variation(data[data.Time == max_time][monomers])

    for i, monomer in enumerate(monomers):
        metrics[f"CV for endpoint count of {monomer}"] = endpoint_expression[i]

    # Output results
    with open(out, "w") as f:
        for metric, value in metrics.items():
            f.write(f"{metric} = {value}\n")




[docs]
def calculate_tet_metrics(
    data, out="out/analysis/paper_figures/metrics/tet_metrics.txt"
):
    metrics = {}

    min_time = data.Time.min()

    # Mean, std dev, CV in OmpF, AcrAB-TolC, active ribosome, micF-ompF duplex
    # concentrations when tet first added
    data_t0 = data[data.Time == min_time]
    for molecule, periplasmic in {
        "OmpF complex": True,
        "AcrAB-TolC": True,
        "Active ribosomes": False,
        "micF-ompF duplex": False,
    }.items():
        molecule_t0 = (
            (
                data_t0[molecule]
                / (
                    (
                        PERIPLASMIC_VOLUME_FRACTION
                        if periplasmic
                        else 1 - PERIPLASMIC_VOLUME_FRACTION
                    )
                    * data_t0["Volume"]
                )
            )
            * COUNTS_PER_FL_TO_NANOMOLAR
            / 10**3
        )
        metrics[
            f"Mean {molecule} {'periplasmic' if periplasmic else 'cytoplasmic'} concentration at instant tet added (μM)"
        ] = molecule_t0.mean()
        metrics[
            f"Std. dev. in {molecule} {'periplasmic' if periplasmic else 'cytoplasmic'} concentration at instant tet added (μM)"
        ] = molecule_t0.std()
        metrics[
            f"CV in {molecule} {'periplasmic' if periplasmic else 'cytoplasmic'} concentration at instant tet added (μM)"
        ] = molecule_t0.std() / molecule_t0.mean()

    # Mean, std. dev., CV in outer membrane permeability when tet. first added
    om_perm = data.loc[data.Time == min_time, "Outer tet. permeability (cm/s)"]
    metrics["Mean OM perm. when tet. first added (nm/s)"] = om_perm.mean() * 1e7
    metrics["Std. dev. OM perm. when tet. first added (nm/s)"] = om_perm.std() * 1e7
    metrics["CV OM perm. when tet. first added"] = om_perm.std() / om_perm.mean()

    # Mean, std. dev., CV in outer membrane permeability 4 hours post-tet.
    om_perm = data.loc[
        data.Time == min_time + 3600 * 4, "Outer tet. permeability (cm/s)"
    ]
    metrics["Mean OM perm. 4 hrs after tet. added (nm/s)"] = om_perm.mean() * 1e7
    metrics["Std. dev. OM perm. 4 hrs after tet. added (nm/s)"] = om_perm.std() * 1e7
    metrics["CV OM perm. 4 hrs after tet. added"] = om_perm.std() / om_perm.mean()

    # Mean, std. dev., CV in periplasmic tetracycline conc. 8 sec after tet. added
    peri_tet = data.loc[data.Time == min_time + 8, "Periplasmic tetracycline"]
    metrics[
        "Mean periplasmic tetracycline concentration 8 sec. after tet. added (μM)"
    ] = peri_tet.mean()
    metrics[
        "Std. dev. periplasmic tetracycline concentration 8 sec. after tet. added (μM)"
    ] = peri_tet.std()
    metrics["CV periplasmic tetracycline concentration 8 sec. after tet. added"] = (
        peri_tet.std() / peri_tet.mean()
    )

    # Time to reach equilbrium in periplasmic, cytoplasmic tet conc
    epsilon = 1e-7
    DT = 2

    # (Throwing away first few seconds to account for time to actually start importing tetracycline)
    discard = 5
    metrics[
        f"Periplasmic tet equilibration time (abs(d[Tet]) <= {epsilon * 10**6:.1e} nM) (seconds)"
    ] = (
        (
            np.abs(
                data[data["Time"] <= min_time + 2 * 60]
                .groupby("Time")["Periplasmic tetracycline"]
                .mean()
                .diff()
            )
            < 1e-7
        )
        .values[discard:]
        .argmax()
        + discard
    ) * DT

    metrics[
        f"Cytoplasmic tet equilibration time (abs(d[Tet]) <= {epsilon * 10**6:.1e} nM) (seconds)"
    ] = (
        (
            np.abs(
                data[data["Time"] <= min_time + 2 * 60]
                .groupby("Time")["Cytoplasmic tetracycline"]
                .mean()
                .diff()
            )
            < 1e-7
        )
        .values[discard:]
        .argmax()
        + discard
    ) * DT

    # Mean, std. dev. ribosome concentration 2 minutes after tet added
    data_t2 = data[data.Time == data.Time.min() + 2 * 60]

    ribosomes_t2 = (
        (
            data_t2["Active ribosomes"]
            / ((1 - PERIPLASMIC_VOLUME_FRACTION) * data_t2["Volume"])
        )
        * COUNTS_PER_FL_TO_NANOMOLAR
        / 10**3
    )
    metrics["Mean ribosome concentration 2 mins after tet added (μM)"] = (
        ribosomes_t2.mean()
    )
    metrics["Std. dev. in ribosome concentration 2 mins after tet added (μM)"] = (
        ribosomes_t2.std()
    )

    # Mean, std. dev. in micF-ompF duplex concentration
    # 10 minutes after tet added
    data_t10 = data[data.Time == data.Time.min() + 10 * 60]

    micF_ompF_t10 = (
        (
            data_t10["micF-ompF duplex"]
            / ((1 - PERIPLASMIC_VOLUME_FRACTION) * data_t10["Volume"])
        )
        * COUNTS_PER_FL_TO_NANOMOLAR
        / 10**3
    )
    metrics["Mean micF-ompF duplex concentration 10 mins after tet added (μM)"] = (
        micF_ompF_t10.mean()
    )
    metrics[
        "Std. dev. in micF-ompF duplex concentration 10 mins after tet added (μM)"
    ] = micF_ompF_t10.std()

    # "Half-life" of ompF monomer, mRNA
    data["OmpF monomer conc"] = (
        data["OmpF monomer"] / (PERIPLASMIC_VOLUME_FRACTION * data.Volume)
    ) * COUNTS_PER_FL_TO_NANOMOLAR
    data["ompF mRNA conc"] = (
        data["ompF mRNA"] / (PERIPLASMIC_VOLUME_FRACTION * data.Volume)
    ) * COUNTS_PER_FL_TO_NANOMOLAR

    ompf_monomer_trace = data.groupby("Time")["OmpF monomer conc"].mean()
    ompf_mRNA_trace = data.groupby("Time")["ompF mRNA conc"].mean()

    metrics["OmpF monomer half-life (min)"] = (
        np.argmin(np.abs(ompf_monomer_trace - ompf_monomer_trace[min_time] / 2))
        * DT
        / 60
    )

    metrics["ompF mRNA half-life (min)"] = (
        np.argmin(np.abs(ompf_mRNA_trace - ompf_mRNA_trace[min_time] / 2)) * DT / 60
    )

    # Output results
    with open(out, "w") as f:
        for metric, value in metrics.items():
            f.write(f"{metric} = {value}\n")




[docs]
def calculate_amp_metrics(
    data, out="out/analysis/paper_figures/metrics/amp_metrics.txt"
):
    metrics = {}

    min_time = data.Time.min()
    data_t60 = data[data.Time == min_time + 60]

    # Mean, std. dev., CV in AmpC concentration 1 min. post-amp.
    ampc_data = (
        data_t60["AmpC monomer"]
        / (data_t60["Volume"] * PERIPLASMIC_VOLUME_FRACTION)
        * COUNTS_PER_FL_TO_NANOMOLAR
        / 1000
    )
    metrics["Mean AmpC when amp. first added (μM)"] = ampc_data.mean()
    metrics["Std. dev. AmpC when amp. first added (μM)"] = ampc_data.std()
    metrics["CV AmpC when amp. first added"] = ampc_data.std() / ampc_data.mean()

    # Mean, std. dev., CV in ampicillin 1 min. post-amp.
    amp_conc = data_t60["Periplasmic ampicillin"]
    metrics["Mean amp. 1 min after amp. added (μM)"] = amp_conc.mean() * 1000
    metrics["Std. dev. amp. 1 min after amp. added (μM)"] = amp_conc.std() * 1000
    metrics["CV amp. 1 min after amp. added"] = amp_conc.std() / amp_conc.mean()

    # Mean, std. dev., CV in PBP1A active frac. 1 min. post-amp.
    pbp1a = data_t60["Active fraction PBP1a"]
    metrics["Mean PBP1A active frac. 1 min after amp. added"] = pbp1a.mean()
    metrics["Std. dev. PBP1A active frac. 1 min after amp. added"] = pbp1a.std()
    metrics["CV PBP1A active frac. 1 min after amp. added"] = pbp1a.std() / pbp1a.mean()

    # Mean, std. dev., CV in PBP1B active frac. 1 min. post-amp.
    pbp1b = data_t60["Active fraction PBP1b"]
    metrics["Mean PBP1B active frac. 1 min after amp. added"] = pbp1b.mean()
    metrics["Std. dev. PBP1B active frac. 1 min after amp. added"] = pbp1b.std()
    metrics["CV PBP1B active frac. 1 min after amp. added"] = pbp1b.std() / pbp1b.mean()

    # Mean, std. dev., CV in ampicillin 15 min. post-amp.
    amp_conc = data.loc[data.Time == min_time + 60 * 15, "Periplasmic ampicillin"]
    metrics["Mean amp. 15 min after amp. added (μM)"] = amp_conc.mean() * 1000
    metrics["Std. dev. amp. 15 min after amp. added (μM)"] = amp_conc.std() * 1000
    metrics["CV amp. 15 min after amp. added"] = amp_conc.std() / amp_conc.mean()

    # Output results
    with open(out, "w") as f:
        for metric, value in metrics.items():
            f.write(f"{metric} = {value}\n")




[docs]
def main():
    os.makedirs("out/analysis/paper_figures/metrics/", exist_ok=True)
    print("Calculating glucose metrics...")
    glc_data = pd.read_csv(
        GLUCOSE_DATA, dtype={"Agent ID": str, "Seed": str}, index_col=0
    )
    glc_data["Boundary"] = glc_data["Boundary"].apply(ast.literal_eval)
    calculate_glucose_metrics(glc_data)
    del glc_data

    print("Calculating tetracycline metrics...")
    tet_data = pd.read_csv(TET_DATA, dtype={"Agent ID": str, "Seed": str}, index_col=0)
    tet_data["Boundary"] = tet_data["Boundary"].apply(ast.literal_eval)
    calculate_tet_metrics(tet_data)
    del tet_data

    print("Calculating ampicillin metrics...")
    amp_data = pd.read_csv(AMP_DATA, dtype={"Agent ID": str, "Seed": str}, index_col=0)
    amp_data["Boundary"] = amp_data["Boundary"].apply(ast.literal_eval)
    calculate_amp_metrics(amp_data)
    del amp_data



if __name__ == "__main__":
    main()