Source code for ecoli.analysis.antibiotics_colony.amp_plots

import argparse
import os
import matplotlib
import numpy as np
import pandas as pd
import seaborn as sns
import statsmodels.api as sm
from scipy.stats import ttest_ind, zscore
from sklearn.decomposition import PCA
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

matplotlib.use("Agg")
import matplotlib.pyplot as plt

from ecoli.analysis.antibiotics_colony import COUNTS_PER_FL_TO_NANOMOLAR
from ecoli.analysis.antibiotics_colony.plot_utils import (
    BG_GRAY,
    HIGHLIGHT_BLUE,
    HIGHLIGHT_RED,
    prettify_axis,
)
from ecoli.analysis.antibiotics_colony.timeseries import (
    plot_timeseries,
)
from ecoli.library.parameters import param_store


# TODO: Tie this to its location in the model code rather than maintaining
# a separate value here.
PERIPLASMIC_VOLUME_FRACTION = 0.2
HIGHLIGHT_LINEAGE = "001111111"


[docs] def load_data( glc_data, amp_data, verbose=False, ): # Load glc data if verbose: print("Loading glucose data...") glc_data = pd.read_csv(glc_data, dtype={"Agent ID": str, "Seed": str}, index_col=0) # Load amp data if verbose: print("Loading ampicillin data...") amp_data = pd.read_csv(amp_data, dtype={"Agent ID": str, "Seed": str}, index_col=0) # Validate data: # - glc_data must be in Glucose condition, # - amp_data must be in (one of) ampicillin condition(s) # - seed must match across glc_data and amp_data if verbose: print("Validating and cleaning data...") assert isinstance(glc_data, pd.DataFrame) assert isinstance(amp_data, pd.DataFrame) glc_data_condition = glc_data["Condition"].unique()[0] amp_data_condition = amp_data["Condition"].unique()[0] assert "Glucose" in glc_data_condition, "glc_data was not from Glucose condition!" assert ( "Ampicillin" in amp_data_condition ), "amp_data was not from Ampicillin condition!" assert ( glc_data["Seed"].unique().size == amp_data["Seed"].unique().size == 1 ), "One of glc_data or amp_data has data for more than one seed!" glc_data_seed = glc_data["Seed"].unique()[0] amp_data_seed = amp_data["Seed"].unique()[0] assert ( glc_data_seed == amp_data_seed ), "Seeds do not match across glc_data and amp_data!" # Merge dataframes from before/after addition of ampicillin amp_data = pd.concat([glc_data[glc_data.Time < amp_data.Time.min()], amp_data]) # Figure out which cells died and when def died(lineage, agent_ids): d1, d2 = lineage + "0", lineage + "1" return (d1 not in agent_ids) and (d2 not in agent_ids) # Get all cell ids, set of cells that died, and time of death for those cells for data in [glc_data, amp_data]: unique_ids = data["Agent ID"].unique() dead_ids = list(filter(lambda id: died(id, unique_ids), unique_ids)) time_of_death = { id: data["Time"][data["Agent ID"] == id].max() for id in dead_ids } # Remove cells still present at sim end from dead time_of_death = { id: time for id, time in time_of_death.items() if time != data.Time.max() } dead_ids = list(time_of_death.keys()) # Create columns for whether a cell died, and the time of death where applicable data["Died"] = data["Agent ID"].isin(dead_ids) data["Time of Death"] = data["Agent ID"].map( lambda id: time_of_death.get(id, None) ) # Add additional columns for periplasmic volume, # concentration of AmpC in the periplasm for data in [glc_data, amp_data]: data["Periplasmic Volume"] = PERIPLASMIC_VOLUME_FRACTION * data["Volume"] data["AmpC conc"] = ( data["AmpC monomer"] / data["Periplasmic Volume"] ) * COUNTS_PER_FL_TO_NANOMOLAR return glc_data, amp_data
[docs] def agent_summary(data, var_summarizers): cols = [] for var, summarization in var_summarizers.items(): if summarization == "mean": cols.append(data.groupby("Agent ID")[var].mean()) elif summarization == "min": cols.append(data.groupby("Agent ID")[var].min()) elif summarization == "max": cols.append(data.groupby("Agent ID")[var].max()) elif callable(summarization): cols.append(summarization(data.groupby("Agent ID")[var])) else: raise ValueError(f"{summarization} is not a recognized summarization.") return pd.concat(cols, axis=1)
[docs] def make_figures( amp_data, verbose, as_svg=False, outdir="out/analysis/paper_figures/figure_4/", ): # Prepare to output figures os.makedirs(outdir, exist_ok=True) ext = "svg" if as_svg else "png" # Get time when ampicillin added amp_time = amp_data["Time"][ amp_data["Condition"].map(lambda s: "Ampicillin" in s) ].min() max_time = amp_data["Time"].max() # Plot colony mass vs time with deaths ======================================================== if verbose: print("Making colony mass vs. time (with marked deaths) plot...") fig, ax = plt.subplots() ax.tick_params("both", labelsize=8) timeseries_death_plot( amp_data, "Dry mass", highlight_lineage=str(amp_data["Agent ID"].iloc[-1]), ax=ax, ) ax.set_ylabel(ax.get_ylabel(), fontsize=9) yticks = ax.get_yticks() ax.set_yticks(yticks, [f"{y:.0f} fg" for y in yticks]) # Relabel time axis to use time after addition of ampicillin amp_time_hrs, max_time_hrs = amp_time / 3600, max_time / 3600 new_ticks = np.array([0, amp_time_hrs, max_time_hrs]) new_tick_labels = [f"{t - amp_time_hrs:.0f}" for t in new_ticks] new_tick_labels[0] = f"{-amp_time_hrs:.1f}" ax.set_xticks(new_ticks, labels=new_tick_labels) ax.set_xlabel("") fig.set_size_inches(4, 1.125) fig.tight_layout() fig.savefig(os.path.join(outdir, f"colony_mass_timeseries.{ext}")) # Plot minute-scale timeseries ================================================================ minute_vars = { "Periplasmic ampicillin": ("Periplasmic\n ampicillin (μM)", False), "AmpC conc": ("AmpC (μM)", False), "Active fraction PBP1a": ( r"$\frac{\mathrm{active}}{\mathrm{total}}$ PBP1A", False, ), # ("Frac active\n PBP1a", False), } # Scale periplasmic ampicillin, AmpC to be in micromolar minute_data = amp_data.transform( { **{c: lambda x: x for c in amp_data.columns}, **{"Periplasmic ampicillin": lambda s: s * 10**3}, **{"AmpC conc": lambda s: s / 10**3}, } ) fig, axs = plt.subplots(1, len(minute_vars)) for ax, (minute_var, (label, to_conc)) in zip(axs, minute_vars.items()): if verbose: print(f"Plotting {minute_var} timeseries...") minute_scale_plot( minute_data, minute_var, dt=2, ax=ax, to_conc=to_conc, highlight_lineage=HIGHLIGHT_LINEAGE, ) ticklabs = [lab.get_text() for lab in ax.get_xticklabels()] # Remove x label, make sure axis visible xaxis = ax.axes.get_xaxis() xaxis.set_visible(True) xaxis.get_label().set_visible(False) ax.set_ylabel(label) prettify_axis( ax, xticks=ax.get_xticks(), xlabel_as_tick=False, ylabel_as_tick=False, tick_format_y="{:.1f}", ) ax.set_xticklabels(ticklabs) ax.spines.bottom.set(bounds=ax.get_xlim(), visible=True, color="0", alpha=1) ax.spines.left.set(bounds=ax.get_ylim(), visible=True, color="0", alpha=1) fig.set_size_inches(5, 1.5) fig.tight_layout() fig.savefig(os.path.join(outdir, f"fig_4e_g_minute_scale_timeseries.{ext}")) # Plot hour-scale timeseries ================================================================== # Clean data amp_data["Max hole area (nm^2)"] = [len(x) for x in amp_data["Max hole size"]] basal_area = ( param_store.get(("cell_wall", "inter_strand_distance")) + param_store.get(("cell_wall", "disaccharide_width")) ) * param_store.get(("cell_wall", "disaccharide_height")) amp_data["Max hole area (nm^2)"] = ( amp_data["Max hole area (nm^2)"] * amp_data["Extension factor"] * basal_area ) amp_data["Max hole area (um^2)"] = amp_data["Max hole area (nm^2)"] / 10**3 hour_variables = { "Porosity": ("Porosity", "{:.2f}"), "Max hole area (um^2)": ("Max hole area ($\\mathrm{\\mu m^2}$)", "{:.1f}"), "Extension factor": ( r"$\frac{\mathrm{Length}}{\mathrm{Resting\ length}}$", "{:.1f}", ), } fig, axs = plt.subplots(1, len(hour_variables)) for ax, (hour_variable, (label, ytickformat)) in zip(axs, hour_variables.items()): if verbose: print(f"Plotting {hour_variable} timeseries...") # Remove artifacts - porosity and hole area take five timesteps (dt=10s) to # be recalculated after division. hour_plot_data = amp_data if hour_variable in ["Porosity", "Max hole area (um^2)"]: agent_initial_times = amp_data.groupby("Agent ID")["Time"].min().to_dict() hour_plot_data = amp_data[ amp_data.apply( lambda r: r["Time"] >= 10 + agent_initial_times[r["Agent ID"]], axis=1, ) ] hour_scale_plot( hour_plot_data, hour_variable, ax=ax, to_conc=False, highlight_lineage=HIGHLIGHT_LINEAGE, ) xaxis = ax.axes.get_xaxis() xaxis.get_label().set_visible(False) ax.set_ylabel(label) prettify_axis( ax, xticks=ax.get_xticks(), xlabel_as_tick=False, ylabel_as_tick=False, tick_format_y=ytickformat, ) new_ticks = np.array([0, amp_time_hrs, max_time_hrs]) new_tick_labels = [f"{t - amp_time_hrs:.0f}" for t in new_ticks] new_tick_labels[0] = f"{-amp_time_hrs:.1f}" ax.set_xticks(new_ticks, labels=new_tick_labels) ax.spines.bottom.set(bounds=ax.get_xlim(), visible=True, color="0", alpha=1) ax.spines.left.set(bounds=ax.get_ylim(), visible=True, color="0", alpha=1) fig.set_size_inches(5, 1.5) fig.tight_layout() fig.savefig(os.path.join(outdir, f"fig_4h_j_hour_scale_timeseries.{ext}")) # Plot beta-lactamase, AcrAB-TolC... vs. death ================================================ death_factor_formatting = { "AmpC conc": ("{:.1f}", False), "Periplasmic ampicillin": ("{:.2e}", False), "PBP1a complex": ("{:.1f}", True), "PBP1b gamma complex": ("{:.1f}", True), } for factor, (fmt, to_conc) in death_factor_formatting.items(): if verbose: print(f"Making histogram plot for {factor}...") fig, ax = plt.subplots() _, _, bins = hist_by_death_plot(amp_data, factor, counts_to_conc=to_conc, ax=ax) leg = ax.get_legend() if leg: leg.remove() fig.set_size_inches(1.25, 1.75) fig.savefig( os.path.join(outdir, f"{factor}.{ext}"), bbox_inches="tight", ) # Run logistic regression to explain death ==================================================== if verbose: print("Running logistic regression...") log_vars = { "AcrAB-TolC": "mean", "PBP1a complex": "mean", "PBP1b gamma complex": "mean", "AmpC conc": "mean", "Died": "max", } # Create summary data for each agent, # Excluding agents present at the end of the simulation # (since these may not have had enough time to die) present_at_end = ( amp_data.groupby("Agent ID")["Time"].max() == amp_data["Time"].max() ) final_cells = present_at_end.index[present_at_end] log_reg_data = agent_summary( amp_data[~np.isin(amp_data["Agent ID"], final_cells)], log_vars ) log_reg_data = sm.add_constant(log_reg_data) log_reg, confusion = logistic_explain_death(log_reg_data, random_state=1342534) # Output text log with open(os.path.join(outdir, "logistic_regression_full.txt"), "w") as f: f.write(str(log_reg.summary())) f.write("\n") # Confusion matrix f.write("Confusion matrix:\n") f.write(str(confusion)) # Run logistic regression again, with AmpC as only predictor # Create training data, test data log_reg_data_2 = log_reg_data[["const", "AmpC conc", "Died"]] log_reg_2, confusion = logistic_explain_death(log_reg_data_2, random_state=1342534) # Output text log with open(os.path.join(outdir, "logistic_regression_reduced.txt"), "w") as f: f.write(str(log_reg_2.summary())) f.write("\n") # Confusion matrix f.write("Confusion matrix:\n") f.write(str(confusion)) # Plot separation for AmpC fig, ax = plt.subplots() x = log_reg_data[["const", "AmpC conc"]] min_ampC, max_ampC = x[["AmpC conc"]].min(), x[["AmpC conc"]].max() ax.scatter( x[["AmpC conc"]], log_reg_data_2[["Died"]], color=HIGHLIGHT_BLUE, alpha=0.5, ) ampC = np.linspace(min_ampC, max_ampC, 100) ax.plot( ampC, log_reg_2.predict(sm.add_constant(ampC.reshape(-1, 1))), color=HIGHLIGHT_BLUE, ) ax.set_xlabel("AmpC (nM)") ax.set_ylabel("Died") prettify_axis(ax) ax.set_yticks([0, 1], labels=["0", "1"]) fig.set_size_inches(1.5, 1.75) fig.tight_layout() fig.savefig(os.path.join(outdir, f"logistic_regression.{ext}")) # Plot separation for a new regression on periplasmic ampicillin concentration, # comparing to a theoretical threshold from literature (Frere, 1989) AMPICILLIN_KILLING_RANGE = (0.7, 1.25) # uM amp_logreg_data = agent_summary( amp_data[~np.isin(amp_data["Agent ID"], final_cells)], {"Periplasmic ampicillin": "mean", "Died": "max"}, ) # Convert from nM to uM amp_logreg_data["Periplasmic ampicillin"] = ( 10**3 * amp_logreg_data["Periplasmic ampicillin"] ) amp_logreg_data = sm.add_constant(amp_logreg_data) log_reg_3, confusion = logistic_explain_death(amp_logreg_data, random_state=1342534) # Plot separation for Ampicillin fig, ax = plt.subplots() x = amp_logreg_data[["const", "Periplasmic ampicillin"]] min_amp, max_amp = ( x[["Periplasmic ampicillin"]].min().values[0], x[["Periplasmic ampicillin"]].max().values[0], ) # Shade literature theoretical killing region ax.axvspan(*AMPICILLIN_KILLING_RANGE, color=BG_GRAY, alpha=0.5) ax.text( AMPICILLIN_KILLING_RANGE[1] + 0.1, 0.1, "Frère 1989", fontdict={"fontsize": 8, "color": BG_GRAY}, ) # line for Nikaido value ax.vlines(1.7, 0, 1, colors=[BG_GRAY], linestyle="dashed") # Plot cell ampicillin concentration and fates ax.scatter( amp_logreg_data[amp_logreg_data["Died"] == 0]["Periplasmic ampicillin"], amp_logreg_data[amp_logreg_data["Died"] == 0]["Died"], color=HIGHLIGHT_BLUE, linewidths=0, alpha=0.5, ) ax.scatter( amp_logreg_data[amp_logreg_data["Died"] == 1]["Periplasmic ampicillin"], amp_logreg_data[amp_logreg_data["Died"] == 1]["Died"], color=HIGHLIGHT_RED, linewidths=0, alpha=0.5, ) # Plot logistic curve amp = np.linspace(min_amp, max_amp, 100) ax.plot( amp, log_reg_3.predict(sm.add_constant(amp.reshape(-1, 1))), color="k", ) # Plot 50% death probability point beta = log_reg_3.params.values x_mid = -beta[0] / beta[1] # ax.vlines(x_mid, 0, 1, color="k", linestyle="dashed") # ax.text(x_mid + 0.1, 0.5, f"{x_mid:.2f} uM", fontdict={"fontsize": 8}) # Format and save plot ax.set_xlabel("Periplasmic ampicillin (μM)") ax.set_ylabel("Died") prettify_axis(ax, xticks=[0, x_mid, max_amp], xlabel_as_tick=False) ax.set_yticks([0, 1], labels=["0", "1"]) fig.set_size_inches(2, 2) fig.tight_layout() fig.savefig(os.path.join(outdir, f"logistic_regression_ampicillin.{ext}")) # Violin plots showing death/survival ========================================================= if verbose: print("Making violin plot...") violin_vars = { "AcrAB-TolC": "mean", "PBP1a complex": "mean", "PBP1b gamma complex": "mean", "AmpC conc": "mean", "OmpF complex": "mean", } violin_cols = list(violin_vars.keys()) # divide by volume to get concentration violin_data = amp_data[violin_cols].div(amp_data["Volume"], axis=0) # Add back agent ID and survival violin_data = pd.concat( [amp_data["Agent ID"], violin_data, amp_data["Died"]], axis=1 ) # Exclude final cells, summarize violin_data = agent_summary( violin_data[~np.isin(violin_data["Agent ID"], final_cells)], {**violin_vars, **{"Died": "max"}}, ) # convert to concentrations for var in set(violin_cols) - {"AmpC conc"}: violin_data[var] *= COUNTS_PER_FL_TO_NANOMOLAR # Run statistical significance tests pvalues = {} with open(os.path.join(outdir, "violin_significance_tests.txt"), "w") as f: for var in violin_vars.keys(): t, p = ttest_ind( violin_data[~violin_data["Died"]][var], violin_data[violin_data["Died"]][var], ) p_bonferroni = p * len(violin_vars) pvalues[var] = p_bonferroni f.write( f"{var}\n" "========\n" "Two-sided t-test,\n" f"t = {t}, p = {p}\nBonferroni-corrected p = {p_bonferroni}\n" ) # center data for each molecule about its mean (use z-score) violin_data[violin_cols] = violin_data[violin_cols].apply(zscore) # pivot to long format, in preparation for plotting violin_data_long = violin_data.reset_index().melt( id_vars=["Agent ID", "Died"], var_name="Molecule", value_name="Concentration" ) # Variable aesthetics - put AmpC at the front, associate each with a human-friendly name reordered_vars = { "AmpC conc": "AmpC", "AcrAB-TolC": "AcrAB-TolC", "OmpF complex": "OmpF", "PBP1a complex": "PBP1A", "PBP1b gamma complex": "PBP1B", } fig, ax = plt.subplots() sns.violinplot( violin_data_long, x="Molecule", y="Concentration", order=list(reordered_vars), hue="Died", palette={True: HIGHLIGHT_RED, False: HIGHLIGHT_BLUE}, inner=None, split=True, legend=False, linewidth=0, ax=ax, ) # Add mean lines means = violin_data_long.groupby(["Molecule", "Died"])["Concentration"].mean() for x, var in enumerate(reordered_vars.keys()): molecule_means = means.loc[var] ax.hlines( y=molecule_means.values, xmin=[x - 0.5, x], xmax=[x, x + 0.5], colors="w", alpha=1, lw=0.5, ) # Add significance stars def convert_pvalue_to_asterisks(pvalue): return ["****", "***", "**", "*", ""][ np.digitize(pvalue, [0, 0.0001, 0.001, 0.01, 0.05, 100000]) - 1 ] for i, var in enumerate(reordered_vars.keys()): p = pvalues[var] print(f"{var}: {p} => {convert_pvalue_to_asterisks(p)}") ax.text( i, 4.95, convert_pvalue_to_asterisks(p), fontsize=8, horizontalalignment="center", ) # Remove legend ax.legend([], [], frameon=False) ax.set_ylabel("Concentration z-score") # Stagger labels tick_labels = [ ("\n" if i % 2 == 1 else "") + label for i, label in enumerate(reordered_vars.values()) ] prettify_axis( ax, xticks="all", tick_format_x="{}", xlabel_as_tick=False, ylabel_as_tick=False ) ax.set_xticklabels(tick_labels) fig.set_size_inches(3.25, 2.25) fig.tight_layout() fig.savefig(os.path.join(outdir, f"fig_4l_violin_plot.{ext}"))
[docs] def timeseries_death_plot(data, var, highlight_lineage=None, ax=None): if ax is None: _, ax = plt.subplots() data = data.copy() plot_timeseries( data, axes=[ax], columns_to_plot={var: HIGHLIGHT_BLUE}, highlight_lineage=highlight_lineage, conc=False, mark_death=True, ) return ax
[docs] def minute_scale_plot(data, var, dt=5, highlight_lineage=None, ax=None, to_conc=False): if not ax: _, ax = plt.subplots() amp_time = data["Time"][data["Condition"].map(lambda s: "Ampicillin" in s)].min() plot_data = data[ (amp_time - 60 * dt <= data["Time"]) & (data["Time"] <= amp_time + 60 * dt) ] plot_timeseries( data=plot_data, axes=[ax], columns_to_plot={var: HIGHLIGHT_BLUE}, highlight_lineage=( highlight_lineage if highlight_lineage is not None else str(plot_data["Agent ID"].iloc[-1]) ), filter_time=False, background_alpha=0.5, background_linewidth=0.3, conc=to_conc, ) ax.set_xticks([(amp_time + d) / 3600 for d in np.arange(-60 * dt, 60 * dt + 1, 60)]) ax.set_xticklabels([int(d) for d in range(-dt, dt + 1)]) return ax
[docs] def hour_scale_plot( data, var, highlight_lineage=None, ax=None, to_conc=False, mark_death=True ): if not ax: _, ax = plt.subplots() plot_timeseries( data=data, axes=[ax], columns_to_plot={var: HIGHLIGHT_BLUE}, highlight_lineage=( highlight_lineage if highlight_lineage else str(data["Agent ID"].iloc[-1]) ), filter_time=False, background_alpha=0.5, background_linewidth=0.3, conc=to_conc, mark_death=mark_death, ) return ax
[docs] def hist_by_death_plot( data, var, var_summarize=lambda var: var.mean(), counts_to_conc=True, ax=None, xlabel=None, ): data = data.copy() if ax is None: _, ax = plt.subplots() # Convert from counts to concentration if requested if counts_to_conc: data[var] = (data[var] / data["Volume"]) * COUNTS_PER_FL_TO_NANOMOLAR # Prepare data for plotting hist_data = pd.concat( [ var_summarize(data.groupby("Agent ID")[var]), data.groupby("Agent ID")[["Died"]].max(), ], axis=1, ) # Replace Died=True with "Lysed", Died=False with "Survived" hist_data["Died"] = hist_data["Died"].replace({False: "Survived", True: "Lysed"}) # Plot histogram sns.histplot( x=var, hue="Died", data=hist_data, palette={"Lysed": BG_GRAY, "Survived": HIGHLIGHT_BLUE}, ax=ax, edgecolor=None, ) if xlabel is None: xlabel = f"{var}{' (nM)' if counts_to_conc else ''}" ax.set_xlabel(xlabel) ax.set_ylabel("Cells") hist, bins = np.histogram(hist_data[var]) return ax, hist, bins
[docs] def ampicillin_pca( data, vars, var_summarizers={}, outcome="Died", counts_to_conc=True, ax=None, ): data = data.copy() if ax is None: _, ax = plt.subplots() # Convert from counts to concentration if requested if counts_to_conc: data[vars] = (data[vars] / data["Volume"]) * COUNTS_PER_FL_TO_NANOMOLAR # Summarize timeseries data for each agent using the specified R^T => R summarizer, # using maximum for the "outcome" variable unless specified var_summarizers[outcome] = var_summarizers.get(outcome, lambda x: x.max()) agent_summary_data = pd.concat( [ var_summarizers.get(var, lambda x: x.mean())(data.groupby("Agent ID")[var]) for var in vars + [outcome] ], axis=1, ) scaled_summary_data = StandardScaler().fit_transform( agent_summary_data.drop("Died", axis=1).values ) pca = PCA(n_components=2) X_r = pca.fit_transform(scaled_summary_data) sns.scatterplot(x=X_r[:, 0], y=X_r[:, 1], hue=agent_summary_data["Died"], ax=ax) return pca, X_r, ax
[docs] def logistic_explain_death(data, outcome="Died", test_size=0.3, random_state=None): # Create training data, test data x_data = data[[x for x in data.columns if x != outcome]] y_data = data[outcome] x_training_data, x_test_data, y_training_data, y_test_data = train_test_split( x_data, y_data, test_size=0.3, random_state=1342534 ) model = sm.Logit(y_training_data, x_training_data).fit() yhat = model.predict(x_test_data) predictions = list(map(round, yhat)) return model, confusion_matrix(y_test_data, predictions)
[docs] def pairplot( data, variable_transform_map={}, hue="Died", ): # Summarize timeseries data for each agent using the specified R^T => R summarizer, # using maximum for the hue variable unless specified variable_transform_map[hue] = variable_transform_map.get( hue, lambda x, data: data.groupby("Agent ID")[x].max() ) agent_summary_data = pd.concat( [summarizer(var, data) for var, summarizer in variable_transform_map.items()], axis=1, ) grid = sns.pairplot( agent_summary_data, vars=[v for v in variable_transform_map.keys() if v != hue], hue=hue, palette={False: HIGHLIGHT_BLUE, True: BG_GRAY}, corner=True, ) return grid
[docs] def amp_conc_sweep(glc_data, amp_data, add_amp_data, ax=None): if ax is None: _, ax = plt.subplots() amp_conc_vs_mass_data = pd.concat( [ glc_data.groupby(["Condition", "Time"]).sum("Dry mass"), amp_data.groupby(["Condition", "Time"]).sum("Dry mass"), add_amp_data.groupby(["Condition", "Time"]).sum("Dry mass"), ], join="inner", ) # Time to hours amp_conc_vs_mass_data = amp_conc_vs_mass_data.reset_index() amp_conc_vs_mass_data.Time /= 60 * 60 # Create color palette condition_palette = { cond: float(cond[len("Ampicillin (") : -len(" mg/L)")]) if "Ampicillin" in cond else 0 for cond in amp_conc_vs_mass_data["Condition"].unique() } max_color = max(condition_palette.values()) condition_palette = { cond: tuple((color / max_color) * np.array(HIGHLIGHT_BLUE)) for cond, color in condition_palette.items() } sns.lineplot( amp_conc_vs_mass_data, x="Time", y="Dry mass", hue="Condition", hue_order=list( dict(sorted(condition_palette.items(), key=lambda kv: kv[1])).keys() ), palette=condition_palette, ax=ax, ) ax.set_xlim(0, amp_conc_vs_mass_data.Time.max()) ax.set_ylim( amp_conc_vs_mass_data["Dry mass"].min(), amp_conc_vs_mass_data["Dry mass"].max() ) ax.set_xlabel("Time (hr)") ax.set_ylabel("Colony Dry Mass") return ax
[docs] def cli(): parser = argparse.ArgumentParser( "Generate analysis plots for ampicillin colony sims." ) parser.add_argument( "glc_data", type=str, help="Locally saved dataframe file for data from baseline glucose simulation.", ) parser.add_argument( "amp_data", type=str, help="Locally saved dataframe file for data following addition of ampicillin.", ) parser.add_argument( "--outdir", "-d", default="out/figure_4", help="Directory in which to output the generated figures.", ) parser.add_argument("--svg", "-s", action="store_true", help="Save as svg.") parser.add_argument("--verbose", "-v", action="store_true") args = parser.parse_args() return args
[docs] def main(): plt.rcParams["font.family"] = "Arial" plt.rcParams["svg.fonttype"] = "none" options = cli() amp_data = load_data( options.glc_data, options.amp_data, options.verbose, ) make_figures( amp_data, options.verbose, as_svg=options.svg, outdir=options.outdir, )
if __name__ == "__main__": main()