Source code for ecoli.analysis.single.selected_fluxes

import os
import pickle
import textwrap
import numpy as np
import matplotlib.pyplot as plt
import polars as pl

from typing import Any, TYPE_CHECKING, cast
from collections import defaultdict
from ecoli.library.parquet_emitter import (
    read_stacked_columns,
    field_metadata,
    named_idx,
    open_arbitrary_sim_data,
)

if TYPE_CHECKING:
    from duckdb import DuckDBPyConnection


[docs] def esc_latex(s): """Helper function to escape a string for LaTeX.""" return s.replace("_", r"\_")
[docs] def plot( params: dict[str, Any], conn: "DuckDBPyConnection", history_sql: str, config_sql: str, success_sql: str, sim_data_paths: dict[str, dict[int, str]], validation_data_paths: list[str], outdir: str, variant_metadata: dict[str, dict[int, Any]], variant_names: dict[str, str], ): """ All options have default values (do not need to be explicitly provided). Args: params: Dictionary of parameters given under analysis name in configuration JSON. Config options look like this: .. code-block:: json { // show_enzyme_counts: whether to show enzyme counts along with fluxes "show_enzyme_counts": true, // Height of each row and width of each column in inches. // Defaults to 3 and 4 inches, respectively. // Overidden if "figsize" is provided! "row_height": 3, "col_width": 4, // figsize: custom figure size "figsize": [12, 9], // plot_reactions: list of reaction-sets to plot in each row. "plot_reactions" : [ // "Reaction-sets" can be: // (1) single strings, for the ID of a single reaction "PGLUCISOM-RXN", // (2) lists of string IDs ["3PGAREARR-RXN", "RXN-15513"], // (3) dictionaries of string IDs paired with human-readable labels {"PEPDEPHOS-RXN" : "PEP kinase", "PEPSYNTH-RXN": "PEP synthase"} ] } """ # Marker symbols to loop through for enzyme counts MARKER_SYMBOLS = "ovs+*DX" # Overwrite default parameters with provided values defaults: dict[str, Any] = { "show_enzyme_counts": False, "figsize": None, "row_height": 3, "col_width": 4, "plot_reactions": [[]], } params = {**defaults, **params} # Set up subplots according to parameters n_rows = len(params["plot_reactions"]) n_cols = 1 if not params["show_enzyme_counts"] else 3 fig, axs = plt.subplots(n_rows, n_cols) # Ensure axs is expected shape if not isinstance(axs, np.ndarray): axs = np.array([axs]) if n_rows == 1: axs = np.array([axs]) # Group axes by what type of data we'll plot into them flux_axes = axs[:, 0] count_axes = [None] * n_rows if not params["show_enzyme_counts"] else axs[:, 1] legend_axes = [None] * n_rows if not params["show_enzyme_counts"] else axs[:, 2] # Retrieve reaction IDs from config metadata reaction_ids = np.array( field_metadata(conn, config_sql, "listeners__fba_results__base_reaction_fluxes") ) # If selected, pull necessary information to plot enzyme counts if params["show_enzyme_counts"]: # Get mappings from sim_data: reactions to catalysts, and fba reaction ids to base reaction ids. # # NOTE: Will NOT work in multivariant sims where the mapping from reactions to catalysts changes between variants # (currently, this is not something we've had to model). # If that becomes necessary in the future, we'd probably want to actually parse the sim_data_paths dictionary # to get those mappings for each variant. Since this is currently just a single analysis, using open_arbitrary_sim_data # is fine as it will just open the only possible option: the sim data corresponding to the variant for the given cell. with open_arbitrary_sim_data(sim_data_paths) as f: sim_data = pickle.load(f) reaction_catalyst_mapping = sim_data.process.metabolism.reaction_catalysts reaction_id_to_base_reaction_id = ( sim_data.process.metabolism.reaction_id_to_base_reaction_id ) # Invert reaction_id_to_base_reaction_id base_reaction_id_to_reaction_ids = defaultdict(list) for rxnid, baseid in reaction_id_to_base_reaction_id.items(): base_reaction_id_to_reaction_ids[baseid].append(rxnid) # Pull catalyst IDs catalyst_ids = field_metadata( conn, config_sql, "listeners__fba_results__catalyst_counts" ) # Plot each reaction set in a separate axis for reaction_set, ax, ax2, lax in zip( params["plot_reactions"], flux_axes, count_axes, legend_axes ): if isinstance(reaction_set, str): reaction_set = {reaction_set: reaction_set} elif isinstance(reaction_set, list): reaction_set = dict(zip(reaction_set, reaction_set)) # Complain if any reaction ids don't exist in the data for rxnid in reaction_set.keys(): if rxnid not in reaction_ids: raise KeyError( f"Reaction with ID {rxnid} was not found in the set of metabolic reactions!" ) # Get reactions indices and read flux values reaction_set_ids = list(reaction_set.keys()) reaction_idx = np.nonzero( np.array(reaction_set_ids)[:, np.newaxis] == reaction_ids )[1] flux_data = cast( pl.DataFrame, read_stacked_columns( history_sql, [ named_idx( "listeners__fba_results__base_reaction_fluxes", reaction_set_ids, [list(reaction_idx)], ) ], remove_first=True, conn=conn, ), ) # Plot flux traces for reaction_id, label in reaction_set.items(): ax.plot(flux_data["time"], flux_data[reaction_id], label=label) # Plot enzyme counts, if requested if params["show_enzyme_counts"]: # Get a tree mapping from catalysts in this subplot (root) # to base reactions (level 2) to fba reactions (level 3) catalysts_to_reactions_mapping: defaultdict[ str, defaultdict[str, list[str]] ] = defaultdict(lambda: defaultdict(list)) for baseid in reaction_set: # Get fba reactions of this base reaction fba_reactions = base_reaction_id_to_reaction_ids[baseid] # Collect catalysts of the fba reactions and store for rxnid in fba_reactions: catalysts = reaction_catalyst_mapping.get(rxnid, []) if len(catalysts) > 0: for catalyst in catalysts: catalysts_to_reactions_mapping[catalyst][baseid].append( rxnid ) # Get catalyst indices and read counts catalyst_set_ids = list(catalysts_to_reactions_mapping.keys()) catalyst_idx = np.nonzero( np.array(catalyst_set_ids)[:, np.newaxis] == catalyst_ids )[1] catalyst_counts = cast( pl.DataFrame, read_stacked_columns( history_sql, [ named_idx( "listeners__fba_results__catalyst_counts", catalyst_set_ids, [list(catalyst_idx)], ) ], remove_first=True, conn=conn, ), ) # Plot counts for each catalyst legend_handles = [] for i, (catalyst, base_to_fba_reaction_mapping) in enumerate( catalysts_to_reactions_mapping.items() ): # Build label based on base reactions and fba reactions of this catalyst label = f"$\\bf{{{esc_latex(catalyst)}}}$" for base_rxn, fba_rxns in base_to_fba_reaction_mapping.items(): base_rxn_label = reaction_set[base_rxn] label += f"\n {base_rxn}" + ( f" ({base_rxn_label})" if base_rxn_label != base_rxn else "" ) for fba_rxn in fba_rxns: # Get suffix distinguishing fba reaction from base reaction suffix = fba_rxn[len(base_rxn) :] if len(suffix) > 0: label += "\n + " + textwrap.fill( suffix, width=40, subsequent_indent=" " ) # Plot catalyst counts vs time # (Store color so markers plotted separately share color) lines = ax2.plot( catalyst_counts["time"], catalyst_counts[catalyst], linestyle="--", ) color = lines[0].get_color() # Plot markers separately to sub-sample timepoints N_POINTS = 20 len_t = len(catalyst_counts["time"]) step = max(1, len_t // N_POINTS) subsampled_times = catalyst_counts["time"][::step] subsampled_counts = catalyst_counts.filter( pl.col("time").is_in(subsampled_times) ) legend_handles.append( ax2.scatter( subsampled_counts["time"], subsampled_counts[catalyst], color=color, marker=MARKER_SYMBOLS[i % len(MARKER_SYMBOLS)], label=label, ) ) # Axis aesthetics ax2.set_ylabel("Enzyme count") lax.legend(handles=legend_handles) lax.axis("off") # Axis aesthetics ax.set_xlabel("Time") ax.set_ylabel("Flux") ax.legend() if params["figsize"] is None: fig.set_size_inches(params["col_width"] * n_cols, params["row_height"] * n_rows) else: fig.set_size_inches(*params["figsize"]) fig.tight_layout() fig.savefig(os.path.join(outdir, "selected_fluxes.svg")) fig.savefig(os.path.join(outdir, "selected_fluxes.png"))