Source code for ecoli.analysis.multivariant.enzyme_availability_flux

"""
Plot enzyme availability (bulk counts) vs unmet homeostatic need over
continuous simulation time for multivariant simulation.

One dual-y-axis Altair subplot per variant, stacked vertically. Within each
variant, lines represent the mean over all cells on a continuous time axis
(relative to the first timestep of each lineage seed); generation is used as
the detail encoding to draw separate line segments at each cell division
without resetting the time axis.

DISCLAIMER: This analysis is only meant for metabolism-redux and
metabolism-redux-classic. metabolism.py lacks necessary listeners due to differences
in problem formulation
"""

from __future__ import annotations

import os
from typing import TYPE_CHECKING, Any

import altair as alt
import plotly.express as px
import polars as pl

from ecoli.analysis.multivariant.utils import create_variant_label
from ecoli.library.parquet_emitter import (
    field_metadata,
    read_stacked_columns,
)

if TYPE_CHECKING:
    from duckdb import DuckDBPyConnection

alt.data_transformers.enable("vegafusion")

DEFAULT_ENZYME_NAME = "UDP-NACMURALA-GLU-LIG-MONOMER[c]"
DEFAULT_MET = "CPD-12261[p]"
PASTEL_COLOR = px.colors.qualitative.Pastel


[docs] def plot( params: dict[str, Any], conn: "DuckDBPyConnection", history_sql: str, config_sql: str, success_sql: str, sim_data_dict: dict[str, dict[int, str]], validation_data_paths: list[str], outdir: str, variant_metadata: dict[str, dict[int, Any]], variant_names: dict[str, str], ) -> None: """Plot mean enzyme counts and unmet need vs time, one subplot per variant.""" experiment_id = next(iter(variant_metadata.keys()), None) per_variant_params: dict[int, Any] = ( variant_metadata[experiment_id] if experiment_id else {} ) enzyme_name = params.get("enzyme_name", DEFAULT_ENZYME_NAME) met = params.get("met", DEFAULT_MET) enzyme_label = f"{enzyme_name} (counts)" need_label = f"{met} unmet need (normalized dmdt diff)" bulk_ids = field_metadata(conn, config_sql, "bulk") homeostatic_metabolite = field_metadata( conn, config_sql, "listeners__fba_results__estimated_homeostatic_dmdt" ) assert enzyme_name in bulk_ids, ( f"{enzyme_name} not found in bulk metadata; cannot proceed." ) assert met in homeostatic_metabolite, ( f"{met} not found in estimated_homeostatic_dmdt metadata; cannot proceed." ) enzyme_idx = bulk_ids.index(enzyme_name) met_idx = homeostatic_metabolite.index(met) query = [ f"list_select(bulk, [{enzyme_idx + 1}])[1] AS enzyme_count", f"listeners__fba_results__estimated_homeostatic_dmdt[{met_idx + 1}] AS met_estimated_dmdt", f"listeners__fba_results__target_homeostatic_dmdt[{met_idx + 1}] AS met_target_dmdt", f"listeners__fba_results__homeostatic_metabolite_counts[{met_idx + 1}] AS met_counts", ] raw = pl.DataFrame( read_stacked_columns( history_sql, query, order_results=True, conn=conn, remove_first=True, ) ) # Continuous time per lineage_seed: subtract the global minimum time for # each seed so the x-axis starts at 0 and runs unbroken across generations. min_t = raw.group_by(["lineage_seed"]).agg(pl.col("time").min().alias("t_min")) raw = raw.join(min_t, on=["lineage_seed"]) raw = raw.with_columns( ((pl.col("time") - pl.col("t_min")) / 60).alias("Time (min)") ) raw = raw.with_columns( ( (pl.col("met_target_dmdt") - pl.col("met_estimated_dmdt")) / pl.col("met_counts") ).alias("unmet_need") ) df_plot = pl.DataFrame( { "Time (min)": raw["Time (min)"], "variant": raw["variant"], "generation": raw["generation"], "lineage_seed": raw["lineage_seed"], "enzyme count": raw["enzyme_count"], "unmet need": raw["unmet_need"], } ) color_scale = alt.Scale( domain=[enzyme_label, need_label], range=[PASTEL_COLOR[0], PASTEL_COLOR[1]], ) line_enz = ( alt.Chart() .mark_line(strokeWidth=1.5) .transform_calculate(trace=f"'{enzyme_label}'") .encode( x=alt.X("Time (min):Q", title="Time (min)"), y=alt.Y( "mean(enzyme count):Q", title=enzyme_label, axis=alt.Axis(orient="left"), ), color=alt.Color( "trace:N", scale=color_scale, legend=alt.Legend(title="Trace"), ), detail=alt.Detail("generation:N"), ) ) line_need = ( alt.Chart() .mark_line(strokeWidth=1.5) .transform_calculate(trace=f"'{need_label}'") .encode( x=alt.X("Time (min):Q", title="Time (min)"), y=alt.Y( "mean(unmet need):Q", title=need_label, axis=alt.Axis(orient="right"), ), color=alt.Color( "trace:N", scale=color_scale, legend=alt.Legend(title="Trace"), ), detail=alt.Detail("generation:N"), ) ) variants = df_plot["variant"].unique().sort() plots = [] for variant_val in variants: variant_name = create_variant_label(variant_val, per_variant_params) variant_data = df_plot.filter(pl.col("variant") == variant_val).to_pandas() subplot = ( alt.layer(line_enz, line_need, data=variant_data) .resolve_scale(y="independent") .properties(width=600, height=300, title=variant_name) ) plots.append(subplot) final = ( alt.vconcat(*plots) .resolve_scale(x="independent", y="independent") .properties(title="Enzyme availability and unmet need by variant") ) safe_met = met.replace("[", "_").replace("]", "_") out_path = os.path.join(outdir, f"WC_{safe_met}_enzyme_availability.html") final.save(out_path) print(f"Saved multivariant enzyme availability vs unmet need to {out_path}")