Source code for runscripts.create_variants

import argparse
import copy
import importlib
import itertools
import json
import os
import pickle
import shutil
import subprocess
from pathlib import Path
from typing import TYPE_CHECKING, Any, cast

import numpy as np

from ecoli.composites.ecoli_configs import CONFIG_DIR_PATH
from ecoli.experiments.ecoli_master_sim import SimConfig

if TYPE_CHECKING:
    from reconstruction.ecoli.simulation_data import SimulationDataEcoli


[docs] def parse_variants( variant_config: dict[str, str | dict[str, Any]], ) -> list[dict[str, Any]]: """ Parse parameters for a variant specified under ``variants`` key of config. Args: variant_config: Dictionary of the form:: { # Define one or more parameters with any names EXCEPT `op` 'param_name': { # Each parameter defines only ONE of the following keys # A list of parameter values 'value': [...] # Numpy function that returns array of parameter values # Example: np.linspace 'linspace': { # Kwargs 'start': ..., 'stop': ..., 'num' (optional): ... } # Dictionary of parameters with same rules as this one 'nested': {...} }, # When more than one parameter is defined, an 'op' key # MUST define how to combine them. The three options are: # 'zip': Zip parameters (must have same length) # 'prod': Cartesian product of parameters # 'add': Concatenate parameter lists into single parameter # named {param_name_1}__{param_name_2}__... 'param_2': {...}, 'op': 'zip' } Returns: List of parameter dictionaries generated from variant config """ # Extract operation if more than one parameter operation = None if len(variant_config) > 1: operation = variant_config.pop("op") elif "op" in variant_config: raise TypeError( "Variant only has a single parameter " "and should not define op key." ) # Perform pre-processing of parameters parsed = {} for param_name, param_conf in variant_config.items(): param_conf = cast(dict[str, Any], param_conf) if len(param_conf) > 1: raise TypeError(f"{param_name} should only have 1 type.") param_type = list(param_conf.keys())[0] param_vals = param_conf[param_type] if param_type == "value": if not isinstance(param_vals, list): raise TypeError(f"{param_name} should have a list value.") parsed[param_name] = param_vals elif param_type == "nested": param_vals = cast(dict[str, str | dict[str, Any]], param_vals) parsed[param_name] = parse_variants(param_vals) else: try: np_func = getattr(np, param_type) except AttributeError as e: raise TypeError(f"{param_name} is unknown type {param_type}.") from e parsed[param_name] = np_func(**param_vals) # Apply parameter operations if operation == "prod": param_tuples = itertools.product(*(parsed[k] for k in parsed)) param_dicts = [ {name: val for name, val in zip(parsed.keys(), param_tuple)} for param_tuple in param_tuples ] elif operation == "zip": n_combos = -1 for name, val in parsed.items(): if n_combos == -1: n_combos = len(val) if len(val) != n_combos: raise RuntimeError( f"At least 1 other parameter has a " f"different # of values than {name}." ) param_dicts = [ {name: val[i] for name, val in parsed.items()} for i in range(n_combos) ] elif operation == "add": combined_param_name = "__".join(parsed) param_dicts = [] for val in parsed.values(): param_dicts.extend({combined_param_name: i} for i in val) elif operation is None: param_name = list(parsed.keys())[0] param_vals = parsed[param_name] param_dicts = [{param_name: param_val} for param_val in param_vals] else: raise RuntimeError(f"Unknown operation {operation} in {variant_config}") return param_dicts
[docs] def apply_and_save_variants( sim_data: "SimulationDataEcoli", param_dicts: list[dict[str, Any]], variant_name: str, outdir: str, skip_baseline: bool, ): """ Applies variant function to ``sim_data`` with each parameter dictionary in ``param_dicts``. Saves each variant as ``{i}.cPickle`` in ``outdir``, where ``i`` is the index of the parameter dictionary in ``param_dicts`` used to create that variant. Also saves ``metadata.json`` in ``outdir`` that maps each ``{i}`` to the parameter dictionary used to create it. Args: sim_data: Simulation data object to modify param_dicts: Return value of :py:func:`~.parse_variants` variant_name: Name of variant function file in ``ecoli/variants`` folder outdir: Path to folder where variant ``sim_data`` pickles are saved skip_baseline: Whether to save metadata for baseline sim_data """ variant_mod = importlib.import_module(f"ecoli.variants.{variant_name}") variant_metadata: dict[int, str | dict[str, Any]] = {} if not skip_baseline: variant_metadata[0] = "baseline" for i, params in enumerate(param_dicts): sim_data_copy = copy.deepcopy(sim_data) variant_metadata[i + 1] = params variant_sim_data = variant_mod.apply_variant(sim_data_copy, params) outpath = os.path.join(outdir, f"{i+1}.cPickle") with open(outpath, "wb") as f: pickle.dump(variant_sim_data, f) with open(os.path.join(outdir, "metadata.json"), "w") as f: json.dump({variant_name: variant_metadata}, f)
def test_parse_variants(): """ Test variant parameter parsing. """ variant_config = { "a": {"value": [1, 2]}, "b": {"value": ["one", "two"]}, "c": {"nested": {"d": {"value": [3, 4]}, "e": {"value": [5, 6]}, "op": "zip"}}, "op": "prod", } parsed_params = parse_variants(variant_config) assert parsed_params == [ {"a": 1, "b": "one", "c": {"d": 3, "e": 5}}, {"a": 1, "b": "one", "c": {"d": 4, "e": 6}}, {"a": 1, "b": "two", "c": {"d": 3, "e": 5}}, {"a": 1, "b": "two", "c": {"d": 4, "e": 6}}, {"a": 2, "b": "one", "c": {"d": 3, "e": 5}}, {"a": 2, "b": "one", "c": {"d": 4, "e": 6}}, {"a": 2, "b": "two", "c": {"d": 3, "e": 5}}, {"a": 2, "b": "two", "c": {"d": 4, "e": 6}}, ]
[docs] class SimData: """ Mock sim_data class for testing. """ pass
def test_create_variants(): """ Test modification and saving of variant sim_data. """ try: os.makedirs("test_create_variants/kb", exist_ok=True) # Create mock sim_data pickle with open("test_create_variants/kb/simData.cPickle", "wb") as f: pickle.dump(SimData(), f) repo_dir = os.path.dirname(os.path.dirname(__file__)) # Test script and config system os.environ["PYTHONPATH"] = repo_dir subprocess.run( [ "python", "runscripts/create_variants.py", "--config", "ecoli/composites/ecoli_configs/test_variant.json", "--kb", "test_create_variants/kb", "-o", "test_create_variants/out", ], check=True, env=os.environ, ) # Check that metadata aligns with variant sim_data attrs with open("test_create_variants/out/metadata.json") as f: variant_metadata = json.load(f) assert "variant_test" in variant_metadata variant_metadata = variant_metadata["variant_test"] out_path = Path("test_create_variants/out") var_paths = out_path.glob("*.cPickle") for var_path in var_paths: # Skip baseline if var_path.stem == "0": continue with open(var_path, "rb") as f: variant_sim_data = pickle.load(f) variant_params = variant_metadata[var_path.stem] assert variant_sim_data.a == variant_params["a"] assert variant_sim_data.b == variant_params["b"] assert variant_sim_data.d == variant_params["c"]["d"] assert variant_sim_data.e == variant_params["c"]["e"] finally: shutil.rmtree("test_create_variants", ignore_errors=True)
[docs] def main(): parser = argparse.ArgumentParser() default_config = os.path.join(CONFIG_DIR_PATH, "default.json") parser.add_argument( "--config", action="store", default=default_config, help=( "Path to configuration file for the simulation. " "All key-value pairs in this file will be applied on top " f"of the options defined in {default_config}. To configure " "variants, the config must include the `variant` key. Under the " "`variant` key should be a single key with the name of the " "variant module under `ecoli.variant` (for example, `variant_1` " "if imported as `ecoli.variant.variant_1` or `folder_1.variant_1` " "if imported as `ecoli.variant.folder_1.variant_1`). See " "`ecoli.variants.template` for variant template. Under the " "variant module name should be a parameter dictionary as " "described in the docstring for `parse_variants`." ), ) parser.add_argument( "--kb", action="store", type=str, help="Path to kb folder generated by ParCa." ) parser.add_argument( "--outdir", "-o", action="store", type=str, help="Path to folder where variant sim_data and metadata are written.", ) args = parser.parse_args() with open(default_config, "r") as f: config = json.load(f) if args.config is not None: with open(os.path.join(args.config), "r") as f: SimConfig.merge_config_dicts(config, json.load(f)) SimConfig.merge_config_dicts(config, vars(args)) print("Loading sim_data...") with open(os.path.join(config["kb"], "simData.cPickle"), "rb") as f: sim_data = pickle.load(f) os.makedirs(config["outdir"], exist_ok=True) if config["skip_baseline"]: print("Skipping baseline sim_data...") else: print("Saving baseline sim_data...") with open(os.path.join(config["outdir"], "0.cPickle"), "wb") as f: pickle.dump(sim_data, f) variant_config = config.get("variants", {}) if len(variant_config) > 1: raise RuntimeError( "Only one variant name allowed. Variants can " "be manually composed in Python by having one " "variant function internally call another." ) elif len(variant_config) == 1: variant_name = list(variant_config.keys())[0] variant_params = variant_config[variant_name] print("Parsing variants...") parsed_params = parse_variants(variant_params) print("Applying variants and saving variant sim_data...") apply_and_save_variants( sim_data, parsed_params, variant_name, config["outdir"], config["skip_baseline"], ) else: with open(os.path.join(config["outdir"], "metadata.json"), "w") as f: json.dump({None: {0: "baseline"}}, f) print("Done.")
if __name__ == "__main__": main()