Source code for wholecell.utils.protein_counts

"""Utilities for counting simulated proteins"""

from typing import cast, List

import numpy as np
import numpy.typing as npt


[docs] def get_simulated_validation_counts( validation_counts: npt.NDArray[np.int64], monomer_counts: npt.NDArray[np.int64], validation_ids: npt.NDArray[np.str_], simulation_ids: npt.NDArray[np.str_], ) -> tuple[npt.NDArray[np.int64], npt.NDArray[np.int64]]: """ Get simulated counts and validation counts of monomers that exist in both the simulation and the validation data Arguments: validation_counts: Monomer counts from validation data. monomer_counts: Simulated monomer counts (from translation process). validation_ids: Monomer IDs from validation data. IDs must appear in same order as in validation_counts. simulation_ids: IDs of monomers in the same order as monomer_counts. Returns: The simulated counts of the monomers that appear in the validation data, and the validation counts of the monomers in the same order. """ avg_sim_counts = monomer_counts.mean(axis=0) sim_ids_lst = cast(List[str], simulation_ids.tolist()) val_ids_lst = cast(List[str], validation_ids.tolist()) overlapping_ids_set = set(sim_ids_lst) & set(val_ids_lst) sim_id_to_index_map = { sim_id: i for i, sim_id in enumerate(sim_ids_lst) if sim_id in overlapping_ids_set } val_id_to_index_map = { val_id: i for i, val_id in enumerate(val_ids_lst) if val_id in overlapping_ids_set } overlapping_ids_list = list(overlapping_ids_set) sim_filtered_idx = np.array( [sim_id_to_index_map[monomer_id] for monomer_id in overlapping_ids_list] ) val_filtered_idx = np.array( [val_id_to_index_map[monomer_id] for monomer_id in overlapping_ids_list] ) return avg_sim_counts[sim_filtered_idx], validation_counts[val_filtered_idx]