Source code for ecoli.analysis.causality_network.network_components

"""
Classes for the Nodes and Edges of a causality network.
"""

import numpy as np
from typing import Optional


# Filenames
NODELIST_FILENAME = "causality_network_node_list.tsv"
EDGELIST_FILENAME = "causality_network_edge_list.tsv"
DYNAMICS_FILENAME = "causality_network_dynamics.tsv"
NODELIST_JSON = "nodes.json"
EDGELIST_JSON = "edges.json"

# Headers
NODE_LIST_HEADER = "\t".join(
    ["ID", "class", "type", "name", "synonyms", "constants", "url"]
)
EDGE_LIST_HEADER = "\t".join(["src_node_id", "dst_node_id", "stoichiometry", "process"])
DYNAMICS_HEADER = "\t".join(["ID", "type", "units", "dynamics"])

# Special strings used as units to designate type of dynamics
COUNT_UNITS = "N"
PROB_UNITS = "prob"

# Precision settings for numbers in the dynamics file
DYNAMICS_PRECISION = 6
PROBABILITY_PRECISION = 4
TIME_PRECISION = 2


[docs] class Node(object): """ Class definition for a node in the causality network. Attributes: node_class: Class of node, string, either "State" or "Process" node_type: Type of node, string, e.g. "Gene", "Metabolism" node_id: Unique ID of node, string, e.g. "EG11274", "CPLX-125[c]" name: Generic name of node, string, e.g. "trpL", "pyruvate" synonyms: List of synonyms of node, list of strings e.g. ["anth", "tryD", tryp-4"] constants: Dictionary with constant names as keys and constants as values, dictionary, e.g. {"reversibility": 0, "Km": 1e-6} dynamics: Dictionary with dynamics data type as keys and list of time-series data as values, dictionary. For example: .. code-block:: python { "counts": [8151, 8525, ...], "concentration": [1.151e-7, 1.155e-7, ...], } dynamics_units: Dictionary with dynamics data type as keys and its units as values (must share same keys with dynamics), dictionary, e.g. {"counts": "N", "concentration": "mol/L"} url: URL to EcoCyc page, string, eg. "https://ecocyc.org/ECOLI/ substring-search?type=NIL&object=EG11028&quickSearch=Quick+ Search" """ def __init__(self): self.node_class = None self.node_type = None self.node_id = None self.name = None self.synonyms = None self.constants = None self.dynamics = {} self.dynamics_units = {} self.url = None self.location = None
[docs] def get_node_id(self): """ Return ID of node. """ return self.node_id
[docs] def read_attributes( self, node_class, node_type, node_id, name="", synonyms="", constants="", url="", location="", ): """ Sets the attribute variables of the node. Argument can be in the form of a single dictionary with names of each argument names as keys. """ self.node_class = node_class self.node_type = node_type self.node_id = node_id self.name = name self.synonyms = synonyms self.constants = constants self.url = url self.location = location
[docs] def read_attributes_from_tsv(self, tsv_line): """ Reads attributes (node type and node id) from a tab-delimited line in the node_list.tsv file. """ split_tsv_line = tsv_line[:-1].split("\t") self.node_type = split_tsv_line[2] self.node_id = split_tsv_line[0] return (self.node_id, self.node_type)
[docs] def read_dynamics(self, dynamics, dynamics_units): """ Sets the dynamics variable of the node. """ self.dynamics = dynamics self.dynamics_units = dynamics_units
[docs] def write_nodelist(self, nodelist_file): """ Writes a single row specifying the given node to the nodelist file. """ # Format single string with attributes of the node separated by commas node_row = "%s\t%s\t%s\t%s\t%s\t%s\t%s" % ( self.node_id, self.node_class, self.node_type, self.name, self.synonyms, self.constants, self.url, ) # Write line to nodelist file nodelist_file.write(node_row + "\n")
[docs] def write_dynamics(self, dynamics_file): """ Writes a single row of dynamics data for each dynamics variable associated with the node. """ # Iterate through all dynamics variables associated with the node for dynamics_name, dynamics_data in self.dynamics.items(): unit = self.dynamics_units.get(dynamics_name, "") # Format dynamics string depending on data type if unit == COUNT_UNITS: dynamics_string = self._format_dynamics_string(dynamics_data, "int") elif unit == PROB_UNITS: dynamics_string = self._format_dynamics_string(dynamics_data, "prob") elif unit == "s": dynamics_string = self._format_dynamics_string(dynamics_data, "time") else: dynamics_string = self._format_dynamics_string(dynamics_data, "float") # Format single string with dynamic attributes separated by commas dynamics_row = "%s\t%s\t%s\t%s" % ( self.node_id, dynamics_name, unit, dynamics_string, ) # Write line to dynamics file dynamics_file.write(dynamics_row + "\n")
[docs] def dynamics_dict(self): all_dynamics = [] for name, data in self.dynamics.items(): unit = self.dynamics_units.get(name, "") dynamics = { "units": unit, "type": name, "id": self.node_id, # orjson requires contiguous Numpy arrays "dynamics": np.ascontiguousarray(data), } all_dynamics.append(dynamics) return all_dynamics
[docs] def to_dict(self): synonyms = [] if isinstance(self.synonyms, list): synonyms = self.synonyms # Some of the synonyms are strings of list-like entities and some are actual lists # -------------------------------------------------------------------------------- # try: # synonyms = ast.literal_eval(self.synonyms or '[]') # except: # print('parsing synonyms failed for {} {}'.format(type(self.synonyms), self.synonyms)) return { "ID": self.node_id, "type": self.node_type, "name": self.name, "class": self.node_class, "synonyms": synonyms, "constants": self.constants, "url": self.url, "location": self.location, }
[docs] def _format_dynamics_string(self, dynamics, datatype): """ Formats the string of dynamics data that is printed out to the dynamics file. If datatype is "int", print all numbers as full decimal integers. If datatype is "float", print all numbers in the general format with the precision set by DYNAMICS_PRECISION. If datatype is "time", print all numbers in the floating point format with the precision set by TIME_PRECISION. """ if datatype == "int": dynamics_string = ", ".join("{0:d}".format(val) for val in dynamics) elif datatype == "float": dynamics_string = ", ".join( "{0:.{1}g}".format(val, DYNAMICS_PRECISION) for val in dynamics ) elif datatype == "prob": dynamics_string = ", ".join( "{0:.{1}f}".format(val, PROBABILITY_PRECISION) for val in dynamics ) elif datatype == "time": dynamics_string = ", ".join( "{0:.{1}f}".format(val, TIME_PRECISION) for val in dynamics ) else: dynamics_string = dynamics return dynamics_string
[docs] class Edge(object): """ Class definition for an edge in the causality network. Attributes: edge_type: Type of edge (type of the process node the edge is attached to), string, e.g. "Complexation", "Metabolism" src_id: ID of the source node, string, e.g. "RXN0-2382" dst_id: ID of the destination node, string, e.g. "WATER[c]" stoichiometry: (Only for metabolism edges) Stoichiometric coefficient of reaction-metabolite pair, integer, e.g. 1 """ def __init__(self, process: str): self.process = process self.src_id: Optional[str] = None self.dst_id: Optional[str] = None self.stoichiometry: Optional[str | int] = None
[docs] def get_src_id(self): """ Return ID of source node. """ return self.src_id
[docs] def get_dst_id(self): """ Return ID of destination node. """ return self.dst_id
[docs] def get_process(self): """ Return process associated with the edge. """ return self.process
[docs] def read_attributes( self, src_id: str, dst_id: str, stoichiometry: Optional[str | int] = "" ): # TODO(jerry): A narrower type for stoichiometry? """ Sets the remaining attribute variables of the node. Argument can be in the form of a single dictionary with names of each argument names as keys. """ self.src_id = src_id self.dst_id = dst_id self.stoichiometry = stoichiometry
[docs] def write_edgelist(self, edgelist_file): """ Writes a single row specifying the given edge to the edgelist file. """ # Format single string with attributes of the edge separated by commas edge_row = "%s\t%s\t%s\t%s" % ( self.src_id, self.dst_id, self.stoichiometry, self.process, ) # Write line to edgelist file edgelist_file.write(edge_row + "\n")