"""
Classes for the Nodes and Edges of a causality network.
"""
import numpy as np
from typing import Optional
# Filenames
NODELIST_FILENAME = "causality_network_node_list.tsv"
EDGELIST_FILENAME = "causality_network_edge_list.tsv"
DYNAMICS_FILENAME = "causality_network_dynamics.tsv"
NODELIST_JSON = "nodes.json"
EDGELIST_JSON = "edges.json"
# Headers
NODE_LIST_HEADER = "\t".join(
["ID", "class", "type", "name", "synonyms", "constants", "url"]
)
EDGE_LIST_HEADER = "\t".join(["src_node_id", "dst_node_id", "stoichiometry", "process"])
DYNAMICS_HEADER = "\t".join(["ID", "type", "units", "dynamics"])
# Special strings used as units to designate type of dynamics
COUNT_UNITS = "N"
PROB_UNITS = "prob"
# Precision settings for numbers in the dynamics file
DYNAMICS_PRECISION = 6
PROBABILITY_PRECISION = 4
TIME_PRECISION = 2
[docs]
class Node(object):
"""
Class definition for a node in the causality network.
Attributes:
node_class: Class of node, string, either "State" or "Process"
node_type: Type of node, string, e.g. "Gene", "Metabolism"
node_id: Unique ID of node, string, e.g. "EG11274", "CPLX-125[c]"
name: Generic name of node, string, e.g. "trpL", "pyruvate"
synonyms: List of synonyms of node, list of strings
e.g. ["anth", "tryD", tryp-4"]
constants: Dictionary with constant names as keys and constants as
values, dictionary, e.g. {"reversibility": 0, "Km": 1e-6}
dynamics: Dictionary with dynamics data type as keys and list of
time-series data as values, dictionary. For example:
.. code-block:: python
{
"counts": [8151, 8525, ...],
"concentration": [1.151e-7, 1.155e-7, ...],
}
dynamics_units: Dictionary with dynamics data type as keys and its
units as values (must share same keys with dynamics),
dictionary, e.g. {"counts": "N", "concentration": "mol/L"}
url: URL to EcoCyc page, string, eg. "https://ecocyc.org/ECOLI/
substring-search?type=NIL&object=EG11028&quickSearch=Quick+
Search"
"""
def __init__(self):
self.node_class = None
self.node_type = None
self.node_id = None
self.name = None
self.synonyms = None
self.constants = None
self.dynamics = {}
self.dynamics_units = {}
self.url = None
self.location = None
[docs]
def get_node_id(self):
"""
Return ID of node.
"""
return self.node_id
[docs]
def read_attributes(
self,
node_class,
node_type,
node_id,
name="",
synonyms="",
constants="",
url="",
location="",
):
"""
Sets the attribute variables of the node. Argument can be in the form
of a single dictionary with names of each argument names as keys.
"""
self.node_class = node_class
self.node_type = node_type
self.node_id = node_id
self.name = name
self.synonyms = synonyms
self.constants = constants
self.url = url
self.location = location
[docs]
def read_attributes_from_tsv(self, tsv_line):
"""
Reads attributes (node type and node id) from a tab-delimited line in
the node_list.tsv file.
"""
split_tsv_line = tsv_line[:-1].split("\t")
self.node_type = split_tsv_line[2]
self.node_id = split_tsv_line[0]
return (self.node_id, self.node_type)
[docs]
def read_dynamics(self, dynamics, dynamics_units):
"""
Sets the dynamics variable of the node.
"""
self.dynamics = dynamics
self.dynamics_units = dynamics_units
[docs]
def write_nodelist(self, nodelist_file):
"""
Writes a single row specifying the given node to the nodelist file.
"""
# Format single string with attributes of the node separated by commas
node_row = "%s\t%s\t%s\t%s\t%s\t%s\t%s" % (
self.node_id,
self.node_class,
self.node_type,
self.name,
self.synonyms,
self.constants,
self.url,
)
# Write line to nodelist file
nodelist_file.write(node_row + "\n")
[docs]
def write_dynamics(self, dynamics_file):
"""
Writes a single row of dynamics data for each dynamics variable
associated with the node.
"""
# Iterate through all dynamics variables associated with the node
for dynamics_name, dynamics_data in self.dynamics.items():
unit = self.dynamics_units.get(dynamics_name, "")
# Format dynamics string depending on data type
if unit == COUNT_UNITS:
dynamics_string = self._format_dynamics_string(dynamics_data, "int")
elif unit == PROB_UNITS:
dynamics_string = self._format_dynamics_string(dynamics_data, "prob")
elif unit == "s":
dynamics_string = self._format_dynamics_string(dynamics_data, "time")
else:
dynamics_string = self._format_dynamics_string(dynamics_data, "float")
# Format single string with dynamic attributes separated by commas
dynamics_row = "%s\t%s\t%s\t%s" % (
self.node_id,
dynamics_name,
unit,
dynamics_string,
)
# Write line to dynamics file
dynamics_file.write(dynamics_row + "\n")
[docs]
def dynamics_dict(self):
all_dynamics = []
for name, data in self.dynamics.items():
unit = self.dynamics_units.get(name, "")
dynamics = {
"units": unit,
"type": name,
"id": self.node_id,
# orjson requires contiguous Numpy arrays
"dynamics": np.ascontiguousarray(data),
}
all_dynamics.append(dynamics)
return all_dynamics
[docs]
def to_dict(self):
synonyms = []
if isinstance(self.synonyms, list):
synonyms = self.synonyms
# Some of the synonyms are strings of list-like entities and some are actual lists
# --------------------------------------------------------------------------------
# try:
# synonyms = ast.literal_eval(self.synonyms or '[]')
# except:
# print('parsing synonyms failed for {} {}'.format(type(self.synonyms), self.synonyms))
return {
"ID": self.node_id,
"type": self.node_type,
"name": self.name,
"class": self.node_class,
"synonyms": synonyms,
"constants": self.constants,
"url": self.url,
"location": self.location,
}
[docs]
class Edge(object):
"""
Class definition for an edge in the causality network.
Attributes:
edge_type: Type of edge (type of the process node the edge is
attached to), string, e.g. "Complexation", "Metabolism"
src_id: ID of the source node, string, e.g. "RXN0-2382"
dst_id: ID of the destination node, string, e.g. "WATER[c]"
stoichiometry: (Only for metabolism edges) Stoichiometric
coefficient of reaction-metabolite pair, integer, e.g. 1
"""
def __init__(self, process: str):
self.process = process
self.src_id: Optional[str] = None
self.dst_id: Optional[str] = None
self.stoichiometry: Optional[str | int] = None
[docs]
def get_src_id(self):
"""
Return ID of source node.
"""
return self.src_id
[docs]
def get_dst_id(self):
"""
Return ID of destination node.
"""
return self.dst_id
[docs]
def get_process(self):
"""
Return process associated with the edge.
"""
return self.process
[docs]
def read_attributes(
self, src_id: str, dst_id: str, stoichiometry: Optional[str | int] = ""
):
# TODO(jerry): A narrower type for stoichiometry?
"""
Sets the remaining attribute variables of the node. Argument can be
in the form of a single dictionary with names of each argument names as
keys.
"""
self.src_id = src_id
self.dst_id = dst_id
self.stoichiometry = stoichiometry
[docs]
def write_edgelist(self, edgelist_file):
"""
Writes a single row specifying the given edge to the edgelist file.
"""
# Format single string with attributes of the edge separated by commas
edge_row = "%s\t%s\t%s\t%s" % (
self.src_id,
self.dst_id,
self.stoichiometry,
self.process,
)
# Write line to edgelist file
edgelist_file.write(edge_row + "\n")