Source code for reconstruction.ecoli.scripts.metabolite_concentrations.merge_files

#! /usr/bin/env python

"""
Merge two or more concentration flat files with a single row for each metabolite
and empty entries for missing concentration data.

Usage with paths to tsv files to merge:
        ./merge_files [TSV1 TSV2 ...]
"""

import io
import os
import sys
import time


from wholecell.io import tsv


FILE_LOCATION = os.path.realpath(os.path.dirname(__file__))
OUTPUT_FILE = os.path.join(FILE_LOCATION, "metabolite_concentrations.tsv")


[docs] def load_conc(filename: str) -> tuple[str, dict[str, str]]: """ Load concentration data from a tsv file. First column should be metabolite ID and second column should be concentration. Does not handle more than one concentration column at this point. Args: filename: path to concentration tsv file Returns: label: header describing the concentration data conc: metabolite ID to concentration """ conc: dict[str, str] = {} with io.open(filename, "rb") as f: reader = tsv.reader(f) headers = next(reader) while headers[0].startswith("#"): headers = next(reader) label = headers[1] for line in reader: mol_id = line[0].strip('# "') mol_conc = line[1] if mol_conc: conc[mol_id] = mol_conc return label, conc
[docs] def save_conc(conc: list[tuple[str, dict[str, str]]]): """ Save combined concentration data with blank entries for metabolites with unknown concentrations. Args: conc: entries with header description and metabolite ID to concentration mapping """ mets = {m for c in conc for m in c[1]} headers = ['"Metabolite"'] + ['"{}"'.format(c[0]) for c in conc] with io.open(OUTPUT_FILE, "wb") as f: writer = tsv.writer(f, quotechar="'", lineterminator="\n") writer.writerow( ["# Created with {} on {}".format(" ".join(sys.argv), time.ctime())] ) writer.writerow(headers) for m in sorted(mets): writer.writerow(['"{}"'.format(m)] + [c[1].get(m, "NaN") for c in conc])
if __name__ == "__main__": if len(sys.argv) < 3: raise ValueError( "Expecting two or more files to merge. {} [TSV1 TSV2 ...]".format( sys.argv[0] ) ) conc_ = [load_conc(f) for f in sys.argv[1:]] save_conc(conc_)