Source code for pyms_nist_search.reference_data

#!/usr/bin/env python
#
#  reference_data.py
"""
Class to store reference data from NIST MS Search.
"""
#
#  This file is part of PyMassSpec NIST Search
#  Python interface to the NIST MS Search DLL
#
#  Copyright (c) 2020-2021 Dominic Davis-Foster <dominic@davis-foster.co.uk>
#
#  PyMassSpec NIST Search is free software; you can redistribute it and/or modify
#  it under the terms of the GNU Lesser General Public License as
#  published by the Free Software Foundation; either version 3 of
#  the License, or (at your option) any later version.
#
#  PyMassSpec NIST Search is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU Lesser General Public License for more details.
#
#  You should have received a copy of the GNU Lesser General Public
#  License along with this program; if not, write to the Free Software
#  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
#  MA 02110-1301, USA.
#
#  PyMassSpec NIST Search includes the redistributable binaries for NIST MS Search in
#  the x86 and x64 directories. Available from
#  ftp://chemdata.nist.gov/mass-spc/v1_7/NISTDLL3.zip .
#  ctnt66.dll and ctnt66_64.dll copyright 1984-1996 FairCom Corporation.
#  "FairCom" and "c-tree Plus" are trademarks of FairCom Corporation
#  and are registered in the United States and other countries.
#  All Rights Reserved.

# stdlib
import copy
import json
import warnings
from typing import Any, Dict, List, Optional, Sequence, Type, Union

# 3rd party
import sdjson
from domdf_python_tools.doctools import prettify_docstrings
from domdf_python_tools.iterative import chunks
from domdf_python_tools.paths import PathPlus
from domdf_python_tools.typing import PathLike
from pyms.Spectrum import MassSpectrum, normalize_mass_spec
from pyms.Utils.jcamp import JcampTagWarning, header_info_fields, xydata_tags
from pyms.Utils.Math import is_float

# this package
from pyms_nist_search.base import NISTBase
from pyms_nist_search.mona_tools import mass_spec_from_mona, parse_metadata
from pyms_nist_search.templates import *
from pyms_nist_search.utils import parse_name_chars

__all__ = ("ReferenceData", )


[docs]@prettify_docstrings class ReferenceData(NISTBase): """ Class to store reference data from NIST MS Search. :param name: The name of the compound. :param cas: The CAS number of the compound. :param nist_no: :param id: :param mw: :param formula: The formula of the compound. :param contributor: The contributor to the library. :param mass_spec: The reference mass spectrum. :param synonyms: List of synonyms for the compound. .. latex:vspace:: 60px """ _exact_mass: float _mass_spec: Optional[MassSpectrum] _synonyms: List[str] def __init__( self, name: str = '', cas: Union[str, int] = "---", nist_no: Union[int, str] = 0, id: Union[str, int] = '', # noqa: A002 # pylint: disable=redefined-builtin mw: Union[float, str] = 0.0, formula: str = '', contributor: str = '', mass_spec: Optional[MassSpectrum] = None, synonyms: Optional[Sequence[str]] = None, exact_mass: Optional[Any] = None, ) -> None: NISTBase.__init__(self, name, cas) self._formula: str = str(formula) self._contributor: str = str(contributor) self._nist_no: int = int(nist_no) self._id: str = str(id) self._mw: int = int(mw) if not exact_mass: self._exact_mass = float(mw) else: self._exact_mass = float(exact_mass) if mass_spec is None: self._mass_spec = None elif isinstance(mass_spec, dict): self._mass_spec = MassSpectrum(**mass_spec) else: self._mass_spec = copy.copy(mass_spec) if synonyms is None: self._synonyms = [] else: self._synonyms = [str(synonym) for synonym in synonyms] @property def formula(self) -> str: """ The formula of the compound. """ return self._formula @property def contributor(self) -> str: """ The name of the contributor to the library. """ return self._contributor @property def nist_no(self) -> int: """ The NIST number of the compund. """ return self._nist_no @property def id(self) -> str: """ The ID of the compound. """ return self._id @property def mw(self) -> int: """ The molecular weight of the compound. """ return self._mw @property def exact_mass(self) -> float: """ The exact mass of the compound. """ return self._exact_mass @property def mass_spec(self) -> Optional[MassSpectrum]: """ The mass spectrum of the compound. """ return copy.copy(self._mass_spec) @property def synonyms(self) -> List[str]: """ A list of synonyms for the compound. """ return self._synonyms[:]
[docs] @classmethod def from_pynist(cls, pynist_dict: Dict[str, Any]) -> "ReferenceData": """ Create a :class:`ReferenceData` object from the raw data returned by the C extension. :param pynist_dict: """ return cls( name=parse_name_chars(pynist_dict["name_chars"]), cas=pynist_dict["cas"], formula=pynist_dict["formula"], contributor=pynist_dict["contributor"], nist_no=pynist_dict["nist_no"], id=pynist_dict["id"], mw=pynist_dict["mw"], mass_spec=MassSpectrum(pynist_dict["mass_list"], pynist_dict["intensity_list"]), synonyms=[parse_name_chars(synonym) for synonym in pynist_dict["synonyms_chars"]], )
[docs] def __repr__(self) -> str: return f"Reference Data: {self.name} \t({self.cas})"
[docs] def to_dict(self) -> Dict[str, Any]: """ Convert the object to a dictionary. .. versionadded:: 0.6.0 """ return dict( name=self.name, cas=self.cas, formula=self.formula, contributor=self.contributor, nist_no=self.nist_no, id=self.id, mw=self.mw, exact_mass=self.exact_mass, synonyms=self.synonyms[:], mass_spec=self.mass_spec, )
@property def __dict__(self): # noqa: MAN002 return self.to_dict()
[docs] @classmethod def from_jcamp(cls, file_name: PathLike, ignore_warnings: bool = True) -> "ReferenceData": """ Create a ReferenceData object from a JCAMP-DX file. :param file_name: Path of the file to read. :param ignore_warnings: Whether warnings about invalid tags should be shown. :authors: Qiao Wang, Andrew Isaac, Vladimir Likic, David Kainer, Dominic Davis-Foster """ with warnings.catch_warnings(): if ignore_warnings: warnings.simplefilter("ignore", JcampTagWarning) file_name = PathPlus(file_name) # Commented this line because it also gets printed when the MassSpectrum is created # print(f" -> Reading JCAMP file '{file_name}'") lines_list = file_name.read_lines() last_tag = None header_info: Dict[str, Any] = {} # Dictionary containing header information for line in lines_list: if len(line.strip()): if line.startswith("##"): # key word or information fields = line.split('=', 1) current_tag = fields[0] = fields[0].lstrip("##").upper() last_tag = fields[0] fields[1] = fields[1].strip() if current_tag.upper().startswith("END"): break elif current_tag in xydata_tags: continue elif current_tag in header_info_fields: if fields[1].isdigit(): header_info[current_tag] = int(fields[1]) elif is_float(fields[1]): header_info[current_tag] = float(fields[1]) else: header_info[current_tag] = fields[1] else: warnings.warn(current_tag, JcampTagWarning) else: if last_tag in header_info: header_info[last_tag] += f"{line}" return cls( name=header_info["TITLE"], cas=header_info["CAS REGISTRY NO"], nist_no=header_info["$NIST MASS SPEC NO"], contributor=header_info["ORIGIN"], formula=header_info["MOLFORM"], mw=header_info["MW"], mass_spec=MassSpectrum.from_jcamp(file_name), )
[docs] def to_json(self) -> str: """ Convert the object to JSON. """ return sdjson.dumps(self.to_dict())
[docs] @classmethod def from_json(cls: Type["ReferenceData"], json_data: str) -> "ReferenceData": """ Construct an object from JSON data. :param json_data: """ peak_dict = json.loads(json_data) # peak_dict["mass_spec"] = MassSpectrum.from_dict(peak_dict["mass_spec"]) return cls.from_dict(peak_dict)
[docs] @classmethod def from_mona_dict(cls, mona_data: Dict) -> "ReferenceData": """ Construct an object from Massbank of North America json data that has been loaded into a dictionary. :param mona_data: dict """ # noqa: D400 compound: Dict = mona_data["compound"][0] names: List = compound["names"] name: str = names[0]["name"] synonyms: List = [name for name in names[1:]] properties_dict = parse_metadata(mona_data) # Remove unwanted properties del properties_dict["license"] mass_spec = mass_spec_from_mona(mona_data["spectrum"]) return cls( name=name, mass_spec=mass_spec, synonyms=synonyms, **properties_dict, )
[docs] def to_msp(self) -> str: """ Returns the ReferenceData object as an MSP file similar to that produced by NIST MS Search's export function. """ # noqa: D400 if not self.mass_spec: raise ValueError("No mass spectrum included in the reference data.") normalized_ms = normalize_mass_spec(self.mass_spec, max_intensity=999) num_peaks = len(self.mass_spec) mz_int_pairs = [f"{mz} {intensity}" for mz, intensity in normalized_ms.iter_peaks()] spec_block = [] for row in list(chunks(mz_int_pairs, 5)): spec_block.append("; ".join(x for x in row)) msp_text = msp_template.render( ref_data=self, num_peaks=num_peaks, spec_block='\n'.join(spec_block), ) return msp_text
@sdjson.register_encoder(ReferenceData) def encode_reference_data(obj: ReferenceData) -> Dict[str, Any]: return dict(obj)