#!/usr/bin/env python
#
# utils.py
"""
General utilities.
"""
#
# This file is part of PyMassSpec NIST Search
# Python interface to the NIST MS Search DLL
#
# Copyright (c) 2020 Dominic Davis-Foster <dominic@davis-foster.co.uk>
#
# PyMassSpec NIST Search is free software; you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as
# published by the Free Software Foundation; either version 3 of
# the License, or (at your option) any later version.
#
# PyMassSpec NIST Search is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301, USA.
#
# PyMassSpec NIST Search includes the redistributable binaries for NIST MS Search in
# the x86 and x64 directories. Available from
# ftp://chemdata.nist.gov/mass-spc/v1_7/NISTDLL3.zip .
# ctnt66.dll and ctnt66_64.dll copyright 1984-1996 FairCom Corporation.
# "FairCom" and "c-tree Plus" are trademarks of FairCom Corporation
# and are registered in the United States and other countries.
# All Rights Reserved.
#
# stdlib
import warnings
from typing import Sequence
# 3rd party
from pyms.Spectrum import MassSpectrum
__all__ = ["pack", "parse_name_chars"]
[docs]def pack(mass_spec: MassSpectrum, top: int = 20) -> str:
"""
Convert a pyms.Spectrum.MassSpectrum object into a string.
Adapted from https://sourceforge.net/projects/mzapi-live/
:param mass_spec:
:param top: The number of largest peaks to identify
"""
values = list(zip(mass_spec.mass_list, mass_spec.intensity_list))
values.sort(key=lambda s: s[1], reverse=True)
norm = values[0][1]
spectrum = [(a, 999.0 * b / norm) for (a, b) in values[:top]]
spectrum.sort()
return '*'.join([f"{a:.2f}\t{b:.2f}" for (a, b) in spectrum]) + '*'
[docs]def parse_name_chars(name_char_list: Sequence[int]) -> str:
"""
Takes a list of Unicode character codes and converts them to characters,
taking into account the special codes used by the NIST DLL.
:param name_char_list:
:return: The parsed name.
""" # noqa: D400
hit_name = ''
errors = [] # Buffer the errors to display at the end
# TODO: can we do away with the -1?
for dec in name_char_list[:-1]:
if dec == 0:
break
if dec == 224:
char = 'α'
elif dec == 225:
char = 'β'
elif dec == 231:
char = 'γ'
elif dec == 235:
char = 'δ'
elif dec == 238:
char = 'ε'
elif dec == 227:
char = 'π'
elif dec == 229:
char = 'σ'
elif dec == 230:
char = 'μ'
elif dec == 234:
char = 'ω'
elif dec == 241:
char = '±'
elif dec == 252:
char = 'η'
else:
try:
char = chr(dec)
except ValueError:
errors.append(dec)
# print(f"Unable to parse character with code {dec}")
char = '�'
# List of problem codes encountered so far:
# -26, which should be a μ (03BC)
if char != '\x00':
hit_name += char
if errors:
warnings.warn(f"Unable to parse the following character codes for string {hit_name}: {errors}.")
return hit_name