Skip to content

Commit

Permalink
Merge pull request #2 from VinzentRisch/1_mzml_formats
Browse files Browse the repository at this point in the history
ENH: Added mzML formats and type
  • Loading branch information
VinzentRisch authored Jan 15, 2025
2 parents 8357be4 + 25bc5b2 commit b9045d0
Show file tree
Hide file tree
Showing 13 changed files with 456 additions and 42 deletions.
2 changes: 2 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ repos:
rev: 4.0.1
hooks:
- id: flake8
args:
- --max-line-length=88
additional_dependencies:
- pycodestyle==2.8.0
- pyflakes==2.4.0
Expand Down
1 change: 1 addition & 0 deletions conda-recipe/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ requirements:
- versioningit

run:
- pymzml
- qiime2 {{ qiime2_epoch }}.*
- q2-types {{ qiime2_epoch }}.*
- q2templates {{ qiime2_epoch }}.*
Expand Down
4 changes: 3 additions & 1 deletion q2_ms/__init__.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
# flake8: noqa
# ----------------------------------------------------------------------------
# Copyright (c) 2024, QIIME 2 development team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------
import importlib

try:
from ._version import __version__
except ModuleNotFoundError:
__version__ = "0.0.0+notfound"

importlib.import_module("q2_ms.types")
10 changes: 10 additions & 0 deletions q2_ms/citations.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
@article{kosters2018pymzml,
title={pymzML v2. 0: introducing a highly compressed and seekable gzip format},
author={K{\"o}sters, M and Leufken, Johannes and Schulze, Stefan and Sugimoto, K and Klein, Joshua and Zahedi, RP and Hippler, Michael and Leidel, SA and Fufezan, Christian},
journal={Bioinformatics},
volume={34},
number={14},
pages={2513--2514},
year={2018},
publisher={Oxford University Press}
}
16 changes: 14 additions & 2 deletions q2_ms/plugin_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@
#
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------

from q2_types.sample_data import SampleData
from qiime2.plugin import Citations, Plugin

from q2_ms import __version__
from q2_ms.types import mzML, mzMLDirFmt, mzMLFormat

citations = Citations.load("citations.bib", package="q2_ms")

Expand All @@ -19,5 +20,16 @@
package="q2_ms",
description="A QIIME 2 plugin for MS data processing.",
short_description="A QIIME 2 plugin for MS data processing.",
citations=[],
)

# Registrations
plugin.register_semantic_types(
mzML,
)

plugin.register_semantic_type_to_format(SampleData[mzML], artifact_format=mzMLDirFmt)

plugin.register_formats(
mzMLFormat,
mzMLDirFmt,
)
31 changes: 0 additions & 31 deletions q2_ms/tests/data/table-1.biom

This file was deleted.

5 changes: 4 additions & 1 deletion q2_ms/tests/__init__.py → q2_ms/types/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
# flake8: noqa
# ----------------------------------------------------------------------------
# Copyright (c) 2024, QIIME 2 development team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------
from q2_ms.types._format import mzMLDirFmt, mzMLFormat
from q2_ms.types._type import mzML

__all__ = ["mzMLFormat", "mzMLDirFmt", "mzML"]
37 changes: 37 additions & 0 deletions q2_ms/types/_format.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# ----------------------------------------------------------------------------
# Copyright (c) 2024, QIIME 2 development team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------
import os
import sys

import pymzml
from qiime2.core.exceptions import ValidationError
from qiime2.plugin import model


class mzMLFormat(model.TextFileFormat):
def _validate(self, n_records=None):
try:
# Suppressing warning print "Not index found and build_index_from_scratch
# is False". This could also be solved with setting build_index_from_scratch
# to True but this builds the index and slows down validation.
sys.stdout = open(os.devnull, "w")
pymzml.run.Reader(str(self))
sys.stdout = sys.__stdout__
except Exception as e:
raise ValidationError(e)

def _validate_(self, level):
self._validate()


class mzMLDirFmt(model.DirectoryFormat):
mzml = model.FileCollection(r".*\.mzML$", format=mzMLFormat)

@mzml.set_path_maker
def mzml_path_maker(self, sample_id):
return f"{sample_id}.mzML"
10 changes: 3 additions & 7 deletions q2_ms/tests/test_methods.py → q2_ms/types/_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,7 @@
#
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------
from qiime2.plugin.testing import TestPluginBase
from q2_types.sample_data import SampleData
from qiime2.core.type import SemanticType


class Test(TestPluginBase):
package = "q2_ms.tests"

def test(self):
pass
mzML = SemanticType("mzML", variant_of=SampleData.field["type"])
File renamed without changes.
15 changes: 15 additions & 0 deletions q2_ms/types/tests/data/mzML_invalid/invalid.mzML
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
<?xml version="1.0" encoding="ISO-8859-1"?>
<indexedmzML xmlns="http://psi.hupo.org/ms/mzml" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://psi.hupo.org/ms/mzml http://psidev.info/files/ms/mzML/xsd/mzML1.1.0_idx.xsd">
<mzML xmlns="http://psi.hupo.org/ms/mzml" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://psi.hupo.org/ms/mzml http://psidev.info/files/ms/mzML/xsd/mzML1.1.0.xsd" id="urn:lsid:psidev.info:mzML.instanceDocuments.tiny.pwiz" version="1.1.0">
<cvList count="2">
<cv id="MS" fullName="Proteomics Standards Initiative Mass Spectrometry Ontology" version="2.26.0" URI="http://psidev.cvs.sourceforge.net/*checkout*/psidev/psi/psi-ms/mzML/controlledVocabulary/psi-ms.obo"/>
<cv id="UO" fullName="Unit Ontology" version="14:07:2009" URI="http://obo.cvs.sourceforge.net/*checkout*/obo/obo/ontology/phenotype/unit.obo"/>
</cvList>
<fileDescription>
<fileContent>
<cvParam cvRef="MS" accession="MS:1000580" name="MSn spectrum" value=""/>
<cvParam cvRef="MS" accession="MS:1000127" name="centroid spectrum" value=""/>
</fileContent>
<sourceFileList count="3">
<sourceFile id="tiny1.yep" name="tiny1.yep" location="file://F:/data/Exp01">
<cvParam cvRef="MS" accession="MS:1000567" name="Bruker/Agilent YEP file" value=""/>
Loading

0 comments on commit b9045d0

Please sign in to comment.